4 # Errors raised by the wrappers when some tree handling failed.
7 def __init__(self, msg):
13 def __init__(self, msg):
19 def __init__(self, msg):
25 def __init__(self, msg):
31 def __init__(self, _obj):
48 def io_read(self, len = -1):
52 return(self.__io.read())
53 return(self.__io.read(len))
55 def io_write(self, str, len = -1):
59 return(self.__io.write(str))
60 return(self.__io.write(str, len))
62 class ioReadWrapper(ioWrapper):
63 def __init__(self, _obj, enc = ""):
64 ioWrapper.__init__(self, _obj)
65 self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
71 libxml2mod.xmlFreeParserInputBuffer(self._o)
77 libxml2mod.xmlFreeParserInputBuffer(self._o)
80 class ioWriteWrapper(ioWrapper):
81 def __init__(self, _obj, enc = ""):
82 ioWrapper.__init__(self, _obj)
83 self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
89 libxml2mod.xmlOutputBufferClose(self._o)
95 libxml2mod.xmlOutputBufferClose(self._o)
99 # Example of a class to handle SAX events
102 """Base class for SAX handlers"""
103 def startDocument(self):
104 """called at the start of the document"""
107 def endDocument(self):
108 """called at the end of the document"""
111 def startElement(self, tag, attrs):
112 """called at the start of every element, tag is the name of
113 the element, attrs is a dictionary of the element's attributes"""
116 def endElement(self, tag):
117 """called at the start of every element, tag is the name of
121 def characters(self, data):
122 """called when character data have been read, data is the string
123 containing the data, multiple consecutive characters() callback
127 def cdataBlock(self, data):
128 """called when CDATA section have been read, data is the string
129 containing the data, multiple consecutive cdataBlock() callback
133 def reference(self, name):
134 """called when an entity reference has been found"""
137 def ignorableWhitespace(self, data):
138 """called when potentially ignorable white spaces have been found"""
141 def processingInstruction(self, target, data):
142 """called when a PI has been found, target contains the PI name and
143 data is the associated data in the PI"""
146 def comment(self, content):
147 """called when a comment has been found, content contains the comment"""
150 def externalSubset(self, name, externalID, systemID):
151 """called when a DOCTYPE declaration has been found, name is the
152 DTD name and externalID, systemID are the DTD public and system
153 identifier for that DTd if available"""
156 def internalSubset(self, name, externalID, systemID):
157 """called when a DOCTYPE declaration has been found, name is the
158 DTD name and externalID, systemID are the DTD public and system
159 identifier for that DTD if available"""
162 def entityDecl(self, name, type, externalID, systemID, content):
163 """called when an ENTITY declaration has been found, name is the
164 entity name and externalID, systemID are the entity public and
165 system identifier for that entity if available, type indicates
166 the entity type, and content reports it's string content"""
169 def notationDecl(self, name, externalID, systemID):
170 """called when an NOTATION declaration has been found, name is the
171 notation name and externalID, systemID are the notation public and
172 system identifier for that notation if available"""
175 def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
176 """called when an ATTRIBUTE definition has been found"""
179 def elementDecl(self, name, type, content):
180 """called when an ELEMENT definition has been found"""
183 def entityDecl(self, name, publicId, systemID, notationName):
184 """called when an unparsed ENTITY declaration has been found,
185 name is the entity name and publicId,, systemID are the entity
186 public and system identifier for that entity if available,
187 and notationName indicate the associated NOTATION"""
190 def warning(self, msg):
193 def error(self, msg):
194 raise parserError(msg)
196 def fatalError(self, msg):
197 raise parserError(msg)
200 # This class is the ancestor of all the Node classes. It provides
201 # the basic functionalities shared by all nodes (and handle
202 # gracefylly the exception), like name, navigation in the tree,
203 # doc reference, content access and serializing to a string or URI
206 def __init__(self, _obj=None):
211 def get_parent(self):
212 ret = libxml2mod.parent(self._o)
215 return xmlNode(_obj=ret)
216 def get_children(self):
217 ret = libxml2mod.children(self._o)
220 return xmlNode(_obj=ret)
222 ret = libxml2mod.last(self._o)
225 return xmlNode(_obj=ret)
227 ret = libxml2mod.next(self._o)
230 return xmlNode(_obj=ret)
231 def get_properties(self):
232 ret = libxml2mod.properties(self._o)
235 return xmlAttr(_obj=ret)
237 ret = libxml2mod.prev(self._o)
240 return xmlNode(_obj=ret)
241 def get_content(self):
242 return libxml2mod.xmlNodeGetContent(self._o)
243 getContent = get_content # why is this duplicate naming needed ?
245 return libxml2mod.name(self._o)
247 return libxml2mod.type(self._o)
249 ret = libxml2mod.doc(self._o)
251 if self.type in ["document_xml", "document_html"]:
252 return xmlDoc(_obj=self._o)
255 return xmlDoc(_obj=ret)
257 # Those are common attributes to nearly all type of nodes
258 # defined as python2 properties
261 if float(sys.version[0:3]) < 2.2:
262 def __getattr__(self, attr):
264 ret = libxml2mod.parent(self._o)
267 return xmlNode(_obj=ret)
268 elif attr == "properties":
269 ret = libxml2mod.properties(self._o)
272 return xmlAttr(_obj=ret)
273 elif attr == "children":
274 ret = libxml2mod.children(self._o)
277 return xmlNode(_obj=ret)
279 ret = libxml2mod.last(self._o)
282 return xmlNode(_obj=ret)
284 ret = libxml2mod.next(self._o)
287 return xmlNode(_obj=ret)
289 ret = libxml2mod.prev(self._o)
292 return xmlNode(_obj=ret)
293 elif attr == "content":
294 return libxml2mod.xmlNodeGetContent(self._o)
296 return libxml2mod.name(self._o)
298 return libxml2mod.type(self._o)
300 ret = libxml2mod.doc(self._o)
302 if self.type == "document_xml" or self.type == "document_html":
303 return xmlDoc(_obj=self._o)
306 return xmlDoc(_obj=ret)
307 raise AttributeError,attr
309 parent = property(get_parent, None, None, "Parent node")
310 children = property(get_children, None, None, "First child node")
311 last = property(get_last, None, None, "Last sibling node")
312 next = property(get_next, None, None, "Next sibling node")
313 prev = property(get_prev, None, None, "Previous sibling node")
314 properties = property(get_properties, None, None, "List of properies")
315 content = property(get_content, None, None, "Content of this node")
316 name = property(get_name, None, None, "Node name")
317 type = property(get_type, None, None, "Node type")
318 doc = property(get_doc, None, None, "The document this node belongs to")
321 # Serialization routines, the optional arguments have the following
323 # encoding: string to ask saving in a specific encoding
324 # indent: if 1 the serializer is asked to indent the output
326 def serialize(self, encoding = None, format = 0):
327 return libxml2mod.serializeNode(self._o, encoding, format)
328 def saveTo(self, file, encoding = None, format = 0):
329 return libxml2mod.saveNodeTo(self._o, file, encoding, format)
332 # Selecting nodes using XPath, a bit slow because the context
333 # is allocated/freed every time but convenient.
335 def xpathEval(self, expr):
339 ctxt = doc.xpathNewContext()
340 ctxt.setContextNode(self)
341 res = ctxt.xpathEval(expr)
342 ctxt.xpathFreeContext()
346 # Selecting nodes using XPath, faster because the context
347 # is allocated just once per xmlDoc.
349 def xpathEval2(self, expr):
354 doc._ctxt.setContextNode(self)
356 doc._ctxt = doc.xpathNewContext()
357 doc._ctxt.setContextNode(self)
358 res = doc._ctxt.xpathEval(expr)
361 # support for python2 iterators
362 def walk_depth_first(self):
363 return xmlCoreDepthFirstItertor(self)
364 def walk_breadth_first(self):
365 return xmlCoreBreadthFirstItertor(self)
366 __iter__ = walk_depth_first
370 self.doc._ctxt.xpathFreeContext()
373 libxml2mod.freeDoc(self._o)
377 # implements the depth-first iterator for libxml2 DOM tree
379 class xmlCoreDepthFirstItertor:
380 def __init__(self, node):
389 self.parents.append(self.node)
390 self.node = self.node.children
393 parent = self.parents.pop()
396 self.node = parent.next
399 # implements the breadth-first iterator for libxml2 DOM tree
401 class xmlCoreBreadthFirstItertor:
402 def __init__(self, node):
411 self.parents.append(self.node)
412 self.node = self.node.next
415 parent = self.parents.pop()
418 self.node = parent.children
421 # converters to present a nicer view of the XPath returns
424 # TODO try to cast to the most appropriate node class
425 name = libxml2mod.name(o)
426 if name == "element" or name == "text":
427 return xmlNode(_obj=o)
428 if name == "attribute":
429 return xmlAttr(_obj=o)
430 if name[0:8] == "document":
431 return xmlDoc(_obj=o)
432 if name[0:8] == "namespace":
434 if name == "elem_decl":
435 return xmlElement(_obj=o)
436 if name == "attribute_decl":
437 return xmlAtribute(_obj=o)
438 if name == "entity_decl":
439 return xmlEntity(_obj=o)
441 return xmlDtd(_obj=o)
442 return xmlNode(_obj=o)
444 def xpathObjectRet(o):
445 if type(o) == type([]) or type(o) == type(()):
446 ret = map(lambda x: nodeWrap(x), o)
451 # register an XPath function
453 def registerXPathFunction(ctxt, name, ns_uri, f):
454 ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
457 # For the xmlTextReader parser configuration
460 PARSER_DEFAULTATTRS=2
462 PARSER_SUBST_ENTITIES=4
465 # For the error callback severities
467 PARSER_SEVERITY_VALIDITY_WARNING=1
468 PARSER_SEVERITY_VALIDITY_ERROR=2
469 PARSER_SEVERITY_WARNING=3
470 PARSER_SEVERITY_ERROR=4
473 # register the libxml2 error handler
475 def registerErrorHandler(f, ctx):
476 """Register a Python written function to for error reporting.
477 The function is called back as f(ctx, error). """
479 if not sys.modules.has_key('libxslt'):
480 # normal behaviour when libxslt is not imported
481 ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
483 # when libxslt is already imported, one must
484 # use libxst's error handler instead
486 ret = libxslt.registerErrorHandler(f,ctx)
489 class parserCtxtCore:
491 def __init__(self, _obj=None):
499 libxml2mod.xmlFreeParserCtxt(self._o)
502 def setErrorHandler(self,f,arg):
503 """Register an error handler that will be called back as
504 f(arg,msg,severity,reserved).
506 @reserved is currently always None."""
507 libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
509 def getErrorHandler(self):
510 """Return (f,arg) as previously registered with setErrorHandler
512 return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
514 def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator):
515 """Intermediate callback to wrap the locator"""
516 return f(arg,msg,severity,xmlTextReaderLocator(locator))
518 class xmlTextReaderCore:
520 def __init__(self, _obj=None):
522 if _obj != None:self._o = _obj;return
527 libxml2mod.xmlFreeTextReader(self._o)
530 def SetErrorHandler(self,f,arg):
531 """Register an error handler that will be called back as
532 f(arg,msg,severity,locator)."""
534 libxml2mod.xmlTextReaderSetErrorHandler(\
537 libxml2mod.xmlTextReaderSetErrorHandler(\
538 self._o,_xmlTextReaderErrorFunc,(f,arg))
540 def GetErrorHandler(self):
541 """Return (f,arg) as previously registered with setErrorHandler
543 f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
547 # assert f is _xmlTextReaderErrorFunc
550 # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
552 # Everything before this line comes from libxml.py
553 # Everything after this line is automatically generated
555 # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING