7 CONTENT="Modular DocBook HTML Stylesheet Version 1.76b+
10 TITLE="Gnome XML Library Reference Manual"
11 HREF="book1.html"><LINK
13 TITLE="Libxml Library Reference"
14 HREF="libxml-lib.html"><LINK
17 HREF="libxml-xmlerror.html"><LINK
20 HREF="libxml-htmltree.html"></HEAD
43 >Gnome XML Library Reference Manual</FONT
52 HREF="libxml-xmlerror.html"
57 ><<< Previous Page</B
90 HREF="libxml-lib.html"
106 HREF="libxml-htmltree.html"
111 >Next Page >>></B
120 NAME="LIBXML-HTMLPARSER">HTMLparser</H1
128 >HTMLparser -- </DIV
130 CLASS="REFSYNOPSISDIV"
146 HREF="libxml-htmlparser.html#HTMLPARSERCTXT"
150 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
151 >htmlParserCtxtPtr</A
154 HREF="libxml-htmlparser.html#HTMLPARSERNODEINFO"
155 >htmlParserNodeInfo</A
158 HREF="libxml-htmlparser.html#HTMLSAXHANDLER"
162 HREF="libxml-htmlparser.html#HTMLSAXHANDLERPTR"
163 >htmlSAXHandlerPtr</A
166 HREF="libxml-htmlparser.html#HTMLPARSERINPUT"
170 HREF="libxml-htmlparser.html#HTMLPARSERINPUTPTR"
171 >htmlParserInputPtr</A
174 HREF="libxml-htmlparser.html#HTMLDOCPTR"
178 HREF="libxml-htmlparser.html#HTMLNODEPTR"
182 HREF="libxml-htmlparser.html#HTMLELEMDESC"
186 HREF="libxml-htmlparser.html#HTMLELEMDESCPTR"
190 HREF="libxml-htmlparser.html#HTMLENTITYDESC"
194 HREF="libxml-htmlparser.html#HTMLENTITYDESCPTR"
195 >htmlEntityDescPtr</A
198 HREF="libxml-htmlparser.html#HTMLELEMDESC"
201 HREF="libxml-htmlparser.html#HTMLTAGLOOKUP"
204 HREF="libxml-tree.html#XMLCHAR"
208 HREF="libxml-htmlparser.html#HTMLENTITYDESC"
211 HREF="libxml-htmlparser.html#HTMLENTITYLOOKUP"
214 HREF="libxml-tree.html#XMLCHAR"
218 HREF="libxml-htmlparser.html#HTMLENTITYDESC"
221 HREF="libxml-htmlparser.html#HTMLENTITYVALUELOOKUP"
222 >htmlEntityValueLookup</A
223 > (unsigned int value);
225 HREF="libxml-htmlparser.html#HTMLISAUTOCLOSED"
228 HREF="libxml-htmlparser.html#HTMLDOCPTR"
232 HREF="libxml-htmlparser.html#HTMLNODEPTR"
236 HREF="libxml-htmlparser.html#HTMLAUTOCLOSETAG"
239 HREF="libxml-htmlparser.html#HTMLDOCPTR"
243 HREF="libxml-tree.html#XMLCHAR"
247 HREF="libxml-htmlparser.html#HTMLNODEPTR"
251 HREF="libxml-htmlparser.html#HTMLENTITYDESC"
254 HREF="libxml-htmlparser.html#HTMLPARSEENTITYREF"
255 >htmlParseEntityRef</A
257 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
258 >htmlParserCtxtPtr</A
261 HREF="libxml-tree.html#XMLCHAR"
265 HREF="libxml-htmlparser.html#HTMLPARSECHARREF"
268 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
269 >htmlParserCtxtPtr</A
272 HREF="libxml-htmlparser.html#HTMLPARSEELEMENT"
275 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
276 >htmlParserCtxtPtr</A
279 HREF="libxml-htmlparser.html#HTMLPARSEDOCUMENT"
280 >htmlParseDocument</A
282 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
283 >htmlParserCtxtPtr</A
286 HREF="libxml-htmlparser.html#HTMLDOCPTR"
289 HREF="libxml-htmlparser.html#HTMLSAXPARSEDOC"
292 HREF="libxml-tree.html#XMLCHAR"
295 const char *encoding,
297 HREF="libxml-htmlparser.html#HTMLSAXHANDLERPTR"
298 >htmlSAXHandlerPtr</A
302 HREF="libxml-htmlparser.html#HTMLDOCPTR"
305 HREF="libxml-htmlparser.html#HTMLPARSEDOC"
308 HREF="libxml-tree.html#XMLCHAR"
311 const char *encoding);
313 HREF="libxml-htmlparser.html#HTMLDOCPTR"
316 HREF="libxml-htmlparser.html#HTMLSAXPARSEFILE"
318 > (const char *filename,
319 const char *encoding,
321 HREF="libxml-htmlparser.html#HTMLSAXHANDLERPTR"
322 >htmlSAXHandlerPtr</A
326 HREF="libxml-htmlparser.html#HTMLDOCPTR"
329 HREF="libxml-htmlparser.html#HTMLPARSEFILE"
331 > (const char *filename,
332 const char *encoding);
334 HREF="libxml-htmlparser.html#UTF8TOHTML"
336 > (unsigned char *out,
341 HREF="libxml-htmlparser.html#HTMLENCODEENTITIES"
342 >htmlEncodeEntities</A
343 > (unsigned char *out,
349 HREF="libxml-htmlparser.html#HTMLISSCRIPTATTRIBUTE"
350 >htmlIsScriptAttribute</A
352 HREF="libxml-tree.html#XMLCHAR"
356 HREF="libxml-htmlparser.html#HTMLHANDLEOMITTEDELEM"
357 >htmlHandleOmittedElem</A
360 HREF="libxml-htmlparser.html#HTMLFREEPARSERCTXT"
361 >htmlFreeParserCtxt</A
363 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
364 >htmlParserCtxtPtr</A
367 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
368 >htmlParserCtxtPtr</A
370 HREF="libxml-htmlparser.html#HTMLCREATEPUSHPARSERCTXT"
371 >htmlCreatePushParserCtxt</A
373 HREF="libxml-htmlparser.html#HTMLSAXHANDLERPTR"
374 >htmlSAXHandlerPtr</A
379 const char *filename,
381 HREF="libxml-encoding.html#XMLCHARENCODING"
385 HREF="libxml-htmlparser.html#HTMLPARSECHUNK"
388 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
389 >htmlParserCtxtPtr</A
422 NAME="HTMLPARSERCTXT"
433 CLASS="PROGRAMLISTING"
434 >typedef xmlParserCtxt htmlParserCtxt;</PRE
448 NAME="HTMLPARSERCTXTPTR"
450 >htmlParserCtxtPtr</H3
459 CLASS="PROGRAMLISTING"
460 >typedef xmlParserCtxtPtr htmlParserCtxtPtr;</PRE
474 NAME="HTMLPARSERNODEINFO"
476 >htmlParserNodeInfo</H3
485 CLASS="PROGRAMLISTING"
486 >typedef xmlParserNodeInfo htmlParserNodeInfo;</PRE
500 NAME="HTMLSAXHANDLER"
511 CLASS="PROGRAMLISTING"
512 >typedef xmlSAXHandler htmlSAXHandler;</PRE
526 NAME="HTMLSAXHANDLERPTR"
528 >htmlSAXHandlerPtr</H3
537 CLASS="PROGRAMLISTING"
538 >typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;</PRE
552 NAME="HTMLPARSERINPUT"
563 CLASS="PROGRAMLISTING"
564 >typedef xmlParserInput htmlParserInput;</PRE
578 NAME="HTMLPARSERINPUTPTR"
580 >htmlParserInputPtr</H3
589 CLASS="PROGRAMLISTING"
590 >typedef xmlParserInputPtr htmlParserInputPtr;</PRE
615 CLASS="PROGRAMLISTING"
616 >typedef xmlDocPtr htmlDocPtr;</PRE
641 CLASS="PROGRAMLISTING"
642 >typedef xmlNodePtr htmlNodePtr;</PRE
658 >struct htmlElemDesc</H3
667 CLASS="PROGRAMLISTING"
668 >struct htmlElemDesc {
669 const char *name; /* The tag name */
670 char startTag; /* Whether the start tag can be implied */
671 char endTag; /* Whether the end tag can be implied */
672 char saveEndTag; /* Whether the end tag should be saved */
673 char empty; /* Is this an empty element ? */
674 char depr; /* Is this a deprecated element ? */
675 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
676 char isinline; /* is this a block 0 or inline 1 element */
677 const char *desc; /* the description */
692 NAME="HTMLELEMDESCPTR"
703 CLASS="PROGRAMLISTING"
704 >typedef htmlElemDesc *htmlElemDescPtr;</PRE
718 NAME="HTMLENTITYDESC"
720 >struct htmlEntityDesc</H3
729 CLASS="PROGRAMLISTING"
730 >struct htmlEntityDesc {
731 unsigned int value; /* the UNICODE value for the character */
732 const char *name; /* The entity name */
733 const char *desc; /* the description */
748 NAME="HTMLENTITYDESCPTR"
750 >htmlEntityDescPtr</H3
759 CLASS="PROGRAMLISTING"
760 >typedef htmlEntityDesc *htmlEntityDescPtr;</PRE
776 >htmlTagLookup ()</H3
785 CLASS="PROGRAMLISTING"
787 HREF="libxml-htmlparser.html#HTMLELEMDESC"
789 >* htmlTagLookup (const <A
790 HREF="libxml-tree.html#XMLCHAR"
797 >Lookup the HTML tag in the ElementTable</P
801 CLASS="INFORMALTABLE"
828 > The tag name in lowercase</TD
846 >the related htmlElemDescPtr or NULL if not found.</TD
861 NAME="HTMLENTITYLOOKUP"
863 >htmlEntityLookup ()</H3
872 CLASS="PROGRAMLISTING"
874 HREF="libxml-htmlparser.html#HTMLENTITYDESC"
876 >* htmlEntityLookup (const <A
877 HREF="libxml-tree.html#XMLCHAR"
884 >Lookup the given entity in EntitiesTable</P
886 >TODO: the linear scan is really ugly, an hash table is really needed.</P
890 CLASS="INFORMALTABLE"
917 > the entity name</TD
935 >the associated htmlEntityDescPtr if found, NULL otherwise.</TD
950 NAME="HTMLENTITYVALUELOOKUP"
952 >htmlEntityValueLookup ()</H3
961 CLASS="PROGRAMLISTING"
963 HREF="libxml-htmlparser.html#HTMLENTITYDESC"
965 >* htmlEntityValueLookup (unsigned int value);</PRE
970 >Lookup the given entity in EntitiesTable</P
972 >TODO: the linear scan is really ugly, an hash table is really needed.</P
976 CLASS="INFORMALTABLE"
1003 > the entity's unicode value</TD
1021 >the associated htmlEntityDescPtr if found, NULL otherwise.</TD
1036 NAME="HTMLISAUTOCLOSED"
1038 >htmlIsAutoClosed ()</H3
1047 CLASS="PROGRAMLISTING"
1048 >int htmlIsAutoClosed (<A
1049 HREF="libxml-htmlparser.html#HTMLDOCPTR"
1053 HREF="libxml-htmlparser.html#HTMLNODEPTR"
1060 >The HTML DTD allows a tag to implicitly close other tags.
1061 The list is kept in htmlStartClose array. This function checks
1062 if a tag is autoclosed by one of it's child</P
1066 CLASS="INFORMALTABLE"
1093 > the HTML document</TD
1110 > the HTML element</TD
1128 >1 if autoclosed, 0 otherwise</TD
1143 NAME="HTMLAUTOCLOSETAG"
1145 >htmlAutoCloseTag ()</H3
1154 CLASS="PROGRAMLISTING"
1155 >int htmlAutoCloseTag (<A
1156 HREF="libxml-htmlparser.html#HTMLDOCPTR"
1160 HREF="libxml-tree.html#XMLCHAR"
1164 HREF="libxml-htmlparser.html#HTMLNODEPTR"
1171 >The HTML DTD allows a tag to implicitly close other tags.
1172 The list is kept in htmlStartClose array. This function checks
1173 if the element or one of it's children would autoclose the
1178 CLASS="INFORMALTABLE"
1205 > the HTML document</TD
1239 > the HTML element</TD
1257 >1 if autoclose, 0 otherwise</TD
1272 NAME="HTMLPARSEENTITYREF"
1274 >htmlParseEntityRef ()</H3
1283 CLASS="PROGRAMLISTING"
1285 HREF="libxml-htmlparser.html#HTMLENTITYDESC"
1287 >* htmlParseEntityRef (<A
1288 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
1289 >htmlParserCtxtPtr</A
1292 HREF="libxml-tree.html#XMLCHAR"
1299 >parse an HTML ENTITY references</P
1301 >[68] EntityRef ::= '&' Name ';'</P
1305 CLASS="INFORMALTABLE"
1332 > an HTML parser context</TD
1349 > location to store the entity name</TD
1367 >the associated htmlEntityDescPtr if found, or NULL otherwise,
1368 if non-NULL *str will have to be freed by the caller.</TD
1383 NAME="HTMLPARSECHARREF"
1385 >htmlParseCharRef ()</H3
1394 CLASS="PROGRAMLISTING"
1395 >int htmlParseCharRef (<A
1396 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
1397 >htmlParserCtxtPtr</A
1403 >parse Reference declarations</P
1405 >[66] CharRef ::= '&#' [0-9]+ ';' |
1409 >' [0-9a-fA-F]+ ';'</P
1413 CLASS="INFORMALTABLE"
1440 > an HTML parser context</TD
1458 >the value parsed (as an int)</TD
1473 NAME="HTMLPARSEELEMENT"
1475 >htmlParseElement ()</H3
1484 CLASS="PROGRAMLISTING"
1485 >void htmlParseElement (<A
1486 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
1487 >htmlParserCtxtPtr</A
1493 >parse an HTML element, this is highly recursive</P
1495 >[39] element ::= EmptyElemTag | STag content ETag</P
1497 >[41] Attribute ::= Name Eq AttValue</P
1501 CLASS="INFORMALTABLE"
1528 > an HTML parser context</TD
1543 NAME="HTMLPARSEDOCUMENT"
1545 >htmlParseDocument ()</H3
1554 CLASS="PROGRAMLISTING"
1555 >int htmlParseDocument (<A
1556 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
1557 >htmlParserCtxtPtr</A
1563 >parse an HTML document (and build a tree if using the standard SAX
1568 CLASS="INFORMALTABLE"
1595 > an HTML parser context</TD
1613 >0, -1 in case of error. the parser context is augmented
1614 as a result of the parsing.</TD
1629 NAME="HTMLSAXPARSEDOC"
1631 >htmlSAXParseDoc ()</H3
1640 CLASS="PROGRAMLISTING"
1642 HREF="libxml-htmlparser.html#HTMLDOCPTR"
1644 > htmlSAXParseDoc (<A
1645 HREF="libxml-tree.html#XMLCHAR"
1648 const char *encoding,
1650 HREF="libxml-htmlparser.html#HTMLSAXHANDLERPTR"
1651 >htmlSAXHandlerPtr</A
1653 void *userData);</PRE
1658 >Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
1659 to handle parse events. If sax is NULL, fallback to the default DOM
1660 behavior and return a tree.</P
1664 CLASS="INFORMALTABLE"
1691 > a pointer to an array of xmlChar</TD
1708 > a free form C string describing the HTML document encoding, or NULL</TD
1725 > the SAX handler block</TD
1742 > if using SAX, this pointer will be provided on callbacks. </TD
1760 >the resulting document tree unless SAX is NULL or the document is
1761 not well formed.</TD
1778 >htmlParseDoc ()</H3
1787 CLASS="PROGRAMLISTING"
1789 HREF="libxml-htmlparser.html#HTMLDOCPTR"
1792 HREF="libxml-tree.html#XMLCHAR"
1795 const char *encoding);</PRE
1800 >parse an HTML in-memory document and build a tree.</P
1804 CLASS="INFORMALTABLE"
1831 > a pointer to an array of xmlChar</TD
1848 > a free form C string describing the HTML document encoding, or NULL</TD
1866 >the resulting document tree</TD
1881 NAME="HTMLSAXPARSEFILE"
1883 >htmlSAXParseFile ()</H3
1892 CLASS="PROGRAMLISTING"
1894 HREF="libxml-htmlparser.html#HTMLDOCPTR"
1896 > htmlSAXParseFile (const char *filename,
1897 const char *encoding,
1899 HREF="libxml-htmlparser.html#HTMLSAXHANDLERPTR"
1900 >htmlSAXHandlerPtr</A
1902 void *userData);</PRE
1907 >parse an HTML file and build a tree. Automatic support for ZLIB/Compress
1908 compressed document is provided by default if found at compile-time.
1909 It use the given SAX function block to handle the parsing callback.
1910 If sax is NULL, fallback to the default DOM tree building routines.</P
1914 CLASS="INFORMALTABLE"
1958 > a free form C string describing the HTML document encoding, or NULL</TD
1975 > the SAX handler block</TD
1992 > if using SAX, this pointer will be provided on callbacks. </TD
2010 >the resulting document tree unless SAX is NULL or the document is
2011 not well formed.</TD
2026 NAME="HTMLPARSEFILE"
2028 >htmlParseFile ()</H3
2037 CLASS="PROGRAMLISTING"
2039 HREF="libxml-htmlparser.html#HTMLDOCPTR"
2041 > htmlParseFile (const char *filename,
2042 const char *encoding);</PRE
2047 >parse an HTML file and build a tree. Automatic support for ZLIB/Compress
2048 compressed document is provided by default if found at compile-time.</P
2052 CLASS="INFORMALTABLE"
2096 > a free form C string describing the HTML document encoding, or NULL</TD
2114 >the resulting document tree</TD
2140 CLASS="PROGRAMLISTING"
2141 >int UTF8ToHtml (unsigned char *out,
2149 >Take a block of UTF-8 chars in and try to convert it to an ASCII
2150 plus HTML entities block of chars out.</P
2154 CLASS="INFORMALTABLE"
2181 > a pointer to an array of bytes to store the result</TD
2220 > a pointer to an array of UTF-8 chars</TD
2260 >0 if success, -2 if the transcoding fails, or -1 otherwise
2266 > after return is the number of octets consumed
2267 as the return value is positive, else unpredictable.
2273 > after return is the number of octets consumed.</TD
2288 NAME="HTMLENCODEENTITIES"
2290 >htmlEncodeEntities ()</H3
2299 CLASS="PROGRAMLISTING"
2300 >int htmlEncodeEntities (unsigned char *out,
2304 int quoteChar);</PRE
2309 >Take a block of UTF-8 chars in and try to convert it to an ASCII
2310 plus HTML entities block of chars out.</P
2314 CLASS="INFORMALTABLE"
2341 > a pointer to an array of bytes to store the result</TD
2380 > a pointer to an array of UTF-8 chars</TD
2419 > the quote character to escape (' or ") or zero.</TD
2437 >0 if success, -2 if the transcoding fails, or -1 otherwise
2443 > after return is the number of octets consumed
2444 as the return value is positive, else unpredictable.
2450 > after return is the number of octets consumed.</TD
2465 NAME="HTMLISSCRIPTATTRIBUTE"
2467 >htmlIsScriptAttribute ()</H3
2476 CLASS="PROGRAMLISTING"
2477 >int htmlIsScriptAttribute (const <A
2478 HREF="libxml-tree.html#XMLCHAR"
2485 >Check if an attribute is of content type Script</P
2489 CLASS="INFORMALTABLE"
2516 > an attribute name</TD
2534 >1 is the attribute is a script 0 otherwise</TD
2549 NAME="HTMLHANDLEOMITTEDELEM"
2551 >htmlHandleOmittedElem ()</H3
2560 CLASS="PROGRAMLISTING"
2561 >int htmlHandleOmittedElem (int val);</PRE
2566 >Set and return the previous value for handling HTML omitted tags.</P
2570 CLASS="INFORMALTABLE"
2615 >the last value for 0 for no handling, 1 for auto insertion.</TD
2630 NAME="HTMLFREEPARSERCTXT"
2632 >htmlFreeParserCtxt ()</H3
2641 CLASS="PROGRAMLISTING"
2642 >void htmlFreeParserCtxt (<A
2643 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
2644 >htmlParserCtxtPtr</A
2650 >Free all the memory used by a parser context. However the parsed
2651 document in ctxt->myDoc is not freed.</P
2655 CLASS="INFORMALTABLE"
2682 > an HTML parser context</TD
2697 NAME="HTMLCREATEPUSHPARSERCTXT"
2699 >htmlCreatePushParserCtxt ()</H3
2708 CLASS="PROGRAMLISTING"
2710 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
2711 >htmlParserCtxtPtr</A
2712 > htmlCreatePushParserCtxt (<A
2713 HREF="libxml-htmlparser.html#HTMLSAXHANDLERPTR"
2714 >htmlSAXHandlerPtr</A
2719 const char *filename,
2721 HREF="libxml-encoding.html#XMLCHARENCODING"
2728 >Create a parser context for using the HTML parser in push mode
2729 To allow content encoding detection, <TT
2740 > is used for fetching external entities
2741 and error/warning reports.</P
2745 CLASS="INFORMALTABLE"
2789 > The user data returned on SAX callbacks</TD
2806 > a pointer to an array of chars</TD
2823 > number of chars in the array</TD
2840 > an optional file name or URI</TD
2857 > an optional encoding</TD
2875 >the new parser context or NULL</TD
2890 NAME="HTMLPARSECHUNK"
2892 >htmlParseChunk ()</H3
2901 CLASS="PROGRAMLISTING"
2902 >int htmlParseChunk (<A
2903 HREF="libxml-htmlparser.html#HTMLPARSERCTXTPTR"
2904 >htmlParserCtxtPtr</A
2908 int terminate);</PRE
2913 >Parse a Chunk of memory</P
2917 CLASS="INFORMALTABLE"
2944 > an XML parser context</TD
2978 > the size in byte of the chunk</TD
2995 > last chunk indicator</TD
3013 >zero if no error, the xmlParserErrors otherwise.</TD
3025 CLEAR="all"><BR><TABLE
3037 HREF="libxml-xmlerror.html"
3042 ><<< Previous Page</B
3075 HREF="libxml-lib.html"
3091 HREF="libxml-htmltree.html"
3096 >Next Page >>></B