2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
33 /************************************************************************
35 * Getting/Setting encoding meta tags *
37 ************************************************************************/
40 * htmlGetMetaEncoding:
43 * Encoding definition lookup in the Meta tags
45 * Returns the current encoding as flagged in the HTML source
48 htmlGetMetaEncoding(htmlDocPtr doc) {
50 const xmlChar *content;
51 const xmlChar *encoding;
61 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62 if (xmlStrEqual(cur->name, BAD_CAST"html"))
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80 if (xmlStrEqual(cur->name, BAD_CAST"head"))
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
93 * Search the meta elements
97 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
101 const xmlChar *value;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
109 value = attr->children->content;
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
116 if ((http != 0) && (content != NULL))
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
151 * htmlSetMetaEncoding:
153 * @encoding: the encoding string
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
159 * Returns 0 in case of success and -1 in case of error
162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
171 if (encoding != NULL) {
172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
174 newcontent[sizeof(newcontent) - 1] = 0;
182 while (cur != NULL) {
183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
200 while (cur != NULL) {
201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
212 if (cur->children == NULL) {
213 if (encoding == NULL)
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
218 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
224 if (encoding != NULL) {
226 * Create a new Meta element with the right attributes
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
232 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
239 while (cur != NULL) {
240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
242 xmlAttrPtr attr = cur->properties;
244 const xmlChar *value;
248 while (attr != NULL) {
249 if ((attr->children != NULL) &&
250 (attr->children->type == XML_TEXT_NODE) &&
251 (attr->children->next == NULL)) {
252 value = attr->children->content;
253 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
258 if ((value != NULL) &&
259 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
262 if ((http != 0) && (content != NULL))
267 if ((http != 0) && (content != NULL)) {
285 * These are the HTML attributes which will be output
286 * in minimized form, i.e. <option selected="selected"> will be
287 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
290 static const char* htmlBooleanAttrs[] = {
291 "checked", "compact", "declare", "defer", "disabled", "ismap",
292 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
299 * @name: the name of the attribute to check
301 * Determine if a given attribute is a boolean attribute.
303 * returns: false if the attribute is not boolean, true otherwise.
306 htmlIsBooleanAttr(const xmlChar *name)
310 while (htmlBooleanAttrs[i] != NULL) {
311 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
318 /************************************************************************
320 * Dumping HTML tree content to a simple buffer *
322 ************************************************************************/
325 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
329 * htmlNodeDumpFormat:
330 * @buf: the HTML buffer output
332 * @cur: the current node
333 * @format: should formatting spaces been added
335 * Dump an HTML node, recursive behaviour,children are printed too.
337 * Returns the number of byte written or -1 in case of error
340 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
344 xmlOutputBufferPtr outbuf;
352 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
353 if (outbuf == NULL) {
354 xmlGenericError(xmlGenericErrorContext,
355 "htmlNodeDumpFormat: out of memory!\n");
358 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
359 outbuf->buffer = buf;
360 outbuf->encoder = NULL;
361 outbuf->writecallback = NULL;
362 outbuf->closecallback = NULL;
363 outbuf->context = NULL;
367 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
369 ret = buf->use - use;
375 * @buf: the HTML buffer output
377 * @cur: the current node
379 * Dump an HTML node, recursive behaviour,children are printed too,
380 * and formatting returns are added.
382 * Returns the number of byte written or -1 in case of error
385 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
388 return(htmlNodeDumpFormat(buf, doc, cur, 1));
392 * htmlNodeDumpFileFormat:
393 * @out: the FILE pointer
395 * @cur: the current node
396 * @encoding: the document encoding
397 * @format: should formatting spaces been added
399 * Dump an HTML node, recursive behaviour,children are printed too.
401 * TODO: if encoding == NULL try to save in the doc encoding
403 * returns: the number of byte written or -1 in case of failure.
406 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
407 xmlNodePtr cur, const char *encoding, int format) {
408 xmlOutputBufferPtr buf;
409 xmlCharEncodingHandlerPtr handler = NULL;
414 if (encoding != NULL) {
417 enc = xmlParseCharEncoding(encoding);
418 if (enc != XML_CHAR_ENCODING_UTF8) {
419 handler = xmlFindCharEncodingHandler(encoding);
426 * Fallback to HTML or ASCII when the encoding is unspecified
429 handler = xmlFindCharEncodingHandler("HTML");
431 handler = xmlFindCharEncodingHandler("ascii");
434 * save the content to a temp buffer.
436 buf = xmlOutputBufferCreateFile(out, handler);
437 if (buf == NULL) return(0);
439 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
441 ret = xmlOutputBufferClose(buf);
447 * @out: the FILE pointer
449 * @cur: the current node
451 * Dump an HTML node, recursive behaviour,children are printed too,
452 * and formatting returns are added.
455 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
456 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
462 * @mem: OUT: the memory pointer
463 * @size: OUT: the memory length
465 * Dump an HTML document in memory and return the xmlChar * and it's size.
466 * It's up to the caller to free the memory.
469 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
470 xmlOutputBufferPtr buf;
471 xmlCharEncodingHandlerPtr handler = NULL;
472 const char *encoding;
478 xmlGenericError(xmlGenericErrorContext,
479 "htmlDocDumpMemory : document == NULL\n");
486 encoding = (const char *) htmlGetMetaEncoding(cur);
488 if (encoding != NULL) {
491 enc = xmlParseCharEncoding(encoding);
492 if (enc != cur->charset) {
493 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
502 handler = xmlFindCharEncodingHandler(encoding);
503 if (handler == NULL) {
512 * Fallback to HTML or ASCII when the encoding is unspecified
515 handler = xmlFindCharEncodingHandler("HTML");
517 handler = xmlFindCharEncodingHandler("ascii");
519 buf = xmlAllocOutputBuffer(handler);
526 htmlDocContentDumpOutput(buf, cur, NULL);
527 xmlOutputBufferFlush(buf);
528 if (buf->conv != NULL) {
529 *size = buf->conv->use;
530 *mem = xmlStrndup(buf->conv->content, *size);
532 *size = buf->buffer->use;
533 *mem = xmlStrndup(buf->buffer->content, *size);
535 (void)xmlOutputBufferClose(buf);
539 /************************************************************************
541 * Dumping HTML tree content to an I/O output buffer *
543 ************************************************************************/
545 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
549 * @buf: the HTML buffer output
551 * @encoding: the encoding string
553 * TODO: check whether encoding is needed
555 * Dump the HTML document DTD, if any.
558 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
559 const char *encoding ATTRIBUTE_UNUSED) {
560 xmlDtdPtr cur = doc->intSubset;
563 xmlGenericError(xmlGenericErrorContext,
564 "htmlDtdDumpOutput : no internal subset\n");
567 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
568 xmlOutputBufferWriteString(buf, (const char *)cur->name);
569 if (cur->ExternalID != NULL) {
570 xmlOutputBufferWriteString(buf, " PUBLIC ");
571 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
572 if (cur->SystemID != NULL) {
573 xmlOutputBufferWriteString(buf, " ");
574 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
576 } else if (cur->SystemID != NULL) {
577 xmlOutputBufferWriteString(buf, " SYSTEM ");
578 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
580 xmlOutputBufferWriteString(buf, ">\n");
584 * htmlAttrDumpOutput:
585 * @buf: the HTML buffer output
587 * @cur: the attribute pointer
588 * @encoding: the encoding string
590 * Dump an HTML attribute
593 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
594 const char *encoding ATTRIBUTE_UNUSED) {
598 * TODO: The html output method should not escape a & character
599 * occurring in an attribute value immediately followed by
600 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
604 xmlGenericError(xmlGenericErrorContext,
605 "htmlAttrDumpOutput : property == NULL\n");
608 xmlOutputBufferWriteString(buf, " ");
609 xmlOutputBufferWriteString(buf, (const char *)cur->name);
610 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
611 value = xmlNodeListGetString(doc, cur->children, 0);
613 xmlOutputBufferWriteString(buf, "=");
614 if ((cur->ns == NULL) && (cur->parent != NULL) &&
615 (cur->parent->ns == NULL) &&
616 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
617 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
618 (!xmlStrcasecmp(cur->name, BAD_CAST "src")))) {
620 xmlChar *tmp = value;
622 while (IS_BLANK(*tmp)) tmp++;
624 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
625 if (escaped != NULL) {
626 xmlBufferWriteQuotedString(buf->buffer, escaped);
629 xmlBufferWriteQuotedString(buf->buffer, value);
632 xmlBufferWriteQuotedString(buf->buffer, value);
636 xmlOutputBufferWriteString(buf, "=\"\"");
642 * htmlAttrListDumpOutput:
643 * @buf: the HTML buffer output
645 * @cur: the first attribute pointer
646 * @encoding: the encoding string
648 * Dump a list of HTML attributes
651 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
653 xmlGenericError(xmlGenericErrorContext,
654 "htmlAttrListDumpOutput : property == NULL\n");
657 while (cur != NULL) {
658 htmlAttrDumpOutput(buf, doc, cur, encoding);
666 * htmlNodeListDumpOutput:
667 * @buf: the HTML buffer output
669 * @cur: the first node
670 * @encoding: the encoding string
671 * @format: should formatting spaces been added
673 * Dump an HTML node list, recursive behaviour,children are printed too.
676 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
677 xmlNodePtr cur, const char *encoding, int format) {
679 xmlGenericError(xmlGenericErrorContext,
680 "htmlNodeListDumpOutput : node == NULL\n");
683 while (cur != NULL) {
684 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
690 * htmlNodeDumpFormatOutput:
691 * @buf: the HTML buffer output
693 * @cur: the current node
694 * @encoding: the encoding string
695 * @format: should formatting spaces been added
697 * Dump an HTML node, recursive behaviour,children are printed too.
700 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
701 xmlNodePtr cur, const char *encoding, int format) {
702 const htmlElemDesc * info;
707 xmlGenericError(xmlGenericErrorContext,
708 "htmlNodeDumpFormatOutput : node == NULL\n");
714 if (cur->type == XML_DTD_NODE)
716 if (cur->type == XML_HTML_DOCUMENT_NODE) {
717 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
720 if (cur->type == HTML_TEXT_NODE) {
721 if (cur->content != NULL) {
722 if (((cur->name == (const xmlChar *)xmlStringText) ||
723 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
724 ((cur->parent == NULL) ||
725 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
726 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
729 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
730 if (buffer != NULL) {
731 xmlOutputBufferWriteString(buf, (const char *)buffer);
735 xmlOutputBufferWriteString(buf, (const char *)cur->content);
740 if (cur->type == HTML_COMMENT_NODE) {
741 if (cur->content != NULL) {
742 xmlOutputBufferWriteString(buf, "<!--");
743 xmlOutputBufferWriteString(buf, (const char *)cur->content);
744 xmlOutputBufferWriteString(buf, "-->");
748 if (cur->type == HTML_PI_NODE) {
749 if (cur->name == NULL)
751 xmlOutputBufferWriteString(buf, "<?");
752 xmlOutputBufferWriteString(buf, (const char *)cur->name);
753 if (cur->content != NULL) {
754 xmlOutputBufferWriteString(buf, " ");
755 xmlOutputBufferWriteString(buf, (const char *)cur->content);
757 xmlOutputBufferWriteString(buf, ">");
760 if (cur->type == HTML_ENTITY_REF_NODE) {
761 xmlOutputBufferWriteString(buf, "&");
762 xmlOutputBufferWriteString(buf, (const char *)cur->name);
763 xmlOutputBufferWriteString(buf, ";");
766 if (cur->type == HTML_PRESERVE_NODE) {
767 if (cur->content != NULL) {
768 xmlOutputBufferWriteString(buf, (const char *)cur->content);
774 * Get specific HTML info for that node.
777 info = htmlTagLookup(cur->name);
781 xmlOutputBufferWriteString(buf, "<");
782 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
783 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
784 xmlOutputBufferWriteString(buf, ":");
786 xmlOutputBufferWriteString(buf, (const char *)cur->name);
788 xmlNsListDumpOutput(buf, cur->nsDef);
789 if (cur->properties != NULL)
790 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
792 if ((info != NULL) && (info->empty)) {
793 xmlOutputBufferWriteString(buf, ">");
794 if ((format) && (!info->isinline) && (cur->next != NULL)) {
795 if ((cur->next->type != HTML_TEXT_NODE) &&
796 (cur->next->type != HTML_ENTITY_REF_NODE) &&
797 (cur->parent != NULL) &&
798 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
799 xmlOutputBufferWriteString(buf, "\n");
803 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
804 (cur->children == NULL)) {
805 if ((info != NULL) && (info->saveEndTag != 0) &&
806 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
807 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
808 xmlOutputBufferWriteString(buf, ">");
810 xmlOutputBufferWriteString(buf, "></");
811 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
812 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
813 xmlOutputBufferWriteString(buf, ":");
815 xmlOutputBufferWriteString(buf, (const char *)cur->name);
816 xmlOutputBufferWriteString(buf, ">");
818 if ((format) && (cur->next != NULL) &&
819 (info != NULL) && (!info->isinline)) {
820 if ((cur->next->type != HTML_TEXT_NODE) &&
821 (cur->next->type != HTML_ENTITY_REF_NODE) &&
822 (cur->parent != NULL) &&
823 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
824 xmlOutputBufferWriteString(buf, "\n");
828 xmlOutputBufferWriteString(buf, ">");
829 if ((cur->type != XML_ELEMENT_NODE) &&
830 (cur->content != NULL)) {
832 * Uses the OutputBuffer property to automatically convert
833 * invalids to charrefs
836 xmlOutputBufferWriteString(buf, (const char *) cur->content);
838 if (cur->children != NULL) {
839 if ((format) && (info != NULL) && (!info->isinline) &&
840 (cur->children->type != HTML_TEXT_NODE) &&
841 (cur->children->type != HTML_ENTITY_REF_NODE) &&
842 (cur->children != cur->last) &&
843 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
844 xmlOutputBufferWriteString(buf, "\n");
845 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
846 if ((format) && (info != NULL) && (!info->isinline) &&
847 (cur->last->type != HTML_TEXT_NODE) &&
848 (cur->last->type != HTML_ENTITY_REF_NODE) &&
849 (cur->children != cur->last) &&
850 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
851 xmlOutputBufferWriteString(buf, "\n");
853 xmlOutputBufferWriteString(buf, "</");
854 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
855 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
856 xmlOutputBufferWriteString(buf, ":");
858 xmlOutputBufferWriteString(buf, (const char *)cur->name);
859 xmlOutputBufferWriteString(buf, ">");
860 if ((format) && (info != NULL) && (!info->isinline) &&
861 (cur->next != NULL)) {
862 if ((cur->next->type != HTML_TEXT_NODE) &&
863 (cur->next->type != HTML_ENTITY_REF_NODE) &&
864 (cur->parent != NULL) &&
865 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
866 xmlOutputBufferWriteString(buf, "\n");
871 * htmlNodeDumpOutput:
872 * @buf: the HTML buffer output
874 * @cur: the current node
875 * @encoding: the encoding string
877 * Dump an HTML node, recursive behaviour,children are printed too,
878 * and formatting returns/spaces are added.
881 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
882 xmlNodePtr cur, const char *encoding) {
883 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
887 * htmlDocContentDumpFormatOutput:
888 * @buf: the HTML buffer output
890 * @encoding: the encoding string
891 * @format: should formatting spaces been added
893 * Dump an HTML document.
896 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
897 const char *encoding, int format) {
903 * force to output the stuff as HTML, especially for entities
906 cur->type = XML_HTML_DOCUMENT_NODE;
907 if (cur->intSubset != NULL) {
908 htmlDtdDumpOutput(buf, cur, NULL);
910 if (cur->children != NULL) {
911 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
913 xmlOutputBufferWriteString(buf, "\n");
914 cur->type = (xmlElementType) type;
918 * htmlDocContentDumpOutput:
919 * @buf: the HTML buffer output
921 * @encoding: the encoding string
923 * Dump an HTML document. Formating return/spaces are added.
926 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
927 const char *encoding) {
928 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
931 /************************************************************************
933 * Saving functions front-ends *
935 ************************************************************************/
942 * Dump an HTML document to an open FILE.
944 * returns: the number of byte written or -1 in case of failure.
947 htmlDocDump(FILE *f, xmlDocPtr cur) {
948 xmlOutputBufferPtr buf;
949 xmlCharEncodingHandlerPtr handler = NULL;
950 const char *encoding;
957 xmlGenericError(xmlGenericErrorContext,
958 "htmlDocDump : document == NULL\n");
963 encoding = (const char *) htmlGetMetaEncoding(cur);
965 if (encoding != NULL) {
968 enc = xmlParseCharEncoding(encoding);
969 if (enc != cur->charset) {
970 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
977 handler = xmlFindCharEncodingHandler(encoding);
984 * Fallback to HTML or ASCII when the encoding is unspecified
987 handler = xmlFindCharEncodingHandler("HTML");
989 handler = xmlFindCharEncodingHandler("ascii");
991 buf = xmlOutputBufferCreateFile(f, handler);
992 if (buf == NULL) return(-1);
993 htmlDocContentDumpOutput(buf, cur, NULL);
995 ret = xmlOutputBufferClose(buf);
1001 * @filename: the filename (or URL)
1002 * @cur: the document
1004 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1006 * returns: the number of byte written or -1 in case of failure.
1009 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1010 xmlOutputBufferPtr buf;
1011 xmlCharEncodingHandlerPtr handler = NULL;
1012 const char *encoding;
1017 encoding = (const char *) htmlGetMetaEncoding(cur);
1019 if (encoding != NULL) {
1020 xmlCharEncoding enc;
1022 enc = xmlParseCharEncoding(encoding);
1023 if (enc != cur->charset) {
1024 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1031 handler = xmlFindCharEncodingHandler(encoding);
1032 if (handler == NULL)
1038 * Fallback to HTML or ASCII when the encoding is unspecified
1040 if (handler == NULL)
1041 handler = xmlFindCharEncodingHandler("HTML");
1042 if (handler == NULL)
1043 handler = xmlFindCharEncodingHandler("ascii");
1046 * save the content to a temp buffer.
1048 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1049 if (buf == NULL) return(0);
1051 htmlDocContentDumpOutput(buf, cur, NULL);
1053 ret = xmlOutputBufferClose(buf);
1058 * htmlSaveFileFormat:
1059 * @filename: the filename
1060 * @cur: the document
1061 * @format: should formatting spaces been added
1062 * @encoding: the document encoding
1064 * Dump an HTML document to a file using a given encoding.
1066 * returns: the number of byte written or -1 in case of failure.
1069 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1070 const char *encoding, int format) {
1071 xmlOutputBufferPtr buf;
1072 xmlCharEncodingHandlerPtr handler = NULL;
1077 if (encoding != NULL) {
1078 xmlCharEncoding enc;
1080 enc = xmlParseCharEncoding(encoding);
1081 if (enc != cur->charset) {
1082 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1089 handler = xmlFindCharEncodingHandler(encoding);
1090 if (handler == NULL)
1092 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1095 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1099 * Fallback to HTML or ASCII when the encoding is unspecified
1101 if (handler == NULL)
1102 handler = xmlFindCharEncodingHandler("HTML");
1103 if (handler == NULL)
1104 handler = xmlFindCharEncodingHandler("ascii");
1107 * save the content to a temp buffer.
1109 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1110 if (buf == NULL) return(0);
1112 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1114 ret = xmlOutputBufferClose(buf);
1120 * @filename: the filename
1121 * @cur: the document
1122 * @encoding: the document encoding
1124 * Dump an HTML document to a file using a given encoding
1125 * and formatting returns/spaces are added.
1127 * returns: the number of byte written or -1 in case of failure.
1130 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1131 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1136 #endif /* LIBXML_HTML_ENABLED */