2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
33 /************************************************************************
35 * Getting/Setting encoding meta tags *
37 ************************************************************************/
40 * htmlGetMetaEncoding:
43 * Encoding definition lookup in the Meta tags
45 * Returns the current encoding as flagged in the HTML source
48 htmlGetMetaEncoding(htmlDocPtr doc) {
50 const xmlChar *content;
51 const xmlChar *encoding;
61 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62 if (xmlStrEqual(cur->name, BAD_CAST"html"))
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80 if (xmlStrEqual(cur->name, BAD_CAST"head"))
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
93 * Search the meta elements
97 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
101 const xmlChar *value;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
109 value = attr->children->content;
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
116 if ((http != 0) && (content != NULL))
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
151 * htmlSetMetaEncoding:
153 * @encoding: the encoding string
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
159 * Returns 0 in case of success and -1 in case of error
162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
171 if (encoding != NULL) {
172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
174 newcontent[sizeof(newcontent) - 1] = 0;
182 while (cur != NULL) {
183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
200 while (cur != NULL) {
201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
212 if (cur->children == NULL) {
213 if (encoding == NULL)
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
218 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
224 if (encoding != NULL) {
226 * Create a new Meta element with the right attributes
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
232 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
239 while (cur != NULL) {
240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
242 xmlAttrPtr attr = cur->properties;
244 const xmlChar *value;
248 while (attr != NULL) {
249 if ((attr->children != NULL) &&
250 (attr->children->type == XML_TEXT_NODE) &&
251 (attr->children->next == NULL)) {
252 value = attr->children->content;
253 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
258 if ((value != NULL) &&
259 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
262 if ((http != 0) && (content != NULL))
267 if ((http != 0) && (content != NULL)) {
285 * These are the HTML attributes which will be output
286 * in minimized form, i.e. <option selected="selected"> will be
287 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
290 static const char* htmlBooleanAttrs[] = {
291 "checked", "compact", "declare", "defer", "disabled", "ismap",
292 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
299 * @name: the name of the attribute to check
301 * Determine if a given attribute is a boolean attribute.
303 * returns: false if the attribute is not boolean, true otherwise.
306 htmlIsBooleanAttr(const xmlChar *name)
310 while (htmlBooleanAttrs[i] != NULL) {
311 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
318 /************************************************************************
320 * Dumping HTML tree content to a simple buffer *
322 ************************************************************************/
325 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
329 * htmlNodeDumpFormat:
330 * @buf: the HTML buffer output
332 * @cur: the current node
333 * @format: should formatting spaces been added
335 * Dump an HTML node, recursive behaviour,children are printed too.
337 * Returns the number of byte written or -1 in case of error
340 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
344 xmlOutputBufferPtr outbuf;
352 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
353 if (outbuf == NULL) {
354 xmlGenericError(xmlGenericErrorContext,
355 "htmlNodeDumpFormat: out of memory!\n");
358 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
359 outbuf->buffer = buf;
360 outbuf->encoder = NULL;
361 outbuf->writecallback = NULL;
362 outbuf->closecallback = NULL;
363 outbuf->context = NULL;
367 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
369 ret = buf->use - use;
375 * @buf: the HTML buffer output
377 * @cur: the current node
379 * Dump an HTML node, recursive behaviour,children are printed too,
380 * and formatting returns are added.
382 * Returns the number of byte written or -1 in case of error
385 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
386 return(htmlNodeDumpFormat(buf, doc, cur, 1));
390 * htmlNodeDumpFileFormat:
391 * @out: the FILE pointer
393 * @cur: the current node
394 * @encoding: the document encoding
395 * @format: should formatting spaces been added
397 * Dump an HTML node, recursive behaviour,children are printed too.
399 * TODO: if encoding == NULL try to save in the doc encoding
401 * returns: the number of byte written or -1 in case of failure.
404 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
405 xmlNodePtr cur, const char *encoding, int format) {
406 xmlOutputBufferPtr buf;
407 xmlCharEncodingHandlerPtr handler = NULL;
410 if (encoding != NULL) {
413 enc = xmlParseCharEncoding(encoding);
414 if (enc != XML_CHAR_ENCODING_UTF8) {
415 handler = xmlFindCharEncodingHandler(encoding);
422 * Fallback to HTML or ASCII when the encoding is unspecified
425 handler = xmlFindCharEncodingHandler("HTML");
427 handler = xmlFindCharEncodingHandler("ascii");
430 * save the content to a temp buffer.
432 buf = xmlOutputBufferCreateFile(out, handler);
433 if (buf == NULL) return(0);
435 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
437 ret = xmlOutputBufferClose(buf);
443 * @out: the FILE pointer
445 * @cur: the current node
447 * Dump an HTML node, recursive behaviour,children are printed too,
448 * and formatting returns are added.
451 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
452 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
458 * @mem: OUT: the memory pointer
459 * @size: OUT: the memory length
461 * Dump an HTML document in memory and return the xmlChar * and it's size.
462 * It's up to the caller to free the memory.
465 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
466 xmlOutputBufferPtr buf;
467 xmlCharEncodingHandlerPtr handler = NULL;
468 const char *encoding;
472 xmlGenericError(xmlGenericErrorContext,
473 "htmlDocDumpMemory : document == NULL\n");
480 encoding = (const char *) htmlGetMetaEncoding(cur);
482 if (encoding != NULL) {
485 enc = xmlParseCharEncoding(encoding);
486 if (enc != cur->charset) {
487 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
496 handler = xmlFindCharEncodingHandler(encoding);
497 if (handler == NULL) {
506 * Fallback to HTML or ASCII when the encoding is unspecified
509 handler = xmlFindCharEncodingHandler("HTML");
511 handler = xmlFindCharEncodingHandler("ascii");
513 buf = xmlAllocOutputBuffer(handler);
520 htmlDocContentDumpOutput(buf, cur, NULL);
521 xmlOutputBufferFlush(buf);
522 if (buf->conv != NULL) {
523 *size = buf->conv->use;
524 *mem = xmlStrndup(buf->conv->content, *size);
526 *size = buf->buffer->use;
527 *mem = xmlStrndup(buf->buffer->content, *size);
529 (void)xmlOutputBufferClose(buf);
533 /************************************************************************
535 * Dumping HTML tree content to an I/O output buffer *
537 ************************************************************************/
539 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
543 * @buf: the HTML buffer output
545 * @encoding: the encoding string
547 * TODO: check whether encoding is needed
549 * Dump the HTML document DTD, if any.
552 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
553 const char *encoding ATTRIBUTE_UNUSED) {
554 xmlDtdPtr cur = doc->intSubset;
557 xmlGenericError(xmlGenericErrorContext,
558 "htmlDtdDumpOutput : no internal subset\n");
561 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
562 xmlOutputBufferWriteString(buf, (const char *)cur->name);
563 if (cur->ExternalID != NULL) {
564 xmlOutputBufferWriteString(buf, " PUBLIC ");
565 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
566 if (cur->SystemID != NULL) {
567 xmlOutputBufferWriteString(buf, " ");
568 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
570 } else if (cur->SystemID != NULL) {
571 xmlOutputBufferWriteString(buf, " SYSTEM ");
572 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
574 xmlOutputBufferWriteString(buf, ">\n");
578 * htmlAttrDumpOutput:
579 * @buf: the HTML buffer output
581 * @cur: the attribute pointer
582 * @encoding: the encoding string
584 * Dump an HTML attribute
587 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
588 const char *encoding ATTRIBUTE_UNUSED) {
592 * TODO: The html output method should not escape a & character
593 * occurring in an attribute value immediately followed by
594 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
598 xmlGenericError(xmlGenericErrorContext,
599 "htmlAttrDumpOutput : property == NULL\n");
602 xmlOutputBufferWriteString(buf, " ");
603 xmlOutputBufferWriteString(buf, (const char *)cur->name);
604 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
605 value = xmlNodeListGetString(doc, cur->children, 0);
607 xmlOutputBufferWriteString(buf, "=");
608 if ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
609 (!xmlStrcasecmp(cur->name, BAD_CAST "src"))) {
611 xmlChar *tmp = value;
613 while (IS_BLANK(*tmp)) tmp++;
615 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&");
616 if (escaped != NULL) {
617 xmlBufferWriteQuotedString(buf->buffer, escaped);
620 xmlBufferWriteQuotedString(buf->buffer, value);
623 xmlBufferWriteQuotedString(buf->buffer, value);
627 xmlOutputBufferWriteString(buf, "=\"\"");
633 * htmlAttrListDumpOutput:
634 * @buf: the HTML buffer output
636 * @cur: the first attribute pointer
637 * @encoding: the encoding string
639 * Dump a list of HTML attributes
642 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
644 xmlGenericError(xmlGenericErrorContext,
645 "htmlAttrListDumpOutput : property == NULL\n");
648 while (cur != NULL) {
649 htmlAttrDumpOutput(buf, doc, cur, encoding);
657 * htmlNodeListDumpOutput:
658 * @buf: the HTML buffer output
660 * @cur: the first node
661 * @encoding: the encoding string
662 * @format: should formatting spaces been added
664 * Dump an HTML node list, recursive behaviour,children are printed too.
667 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
668 xmlNodePtr cur, const char *encoding, int format) {
670 xmlGenericError(xmlGenericErrorContext,
671 "htmlNodeListDumpOutput : node == NULL\n");
674 while (cur != NULL) {
675 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
681 * htmlNodeDumpFormatOutput:
682 * @buf: the HTML buffer output
684 * @cur: the current node
685 * @encoding: the encoding string
686 * @format: should formatting spaces been added
688 * Dump an HTML node, recursive behaviour,children are printed too.
691 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
692 xmlNodePtr cur, const char *encoding, int format) {
693 const htmlElemDesc * info;
696 xmlGenericError(xmlGenericErrorContext,
697 "htmlNodeDumpFormatOutput : node == NULL\n");
703 if (cur->type == XML_DTD_NODE)
705 if (cur->type == XML_HTML_DOCUMENT_NODE) {
706 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
709 if (cur->type == HTML_TEXT_NODE) {
710 if (cur->content != NULL) {
711 if (((cur->name == (const xmlChar *)xmlStringText) ||
712 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
713 ((cur->parent == NULL) ||
714 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
715 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
718 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
719 if (buffer != NULL) {
720 xmlOutputBufferWriteString(buf, (const char *)buffer);
724 xmlOutputBufferWriteString(buf, (const char *)cur->content);
729 if (cur->type == HTML_COMMENT_NODE) {
730 if (cur->content != NULL) {
731 xmlOutputBufferWriteString(buf, "<!--");
732 xmlOutputBufferWriteString(buf, (const char *)cur->content);
733 xmlOutputBufferWriteString(buf, "-->");
737 if (cur->type == HTML_PI_NODE) {
738 if (cur->name == NULL)
740 xmlOutputBufferWriteString(buf, "<?");
741 xmlOutputBufferWriteString(buf, (const char *)cur->name);
742 if (cur->content != NULL) {
743 xmlOutputBufferWriteString(buf, " ");
744 xmlOutputBufferWriteString(buf, (const char *)cur->content);
746 xmlOutputBufferWriteString(buf, ">");
749 if (cur->type == HTML_ENTITY_REF_NODE) {
750 xmlOutputBufferWriteString(buf, "&");
751 xmlOutputBufferWriteString(buf, (const char *)cur->name);
752 xmlOutputBufferWriteString(buf, ";");
755 if (cur->type == HTML_PRESERVE_NODE) {
756 if (cur->content != NULL) {
757 xmlOutputBufferWriteString(buf, (const char *)cur->content);
763 * Get specific HTML info for that node.
766 info = htmlTagLookup(cur->name);
770 xmlOutputBufferWriteString(buf, "<");
771 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
772 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
773 xmlOutputBufferWriteString(buf, ":");
775 xmlOutputBufferWriteString(buf, (const char *)cur->name);
777 xmlNsListDumpOutput(buf, cur->nsDef);
778 if (cur->properties != NULL)
779 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
781 if ((info != NULL) && (info->empty)) {
782 xmlOutputBufferWriteString(buf, ">");
783 if ((format) && (!info->isinline) && (cur->next != NULL)) {
784 if ((cur->next->type != HTML_TEXT_NODE) &&
785 (cur->next->type != HTML_ENTITY_REF_NODE) &&
786 (cur->parent != NULL) &&
787 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
788 xmlOutputBufferWriteString(buf, "\n");
792 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
793 (cur->children == NULL)) {
794 if ((info != NULL) && (info->saveEndTag != 0) &&
795 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
796 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
797 xmlOutputBufferWriteString(buf, ">");
799 xmlOutputBufferWriteString(buf, "></");
800 xmlOutputBufferWriteString(buf, (const char *)cur->name);
801 xmlOutputBufferWriteString(buf, ">");
803 if ((format) && (cur->next != NULL) &&
804 (info != NULL) && (!info->isinline)) {
805 if ((cur->next->type != HTML_TEXT_NODE) &&
806 (cur->next->type != HTML_ENTITY_REF_NODE) &&
807 (cur->parent != NULL) &&
808 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
809 xmlOutputBufferWriteString(buf, "\n");
813 xmlOutputBufferWriteString(buf, ">");
814 if ((cur->type != XML_ELEMENT_NODE) &&
815 (cur->content != NULL)) {
817 * Uses the OutputBuffer property to automatically convert
818 * invalids to charrefs
821 xmlOutputBufferWriteString(buf, (const char *) cur->content);
823 if (cur->children != NULL) {
824 if ((format) && (info != NULL) && (!info->isinline) &&
825 (cur->children->type != HTML_TEXT_NODE) &&
826 (cur->children->type != HTML_ENTITY_REF_NODE) &&
827 (cur->children != cur->last) &&
828 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
829 xmlOutputBufferWriteString(buf, "\n");
830 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
831 if ((format) && (info != NULL) && (!info->isinline) &&
832 (cur->last->type != HTML_TEXT_NODE) &&
833 (cur->last->type != HTML_ENTITY_REF_NODE) &&
834 (cur->children != cur->last) &&
835 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
836 xmlOutputBufferWriteString(buf, "\n");
838 xmlOutputBufferWriteString(buf, "</");
839 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
840 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
841 xmlOutputBufferWriteString(buf, ":");
843 xmlOutputBufferWriteString(buf, (const char *)cur->name);
844 xmlOutputBufferWriteString(buf, ">");
845 if ((format) && (info != NULL) && (!info->isinline) &&
846 (cur->next != NULL)) {
847 if ((cur->next->type != HTML_TEXT_NODE) &&
848 (cur->next->type != HTML_ENTITY_REF_NODE) &&
849 (cur->parent != NULL) &&
850 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
851 xmlOutputBufferWriteString(buf, "\n");
856 * htmlNodeDumpOutput:
857 * @buf: the HTML buffer output
859 * @cur: the current node
860 * @encoding: the encoding string
862 * Dump an HTML node, recursive behaviour,children are printed too,
863 * and formatting returns/spaces are added.
866 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
867 xmlNodePtr cur, const char *encoding) {
868 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
872 * htmlDocContentDumpFormatOutput:
873 * @buf: the HTML buffer output
875 * @encoding: the encoding string
876 * @format: should formatting spaces been added
878 * Dump an HTML document.
881 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
882 const char *encoding, int format) {
886 * force to output the stuff as HTML, especially for entities
889 cur->type = XML_HTML_DOCUMENT_NODE;
890 if (cur->intSubset != NULL) {
891 htmlDtdDumpOutput(buf, cur, NULL);
893 if (cur->children != NULL) {
894 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
896 xmlOutputBufferWriteString(buf, "\n");
897 cur->type = (xmlElementType) type;
901 * htmlDocContentDumpOutput:
902 * @buf: the HTML buffer output
904 * @encoding: the encoding string
906 * Dump an HTML document. Formating return/spaces are added.
909 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
910 const char *encoding) {
911 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
914 /************************************************************************
916 * Saving functions front-ends *
918 ************************************************************************/
925 * Dump an HTML document to an open FILE.
927 * returns: the number of byte written or -1 in case of failure.
930 htmlDocDump(FILE *f, xmlDocPtr cur) {
931 xmlOutputBufferPtr buf;
932 xmlCharEncodingHandlerPtr handler = NULL;
933 const char *encoding;
938 xmlGenericError(xmlGenericErrorContext,
939 "htmlDocDump : document == NULL\n");
944 encoding = (const char *) htmlGetMetaEncoding(cur);
946 if (encoding != NULL) {
949 enc = xmlParseCharEncoding(encoding);
950 if (enc != cur->charset) {
951 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
958 handler = xmlFindCharEncodingHandler(encoding);
965 * Fallback to HTML or ASCII when the encoding is unspecified
968 handler = xmlFindCharEncodingHandler("HTML");
970 handler = xmlFindCharEncodingHandler("ascii");
972 buf = xmlOutputBufferCreateFile(f, handler);
973 if (buf == NULL) return(-1);
974 htmlDocContentDumpOutput(buf, cur, NULL);
976 ret = xmlOutputBufferClose(buf);
982 * @filename: the filename (or URL)
985 * Dump an HTML document to a file. If @filename is "-" the stdout file is
987 * returns: the number of byte written or -1 in case of failure.
990 htmlSaveFile(const char *filename, xmlDocPtr cur) {
991 xmlOutputBufferPtr buf;
992 xmlCharEncodingHandlerPtr handler = NULL;
993 const char *encoding;
996 encoding = (const char *) htmlGetMetaEncoding(cur);
998 if (encoding != NULL) {
1001 enc = xmlParseCharEncoding(encoding);
1002 if (enc != cur->charset) {
1003 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1010 handler = xmlFindCharEncodingHandler(encoding);
1011 if (handler == NULL)
1017 * Fallback to HTML or ASCII when the encoding is unspecified
1019 if (handler == NULL)
1020 handler = xmlFindCharEncodingHandler("HTML");
1021 if (handler == NULL)
1022 handler = xmlFindCharEncodingHandler("ascii");
1025 * save the content to a temp buffer.
1027 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1028 if (buf == NULL) return(0);
1030 htmlDocContentDumpOutput(buf, cur, NULL);
1032 ret = xmlOutputBufferClose(buf);
1037 * htmlSaveFileFormat:
1038 * @filename: the filename
1039 * @cur: the document
1040 * @format: should formatting spaces been added
1041 * @encoding: the document encoding
1043 * Dump an HTML document to a file using a given encoding.
1045 * returns: the number of byte written or -1 in case of failure.
1048 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1049 const char *encoding, int format) {
1050 xmlOutputBufferPtr buf;
1051 xmlCharEncodingHandlerPtr handler = NULL;
1054 if (encoding != NULL) {
1055 xmlCharEncoding enc;
1057 enc = xmlParseCharEncoding(encoding);
1058 if (enc != cur->charset) {
1059 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1066 handler = xmlFindCharEncodingHandler(encoding);
1067 if (handler == NULL)
1069 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1072 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1076 * Fallback to HTML or ASCII when the encoding is unspecified
1078 if (handler == NULL)
1079 handler = xmlFindCharEncodingHandler("HTML");
1080 if (handler == NULL)
1081 handler = xmlFindCharEncodingHandler("ascii");
1084 * save the content to a temp buffer.
1086 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1087 if (buf == NULL) return(0);
1089 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1091 ret = xmlOutputBufferClose(buf);
1097 * @filename: the filename
1098 * @cur: the document
1099 * @encoding: the document encoding
1101 * Dump an HTML document to a file using a given encoding
1102 * and formatting returns/spaces are added.
1104 * returns: the number of byte written or -1 in case of failure.
1107 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1108 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1113 #endif /* LIBXML_HTML_ENABLED */