2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
44 #include <libxml/xmlmemory.h>
45 #include <libxml/threads.h>
46 #include <libxml/globals.h>
47 #include <libxml/tree.h>
48 #include <libxml/parser.h>
49 #include <libxml/parserInternals.h>
50 #include <libxml/valid.h>
51 #include <libxml/entities.h>
52 #include <libxml/xmlerror.h>
53 #include <libxml/encoding.h>
54 #include <libxml/xmlIO.h>
55 #include <libxml/uri.h>
56 #ifdef LIBXML_CATALOG_ENABLED
57 #include <libxml/catalog.h>
66 #ifdef HAVE_SYS_STAT_H
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
86 #define MAX_DEPTH 1024
88 #define XML_PARSER_BIG_BUFFER_SIZE 300
89 #define XML_PARSER_BUFFER_SIZE 100
91 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
94 * List of XML prefixed PI allowed by W3C specs
97 static const char *xmlW3CPIs[] = {
102 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
103 xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
107 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
109 void *user_data, int depth, const xmlChar *URL,
110 const xmlChar *ID, xmlNodePtr *list);
113 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
117 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
119 /************************************************************************
121 * Parser stacks related functions and macros *
123 ************************************************************************/
125 xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
130 * @ctxt: an XML parser context
131 * @value: the parser input
133 * Pushes a new parser input on top of the input stack
135 * Returns 0 in case of error, the index in the stack otherwise
138 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
140 if (ctxt->inputNr >= ctxt->inputMax) {
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
151 ctxt->inputTab[ctxt->inputNr] = value;
153 return (ctxt->inputNr++);
157 * @ctxt: an XML parser context
159 * Pops the top parser input from the input stack
161 * Returns the input just removed
163 extern xmlParserInputPtr
164 inputPop(xmlParserCtxtPtr ctxt)
166 xmlParserInputPtr ret;
168 if (ctxt->inputNr <= 0)
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
181 * @ctxt: an XML parser context
182 * @value: the element node
184 * Pushes a new element node on top of the node stack
186 * Returns 0 in case of error, the index in the stack otherwise
189 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
214 ctxt->nodeTab[ctxt->nodeNr] = value;
216 return (ctxt->nodeNr++);
220 * @ctxt: an XML parser context
222 * Pops the top element node from the node stack
224 * Returns the node just removed
227 nodePop(xmlParserCtxtPtr ctxt)
231 if (ctxt->nodeNr <= 0)
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
244 * @ctxt: an XML parser context
245 * @value: the element name
247 * Pushes a new element name on top of the name stack
249 * Returns 0 in case of error, the index in the stack otherwise
252 namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
254 if (ctxt->nameNr >= ctxt->nameMax) {
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
265 ctxt->nameTab[ctxt->nameNr] = value;
267 return (ctxt->nameNr++);
271 * @ctxt: an XML parser context
273 * Pops the top element name from the name stack
275 * Returns the name just removed
278 namePop(xmlParserCtxtPtr ctxt)
282 if (ctxt->nameNr <= 0)
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
294 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
295 if (ctxt->spaceNr >= ctxt->spaceMax) {
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
310 static int spacePop(xmlParserCtxtPtr ctxt) {
312 if (ctxt->spaceNr <= 0) return(0);
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
324 * Macros for accessing the content. Those should be used only by the parser,
327 * Dirty macros, i.e. one often need to make assumption on the context to
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
355 * GROW, SHRINK handling of input buffers
358 #define RAW (*ctxt->input->cur)
359 #define CUR (*ctxt->input->cur)
360 #define NXT(val) ctxt->input->cur[(val)]
361 #define CUR_PTR ctxt->input->cur
363 #define SKIP(val) do { \
364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
366 if ((*ctxt->input->cur == 0) && \
367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
371 #define SHRINK if ((ctxt->progressive == 0) && \
372 (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) && \
373 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
376 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
377 xmlParserInputShrink(ctxt->input);
378 if ((*ctxt->input->cur == 0) &&
379 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
383 #define GROW if ((ctxt->progressive == 0) && \
384 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
387 static void xmlGROW (xmlParserCtxtPtr ctxt) {
388 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
389 if ((*ctxt->input->cur == 0) &&
390 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
394 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
396 #define NEXT xmlNextChar(ctxt)
399 ctxt->input->col++; \
400 ctxt->input->cur++; \
402 if (*ctxt->input->cur == 0) \
403 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
406 #define NEXTL(l) do { \
407 if (*(ctxt->input->cur) == '\n') { \
408 ctxt->input->line++; ctxt->input->col = 1; \
409 } else ctxt->input->col++; \
410 ctxt->input->cur += l; \
411 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
414 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
415 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
417 #define COPY_BUF(l,b,i,v) \
418 if (l == 1) b[i++] = (xmlChar) v; \
419 else i += xmlCopyCharMultiByte(&b[i],v)
423 * @ctxt: the XML parser context
425 * skip all blanks character found at that point in the input streams.
426 * It pops up finished entities in the process if allowable at that point.
428 * Returns the number of space chars skipped
432 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
436 * It's Okay to use CUR/NEXT here since all the blanks are on
439 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
442 * if we are in the document content, go really fast
444 cur = ctxt->input->cur;
445 while (IS_BLANK(*cur)) {
447 ctxt->input->line++; ctxt->input->col = 1;
452 ctxt->input->cur = cur;
453 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
454 cur = ctxt->input->cur;
457 ctxt->input->cur = cur;
462 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
467 while ((cur == 0) && (ctxt->inputNr > 1) &&
468 (ctxt->instate != XML_PARSER_COMMENT)) {
473 * Need to handle support of entities branching here
475 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
476 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
481 /************************************************************************
483 * Commodity functions to handle entities *
485 ************************************************************************/
489 * @ctxt: an XML parser context
491 * xmlPopInput: the current input pointed by ctxt->input came to an end
492 * pop it and return the next char.
494 * Returns the current xmlChar in the parser context
497 xmlPopInput(xmlParserCtxtPtr ctxt) {
498 if (ctxt->inputNr == 1) return(0); /* End of main Input */
499 if (xmlParserDebugEntities)
500 xmlGenericError(xmlGenericErrorContext,
501 "Popping input %d\n", ctxt->inputNr);
502 xmlFreeInputStream(inputPop(ctxt));
503 if ((*ctxt->input->cur == 0) &&
504 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
505 return(xmlPopInput(ctxt));
511 * @ctxt: an XML parser context
512 * @input: an XML parser input fragment (entity, XML fragment ...).
514 * xmlPushInput: switch to a new input stream which is stacked on top
515 * of the previous one(s).
518 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
519 if (input == NULL) return;
521 if (xmlParserDebugEntities) {
522 if ((ctxt->input != NULL) && (ctxt->input->filename))
523 xmlGenericError(xmlGenericErrorContext,
524 "%s(%d): ", ctxt->input->filename,
526 xmlGenericError(xmlGenericErrorContext,
527 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
529 inputPush(ctxt, input);
535 * @ctxt: an XML parser context
537 * parse Reference declarations
539 * [66] CharRef ::= '&#' [0-9]+ ';' |
540 * '&#x' [0-9a-fA-F]+ ';'
542 * [ WFC: Legal Character ]
543 * Characters referred to using character references must match the
544 * production for Char.
546 * Returns the value parsed (as an int), 0 in case of error
549 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
550 unsigned int val = 0;
554 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
556 if ((RAW == '&') && (NXT(1) == '#') &&
560 while (RAW != ';') { /* loop blocked by count */
565 if ((RAW >= '0') && (RAW <= '9'))
566 val = val * 16 + (CUR - '0');
567 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
568 val = val * 16 + (CUR - 'a') + 10;
569 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
570 val = val * 16 + (CUR - 'A') + 10;
572 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
574 ctxt->sax->error(ctxt->userData,
575 "xmlParseCharRef: invalid hexadecimal value\n");
576 ctxt->wellFormed = 0;
577 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
585 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
590 } else if ((RAW == '&') && (NXT(1) == '#')) {
593 while (RAW != ';') { /* loop blocked by count */
598 if ((RAW >= '0') && (RAW <= '9'))
599 val = val * 10 + (CUR - '0');
601 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "xmlParseCharRef: invalid decimal value\n");
605 ctxt->wellFormed = 0;
606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
614 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
620 ctxt->errNo = XML_ERR_INVALID_CHARREF;
621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
622 ctxt->sax->error(ctxt->userData,
623 "xmlParseCharRef: invalid value\n");
624 ctxt->wellFormed = 0;
625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
629 * [ WFC: Legal Character ]
630 * Characters referred to using character references must match the
631 * production for Char.
636 ctxt->errNo = XML_ERR_INVALID_CHAR;
637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
638 ctxt->sax->error(ctxt->userData,
639 "xmlParseCharRef: invalid xmlChar value %d\n",
641 ctxt->wellFormed = 0;
642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
648 * xmlParseStringCharRef:
649 * @ctxt: an XML parser context
650 * @str: a pointer to an index in the string
652 * parse Reference declarations, variant parsing from a string rather
653 * than an an input flow.
655 * [66] CharRef ::= '&#' [0-9]+ ';' |
656 * '&#x' [0-9a-fA-F]+ ';'
658 * [ WFC: Legal Character ]
659 * Characters referred to using character references must match the
660 * production for Char.
662 * Returns the value parsed (as an int), 0 in case of error, str will be
663 * updated to the current value of the index
666 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
671 if ((str == NULL) || (*str == NULL)) return(0);
674 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
677 while (cur != ';') { /* Non input consuming loop */
678 if ((cur >= '0') && (cur <= '9'))
679 val = val * 16 + (cur - '0');
680 else if ((cur >= 'a') && (cur <= 'f'))
681 val = val * 16 + (cur - 'a') + 10;
682 else if ((cur >= 'A') && (cur <= 'F'))
683 val = val * 16 + (cur - 'A') + 10;
685 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687 ctxt->sax->error(ctxt->userData,
688 "xmlParseStringCharRef: invalid hexadecimal value\n");
689 ctxt->wellFormed = 0;
690 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
699 } else if ((cur == '&') && (ptr[1] == '#')){
702 while (cur != ';') { /* Non input consuming loops */
703 if ((cur >= '0') && (cur <= '9'))
704 val = val * 10 + (cur - '0');
706 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
708 ctxt->sax->error(ctxt->userData,
709 "xmlParseStringCharRef: invalid decimal value\n");
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
721 ctxt->errNo = XML_ERR_INVALID_CHARREF;
722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723 ctxt->sax->error(ctxt->userData,
724 "xmlParseStringCharRef: invalid value\n");
725 ctxt->wellFormed = 0;
726 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
732 * [ WFC: Legal Character ]
733 * Characters referred to using character references must match the
734 * production for Char.
739 ctxt->errNo = XML_ERR_INVALID_CHAR;
740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
741 ctxt->sax->error(ctxt->userData,
742 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
743 ctxt->wellFormed = 0;
744 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
750 * xmlNewBlanksWrapperInputStream:
751 * @ctxt: an XML parser context
752 * @entity: an Entity pointer
754 * Create a new input stream for wrapping
755 * blanks around a PEReference
757 * Returns the new input stream or NULL
760 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
762 static xmlParserInputPtr
763 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
764 xmlParserInputPtr input;
767 if (entity == NULL) {
768 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
770 ctxt->sax->error(ctxt->userData,
771 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
772 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
775 if (xmlParserDebugEntities)
776 xmlGenericError(xmlGenericErrorContext,
777 "new blanks wrapper for entity: %s\n", entity->name);
778 input = xmlNewInputStream(ctxt);
782 length = xmlStrlen(entity->name) + 5;
783 buffer = xmlMallocAtomic(length);
784 if (buffer == NULL) {
789 buffer [length-3] = ';';
790 buffer [length-2] = ' ';
791 buffer [length-1] = 0;
792 memcpy(buffer + 2, entity->name, length - 5);
793 input->free = deallocblankswrapper;
794 input->base = buffer;
796 input->length = length;
797 input->end = &buffer[length];
802 * xmlParserHandlePEReference:
803 * @ctxt: the parser context
805 * [69] PEReference ::= '%' Name ';'
807 * [ WFC: No Recursion ]
808 * A parsed entity must not contain a recursive
809 * reference to itself, either directly or indirectly.
811 * [ WFC: Entity Declared ]
812 * In a document without any DTD, a document with only an internal DTD
813 * subset which contains no parameter entity references, or a document
814 * with "standalone='yes'", ... ... The declaration of a parameter
815 * entity must precede any reference to it...
817 * [ VC: Entity Declared ]
818 * In a document with an external subset or external parameter entities
819 * with "standalone='no'", ... ... The declaration of a parameter entity
820 * must precede any reference to it...
823 * Parameter-entity references may only appear in the DTD.
824 * NOTE: misleading but this is handled.
826 * A PEReference may have been detected in the current input stream
827 * the handling is done accordingly to
828 * http://www.w3.org/TR/REC-xml#entproc
830 * - Included in literal in entity values
831 * - Included as Parameter Entity reference within DTDs
834 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
836 xmlEntityPtr entity = NULL;
837 xmlParserInputPtr input;
839 if (RAW != '%') return;
840 switch(ctxt->instate) {
841 case XML_PARSER_CDATA_SECTION:
843 case XML_PARSER_COMMENT:
845 case XML_PARSER_START_TAG:
847 case XML_PARSER_END_TAG:
850 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
852 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
853 ctxt->wellFormed = 0;
854 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
856 case XML_PARSER_PROLOG:
857 case XML_PARSER_START:
858 case XML_PARSER_MISC:
859 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
861 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
862 ctxt->wellFormed = 0;
863 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
865 case XML_PARSER_ENTITY_DECL:
866 case XML_PARSER_CONTENT:
867 case XML_PARSER_ATTRIBUTE_VALUE:
869 case XML_PARSER_SYSTEM_LITERAL:
870 case XML_PARSER_PUBLIC_LITERAL:
871 /* we just ignore it there */
873 case XML_PARSER_EPILOG:
874 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
876 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
877 ctxt->wellFormed = 0;
878 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
880 case XML_PARSER_ENTITY_VALUE:
882 * NOTE: in the case of entity values, we don't do the
883 * substitution here since we need the literal
884 * entity value to be able to save the internal
885 * subset of the document.
886 * This will be handled by xmlStringDecodeEntities
891 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
892 * In the internal DTD subset, parameter-entity references
893 * can occur only where markup declarations can occur, not
894 * within markup declarations.
895 * In that case this is handled in xmlParseMarkupDecl
897 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
899 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
902 case XML_PARSER_IGNORE:
907 name = xmlParseName(ctxt);
908 if (xmlParserDebugEntities)
909 xmlGenericError(xmlGenericErrorContext,
910 "PEReference: %s\n", name);
912 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
914 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
915 ctxt->wellFormed = 0;
916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
920 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
921 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
922 if (entity == NULL) {
925 * [ WFC: Entity Declared ]
926 * In a document without any DTD, a document with only an
927 * internal DTD subset which contains no parameter entity
928 * references, or a document with "standalone='yes'", ...
929 * ... The declaration of a parameter entity must precede
930 * any reference to it...
932 if ((ctxt->standalone == 1) ||
933 ((ctxt->hasExternalSubset == 0) &&
934 (ctxt->hasPErefs == 0))) {
935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
936 ctxt->sax->error(ctxt->userData,
937 "PEReference: %%%s; not found\n", name);
938 ctxt->wellFormed = 0;
939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
942 * [ VC: Entity Declared ]
943 * In a document with an external subset or external
944 * parameter entities with "standalone='no'", ...
945 * ... The declaration of a parameter entity must precede
946 * any reference to it...
948 if ((!ctxt->disableSAX) &&
949 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
950 ctxt->vctxt.error(ctxt->vctxt.userData,
951 "PEReference: %%%s; not found\n", name);
952 } else if ((!ctxt->disableSAX) &&
953 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
954 ctxt->sax->warning(ctxt->userData,
955 "PEReference: %%%s; not found\n", name);
958 } else if (ctxt->input->free != deallocblankswrapper) {
959 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
960 xmlPushInput(ctxt, input);
962 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
963 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
968 * handle the extra spaces added before and after
969 * c.f. http://www.w3.org/TR/REC-xml#as-PE
970 * this is done independently.
972 input = xmlNewEntityInputStream(ctxt, entity);
973 xmlPushInput(ctxt, input);
976 * Get the 4 first bytes and decode the charset
977 * if enc != XML_CHAR_ENCODING_NONE
978 * plug some encoding conversion routines.
981 if (entity->length >= 4) {
986 enc = xmlDetectCharEncoding(start, 4);
987 if (enc != XML_CHAR_ENCODING_NONE) {
988 xmlSwitchEncoding(ctxt, enc);
992 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
993 (RAW == '<') && (NXT(1) == '?') &&
994 (NXT(2) == 'x') && (NXT(3) == 'm') &&
995 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
996 xmlParseTextDecl(ctxt);
999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1000 ctxt->sax->error(ctxt->userData,
1001 "xmlParserHandlePEReference: %s is not a parameter entity\n",
1003 ctxt->wellFormed = 0;
1004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1008 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1010 ctxt->sax->error(ctxt->userData,
1011 "xmlParserHandlePEReference: expecting ';'\n");
1012 ctxt->wellFormed = 0;
1013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1020 * Macro used to grow the current buffer.
1022 #define growBuffer(buffer) { \
1023 buffer##_size *= 2; \
1024 buffer = (xmlChar *) \
1025 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1026 if (buffer == NULL) { \
1027 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
1033 * xmlStringDecodeEntities:
1034 * @ctxt: the parser context
1035 * @str: the input string
1036 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1037 * @end: an end marker xmlChar, 0 if none
1038 * @end2: an end marker xmlChar, 0 if none
1039 * @end3: an end marker xmlChar, 0 if none
1041 * Takes a entity string content and process to do the adequate substitutions.
1043 * [67] Reference ::= EntityRef | CharRef
1045 * [69] PEReference ::= '%' Name ';'
1047 * Returns A newly allocated string with the substitution done. The caller
1048 * must deallocate it !
1051 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1052 xmlChar end, xmlChar end2, xmlChar end3) {
1053 xmlChar *buffer = NULL;
1054 int buffer_size = 0;
1056 xmlChar *current = NULL;
1064 if (ctxt->depth > 40) {
1065 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1067 ctxt->sax->error(ctxt->userData,
1068 "Detected entity reference loop\n");
1069 ctxt->wellFormed = 0;
1070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1075 * allocate a translation buffer.
1077 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1078 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
1079 if (buffer == NULL) {
1080 xmlGenericError(xmlGenericErrorContext,
1081 "xmlStringDecodeEntities: malloc failed");
1086 * OK loop until we reach one of the ending char or a size limit.
1087 * we are operating on already parsed values.
1089 c = CUR_SCHAR(str, l);
1090 while ((c != 0) && (c != end) && /* non input consuming loop */
1091 (c != end2) && (c != end3)) {
1094 if ((c == '&') && (str[1] == '#')) {
1095 int val = xmlParseStringCharRef(ctxt, &str);
1097 COPY_BUF(0,buffer,nbchars,val);
1099 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1100 if (xmlParserDebugEntities)
1101 xmlGenericError(xmlGenericErrorContext,
1102 "String decoding Entity Reference: %.30s\n",
1104 ent = xmlParseStringEntityRef(ctxt, &str);
1105 if ((ent != NULL) &&
1106 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1107 if (ent->content != NULL) {
1108 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1111 ctxt->sax->error(ctxt->userData,
1112 "internal error entity has no content\n");
1114 } else if ((ent != NULL) && (ent->content != NULL)) {
1118 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1123 while (*current != 0) { /* non input consuming loop */
1124 buffer[nbchars++] = *current++;
1126 buffer_size - XML_PARSER_BUFFER_SIZE) {
1132 } else if (ent != NULL) {
1133 int i = xmlStrlen(ent->name);
1134 const xmlChar *cur = ent->name;
1136 buffer[nbchars++] = '&';
1137 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1141 buffer[nbchars++] = *cur++;
1142 buffer[nbchars++] = ';';
1144 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1145 if (xmlParserDebugEntities)
1146 xmlGenericError(xmlGenericErrorContext,
1147 "String decoding PE Reference: %.30s\n", str);
1148 ent = xmlParseStringPEReference(ctxt, &str);
1153 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1158 while (*current != 0) { /* non input consuming loop */
1159 buffer[nbchars++] = *current++;
1161 buffer_size - XML_PARSER_BUFFER_SIZE) {
1169 COPY_BUF(l,buffer,nbchars,c);
1171 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1175 c = CUR_SCHAR(str, l);
1177 buffer[nbchars++] = 0;
1182 /************************************************************************
1184 * Commodity functions to handle xmlChars *
1186 ************************************************************************/
1190 * @cur: the input xmlChar *
1191 * @len: the len of @cur
1193 * a strndup for array of xmlChar's
1195 * Returns a new xmlChar * or NULL
1198 xmlStrndup(const xmlChar *cur, int len) {
1201 if ((cur == NULL) || (len < 0)) return(NULL);
1202 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
1204 xmlGenericError(xmlGenericErrorContext,
1205 "malloc of %ld byte failed\n",
1206 (len + 1) * (long)sizeof(xmlChar));
1209 memcpy(ret, cur, len * sizeof(xmlChar));
1216 * @cur: the input xmlChar *
1218 * a strdup for array of xmlChar's. Since they are supposed to be
1219 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1220 * a termination mark of '0'.
1222 * Returns a new xmlChar * or NULL
1225 xmlStrdup(const xmlChar *cur) {
1226 const xmlChar *p = cur;
1228 if (cur == NULL) return(NULL);
1229 while (*p != 0) p++; /* non input consuming */
1230 return(xmlStrndup(cur, p - cur));
1235 * @cur: the input char *
1236 * @len: the len of @cur
1238 * a strndup for char's to xmlChar's
1240 * Returns a new xmlChar * or NULL
1244 xmlCharStrndup(const char *cur, int len) {
1248 if ((cur == NULL) || (len < 0)) return(NULL);
1249 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
1251 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1252 (len + 1) * (long)sizeof(xmlChar));
1255 for (i = 0;i < len;i++)
1256 ret[i] = (xmlChar) cur[i];
1263 * @cur: the input char *
1265 * a strdup for char's to xmlChar's
1267 * Returns a new xmlChar * or NULL
1271 xmlCharStrdup(const char *cur) {
1272 const char *p = cur;
1274 if (cur == NULL) return(NULL);
1275 while (*p != '\0') p++; /* non input consuming */
1276 return(xmlCharStrndup(cur, p - cur));
1281 * @str1: the first xmlChar *
1282 * @str2: the second xmlChar *
1284 * a strcmp for xmlChar's
1286 * Returns the integer result of the comparison
1290 xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1293 if (str1 == str2) return(0);
1294 if (str1 == NULL) return(-1);
1295 if (str2 == NULL) return(1);
1297 tmp = *str1++ - *str2;
1298 if (tmp != 0) return(tmp);
1299 } while (*str2++ != 0);
1305 * @str1: the first xmlChar *
1306 * @str2: the second xmlChar *
1308 * Check if both string are equal of have same content
1309 * Should be a bit more readable and faster than xmlStrEqual()
1311 * Returns 1 if they are equal, 0 if they are different
1315 xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1316 if (str1 == str2) return(1);
1317 if (str1 == NULL) return(0);
1318 if (str2 == NULL) return(0);
1320 if (*str1++ != *str2) return(0);
1327 * @str1: the first xmlChar *
1328 * @str2: the second xmlChar *
1329 * @len: the max comparison length
1331 * a strncmp for xmlChar's
1333 * Returns the integer result of the comparison
1337 xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1340 if (len <= 0) return(0);
1341 if (str1 == str2) return(0);
1342 if (str1 == NULL) return(-1);
1343 if (str2 == NULL) return(1);
1345 tmp = *str1++ - *str2;
1346 if (tmp != 0 || --len == 0) return(tmp);
1347 } while (*str2++ != 0);
1351 static const xmlChar casemap[256] = {
1352 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1353 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1354 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1355 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1356 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1357 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1358 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1359 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1360 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1361 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1362 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1363 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1364 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1365 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1366 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1367 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1368 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1369 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1370 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1371 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1372 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1373 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1374 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1375 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1376 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1377 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1378 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1379 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1380 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1381 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1382 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1383 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1388 * @str1: the first xmlChar *
1389 * @str2: the second xmlChar *
1391 * a strcasecmp for xmlChar's
1393 * Returns the integer result of the comparison
1397 xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1400 if (str1 == str2) return(0);
1401 if (str1 == NULL) return(-1);
1402 if (str2 == NULL) return(1);
1404 tmp = casemap[*str1++] - casemap[*str2];
1405 if (tmp != 0) return(tmp);
1406 } while (*str2++ != 0);
1412 * @str1: the first xmlChar *
1413 * @str2: the second xmlChar *
1414 * @len: the max comparison length
1416 * a strncasecmp for xmlChar's
1418 * Returns the integer result of the comparison
1422 xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1425 if (len <= 0) return(0);
1426 if (str1 == str2) return(0);
1427 if (str1 == NULL) return(-1);
1428 if (str2 == NULL) return(1);
1430 tmp = casemap[*str1++] - casemap[*str2];
1431 if (tmp != 0 || --len == 0) return(tmp);
1432 } while (*str2++ != 0);
1438 * @str: the xmlChar * array
1439 * @val: the xmlChar to search
1441 * a strchr for xmlChar's
1443 * Returns the xmlChar * for the first occurrence or NULL.
1447 xmlStrchr(const xmlChar *str, xmlChar val) {
1448 if (str == NULL) return(NULL);
1449 while (*str != 0) { /* non input consuming */
1450 if (*str == val) return((xmlChar *) str);
1458 * @str: the xmlChar * array (haystack)
1459 * @val: the xmlChar to search (needle)
1461 * a strstr for xmlChar's
1463 * Returns the xmlChar * for the first occurrence or NULL.
1467 xmlStrstr(const xmlChar *str, const xmlChar *val) {
1470 if (str == NULL) return(NULL);
1471 if (val == NULL) return(NULL);
1474 if (n == 0) return(str);
1475 while (*str != 0) { /* non input consuming */
1477 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1486 * @str: the xmlChar * array (haystack)
1487 * @val: the xmlChar to search (needle)
1489 * a case-ignoring strstr for xmlChar's
1491 * Returns the xmlChar * for the first occurrence or NULL.
1495 xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1498 if (str == NULL) return(NULL);
1499 if (val == NULL) return(NULL);
1502 if (n == 0) return(str);
1503 while (*str != 0) { /* non input consuming */
1504 if (casemap[*str] == casemap[*val])
1505 if (!xmlStrncasecmp(str, val, n)) return(str);
1513 * @str: the xmlChar * array (haystack)
1514 * @start: the index of the first char (zero based)
1515 * @len: the length of the substring
1517 * Extract a substring of a given string
1519 * Returns the xmlChar * for the first occurrence or NULL.
1523 xmlStrsub(const xmlChar *str, int start, int len) {
1526 if (str == NULL) return(NULL);
1527 if (start < 0) return(NULL);
1528 if (len < 0) return(NULL);
1530 for (i = 0;i < start;i++) {
1531 if (*str == 0) return(NULL);
1534 if (*str == 0) return(NULL);
1535 return(xmlStrndup(str, len));
1540 * @str: the xmlChar * array
1542 * length of a xmlChar's string
1544 * Returns the number of xmlChar contained in the ARRAY.
1548 xmlStrlen(const xmlChar *str) {
1551 if (str == NULL) return(0);
1552 while (*str != 0) { /* non input consuming */
1561 * @cur: the original xmlChar * array
1562 * @add: the xmlChar * array added
1563 * @len: the length of @add
1565 * a strncat for array of xmlChar's, it will extend @cur with the len
1566 * first bytes of @add.
1568 * Returns a new xmlChar *, the original @cur is reallocated if needed
1569 * and should not be freed
1573 xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1577 if ((add == NULL) || (len == 0))
1580 return(xmlStrndup(add, len));
1582 size = xmlStrlen(cur);
1583 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1585 xmlGenericError(xmlGenericErrorContext,
1586 "xmlStrncat: realloc of %ld byte failed\n",
1587 (size + len + 1) * (long)sizeof(xmlChar));
1590 memcpy(&ret[size], add, len * sizeof(xmlChar));
1591 ret[size + len] = 0;
1597 * @cur: the original xmlChar * array
1598 * @add: the xmlChar * array added
1600 * a strcat for array of xmlChar's. Since they are supposed to be
1601 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1602 * a termination mark of '0'.
1604 * Returns a new xmlChar * containing the concatenated string.
1607 xmlStrcat(xmlChar *cur, const xmlChar *add) {
1608 const xmlChar *p = add;
1610 if (add == NULL) return(cur);
1612 return(xmlStrdup(add));
1614 while (*p != 0) p++; /* non input consuming */
1615 return(xmlStrncat(cur, add, p - add));
1618 /************************************************************************
1620 * Commodity functions, cleanup needed ? *
1622 ************************************************************************/
1626 * @ctxt: an XML parser context
1628 * @len: the size of @str
1630 * Is this a sequence of blank chars that one can ignore ?
1632 * Returns 1 if ignorable 0 otherwise.
1635 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1637 xmlNodePtr lastChild;
1640 * Don't spend time trying to differentiate them, the same callback is
1643 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
1647 * Check for xml:space value.
1649 if (*(ctxt->space) == 1)
1653 * Check that the string is made of blanks
1655 for (i = 0;i < len;i++)
1656 if (!(IS_BLANK(str[i]))) return(0);
1659 * Look if the element is mixed content in the DTD if available
1661 if (ctxt->node == NULL) return(0);
1662 if (ctxt->myDoc != NULL) {
1663 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1664 if (ret == 0) return(1);
1665 if (ret == 1) return(0);
1669 * Otherwise, heuristic :-\
1671 if (RAW != '<') return(0);
1672 if ((ctxt->node->children == NULL) &&
1673 (RAW == '<') && (NXT(1) == '/')) return(0);
1675 lastChild = xmlGetLastChild(ctxt->node);
1676 if (lastChild == NULL) {
1677 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1678 (ctxt->node->content != NULL)) return(0);
1679 } else if (xmlNodeIsText(lastChild))
1681 else if ((ctxt->node->children != NULL) &&
1682 (xmlNodeIsText(ctxt->node->children)))
1687 /************************************************************************
1689 * Extra stuff for namespace support *
1690 * Relates to http://www.w3.org/TR/WD-xml-names *
1692 ************************************************************************/
1696 * @ctxt: an XML parser context
1697 * @name: an XML parser context
1698 * @prefix: a xmlChar **
1700 * parse an UTF8 encoded XML qualified name string
1702 * [NS 5] QName ::= (Prefix ':')? LocalPart
1704 * [NS 6] Prefix ::= NCName
1706 * [NS 7] LocalPart ::= NCName
1708 * Returns the local part, and prefix is updated
1709 * to get the Prefix if any.
1713 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1714 xmlChar buf[XML_MAX_NAMELEN + 5];
1715 xmlChar *buffer = NULL;
1717 int max = XML_MAX_NAMELEN;
1718 xmlChar *ret = NULL;
1719 const xmlChar *cur = name;
1724 if (cur == NULL) return(NULL);
1726 #ifndef XML_XML_NAMESPACE
1727 /* xml: prefix is not really a namespace */
1728 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1729 (cur[2] == 'l') && (cur[3] == ':'))
1730 return(xmlStrdup(name));
1733 /* nasty but well=formed */
1735 return(xmlStrdup(name));
1738 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1744 * Okay someone managed to make a huge name, so he's ready to pay
1745 * for the processing speed.
1749 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
1750 if (buffer == NULL) {
1751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1752 ctxt->sax->error(ctxt->userData,
1753 "xmlSplitQName: out of memory\n");
1756 memcpy(buffer, buf, len);
1757 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1758 if (len + 10 > max) {
1760 buffer = (xmlChar *) xmlRealloc(buffer,
1761 max * sizeof(xmlChar));
1762 if (buffer == NULL) {
1763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1764 ctxt->sax->error(ctxt->userData,
1765 "xmlSplitQName: out of memory\n");
1775 /* nasty but well=formed
1776 if ((c == ':') && (*cur == 0)) {
1777 return(xmlStrdup(name));
1781 ret = xmlStrndup(buf, len);
1785 max = XML_MAX_NAMELEN;
1793 return(xmlStrndup(BAD_CAST "", 0));
1798 * Check that the first character is proper to start
1801 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1802 ((c >= 0x41) && (c <= 0x5A)) ||
1803 (c == '_') || (c == ':'))) {
1805 int first = CUR_SCHAR(cur, l);
1807 if (!IS_LETTER(first) && (first != '_')) {
1808 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1809 (ctxt->sax->error != NULL))
1810 ctxt->sax->error(ctxt->userData,
1811 "Name %s is not XML Namespace compliant\n",
1817 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1823 * Okay someone managed to make a huge name, so he's ready to pay
1824 * for the processing speed.
1828 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
1829 if (buffer == NULL) {
1830 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1831 (ctxt->sax->error != NULL))
1832 ctxt->sax->error(ctxt->userData,
1833 "xmlSplitQName: out of memory\n");
1836 memcpy(buffer, buf, len);
1837 while (c != 0) { /* tested bigname2.xml */
1838 if (len + 10 > max) {
1840 buffer = (xmlChar *) xmlRealloc(buffer,
1841 max * sizeof(xmlChar));
1842 if (buffer == NULL) {
1843 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1844 (ctxt->sax->error != NULL))
1845 ctxt->sax->error(ctxt->userData,
1846 "xmlSplitQName: out of memory\n");
1857 ret = xmlStrndup(buf, len);
1866 /************************************************************************
1868 * The parser itself *
1869 * Relates to http://www.w3.org/TR/REC-xml *
1871 ************************************************************************/
1873 static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
1876 * @ctxt: an XML parser context
1878 * parse an XML name.
1880 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1881 * CombiningChar | Extender
1883 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1885 * [6] Names ::= Name (S Name)*
1887 * Returns the Name parsed or NULL
1891 xmlParseName(xmlParserCtxtPtr ctxt) {
1899 * Accelerator for simple ASCII names
1901 in = ctxt->input->cur;
1902 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1903 ((*in >= 0x41) && (*in <= 0x5A)) ||
1904 (*in == '_') || (*in == ':')) {
1906 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1907 ((*in >= 0x41) && (*in <= 0x5A)) ||
1908 ((*in >= 0x30) && (*in <= 0x39)) ||
1909 (*in == '_') || (*in == '-') ||
1910 (*in == ':') || (*in == '.'))
1912 if ((*in > 0) && (*in < 0x80)) {
1913 count = in - ctxt->input->cur;
1914 ret = xmlStrndup(ctxt->input->cur, count);
1915 ctxt->input->cur = in;
1916 ctxt->nbChars += count;
1917 ctxt->input->col += count;
1919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1920 ctxt->sax->error(ctxt->userData,
1921 "XML parser: out of memory\n");
1922 ctxt->errNo = XML_ERR_NO_MEMORY;
1923 ctxt->instate = XML_PARSER_EOF;
1924 ctxt->disableSAX = 1;
1929 return(xmlParseNameComplex(ctxt));
1933 * xmlParseNameAndCompare:
1934 * @ctxt: an XML parser context
1936 * parse an XML name and compares for match
1937 * (specialized for endtag parsing)
1940 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1941 * and the name for mismatch
1945 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1946 const xmlChar *cmp = other;
1952 in = ctxt->input->cur;
1953 while (*in != 0 && *in == *cmp) {
1957 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1959 ctxt->input->cur = in;
1960 return (xmlChar*) 1;
1962 /* failure (or end of input buffer), check with full function */
1963 ret = xmlParseName (ctxt);
1964 if (ret != 0 && xmlStrEqual (ret, other)) {
1966 return (xmlChar*) 1;
1972 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1973 xmlChar buf[XML_MAX_NAMELEN + 5];
1979 * Handler for more complex cases
1983 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1984 (!IS_LETTER(c) && (c != '_') &&
1989 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1990 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1991 (c == '.') || (c == '-') ||
1992 (c == '_') || (c == ':') ||
1993 (IS_COMBINING(c)) ||
1994 (IS_EXTENDER(c)))) {
1995 if (count++ > 100) {
1999 COPY_BUF(l,buf,len,c);
2002 if (len >= XML_MAX_NAMELEN) {
2004 * Okay someone managed to make a huge name, so he's ready to pay
2005 * for the processing speed.
2010 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2011 if (buffer == NULL) {
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData,
2014 "xmlParseNameComplex: out of memory\n");
2017 memcpy(buffer, buf, len);
2018 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
2019 (c == '.') || (c == '-') ||
2020 (c == '_') || (c == ':') ||
2021 (IS_COMBINING(c)) ||
2023 if (count++ > 100) {
2027 if (len + 10 > max) {
2029 buffer = (xmlChar *) xmlRealloc(buffer,
2030 max * sizeof(xmlChar));
2031 if (buffer == NULL) {
2032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2033 ctxt->sax->error(ctxt->userData,
2034 "xmlParseNameComplex: out of memory\n");
2038 COPY_BUF(l,buffer,len,c);
2046 return(xmlStrndup(buf, len));
2050 * xmlParseStringName:
2051 * @ctxt: an XML parser context
2052 * @str: a pointer to the string pointer (IN/OUT)
2054 * parse an XML name.
2056 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2057 * CombiningChar | Extender
2059 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2061 * [6] Names ::= Name (S Name)*
2063 * Returns the Name parsed or NULL. The @str pointer
2064 * is updated to the current location in the string.
2068 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2069 xmlChar buf[XML_MAX_NAMELEN + 5];
2070 const xmlChar *cur = *str;
2074 c = CUR_SCHAR(cur, l);
2075 if (!IS_LETTER(c) && (c != '_') &&
2080 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2081 (c == '.') || (c == '-') ||
2082 (c == '_') || (c == ':') ||
2083 (IS_COMBINING(c)) ||
2085 COPY_BUF(l,buf,len,c);
2087 c = CUR_SCHAR(cur, l);
2088 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2090 * Okay someone managed to make a huge name, so he's ready to pay
2091 * for the processing speed.
2096 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2097 if (buffer == NULL) {
2098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2099 ctxt->sax->error(ctxt->userData,
2100 "xmlParseStringName: out of memory\n");
2103 memcpy(buffer, buf, len);
2104 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2105 (c == '.') || (c == '-') ||
2106 (c == '_') || (c == ':') ||
2107 (IS_COMBINING(c)) ||
2109 if (len + 10 > max) {
2111 buffer = (xmlChar *) xmlRealloc(buffer,
2112 max * sizeof(xmlChar));
2113 if (buffer == NULL) {
2114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2115 ctxt->sax->error(ctxt->userData,
2116 "xmlParseStringName: out of memory\n");
2120 COPY_BUF(l,buffer,len,c);
2122 c = CUR_SCHAR(cur, l);
2130 return(xmlStrndup(buf, len));
2135 * @ctxt: an XML parser context
2137 * parse an XML Nmtoken.
2139 * [7] Nmtoken ::= (NameChar)+
2141 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2143 * Returns the Nmtoken parsed or NULL
2147 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2148 xmlChar buf[XML_MAX_NAMELEN + 5];
2156 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2157 (c == '.') || (c == '-') ||
2158 (c == '_') || (c == ':') ||
2159 (IS_COMBINING(c)) ||
2161 if (count++ > 100) {
2165 COPY_BUF(l,buf,len,c);
2168 if (len >= XML_MAX_NAMELEN) {
2170 * Okay someone managed to make a huge token, so he's ready to pay
2171 * for the processing speed.
2176 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2177 if (buffer == NULL) {
2178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2179 ctxt->sax->error(ctxt->userData,
2180 "xmlParseNmtoken: out of memory\n");
2183 memcpy(buffer, buf, len);
2184 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2185 (c == '.') || (c == '-') ||
2186 (c == '_') || (c == ':') ||
2187 (IS_COMBINING(c)) ||
2189 if (count++ > 100) {
2193 if (len + 10 > max) {
2195 buffer = (xmlChar *) xmlRealloc(buffer,
2196 max * sizeof(xmlChar));
2197 if (buffer == NULL) {
2198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2199 ctxt->sax->error(ctxt->userData,
2200 "xmlParseNmtoken: out of memory\n");
2204 COPY_BUF(l,buffer,len,c);
2214 return(xmlStrndup(buf, len));
2218 * xmlParseEntityValue:
2219 * @ctxt: an XML parser context
2220 * @orig: if non-NULL store a copy of the original entity value
2222 * parse a value for ENTITY declarations
2224 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2225 * "'" ([^%&'] | PEReference | Reference)* "'"
2227 * Returns the EntityValue parsed with reference substituted or NULL
2231 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2232 xmlChar *buf = NULL;
2234 int size = XML_PARSER_BUFFER_SIZE;
2237 xmlChar *ret = NULL;
2238 const xmlChar *cur = NULL;
2239 xmlParserInputPtr input;
2241 if (RAW == '"') stop = '"';
2242 else if (RAW == '\'') stop = '\'';
2244 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2246 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2247 ctxt->wellFormed = 0;
2248 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2251 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2253 xmlGenericError(xmlGenericErrorContext,
2254 "malloc of %d byte failed\n", size);
2259 * The content of the entity definition is copied in a buffer.
2262 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2263 input = ctxt->input;
2268 * NOTE: 4.4.5 Included in Literal
2269 * When a parameter entity reference appears in a literal entity
2270 * value, ... a single or double quote character in the replacement
2271 * text is always treated as a normal data character and will not
2272 * terminate the literal.
2273 * In practice it means we stop the loop only when back at parsing
2274 * the initial entity and the quote is found
2276 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2277 (ctxt->input != input))) {
2278 if (len + 5 >= size) {
2280 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2282 xmlGenericError(xmlGenericErrorContext,
2283 "realloc of %d byte failed\n", size);
2287 COPY_BUF(l,buf,len,c);
2290 * Pop-up of finished entities.
2292 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2305 * Raise problem w.r.t. '&' and '%' being used in non-entities
2306 * reference constructs. Note Charref will be handled in
2307 * xmlStringDecodeEntities()
2310 while (*cur != 0) { /* non input consuming */
2311 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2316 name = xmlParseStringName(ctxt, &cur);
2317 if ((name == NULL) || (*cur != ';')) {
2318 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2320 ctxt->sax->error(ctxt->userData,
2321 "EntityValue: '%c' forbidden except for entities references\n",
2323 ctxt->wellFormed = 0;
2324 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2326 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2327 (ctxt->inputNr == 1)) {
2328 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2330 ctxt->sax->error(ctxt->userData,
2331 "EntityValue: PEReferences forbidden in internal subset\n",
2333 ctxt->wellFormed = 0;
2334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2343 * Then PEReference entities are substituted.
2346 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2348 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2349 ctxt->wellFormed = 0;
2350 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2355 * NOTE: 4.4.7 Bypassed
2356 * When a general entity reference appears in the EntityValue in
2357 * an entity declaration, it is bypassed and left as is.
2358 * so XML_SUBSTITUTE_REF is not set here.
2360 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2373 * @ctxt: an XML parser context
2375 * parse a value for an attribute
2376 * Note: the parser won't do substitution of entities here, this
2377 * will be handled later in xmlStringGetNodeList
2379 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2380 * "'" ([^<&'] | Reference)* "'"
2382 * 3.3.3 Attribute-Value Normalization:
2383 * Before the value of an attribute is passed to the application or
2384 * checked for validity, the XML processor must normalize it as follows:
2385 * - a character reference is processed by appending the referenced
2386 * character to the attribute value
2387 * - an entity reference is processed by recursively processing the
2388 * replacement text of the entity
2389 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2390 * appending #x20 to the normalized value, except that only a single
2391 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2392 * parsed entity or the literal entity value of an internal parsed entity
2393 * - other characters are processed by appending them to the normalized value
2394 * If the declared value is not CDATA, then the XML processor must further
2395 * process the normalized attribute value by discarding any leading and
2396 * trailing space (#x20) characters, and by replacing sequences of space
2397 * (#x20) characters by a single space (#x20) character.
2398 * All attributes for which no declaration has been read should be treated
2399 * by a non-validating parser as if declared CDATA.
2401 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2405 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2408 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2410 const xmlChar *in = NULL;
2411 xmlChar *ret = NULL;
2414 in = (xmlChar *) CUR_PTR;
2415 if (*in != '"' && *in != '\'') {
2416 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2418 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2419 ctxt->wellFormed = 0;
2420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2427 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2428 *in != '&' && *in != '<'
2433 return xmlParseAttValueComplex(ctxt);
2436 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2442 * xmlParseAttValueComplex:
2443 * @ctxt: an XML parser context
2445 * parse a value for an attribute, this is the fallback function
2446 * of xmlParseAttValue() when the attribute parsing requires handling
2447 * of non-ASCII characters.
2449 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2452 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2454 xmlChar *buf = NULL;
2458 xmlChar *current = NULL;
2463 if (NXT(0) == '"') {
2464 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2467 } else if (NXT(0) == '\'') {
2469 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2472 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2474 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2475 ctxt->wellFormed = 0;
2476 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2481 * allocate a translation buffer.
2483 buf_size = XML_PARSER_BUFFER_SIZE;
2484 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
2486 xmlGenericError(xmlGenericErrorContext,
2487 "xmlParseAttValue: malloc failed");
2492 * OK loop until we reach one of the ending char or a size limit.
2495 while ((NXT(0) != limit) && /* checked */
2499 if (NXT(1) == '#') {
2500 int val = xmlParseCharRef(ctxt);
2502 if (ctxt->replaceEntities) {
2503 if (len > buf_size - 10) {
2509 * The reparsing will be done in xmlStringGetNodeList()
2510 * called by the attribute() function in SAX.c
2512 static xmlChar buffer[6] = "&";
2514 if (len > buf_size - 10) {
2517 current = &buffer[0];
2518 while (*current != 0) { /* non input consuming */
2519 buf[len++] = *current++;
2523 if (len > buf_size - 10) {
2526 len += xmlCopyChar(0, &buf[len], val);
2529 ent = xmlParseEntityRef(ctxt);
2530 if ((ent != NULL) &&
2531 (ctxt->replaceEntities != 0)) {
2534 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2535 rep = xmlStringDecodeEntities(ctxt, ent->content,
2536 XML_SUBSTITUTE_REF, 0, 0, 0);
2539 while (*current != 0) { /* non input consuming */
2540 buf[len++] = *current++;
2541 if (len > buf_size - 10) {
2548 if (len > buf_size - 10) {
2551 if (ent->content != NULL)
2552 buf[len++] = ent->content[0];
2554 } else if (ent != NULL) {
2555 int i = xmlStrlen(ent->name);
2556 const xmlChar *cur = ent->name;
2559 * This may look absurd but is needed to detect
2562 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2563 (ent->content != NULL)) {
2565 rep = xmlStringDecodeEntities(ctxt, ent->content,
2566 XML_SUBSTITUTE_REF, 0, 0, 0);
2572 * Just output the reference
2575 if (len > buf_size - i - 10) {
2579 buf[len++] = *cur++;
2584 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2585 COPY_BUF(l,buf,len,0x20);
2586 if (len > buf_size - 10) {
2590 COPY_BUF(l,buf,len,c);
2591 if (len > buf_size - 10) {
2602 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "Unescaped '<' not allowed in attributes values\n");
2606 ctxt->wellFormed = 0;
2607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2608 } else if (RAW != limit) {
2609 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2611 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2612 ctxt->wellFormed = 0;
2613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2620 * xmlParseSystemLiteral:
2621 * @ctxt: an XML parser context
2623 * parse an XML Literal
2625 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2627 * Returns the SystemLiteral parsed or NULL
2631 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2632 xmlChar *buf = NULL;
2634 int size = XML_PARSER_BUFFER_SIZE;
2637 int state = ctxt->instate;
2644 } else if (RAW == '\'') {
2648 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2650 ctxt->sax->error(ctxt->userData,
2651 "SystemLiteral \" or ' expected\n");
2652 ctxt->wellFormed = 0;
2653 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2657 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2659 xmlGenericError(xmlGenericErrorContext,
2660 "malloc of %d byte failed\n", size);
2663 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2665 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2666 if (len + 5 >= size) {
2668 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2670 xmlGenericError(xmlGenericErrorContext,
2671 "realloc of %d byte failed\n", size);
2672 ctxt->instate = (xmlParserInputState) state;
2681 COPY_BUF(l,buf,len,cur);
2691 ctxt->instate = (xmlParserInputState) state;
2692 if (!IS_CHAR(cur)) {
2693 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2695 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2696 ctxt->wellFormed = 0;
2697 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2705 * xmlParsePubidLiteral:
2706 * @ctxt: an XML parser context
2708 * parse an XML public literal
2710 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2712 * Returns the PubidLiteral parsed or NULL.
2716 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2717 xmlChar *buf = NULL;
2719 int size = XML_PARSER_BUFFER_SIZE;
2723 xmlParserInputState oldstate = ctxt->instate;
2729 } else if (RAW == '\'') {
2733 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2735 ctxt->sax->error(ctxt->userData,
2736 "SystemLiteral \" or ' expected\n");
2737 ctxt->wellFormed = 0;
2738 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2741 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2743 xmlGenericError(xmlGenericErrorContext,
2744 "malloc of %d byte failed\n", size);
2747 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
2749 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2750 if (len + 1 >= size) {
2752 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2754 xmlGenericError(xmlGenericErrorContext,
2755 "realloc of %d byte failed\n", size);
2775 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2777 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2778 ctxt->wellFormed = 0;
2779 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2783 ctxt->instate = oldstate;
2787 void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
2790 * @ctxt: an XML parser context
2791 * @cdata: int indicating whether we are within a CDATA section
2793 * parse a CharData section.
2794 * if we are within a CDATA section ']]>' marks an end of section.
2796 * The right angle bracket (>) may be represented using the string ">",
2797 * and must, for compatibility, be escaped using ">" or a character
2798 * reference when it appears in the string "]]>" in content, when that
2799 * string is not marking the end of a CDATA section.
2801 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2805 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
2808 int line = ctxt->input->line;
2809 int col = ctxt->input->col;
2814 * Accelerated common case where input don't need to be
2815 * modified before passing it to the handler.
2818 in = ctxt->input->cur;
2821 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2822 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2825 ctxt->input->line++;
2827 while (*in == 0xA) {
2828 ctxt->input->line++;
2834 if ((in[1] == ']') && (in[2] == '>')) {
2835 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2837 ctxt->sax->error(ctxt->userData,
2838 "Sequence ']]>' not allowed in content\n");
2839 ctxt->input->cur = in;
2840 ctxt->wellFormed = 0;
2841 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2847 nbchar = in - ctxt->input->cur;
2849 if (IS_BLANK(*ctxt->input->cur)) {
2850 const xmlChar *tmp = ctxt->input->cur;
2851 ctxt->input->cur = in;
2852 if (areBlanks(ctxt, tmp, nbchar)) {
2853 if (ctxt->sax->ignorableWhitespace != NULL)
2854 ctxt->sax->ignorableWhitespace(ctxt->userData,
2857 if (ctxt->sax->characters != NULL)
2858 ctxt->sax->characters(ctxt->userData,
2861 line = ctxt->input->line;
2862 col = ctxt->input->col;
2864 if (ctxt->sax->characters != NULL)
2865 ctxt->sax->characters(ctxt->userData,
2866 ctxt->input->cur, nbchar);
2867 line = ctxt->input->line;
2868 col = ctxt->input->col;
2871 ctxt->input->cur = in;
2875 ctxt->input->cur = in;
2877 ctxt->input->line++;
2878 continue; /* while */
2890 in = ctxt->input->cur;
2891 } while ((*in >= 0x20) && (*in <= 0x7F));
2894 ctxt->input->line = line;
2895 ctxt->input->col = col;
2896 xmlParseCharDataComplex(ctxt, cdata);
2900 * xmlParseCharDataComplex:
2901 * @ctxt: an XML parser context
2902 * @cdata: int indicating whether we are within a CDATA section
2904 * parse a CharData section.this is the fallback function
2905 * of xmlParseCharData() when the parsing requires handling
2906 * of non-ASCII characters.
2909 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
2910 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2918 while ((cur != '<') && /* checked */
2920 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2921 if ((cur == ']') && (NXT(1) == ']') &&
2925 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2927 ctxt->sax->error(ctxt->userData,
2928 "Sequence ']]>' not allowed in content\n");
2929 /* Should this be relaxed ??? I see a "must here */
2930 ctxt->wellFormed = 0;
2931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2934 COPY_BUF(l,buf,nbchar,cur);
2935 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2937 * OK the segment is to be consumed as chars.
2939 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2940 if (areBlanks(ctxt, buf, nbchar)) {
2941 if (ctxt->sax->ignorableWhitespace != NULL)
2942 ctxt->sax->ignorableWhitespace(ctxt->userData,
2945 if (ctxt->sax->characters != NULL)
2946 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2961 * OK the segment is to be consumed as chars.
2963 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2964 if (areBlanks(ctxt, buf, nbchar)) {
2965 if (ctxt->sax->ignorableWhitespace != NULL)
2966 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2968 if (ctxt->sax->characters != NULL)
2969 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2976 * xmlParseExternalID:
2977 * @ctxt: an XML parser context
2978 * @publicID: a xmlChar** receiving PubidLiteral
2979 * @strict: indicate whether we should restrict parsing to only
2980 * production [75], see NOTE below
2982 * Parse an External ID or a Public ID
2984 * NOTE: Productions [75] and [83] interact badly since [75] can generate
2985 * 'PUBLIC' S PubidLiteral S SystemLiteral
2987 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2988 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2990 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2992 * Returns the function returns SystemLiteral and in the second
2993 * case publicID receives PubidLiteral, is strict is off
2994 * it is possible to return NULL and have publicID set.
2998 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2999 xmlChar *URI = NULL;
3004 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3005 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3006 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3008 if (!IS_BLANK(CUR)) {
3009 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3011 ctxt->sax->error(ctxt->userData,
3012 "Space required after 'SYSTEM'\n");
3013 ctxt->wellFormed = 0;
3014 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3017 URI = xmlParseSystemLiteral(ctxt);
3019 ctxt->errNo = XML_ERR_URI_REQUIRED;
3020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3021 ctxt->sax->error(ctxt->userData,
3022 "xmlParseExternalID: SYSTEM, no URI\n");
3023 ctxt->wellFormed = 0;
3024 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3026 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3027 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3028 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3030 if (!IS_BLANK(CUR)) {
3031 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3033 ctxt->sax->error(ctxt->userData,
3034 "Space required after 'PUBLIC'\n");
3035 ctxt->wellFormed = 0;
3036 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3039 *publicID = xmlParsePubidLiteral(ctxt);
3040 if (*publicID == NULL) {
3041 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3043 ctxt->sax->error(ctxt->userData,
3044 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3045 ctxt->wellFormed = 0;
3046 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3050 * We don't handle [83] so "S SystemLiteral" is required.
3052 if (!IS_BLANK(CUR)) {
3053 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3055 ctxt->sax->error(ctxt->userData,
3056 "Space required after the Public Identifier\n");
3057 ctxt->wellFormed = 0;
3058 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3062 * We handle [83] so we return immediately, if
3063 * "S SystemLiteral" is not detected. From a purely parsing
3064 * point of view that's a nice mess.
3070 if (!IS_BLANK(*ptr)) return(NULL);
3072 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3073 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3076 URI = xmlParseSystemLiteral(ctxt);
3078 ctxt->errNo = XML_ERR_URI_REQUIRED;
3079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3080 ctxt->sax->error(ctxt->userData,
3081 "xmlParseExternalID: PUBLIC, no URI\n");
3082 ctxt->wellFormed = 0;
3083 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3091 * @ctxt: an XML parser context
3093 * Skip an XML (SGML) comment <!-- .... -->
3094 * The spec says that "For compatibility, the string "--" (double-hyphen)
3095 * must not occur within comments. "
3097 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3100 xmlParseComment(xmlParserCtxtPtr ctxt) {
3101 xmlChar *buf = NULL;
3103 int size = XML_PARSER_BUFFER_SIZE;
3107 xmlParserInputState state;
3108 xmlParserInputPtr input = ctxt->input;
3112 * Check that there is a comment right here.
3114 if ((RAW != '<') || (NXT(1) != '!') ||
3115 (NXT(2) != '-') || (NXT(3) != '-')) return;
3117 state = ctxt->instate;
3118 ctxt->instate = XML_PARSER_COMMENT;
3121 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3123 xmlGenericError(xmlGenericErrorContext,
3124 "malloc of %d byte failed\n", size);
3125 ctxt->instate = state;
3134 while (IS_CHAR(cur) && /* checked */
3136 (r != '-') || (q != '-'))) {
3137 if ((r == '-') && (q == '-')) {
3138 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3140 ctxt->sax->error(ctxt->userData,
3141 "Comment must not contain '--' (double-hyphen)`\n");
3142 ctxt->wellFormed = 0;
3143 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3145 if (len + 5 >= size) {
3147 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3149 xmlGenericError(xmlGenericErrorContext,
3150 "realloc of %d byte failed\n", size);
3151 ctxt->instate = state;
3155 COPY_BUF(ql,buf,len,q);
3175 if (!IS_CHAR(cur)) {
3176 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3178 ctxt->sax->error(ctxt->userData,
3179 "Comment not terminated \n<!--%.50s\n", buf);
3180 ctxt->wellFormed = 0;
3181 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3184 if (input != ctxt->input) {
3185 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3187 ctxt->sax->error(ctxt->userData,
3188 "Comment doesn't start and stop in the same entity\n");
3189 ctxt->wellFormed = 0;
3190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3193 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3194 (!ctxt->disableSAX))
3195 ctxt->sax->comment(ctxt->userData, buf);
3198 ctxt->instate = state;
3203 * @ctxt: an XML parser context
3205 * parse the name of a PI
3207 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3209 * Returns the PITarget name or NULL
3213 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3216 name = xmlParseName(ctxt);
3217 if ((name != NULL) &&
3218 ((name[0] == 'x') || (name[0] == 'X')) &&
3219 ((name[1] == 'm') || (name[1] == 'M')) &&
3220 ((name[2] == 'l') || (name[2] == 'L'))) {
3222 if ((name[0] == 'x') && (name[1] == 'm') &&
3223 (name[2] == 'l') && (name[3] == 0)) {
3224 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3226 ctxt->sax->error(ctxt->userData,
3227 "XML declaration allowed only at the start of the document\n");
3228 ctxt->wellFormed = 0;
3229 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3231 } else if (name[3] == 0) {
3232 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3234 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3235 ctxt->wellFormed = 0;
3236 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3240 if (xmlW3CPIs[i] == NULL) break;
3241 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3244 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3245 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3246 ctxt->sax->warning(ctxt->userData,
3247 "xmlParsePITarget: invalid name prefix 'xml'\n");
3253 #ifdef LIBXML_CATALOG_ENABLED
3255 * xmlParseCatalogPI:
3256 * @ctxt: an XML parser context
3257 * @catalog: the PI value string
3259 * parse an XML Catalog Processing Instruction.
3261 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3263 * Occurs only if allowed by the user and if happening in the Misc
3264 * part of the document before any doctype informations
3265 * This will add the given catalog to the parsing context in order
3266 * to be used if there is a resolution need further down in the document
3270 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3271 xmlChar *URL = NULL;
3272 const xmlChar *tmp, *base;
3276 while (IS_BLANK(*tmp)) tmp++;
3277 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3280 while (IS_BLANK(*tmp)) tmp++;
3285 while (IS_BLANK(*tmp)) tmp++;
3287 if ((marker != '\'') && (marker != '"'))
3291 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3294 URL = xmlStrndup(base, tmp - base);
3296 while (IS_BLANK(*tmp)) tmp++;
3301 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3307 ctxt->errNo = XML_WAR_CATALOG_PI;
3308 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3309 ctxt->sax->warning(ctxt->userData,
3310 "Catalog PI syntax error: %s\n", catalog);
3318 * @ctxt: an XML parser context
3320 * parse an XML Processing Instruction.
3322 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3324 * The processing is transfered to SAX once parsed.
3328 xmlParsePI(xmlParserCtxtPtr ctxt) {
3329 xmlChar *buf = NULL;
3331 int size = XML_PARSER_BUFFER_SIZE;
3334 xmlParserInputState state;
3337 if ((RAW == '<') && (NXT(1) == '?')) {
3338 xmlParserInputPtr input = ctxt->input;
3339 state = ctxt->instate;
3340 ctxt->instate = XML_PARSER_PI;
3342 * this is a Processing Instruction.
3348 * Parse the target name and check for special support like
3351 target = xmlParsePITarget(ctxt);
3352 if (target != NULL) {
3353 if ((RAW == '?') && (NXT(1) == '>')) {
3354 if (input != ctxt->input) {
3355 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3357 ctxt->sax->error(ctxt->userData,
3358 "PI declaration doesn't start and stop in the same entity\n");
3359 ctxt->wellFormed = 0;
3360 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3367 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3368 (ctxt->sax->processingInstruction != NULL))
3369 ctxt->sax->processingInstruction(ctxt->userData,
3371 ctxt->instate = state;
3375 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3377 xmlGenericError(xmlGenericErrorContext,
3378 "malloc of %d byte failed\n", size);
3379 ctxt->instate = state;
3383 if (!IS_BLANK(cur)) {
3384 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3386 ctxt->sax->error(ctxt->userData,
3387 "xmlParsePI: PI %s space expected\n", target);
3388 ctxt->wellFormed = 0;
3389 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3393 while (IS_CHAR(cur) && /* checked */
3394 ((cur != '?') || (NXT(1) != '>'))) {
3395 if (len + 5 >= size) {
3397 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3399 xmlGenericError(xmlGenericErrorContext,
3400 "realloc of %d byte failed\n", size);
3401 ctxt->instate = state;
3410 COPY_BUF(l,buf,len,cur);
3421 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3423 ctxt->sax->error(ctxt->userData,
3424 "xmlParsePI: PI %s never end ...\n", target);
3425 ctxt->wellFormed = 0;
3426 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3428 if (input != ctxt->input) {
3429 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3431 ctxt->sax->error(ctxt->userData,
3432 "PI declaration doesn't start and stop in the same entity\n");
3433 ctxt->wellFormed = 0;
3434 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3438 #ifdef LIBXML_CATALOG_ENABLED
3439 if (((state == XML_PARSER_MISC) ||
3440 (state == XML_PARSER_START)) &&
3441 (xmlStrEqual(target, XML_CATALOG_PI))) {
3442 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3443 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3444 (allow == XML_CATA_ALLOW_ALL))
3445 xmlParseCatalogPI(ctxt, buf);
3453 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3454 (ctxt->sax->processingInstruction != NULL))
3455 ctxt->sax->processingInstruction(ctxt->userData,
3461 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3463 ctxt->sax->error(ctxt->userData,
3464 "xmlParsePI : no target name\n");
3465 ctxt->wellFormed = 0;
3466 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3468 ctxt->instate = state;
3473 * xmlParseNotationDecl:
3474 * @ctxt: an XML parser context
3476 * parse a notation declaration
3478 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3480 * Hence there is actually 3 choices:
3481 * 'PUBLIC' S PubidLiteral
3482 * 'PUBLIC' S PubidLiteral S SystemLiteral
3483 * and 'SYSTEM' S SystemLiteral
3485 * See the NOTE on xmlParseExternalID().
3489 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3494 if ((RAW == '<') && (NXT(1) == '!') &&
3495 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3496 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3497 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3498 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3499 xmlParserInputPtr input = ctxt->input;
3502 if (!IS_BLANK(CUR)) {
3503 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3505 ctxt->sax->error(ctxt->userData,
3506 "Space required after '<!NOTATION'\n");
3507 ctxt->wellFormed = 0;
3508 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3513 name = xmlParseName(ctxt);
3515 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3517 ctxt->sax->error(ctxt->userData,
3518 "NOTATION: Name expected here\n");
3519 ctxt->wellFormed = 0;
3520 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3523 if (!IS_BLANK(CUR)) {
3524 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527 "Space required after the NOTATION name'\n");
3528 ctxt->wellFormed = 0;
3529 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3537 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3541 if (input != ctxt->input) {
3542 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3544 ctxt->sax->error(ctxt->userData,
3545 "Notation declaration doesn't start and stop in the same entity\n");
3546 ctxt->wellFormed = 0;
3547 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3550 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3551 (ctxt->sax->notationDecl != NULL))
3552 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3554 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3556 ctxt->sax->error(ctxt->userData,
3557 "'>' required to close NOTATION declaration\n");
3558 ctxt->wellFormed = 0;
3559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3562 if (Systemid != NULL) xmlFree(Systemid);
3563 if (Pubid != NULL) xmlFree(Pubid);
3568 * xmlParseEntityDecl:
3569 * @ctxt: an XML parser context
3571 * parse <!ENTITY declarations
3573 * [70] EntityDecl ::= GEDecl | PEDecl
3575 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3577 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3579 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3581 * [74] PEDef ::= EntityValue | ExternalID
3583 * [76] NDataDecl ::= S 'NDATA' S Name
3585 * [ VC: Notation Declared ]
3586 * The Name must match the declared name of a notation.
3590 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3591 xmlChar *name = NULL;
3592 xmlChar *value = NULL;
3593 xmlChar *URI = NULL, *literal = NULL;
3594 xmlChar *ndata = NULL;
3595 int isParameter = 0;
3596 xmlChar *orig = NULL;
3600 if ((RAW == '<') && (NXT(1) == '!') &&
3601 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3602 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3603 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3604 xmlParserInputPtr input = ctxt->input;
3607 skipped = SKIP_BLANKS;
3609 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3611 ctxt->sax->error(ctxt->userData,
3612 "Space required after '<!ENTITY'\n");
3613 ctxt->wellFormed = 0;
3614 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3619 skipped = SKIP_BLANKS;
3621 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "Space required after '%'\n");
3625 ctxt->wellFormed = 0;
3626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3631 name = xmlParseName(ctxt);
3633 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3635 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3636 ctxt->wellFormed = 0;
3637 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3640 skipped = SKIP_BLANKS;
3642 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3644 ctxt->sax->error(ctxt->userData,
3645 "Space required after the entity name\n");
3646 ctxt->wellFormed = 0;
3647 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3650 ctxt->instate = XML_PARSER_ENTITY_DECL;
3652 * handle the various case of definitions...
3655 if ((RAW == '"') || (RAW == '\'')) {
3656 value = xmlParseEntityValue(ctxt, &orig);
3658 if ((ctxt->sax != NULL) &&
3659 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3660 ctxt->sax->entityDecl(ctxt->userData, name,
3661 XML_INTERNAL_PARAMETER_ENTITY,
3665 URI = xmlParseExternalID(ctxt, &literal, 1);
3666 if ((URI == NULL) && (literal == NULL)) {
3667 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3669 ctxt->sax->error(ctxt->userData,
3670 "Entity value required\n");
3671 ctxt->wellFormed = 0;
3672 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3677 uri = xmlParseURI((const char *) URI);
3679 ctxt->errNo = XML_ERR_INVALID_URI;
3680 if ((ctxt->sax != NULL) &&
3681 (!ctxt->disableSAX) &&
3682 (ctxt->sax->error != NULL))
3683 ctxt->sax->error(ctxt->userData,
3684 "Invalid URI: %s\n", URI);
3686 * This really ought to be a well formedness error
3687 * but the XML Core WG decided otherwise c.f. issue
3688 * E26 of the XML erratas.
3691 if (uri->fragment != NULL) {
3692 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3693 if ((ctxt->sax != NULL) &&
3694 (!ctxt->disableSAX) &&
3695 (ctxt->sax->error != NULL))
3696 ctxt->sax->error(ctxt->userData,
3697 "Fragment not allowed: %s\n", URI);
3699 * Okay this is foolish to block those but not
3702 ctxt->wellFormed = 0;
3704 if ((ctxt->sax != NULL) &&
3705 (!ctxt->disableSAX) &&
3706 (ctxt->sax->entityDecl != NULL))
3707 ctxt->sax->entityDecl(ctxt->userData, name,
3708 XML_EXTERNAL_PARAMETER_ENTITY,
3709 literal, URI, NULL);
3716 if ((RAW == '"') || (RAW == '\'')) {
3717 value = xmlParseEntityValue(ctxt, &orig);
3718 if ((ctxt->sax != NULL) &&
3719 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3720 ctxt->sax->entityDecl(ctxt->userData, name,
3721 XML_INTERNAL_GENERAL_ENTITY,
3724 * For expat compatibility in SAX mode.
3726 if ((ctxt->myDoc == NULL) ||
3727 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3728 if (ctxt->myDoc == NULL) {
3729 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3731 if (ctxt->myDoc->intSubset == NULL)
3732 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3733 BAD_CAST "fake", NULL, NULL);
3735 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3739 URI = xmlParseExternalID(ctxt, &literal, 1);
3740 if ((URI == NULL) && (literal == NULL)) {
3741 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3743 ctxt->sax->error(ctxt->userData,
3744 "Entity value required\n");
3745 ctxt->wellFormed = 0;
3746 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3751 uri = xmlParseURI((const char *)URI);
3753 ctxt->errNo = XML_ERR_INVALID_URI;
3754 if ((ctxt->sax != NULL) &&
3755 (!ctxt->disableSAX) &&
3756 (ctxt->sax->error != NULL))
3757 ctxt->sax->error(ctxt->userData,
3758 "Invalid URI: %s\n", URI);
3760 * This really ought to be a well formedness error
3761 * but the XML Core WG decided otherwise c.f. issue
3762 * E26 of the XML erratas.
3765 if (uri->fragment != NULL) {
3766 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3767 if ((ctxt->sax != NULL) &&
3768 (!ctxt->disableSAX) &&
3769 (ctxt->sax->error != NULL))
3770 ctxt->sax->error(ctxt->userData,
3771 "Fragment not allowed: %s\n", URI);
3773 * Okay this is foolish to block those but not
3776 ctxt->wellFormed = 0;
3781 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3782 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3784 ctxt->sax->error(ctxt->userData,
3785 "Space required before 'NDATA'\n");
3786 ctxt->wellFormed = 0;
3787 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3790 if ((RAW == 'N') && (NXT(1) == 'D') &&
3791 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3794 if (!IS_BLANK(CUR)) {
3795 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3797 ctxt->sax->error(ctxt->userData,
3798 "Space required after 'NDATA'\n");
3799 ctxt->wellFormed = 0;
3800 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3803 ndata = xmlParseName(ctxt);
3804 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3805 (ctxt->sax->unparsedEntityDecl != NULL))
3806 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3807 literal, URI, ndata);
3809 if ((ctxt->sax != NULL) &&
3810 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3811 ctxt->sax->entityDecl(ctxt->userData, name,
3812 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3813 literal, URI, NULL);
3815 * For expat compatibility in SAX mode.
3816 * assuming the entity repalcement was asked for
3818 if ((ctxt->replaceEntities != 0) &&
3819 ((ctxt->myDoc == NULL) ||
3820 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3821 if (ctxt->myDoc == NULL) {
3822 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3825 if (ctxt->myDoc->intSubset == NULL)
3826 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3827 BAD_CAST "fake", NULL, NULL);
3828 entityDecl(ctxt, name,
3829 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3830 literal, URI, NULL);
3837 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3839 ctxt->sax->error(ctxt->userData,
3840 "xmlParseEntityDecl: entity %s not terminated\n", name);
3841 ctxt->wellFormed = 0;
3842 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3844 if (input != ctxt->input) {
3845 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3847 ctxt->sax->error(ctxt->userData,
3848 "Entity declaration doesn't start and stop in the same entity\n");
3849 ctxt->wellFormed = 0;
3850 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3856 * Ugly mechanism to save the raw entity value.
3858 xmlEntityPtr cur = NULL;
3861 if ((ctxt->sax != NULL) &&
3862 (ctxt->sax->getParameterEntity != NULL))
3863 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3865 if ((ctxt->sax != NULL) &&
3866 (ctxt->sax->getEntity != NULL))
3867 cur = ctxt->sax->getEntity(ctxt->userData, name);
3868 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3869 cur = getEntity(ctxt, name);
3873 if (cur->orig != NULL)
3880 if (name != NULL) xmlFree(name);
3881 if (value != NULL) xmlFree(value);
3882 if (URI != NULL) xmlFree(URI);
3883 if (literal != NULL) xmlFree(literal);
3884 if (ndata != NULL) xmlFree(ndata);
3889 * xmlParseDefaultDecl:
3890 * @ctxt: an XML parser context
3891 * @value: Receive a possible fixed default value for the attribute
3893 * Parse an attribute default declaration
3895 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3897 * [ VC: Required Attribute ]
3898 * if the default declaration is the keyword #REQUIRED, then the
3899 * attribute must be specified for all elements of the type in the
3900 * attribute-list declaration.
3902 * [ VC: Attribute Default Legal ]
3903 * The declared default value must meet the lexical constraints of
3904 * the declared attribute type c.f. xmlValidateAttributeDecl()
3906 * [ VC: Fixed Attribute Default ]
3907 * if an attribute has a default value declared with the #FIXED
3908 * keyword, instances of that attribute must match the default value.
3910 * [ WFC: No < in Attribute Values ]
3911 * handled in xmlParseAttValue()
3913 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3914 * or XML_ATTRIBUTE_FIXED.
3918 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3923 if ((RAW == '#') && (NXT(1) == 'R') &&
3924 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3925 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3926 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3929 return(XML_ATTRIBUTE_REQUIRED);
3931 if ((RAW == '#') && (NXT(1) == 'I') &&
3932 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3933 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3934 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3936 return(XML_ATTRIBUTE_IMPLIED);
3938 val = XML_ATTRIBUTE_NONE;
3939 if ((RAW == '#') && (NXT(1) == 'F') &&
3940 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3941 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3943 val = XML_ATTRIBUTE_FIXED;
3944 if (!IS_BLANK(CUR)) {
3945 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3947 ctxt->sax->error(ctxt->userData,
3948 "Space required after '#FIXED'\n");
3949 ctxt->wellFormed = 0;
3950 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3954 ret = xmlParseAttValue(ctxt);
3955 ctxt->instate = XML_PARSER_DTD;
3957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3958 ctxt->sax->error(ctxt->userData,
3959 "Attribute default value declaration error\n");
3960 ctxt->wellFormed = 0;
3961 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3968 * xmlParseNotationType:
3969 * @ctxt: an XML parser context
3971 * parse an Notation attribute type.
3973 * Note: the leading 'NOTATION' S part has already being parsed...
3975 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3977 * [ VC: Notation Attributes ]
3978 * Values of this type must match one of the notation names included
3979 * in the declaration; all notation names in the declaration must be declared.
3981 * Returns: the notation attribute tree built while parsing
3985 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3987 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3990 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3992 ctxt->sax->error(ctxt->userData,
3993 "'(' required to start 'NOTATION'\n");
3994 ctxt->wellFormed = 0;
3995 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4002 name = xmlParseName(ctxt);
4004 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4006 ctxt->sax->error(ctxt->userData,
4007 "Name expected in NOTATION declaration\n");
4008 ctxt->wellFormed = 0;
4009 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4012 cur = xmlCreateEnumeration(name);
4014 if (cur == NULL) return(ret);
4015 if (last == NULL) ret = last = cur;
4021 } while (RAW == '|');
4023 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
4024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4025 ctxt->sax->error(ctxt->userData,
4026 "')' required to finish NOTATION declaration\n");
4027 ctxt->wellFormed = 0;
4028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4029 if ((last != NULL) && (last != ret))
4030 xmlFreeEnumeration(last);
4038 * xmlParseEnumerationType:
4039 * @ctxt: an XML parser context
4041 * parse an Enumeration attribute type.
4043 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4045 * [ VC: Enumeration ]
4046 * Values of this type must match one of the Nmtoken tokens in
4049 * Returns: the enumeration attribute tree built while parsing
4053 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4055 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4058 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4060 ctxt->sax->error(ctxt->userData,
4061 "'(' required to start ATTLIST enumeration\n");
4062 ctxt->wellFormed = 0;
4063 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4070 name = xmlParseNmtoken(ctxt);
4072 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4074 ctxt->sax->error(ctxt->userData,
4075 "NmToken expected in ATTLIST enumeration\n");
4076 ctxt->wellFormed = 0;
4077 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4080 cur = xmlCreateEnumeration(name);
4082 if (cur == NULL) return(ret);
4083 if (last == NULL) ret = last = cur;
4089 } while (RAW == '|');
4091 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4093 ctxt->sax->error(ctxt->userData,
4094 "')' required to finish ATTLIST enumeration\n");
4095 ctxt->wellFormed = 0;
4096 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4104 * xmlParseEnumeratedType:
4105 * @ctxt: an XML parser context
4106 * @tree: the enumeration tree built while parsing
4108 * parse an Enumerated attribute type.
4110 * [57] EnumeratedType ::= NotationType | Enumeration
4112 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4115 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4119 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4120 if ((RAW == 'N') && (NXT(1) == 'O') &&
4121 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4122 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4123 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4125 if (!IS_BLANK(CUR)) {
4126 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4128 ctxt->sax->error(ctxt->userData,
4129 "Space required after 'NOTATION'\n");
4130 ctxt->wellFormed = 0;
4131 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4135 *tree = xmlParseNotationType(ctxt);
4136 if (*tree == NULL) return(0);
4137 return(XML_ATTRIBUTE_NOTATION);
4139 *tree = xmlParseEnumerationType(ctxt);
4140 if (*tree == NULL) return(0);
4141 return(XML_ATTRIBUTE_ENUMERATION);
4145 * xmlParseAttributeType:
4146 * @ctxt: an XML parser context
4147 * @tree: the enumeration tree built while parsing
4149 * parse the Attribute list def for an element
4151 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4153 * [55] StringType ::= 'CDATA'
4155 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4156 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4158 * Validity constraints for attribute values syntax are checked in
4159 * xmlValidateAttributeValue()
4162 * Values of type ID must match the Name production. A name must not
4163 * appear more than once in an XML document as a value of this type;
4164 * i.e., ID values must uniquely identify the elements which bear them.
4166 * [ VC: One ID per Element Type ]
4167 * No element type may have more than one ID attribute specified.
4169 * [ VC: ID Attribute Default ]
4170 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4173 * Values of type IDREF must match the Name production, and values
4174 * of type IDREFS must match Names; each IDREF Name must match the value
4175 * of an ID attribute on some element in the XML document; i.e. IDREF
4176 * values must match the value of some ID attribute.
4178 * [ VC: Entity Name ]
4179 * Values of type ENTITY must match the Name production, values
4180 * of type ENTITIES must match Names; each Entity Name must match the
4181 * name of an unparsed entity declared in the DTD.
4183 * [ VC: Name Token ]
4184 * Values of type NMTOKEN must match the Nmtoken production; values
4185 * of type NMTOKENS must match Nmtokens.
4187 * Returns the attribute type
4190 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4192 if ((RAW == 'C') && (NXT(1) == 'D') &&
4193 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4196 return(XML_ATTRIBUTE_CDATA);
4197 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4198 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4199 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4201 return(XML_ATTRIBUTE_IDREFS);
4202 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4203 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4206 return(XML_ATTRIBUTE_IDREF);
4207 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4209 return(XML_ATTRIBUTE_ID);
4210 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4211 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4212 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4214 return(XML_ATTRIBUTE_ENTITY);
4215 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4216 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4217 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4218 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4220 return(XML_ATTRIBUTE_ENTITIES);
4221 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4222 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4223 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4224 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4226 return(XML_ATTRIBUTE_NMTOKENS);
4227 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4228 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4229 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4232 return(XML_ATTRIBUTE_NMTOKEN);
4234 return(xmlParseEnumeratedType(ctxt, tree));
4238 * xmlParseAttributeListDecl:
4239 * @ctxt: an XML parser context
4241 * : parse the Attribute list def for an element
4243 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4245 * [53] AttDef ::= S Name S AttType S DefaultDecl
4249 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4252 xmlEnumerationPtr tree;
4254 if ((RAW == '<') && (NXT(1) == '!') &&
4255 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4256 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4257 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4259 xmlParserInputPtr input = ctxt->input;
4262 if (!IS_BLANK(CUR)) {
4263 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4265 ctxt->sax->error(ctxt->userData,
4266 "Space required after '<!ATTLIST'\n");
4267 ctxt->wellFormed = 0;
4268 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4271 elemName = xmlParseName(ctxt);
4272 if (elemName == NULL) {
4273 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4275 ctxt->sax->error(ctxt->userData,
4276 "ATTLIST: no name for Element\n");
4277 ctxt->wellFormed = 0;
4278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4283 while (RAW != '>') {
4284 const xmlChar *check = CUR_PTR;
4287 xmlChar *defaultValue = NULL;
4291 attrName = xmlParseName(ctxt);
4292 if (attrName == NULL) {
4293 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4295 ctxt->sax->error(ctxt->userData,
4296 "ATTLIST: no name for Attribute\n");
4297 ctxt->wellFormed = 0;
4298 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4302 if (!IS_BLANK(CUR)) {
4303 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4305 ctxt->sax->error(ctxt->userData,
4306 "Space required after the attribute name\n");
4307 ctxt->wellFormed = 0;
4308 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4309 if (attrName != NULL)
4311 if (defaultValue != NULL)
4312 xmlFree(defaultValue);
4317 type = xmlParseAttributeType(ctxt, &tree);
4319 if (attrName != NULL)
4321 if (defaultValue != NULL)
4322 xmlFree(defaultValue);
4327 if (!IS_BLANK(CUR)) {
4328 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4330 ctxt->sax->error(ctxt->userData,
4331 "Space required after the attribute type\n");
4332 ctxt->wellFormed = 0;
4333 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4334 if (attrName != NULL)
4336 if (defaultValue != NULL)
4337 xmlFree(defaultValue);
4339 xmlFreeEnumeration(tree);
4344 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4346 if (attrName != NULL)
4348 if (defaultValue != NULL)
4349 xmlFree(defaultValue);
4351 xmlFreeEnumeration(tree);
4357 if (!IS_BLANK(CUR)) {
4358 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4360 ctxt->sax->error(ctxt->userData,
4361 "Space required after the attribute default value\n");
4362 ctxt->wellFormed = 0;
4363 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4364 if (attrName != NULL)
4366 if (defaultValue != NULL)
4367 xmlFree(defaultValue);
4369 xmlFreeEnumeration(tree);
4374 if (check == CUR_PTR) {
4375 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4377 ctxt->sax->error(ctxt->userData,
4378 "xmlParseAttributeListDecl: detected internal error\n");
4379 if (attrName != NULL)
4381 if (defaultValue != NULL)
4382 xmlFree(defaultValue);
4384 xmlFreeEnumeration(tree);
4387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4388 (ctxt->sax->attributeDecl != NULL))
4389 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4390 type, def, defaultValue, tree);
4391 if (attrName != NULL)
4393 if (defaultValue != NULL)
4394 xmlFree(defaultValue);
4398 if (input != ctxt->input) {
4399 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4401 ctxt->sax->error(ctxt->userData,
4402 "Attribute list declaration doesn't start and stop in the same entity\n");
4403 ctxt->wellFormed = 0;
4404 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4414 * xmlParseElementMixedContentDecl:
4415 * @ctxt: an XML parser context
4416 * @inputchk: the input used for the current entity, needed for boundary checks
4418 * parse the declaration for a Mixed Element content
4419 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4421 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4422 * '(' S? '#PCDATA' S? ')'
4424 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4426 * [ VC: No Duplicate Types ]
4427 * The same name must not appear more than once in a single
4428 * mixed-content declaration.
4430 * returns: the list of the xmlElementContentPtr describing the element choices
4432 xmlElementContentPtr
4433 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
4434 xmlElementContentPtr ret = NULL, cur = NULL, n;
4435 xmlChar *elem = NULL;
4438 if ((RAW == '#') && (NXT(1) == 'P') &&
4439 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4440 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4446 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4447 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4448 if (ctxt->vctxt.error != NULL)
4449 ctxt->vctxt.error(ctxt->vctxt.userData,
4450 "Element content declaration doesn't start and stop in the same entity\n");
4454 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4456 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4461 if ((RAW == '(') || (RAW == '|')) {
4462 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4463 if (ret == NULL) return(NULL);
4465 while (RAW == '|') {
4468 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4469 if (ret == NULL) return(NULL);
4475 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4476 if (n == NULL) return(NULL);
4477 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4487 elem = xmlParseName(ctxt);
4489 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4491 ctxt->sax->error(ctxt->userData,
4492 "xmlParseElementMixedContentDecl : Name expected\n");
4493 ctxt->wellFormed = 0;
4494 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4495 xmlFreeElementContent(cur);
4501 if ((RAW == ')') && (NXT(1) == '*')) {
4503 cur->c2 = xmlNewElementContent(elem,
4504 XML_ELEMENT_CONTENT_ELEMENT);
4505 if (cur->c2 != NULL)
4506 cur->c2->parent = cur;
4509 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4510 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4511 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4512 if (ctxt->vctxt.error != NULL)
4513 ctxt->vctxt.error(ctxt->vctxt.userData,
4514 "Element content declaration doesn't start and stop in the same entity\n");
4519 if (elem != NULL) xmlFree(elem);
4520 xmlFreeElementContent(ret);
4521 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4523 ctxt->sax->error(ctxt->userData,
4524 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4525 ctxt->wellFormed = 0;
4526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4531 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4533 ctxt->sax->error(ctxt->userData,
4534 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4535 ctxt->wellFormed = 0;
4536 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4542 * xmlParseElementChildrenContentDecl:
4543 * @ctxt: an XML parser context
4544 * @inputchk: the input used for the current entity, needed for boundary checks
4546 * parse the declaration for a Mixed Element content
4547 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4550 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4552 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4554 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4556 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4558 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4559 * TODO Parameter-entity replacement text must be properly nested
4560 * with parenthesized groups. That is to say, if either of the
4561 * opening or closing parentheses in a choice, seq, or Mixed
4562 * construct is contained in the replacement text for a parameter
4563 * entity, both must be contained in the same replacement text. For
4564 * interoperability, if a parameter-entity reference appears in a
4565 * choice, seq, or Mixed construct, its replacement text should not
4566 * be empty, and neither the first nor last non-blank character of
4567 * the replacement text should be a connector (| or ,).
4569 * Returns the tree of xmlElementContentPtr describing the element
4572 xmlElementContentPtr
4573 xmlParseElementChildrenContentDecl
4574 (xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
4575 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4582 xmlParserInputPtr input = ctxt->input;
4584 /* Recurse on first child */
4587 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
4591 elem = xmlParseName(ctxt);
4593 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4595 ctxt->sax->error(ctxt->userData,
4596 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4597 ctxt->wellFormed = 0;
4598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4601 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4604 ctxt->sax->error(ctxt->userData,
4605 "xmlParseElementChildrenContentDecl : out of memory\n");
4606 ctxt->errNo = XML_ERR_NO_MEMORY;
4607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4613 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4615 } else if (RAW == '*') {
4616 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4618 } else if (RAW == '+') {
4619 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4622 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4629 while (RAW != ')') {
4631 * Each loop we parse one separator and one element.
4634 if (type == 0) type = CUR;
4637 * Detect "Name | Name , Name" error
4639 else if (type != CUR) {
4640 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4642 ctxt->sax->error(ctxt->userData,
4643 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4645 ctxt->wellFormed = 0;
4646 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4647 if ((last != NULL) && (last != ret))
4648 xmlFreeElementContent(last);
4650 xmlFreeElementContent(ret);
4655 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4657 if ((last != NULL) && (last != ret))
4658 xmlFreeElementContent(last);
4659 xmlFreeElementContent(ret);
4677 } else if (RAW == '|') {
4678 if (type == 0) type = CUR;
4681 * Detect "Name , Name | Name" error
4683 else if (type != CUR) {
4684 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4686 ctxt->sax->error(ctxt->userData,
4687 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4689 ctxt->wellFormed = 0;
4690 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4691 if ((last != NULL) && (last != ret))
4692 xmlFreeElementContent(last);
4694 xmlFreeElementContent(ret);
4699 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4701 if ((last != NULL) && (last != ret))
4702 xmlFreeElementContent(last);
4704 xmlFreeElementContent(ret);
4723 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4725 ctxt->sax->error(ctxt->userData,
4726 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4727 ctxt->wellFormed = 0;
4728 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4730 xmlFreeElementContent(ret);
4737 xmlParserInputPtr input = ctxt->input;
4738 /* Recurse on second child */
4741 last = xmlParseElementChildrenContentDecl(ctxt, input);
4744 elem = xmlParseName(ctxt);
4746 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4748 ctxt->sax->error(ctxt->userData,
4749 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4750 ctxt->wellFormed = 0;
4751 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4753 xmlFreeElementContent(ret);
4756 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4759 last->ocur = XML_ELEMENT_CONTENT_OPT;
4761 } else if (RAW == '*') {
4762 last->ocur = XML_ELEMENT_CONTENT_MULT;
4764 } else if (RAW == '+') {
4765 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4768 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4774 if ((cur != NULL) && (last != NULL)) {
4779 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4780 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4781 if (ctxt->vctxt.error != NULL)
4782 ctxt->vctxt.error(ctxt->vctxt.userData,
4783 "Element content declaration doesn't start and stop in the same entity\n");
4789 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4791 } else if (RAW == '*') {
4793 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4796 * Some normalization:
4797 * (a | b* | c?)* == (a | b | c)*
4799 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4800 if ((cur->c1 != NULL) &&
4801 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4802 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4803 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4804 if ((cur->c2 != NULL) &&
4805 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4806 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4807 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4812 } else if (RAW == '+') {
4816 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4818 * Some normalization:
4819 * (a | b*)+ == (a | b)*
4820 * (a | b?)+ == (a | b)*
4822 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4823 if ((cur->c1 != NULL) &&
4824 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4825 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4826 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4829 if ((cur->c2 != NULL) &&
4830 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4831 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4832 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4838 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4846 * xmlParseElementContentDecl:
4847 * @ctxt: an XML parser context
4848 * @name: the name of the element being defined.
4849 * @result: the Element Content pointer will be stored here if any
4851 * parse the declaration for an Element content either Mixed or Children,
4852 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4854 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4856 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4860 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4861 xmlElementContentPtr *result) {
4863 xmlElementContentPtr tree = NULL;
4864 xmlParserInputPtr input = ctxt->input;
4870 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4872 ctxt->sax->error(ctxt->userData,
4873 "xmlParseElementContentDecl : %s '(' expected\n", name);
4874 ctxt->wellFormed = 0;
4875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4881 if ((RAW == '#') && (NXT(1) == 'P') &&
4882 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4883 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4885 tree = xmlParseElementMixedContentDecl(ctxt, input);
4886 res = XML_ELEMENT_TYPE_MIXED;
4888 tree = xmlParseElementChildrenContentDecl(ctxt, input);
4889 res = XML_ELEMENT_TYPE_ELEMENT;
4897 * xmlParseElementDecl:
4898 * @ctxt: an XML parser context
4900 * parse an Element declaration.
4902 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4904 * [ VC: Unique Element Type Declaration ]
4905 * No element type may be declared more than once
4907 * Returns the type of the element, or -1 in case of error
4910 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4913 xmlElementContentPtr content = NULL;
4916 if ((RAW == '<') && (NXT(1) == '!') &&
4917 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4918 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4919 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4921 xmlParserInputPtr input = ctxt->input;
4924 if (!IS_BLANK(CUR)) {
4925 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4927 ctxt->sax->error(ctxt->userData,
4928 "Space required after 'ELEMENT'\n");
4929 ctxt->wellFormed = 0;
4930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4933 name = xmlParseName(ctxt);
4935 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4937 ctxt->sax->error(ctxt->userData,
4938 "xmlParseElementDecl: no name for Element\n");
4939 ctxt->wellFormed = 0;
4940 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4943 while ((RAW == 0) && (ctxt->inputNr > 1))
4945 if (!IS_BLANK(CUR)) {
4946 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4948 ctxt->sax->error(ctxt->userData,
4949 "Space required after the element name\n");
4950 ctxt->wellFormed = 0;
4951 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4954 if ((RAW == 'E') && (NXT(1) == 'M') &&
4955 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4959 * Element must always be empty.
4961 ret = XML_ELEMENT_TYPE_EMPTY;
4962 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4966 * Element is a generic container.
4968 ret = XML_ELEMENT_TYPE_ANY;
4969 } else if (RAW == '(') {
4970 ret = xmlParseElementContentDecl(ctxt, name, &content);
4973 * [ WFC: PEs in Internal Subset ] error handling.
4975 if ((RAW == '%') && (ctxt->external == 0) &&
4976 (ctxt->inputNr == 1)) {
4977 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4979 ctxt->sax->error(ctxt->userData,
4980 "PEReference: forbidden within markup decl in internal subset\n");
4982 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4984 ctxt->sax->error(ctxt->userData,
4985 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4987 ctxt->wellFormed = 0;
4988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4989 if (name != NULL) xmlFree(name);
4995 * Pop-up of finished entities.
4997 while ((RAW == 0) && (ctxt->inputNr > 1))
5002 ctxt->errNo = XML_ERR_GT_REQUIRED;
5003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5004 ctxt->sax->error(ctxt->userData,
5005 "xmlParseElementDecl: expected '>' at the end\n");
5006 ctxt->wellFormed = 0;
5007 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5009 if (input != ctxt->input) {
5010 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5012 ctxt->sax->error(ctxt->userData,
5013 "Element declaration doesn't start and stop in the same entity\n");
5014 ctxt->wellFormed = 0;
5015 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5019 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5020 (ctxt->sax->elementDecl != NULL))
5021 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5024 if (content != NULL) {
5025 xmlFreeElementContent(content);
5035 * xmlParseConditionalSections
5036 * @ctxt: an XML parser context
5038 * [61] conditionalSect ::= includeSect | ignoreSect
5039 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5040 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5041 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5042 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5046 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5049 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5050 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5055 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5057 ctxt->sax->error(ctxt->userData,
5058 "XML conditional section '[' expected\n");
5059 ctxt->wellFormed = 0;
5060 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5064 if (xmlParserDebugEntities) {
5065 if ((ctxt->input != NULL) && (ctxt->input->filename))
5066 xmlGenericError(xmlGenericErrorContext,
5067 "%s(%d): ", ctxt->input->filename,
5069 xmlGenericError(xmlGenericErrorContext,
5070 "Entering INCLUDE Conditional Section\n");
5073 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5075 const xmlChar *check = CUR_PTR;
5076 unsigned int cons = ctxt->input->consumed;
5078 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5079 xmlParseConditionalSections(ctxt);
5080 } else if (IS_BLANK(CUR)) {
5082 } else if (RAW == '%') {
5083 xmlParsePEReference(ctxt);
5085 xmlParseMarkupDecl(ctxt);
5088 * Pop-up of finished entities.
5090 while ((RAW == 0) && (ctxt->inputNr > 1))
5093 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5094 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5096 ctxt->sax->error(ctxt->userData,
5097 "Content error in the external subset\n");
5098 ctxt->wellFormed = 0;
5099 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5103 if (xmlParserDebugEntities) {
5104 if ((ctxt->input != NULL) && (ctxt->input->filename))
5105 xmlGenericError(xmlGenericErrorContext,
5106 "%s(%d): ", ctxt->input->filename,
5108 xmlGenericError(xmlGenericErrorContext,
5109 "Leaving INCLUDE Conditional Section\n");
5112 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5113 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5115 xmlParserInputState instate;
5121 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5123 ctxt->sax->error(ctxt->userData,
5124 "XML conditional section '[' expected\n");
5125 ctxt->wellFormed = 0;
5126 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5130 if (xmlParserDebugEntities) {
5131 if ((ctxt->input != NULL) && (ctxt->input->filename))
5132 xmlGenericError(xmlGenericErrorContext,
5133 "%s(%d): ", ctxt->input->filename,
5135 xmlGenericError(xmlGenericErrorContext,
5136 "Entering IGNORE Conditional Section\n");
5140 * Parse up to the end of the conditional section
5141 * But disable SAX event generating DTD building in the meantime
5143 state = ctxt->disableSAX;
5144 instate = ctxt->instate;
5145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5146 ctxt->instate = XML_PARSER_IGNORE;
5148 while ((depth >= 0) && (RAW != 0)) {
5149 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5154 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5155 if (--depth >= 0) SKIP(3);
5162 ctxt->disableSAX = state;
5163 ctxt->instate = instate;
5165 if (xmlParserDebugEntities) {
5166 if ((ctxt->input != NULL) && (ctxt->input->filename))
5167 xmlGenericError(xmlGenericErrorContext,
5168 "%s(%d): ", ctxt->input->filename,
5170 xmlGenericError(xmlGenericErrorContext,
5171 "Leaving IGNORE Conditional Section\n");
5175 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5177 ctxt->sax->error(ctxt->userData,
5178 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5179 ctxt->wellFormed = 0;
5180 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5187 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5189 ctxt->sax->error(ctxt->userData,
5190 "XML conditional section not closed\n");
5191 ctxt->wellFormed = 0;
5192 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5199 * xmlParseMarkupDecl:
5200 * @ctxt: an XML parser context
5202 * parse Markup declarations
5204 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5205 * NotationDecl | PI | Comment
5207 * [ VC: Proper Declaration/PE Nesting ]
5208 * Parameter-entity replacement text must be properly nested with
5209 * markup declarations. That is to say, if either the first character
5210 * or the last character of a markup declaration (markupdecl above) is
5211 * contained in the replacement text for a parameter-entity reference,
5212 * both must be contained in the same replacement text.
5214 * [ WFC: PEs in Internal Subset ]
5215 * In the internal DTD subset, parameter-entity references can occur
5216 * only where markup declarations can occur, not within markup declarations.
5217 * (This does not apply to references that occur in external parameter
5218 * entities or to the external subset.)
5221 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5223 xmlParseElementDecl(ctxt);
5224 xmlParseAttributeListDecl(ctxt);
5225 xmlParseEntityDecl(ctxt);
5226 xmlParseNotationDecl(ctxt);
5228 xmlParseComment(ctxt);
5230 * This is only for internal subset. On external entities,
5231 * the replacement is done before parsing stage
5233 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5234 xmlParsePEReference(ctxt);
5237 * Conditional sections are allowed from entities included
5238 * by PE References in the internal subset.
5240 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5241 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5242 xmlParseConditionalSections(ctxt);
5246 ctxt->instate = XML_PARSER_DTD;
5251 * @ctxt: an XML parser context
5253 * parse an XML declaration header for external entities
5255 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5257 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5261 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5265 * We know that '<?xml' is here.
5267 if ((RAW == '<') && (NXT(1) == '?') &&
5268 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5269 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5272 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5274 ctxt->sax->error(ctxt->userData,
5275 "Text declaration '<?xml' required\n");
5276 ctxt->wellFormed = 0;
5277 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5282 if (!IS_BLANK(CUR)) {
5283 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5285 ctxt->sax->error(ctxt->userData,
5286 "Space needed after '<?xml'\n");
5287 ctxt->wellFormed = 0;
5288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5293 * We may have the VersionInfo here.
5295 version = xmlParseVersionInfo(ctxt);
5296 if (version == NULL)
5297 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5299 if (!IS_BLANK(CUR)) {
5300 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5302 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5303 ctxt->wellFormed = 0;
5304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5307 ctxt->input->version = version;
5310 * We must have the encoding declaration
5312 xmlParseEncodingDecl(ctxt);
5313 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5315 * The XML REC instructs us to stop parsing right here
5321 if ((RAW == '?') && (NXT(1) == '>')) {
5323 } else if (RAW == '>') {
5324 /* Deprecated old WD ... */
5325 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5327 ctxt->sax->error(ctxt->userData,
5328 "XML declaration must end-up with '?>'\n");
5329 ctxt->wellFormed = 0;
5330 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5333 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5335 ctxt->sax->error(ctxt->userData,
5336 "parsing XML declaration: '?>' expected\n");
5337 ctxt->wellFormed = 0;
5338 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5339 MOVETO_ENDTAG(CUR_PTR);
5345 * xmlParseExternalSubset:
5346 * @ctxt: an XML parser context
5347 * @ExternalID: the external identifier
5348 * @SystemID: the system identifier (or URL)
5350 * parse Markup declarations from an external subset
5352 * [30] extSubset ::= textDecl? extSubsetDecl
5354 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5357 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5358 const xmlChar *SystemID) {
5360 if ((RAW == '<') && (NXT(1) == '?') &&
5361 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5363 xmlParseTextDecl(ctxt);
5364 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5366 * The XML REC instructs us to stop parsing right here
5368 ctxt->instate = XML_PARSER_EOF;
5372 if (ctxt->myDoc == NULL) {
5373 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5375 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5376 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5378 ctxt->instate = XML_PARSER_DTD;
5380 while (((RAW == '<') && (NXT(1) == '?')) ||
5381 ((RAW == '<') && (NXT(1) == '!')) ||
5382 (RAW == '%') || IS_BLANK(CUR)) {
5383 const xmlChar *check = CUR_PTR;
5384 unsigned int cons = ctxt->input->consumed;
5387 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5388 xmlParseConditionalSections(ctxt);
5389 } else if (IS_BLANK(CUR)) {
5391 } else if (RAW == '%') {
5392 xmlParsePEReference(ctxt);
5394 xmlParseMarkupDecl(ctxt);
5397 * Pop-up of finished entities.
5399 while ((RAW == 0) && (ctxt->inputNr > 1))
5402 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5403 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5405 ctxt->sax->error(ctxt->userData,
5406 "Content error in the external subset\n");
5407 ctxt->wellFormed = 0;
5408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5414 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5416 ctxt->sax->error(ctxt->userData,
5417 "Extra content at the end of the document\n");
5418 ctxt->wellFormed = 0;
5419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5425 * xmlParseReference:
5426 * @ctxt: an XML parser context
5428 * parse and handle entity references in content, depending on the SAX
5429 * interface, this may end-up in a call to character() if this is a
5430 * CharRef, a predefined entity, if there is no reference() callback.
5431 * or if the parser was asked to switch to that mode.
5433 * [67] Reference ::= EntityRef | CharRef
5436 xmlParseReference(xmlParserCtxtPtr ctxt) {
5439 if (RAW != '&') return;
5441 if (NXT(1) == '#') {
5445 int value = xmlParseCharRef(ctxt);
5447 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5449 * So we are using non-UTF-8 buffers
5450 * Check that the char fit on 8bits, if not
5451 * generate a CharRef.
5453 if (value <= 0xFF) {
5456 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5457 (!ctxt->disableSAX))
5458 ctxt->sax->characters(ctxt->userData, out, 1);
5460 if ((hex == 'x') || (hex == 'X'))
5461 snprintf((char *)out, sizeof(out), "#x%X", value);
5463 snprintf((char *)out, sizeof(out), "#%d", value);
5464 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5465 (!ctxt->disableSAX))
5466 ctxt->sax->reference(ctxt->userData, out);
5470 * Just encode the value in UTF-8
5472 COPY_BUF(0 ,out, i, value);
5474 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5475 (!ctxt->disableSAX))
5476 ctxt->sax->characters(ctxt->userData, out, i);
5479 ent = xmlParseEntityRef(ctxt);
5480 if (ent == NULL) return;
5481 if (!ctxt->wellFormed)
5483 if ((ent->name != NULL) &&
5484 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5485 xmlNodePtr list = NULL;
5490 * The first reference to the entity trigger a parsing phase
5491 * where the ent->children is filled with the result from
5494 if (ent->children == NULL) {
5496 value = ent->content;
5499 * Check that this entity is well formed
5501 if ((value != NULL) &&
5502 (value[1] == 0) && (value[0] == '<') &&
5503 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5505 * DONE: get definite answer on this !!!
5506 * Lots of entity decls are used to declare a single
5509 * Which seems to be valid since
5510 * 2.4: The ampersand character (&) and the left angle
5511 * bracket (<) may appear in their literal form only
5512 * when used ... They are also legal within the literal
5513 * entity value of an internal entity declaration;i
5514 * see "4.3.2 Well-Formed Parsed Entities".
5515 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5516 * Looking at the OASIS test suite and James Clark
5517 * tests, this is broken. However the XML REC uses
5518 * it. Is the XML REC not well-formed ????
5519 * This is a hack to avoid this problem
5521 * ANSWER: since lt gt amp .. are already defined,
5522 * this is a redefinition and hence the fact that the
5523 * content is not well balanced is not a Wf error, this
5524 * is lousy but acceptable.
5526 list = xmlNewDocText(ctxt->myDoc, value);
5528 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5529 (ent->children == NULL)) {
5530 ent->children = list;
5533 list->parent = (xmlNodePtr) ent;
5535 xmlFreeNodeList(list);
5537 } else if (list != NULL) {
5538 xmlFreeNodeList(list);
5542 * 4.3.2: An internal general parsed entity is well-formed
5543 * if its replacement text matches the production labeled
5549 * This is a bit hackish but this seems the best
5550 * way to make sure both SAX and DOM entity support
5553 if (ctxt->userData == ctxt)
5556 user_data = ctxt->userData;
5558 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5560 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5561 value, user_data, &list);
5563 } else if (ent->etype ==
5564 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5566 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
5567 ctxt->sax, user_data, ctxt->depth,
5568 ent->URI, ent->ExternalID, &list);
5572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5573 ctxt->sax->error(ctxt->userData,
5574 "Internal: invalid entity type\n");
5576 if (ret == XML_ERR_ENTITY_LOOP) {
5577 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5579 ctxt->sax->error(ctxt->userData,
5580 "Detected entity reference loop\n");
5581 ctxt->wellFormed = 0;
5582 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5584 } else if ((ret == 0) && (list != NULL)) {
5585 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5586 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
5587 (ent->children == NULL)) {
5588 ent->children = list;
5589 if (ctxt->replaceEntities) {
5591 * Prune it directly in the generated document
5592 * except for single text nodes.
5594 if ((list->type == XML_TEXT_NODE) &&
5595 (list->next == NULL)) {
5596 list->parent = (xmlNodePtr) ent;
5601 while (list != NULL) {
5602 list->parent = (xmlNodePtr) ctxt->node;
5603 list->doc = ctxt->myDoc;
5604 if (list->next == NULL)
5608 list = ent->children;
5609 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5610 xmlAddEntityReference(ent, list, NULL);
5614 while (list != NULL) {
5615 list->parent = (xmlNodePtr) ent;
5616 if (list->next == NULL)
5622 xmlFreeNodeList(list);
5625 } else if (ret > 0) {
5627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5628 ctxt->sax->error(ctxt->userData,
5629 "Entity value required\n");
5630 ctxt->wellFormed = 0;
5631 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5632 } else if (list != NULL) {
5633 xmlFreeNodeList(list);
5638 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5639 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5643 ctxt->sax->reference(ctxt->userData, ent->name);
5645 } else if (ctxt->replaceEntities) {
5646 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5648 * Seems we are generating the DOM content, do
5649 * a simple tree copy for all references except the first
5650 * In the first occurrence list contains the replacement
5652 if ((list == NULL) && (ent->owner == 0)) {
5653 xmlNodePtr nw = NULL, cur, firstChild = NULL;
5654 cur = ent->children;
5655 while (cur != NULL) {
5656 nw = xmlCopyNode(cur, 1);
5658 nw->_private = cur->_private;
5659 if (firstChild == NULL){
5662 xmlAddChild(ctxt->node, nw);
5664 if (cur == ent->last)
5668 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5669 xmlAddEntityReference(ent, firstChild, nw);
5670 } else if (list == NULL) {
5671 xmlNodePtr nw = NULL, cur, next, last,
5674 * Copy the entity child list and make it the new
5675 * entity child list. The goal is to make sure any
5676 * ID or REF referenced will be the one from the
5677 * document content and not the entity copy.
5679 cur = ent->children;
5680 ent->children = NULL;
5683 while (cur != NULL) {
5687 nw = xmlCopyNode(cur, 1);
5689 nw->_private = cur->_private;
5690 if (firstChild == NULL){
5693 xmlAddChild((xmlNodePtr) ent, nw);
5694 xmlAddChild(ctxt->node, cur);
5701 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5702 xmlAddEntityReference(ent, firstChild, nw);
5705 * the name change is to avoid coalescing of the
5706 * node with a possible previous text one which
5707 * would make ent->children a dangling pointer
5709 if (ent->children->type == XML_TEXT_NODE)
5710 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5711 if ((ent->last != ent->children) &&
5712 (ent->last->type == XML_TEXT_NODE))
5713 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5714 xmlAddChildList(ctxt->node, ent->children);
5718 * This is to avoid a nasty side effect, see
5719 * characters() in SAX.c
5726 * Probably running in SAX mode
5728 xmlParserInputPtr input;
5730 input = xmlNewEntityInputStream(ctxt, ent);
5731 xmlPushInput(ctxt, input);
5732 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5733 (RAW == '<') && (NXT(1) == '?') &&
5734 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5735 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5736 xmlParseTextDecl(ctxt);
5737 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5739 * The XML REC instructs us to stop parsing right here
5741 ctxt->instate = XML_PARSER_EOF;
5744 if (input->standalone == 1) {
5745 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5747 ctxt->sax->error(ctxt->userData,
5748 "external parsed entities cannot be standalone\n");
5749 ctxt->wellFormed = 0;
5750 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5758 if (val == NULL) return;
5760 * inline the entity.
5762 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5763 (!ctxt->disableSAX))
5764 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5770 * xmlParseEntityRef:
5771 * @ctxt: an XML parser context
5773 * parse ENTITY references declarations
5775 * [68] EntityRef ::= '&' Name ';'
5777 * [ WFC: Entity Declared ]
5778 * In a document without any DTD, a document with only an internal DTD
5779 * subset which contains no parameter entity references, or a document
5780 * with "standalone='yes'", the Name given in the entity reference
5781 * must match that in an entity declaration, except that well-formed
5782 * documents need not declare any of the following entities: amp, lt,
5783 * gt, apos, quot. The declaration of a parameter entity must precede
5784 * any reference to it. Similarly, the declaration of a general entity
5785 * must precede any reference to it which appears in a default value in an
5786 * attribute-list declaration. Note that if entities are declared in the
5787 * external subset or in external parameter entities, a non-validating
5788 * processor is not obligated to read and process their declarations;
5789 * for such documents, the rule that an entity must be declared is a
5790 * well-formedness constraint only if standalone='yes'.
5792 * [ WFC: Parsed Entity ]
5793 * An entity reference must not contain the name of an unparsed entity
5795 * Returns the xmlEntityPtr if found, or NULL otherwise.
5798 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5800 xmlEntityPtr ent = NULL;
5806 name = xmlParseName(ctxt);
5808 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5810 ctxt->sax->error(ctxt->userData,
5811 "xmlParseEntityRef: no name\n");
5812 ctxt->wellFormed = 0;
5813 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5818 * Ask first SAX for entity resolution, otherwise try the
5821 if (ctxt->sax != NULL) {
5822 if (ctxt->sax->getEntity != NULL)
5823 ent = ctxt->sax->getEntity(ctxt->userData, name);
5824 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
5825 ent = xmlGetPredefinedEntity(name);
5826 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5827 (ctxt->userData==ctxt)) {
5828 ent = getEntity(ctxt, name);
5832 * [ WFC: Entity Declared ]
5833 * In a document without any DTD, a document with only an
5834 * internal DTD subset which contains no parameter entity
5835 * references, or a document with "standalone='yes'", the
5836 * Name given in the entity reference must match that in an
5837 * entity declaration, except that well-formed documents
5838 * need not declare any of the following entities: amp, lt,
5840 * The declaration of a parameter entity must precede any
5842 * Similarly, the declaration of a general entity must
5843 * precede any reference to it which appears in a default
5844 * value in an attribute-list declaration. Note that if
5845 * entities are declared in the external subset or in
5846 * external parameter entities, a non-validating processor
5847 * is not obligated to read and process their declarations;
5848 * for such documents, the rule that an entity must be
5849 * declared is a well-formedness constraint only if
5853 if ((ctxt->standalone == 1) ||
5854 ((ctxt->hasExternalSubset == 0) &&
5855 (ctxt->hasPErefs == 0))) {
5856 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5858 ctxt->sax->error(ctxt->userData,
5859 "Entity '%s' not defined\n", name);
5860 ctxt->wellFormed = 0;
5862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5864 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5866 ctxt->sax->error(ctxt->userData,
5867 "Entity '%s' not defined\n", name);
5873 * [ WFC: Parsed Entity ]
5874 * An entity reference must not contain the name of an
5877 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5878 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5880 ctxt->sax->error(ctxt->userData,
5881 "Entity reference to unparsed entity %s\n", name);
5882 ctxt->wellFormed = 0;
5883 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5887 * [ WFC: No External Entity References ]
5888 * Attribute values cannot contain direct or indirect
5889 * entity references to external entities.
5891 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5892 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5893 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5895 ctxt->sax->error(ctxt->userData,
5896 "Attribute references external entity '%s'\n", name);
5897 ctxt->wellFormed = 0;
5898 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5901 * [ WFC: No < in Attribute Values ]
5902 * The replacement text of any entity referred to directly or
5903 * indirectly in an attribute value (other than "<") must
5906 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5908 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5909 (ent->content != NULL) &&
5910 (xmlStrchr(ent->content, '<'))) {
5911 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5913 ctxt->sax->error(ctxt->userData,
5914 "'<' in entity '%s' is not allowed in attributes values\n", name);
5915 ctxt->wellFormed = 0;
5916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5920 * Internal check, no parameter entities here ...
5923 switch (ent->etype) {
5924 case XML_INTERNAL_PARAMETER_ENTITY:
5925 case XML_EXTERNAL_PARAMETER_ENTITY:
5926 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5928 ctxt->sax->error(ctxt->userData,
5929 "Attempt to reference the parameter entity '%s'\n", name);
5930 ctxt->wellFormed = 0;
5931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5939 * [ WFC: No Recursion ]
5940 * A parsed entity must not contain a recursive reference
5941 * to itself, either directly or indirectly.
5942 * Done somewhere else
5946 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5948 ctxt->sax->error(ctxt->userData,
5949 "xmlParseEntityRef: expecting ';'\n");
5950 ctxt->wellFormed = 0;
5951 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5960 * xmlParseStringEntityRef:
5961 * @ctxt: an XML parser context
5962 * @str: a pointer to an index in the string
5964 * parse ENTITY references declarations, but this version parses it from
5967 * [68] EntityRef ::= '&' Name ';'
5969 * [ WFC: Entity Declared ]
5970 * In a document without any DTD, a document with only an internal DTD
5971 * subset which contains no parameter entity references, or a document
5972 * with "standalone='yes'", the Name given in the entity reference
5973 * must match that in an entity declaration, except that well-formed
5974 * documents need not declare any of the following entities: amp, lt,
5975 * gt, apos, quot. The declaration of a parameter entity must precede
5976 * any reference to it. Similarly, the declaration of a general entity
5977 * must precede any reference to it which appears in a default value in an
5978 * attribute-list declaration. Note that if entities are declared in the
5979 * external subset or in external parameter entities, a non-validating
5980 * processor is not obligated to read and process their declarations;
5981 * for such documents, the rule that an entity must be declared is a
5982 * well-formedness constraint only if standalone='yes'.
5984 * [ WFC: Parsed Entity ]
5985 * An entity reference must not contain the name of an unparsed entity
5987 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5988 * is updated to the current location in the string.
5991 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5995 xmlEntityPtr ent = NULL;
5997 if ((str == NULL) || (*str == NULL))
6004 name = xmlParseStringName(ctxt, &ptr);
6006 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6008 ctxt->sax->error(ctxt->userData,
6009 "xmlParseStringEntityRef: no name\n");
6010 ctxt->wellFormed = 0;
6011 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6016 * Ask first SAX for entity resolution, otherwise try the
6019 if (ctxt->sax != NULL) {
6020 if (ctxt->sax->getEntity != NULL)
6021 ent = ctxt->sax->getEntity(ctxt->userData, name);
6023 ent = xmlGetPredefinedEntity(name);
6024 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6025 ent = getEntity(ctxt, name);
6029 * [ WFC: Entity Declared ]
6030 * In a document without any DTD, a document with only an
6031 * internal DTD subset which contains no parameter entity
6032 * references, or a document with "standalone='yes'", the
6033 * Name given in the entity reference must match that in an
6034 * entity declaration, except that well-formed documents
6035 * need not declare any of the following entities: amp, lt,
6037 * The declaration of a parameter entity must precede any
6039 * Similarly, the declaration of a general entity must
6040 * precede any reference to it which appears in a default
6041 * value in an attribute-list declaration. Note that if
6042 * entities are declared in the external subset or in
6043 * external parameter entities, a non-validating processor
6044 * is not obligated to read and process their declarations;
6045 * for such documents, the rule that an entity must be
6046 * declared is a well-formedness constraint only if
6050 if ((ctxt->standalone == 1) ||
6051 ((ctxt->hasExternalSubset == 0) &&
6052 (ctxt->hasPErefs == 0))) {
6053 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6055 ctxt->sax->error(ctxt->userData,
6056 "Entity '%s' not defined\n", name);
6057 ctxt->wellFormed = 0;
6058 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6060 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6061 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6062 ctxt->sax->warning(ctxt->userData,
6063 "Entity '%s' not defined\n", name);
6068 * [ WFC: Parsed Entity ]
6069 * An entity reference must not contain the name of an
6072 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6073 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6075 ctxt->sax->error(ctxt->userData,
6076 "Entity reference to unparsed entity %s\n", name);
6077 ctxt->wellFormed = 0;
6078 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6082 * [ WFC: No External Entity References ]
6083 * Attribute values cannot contain direct or indirect
6084 * entity references to external entities.
6086 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6087 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6088 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6090 ctxt->sax->error(ctxt->userData,
6091 "Attribute references external entity '%s'\n", name);
6092 ctxt->wellFormed = 0;
6093 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6096 * [ WFC: No < in Attribute Values ]
6097 * The replacement text of any entity referred to directly or
6098 * indirectly in an attribute value (other than "<") must
6101 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6103 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6104 (ent->content != NULL) &&
6105 (xmlStrchr(ent->content, '<'))) {
6106 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6108 ctxt->sax->error(ctxt->userData,
6109 "'<' in entity '%s' is not allowed in attributes values\n", name);
6110 ctxt->wellFormed = 0;
6111 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6115 * Internal check, no parameter entities here ...
6118 switch (ent->etype) {
6119 case XML_INTERNAL_PARAMETER_ENTITY:
6120 case XML_EXTERNAL_PARAMETER_ENTITY:
6121 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6123 ctxt->sax->error(ctxt->userData,
6124 "Attempt to reference the parameter entity '%s'\n", name);
6125 ctxt->wellFormed = 0;
6126 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6134 * [ WFC: No Recursion ]
6135 * A parsed entity must not contain a recursive reference
6136 * to itself, either directly or indirectly.
6137 * Done somewhere else
6141 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6143 ctxt->sax->error(ctxt->userData,
6144 "xmlParseStringEntityRef: expecting ';'\n");
6145 ctxt->wellFormed = 0;
6146 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6156 * xmlParsePEReference:
6157 * @ctxt: an XML parser context
6159 * parse PEReference declarations
6160 * The entity content is handled directly by pushing it's content as
6161 * a new input stream.
6163 * [69] PEReference ::= '%' Name ';'
6165 * [ WFC: No Recursion ]
6166 * A parsed entity must not contain a recursive
6167 * reference to itself, either directly or indirectly.
6169 * [ WFC: Entity Declared ]
6170 * In a document without any DTD, a document with only an internal DTD
6171 * subset which contains no parameter entity references, or a document
6172 * with "standalone='yes'", ... ... The declaration of a parameter
6173 * entity must precede any reference to it...
6175 * [ VC: Entity Declared ]
6176 * In a document with an external subset or external parameter entities
6177 * with "standalone='no'", ... ... The declaration of a parameter entity
6178 * must precede any reference to it...
6181 * Parameter-entity references may only appear in the DTD.
6182 * NOTE: misleading but this is handled.
6185 xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6187 xmlEntityPtr entity = NULL;
6188 xmlParserInputPtr input;
6192 name = xmlParseName(ctxt);
6194 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6196 ctxt->sax->error(ctxt->userData,
6197 "xmlParsePEReference: no name\n");
6198 ctxt->wellFormed = 0;
6199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6203 if ((ctxt->sax != NULL) &&
6204 (ctxt->sax->getParameterEntity != NULL))
6205 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6207 if (entity == NULL) {
6209 * [ WFC: Entity Declared ]
6210 * In a document without any DTD, a document with only an
6211 * internal DTD subset which contains no parameter entity
6212 * references, or a document with "standalone='yes'", ...
6213 * ... The declaration of a parameter entity must precede
6214 * any reference to it...
6216 if ((ctxt->standalone == 1) ||
6217 ((ctxt->hasExternalSubset == 0) &&
6218 (ctxt->hasPErefs == 0))) {
6219 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6220 if ((!ctxt->disableSAX) &&
6221 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6222 ctxt->sax->error(ctxt->userData,
6223 "PEReference: %%%s; not found\n", name);
6224 ctxt->wellFormed = 0;
6225 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6228 * [ VC: Entity Declared ]
6229 * In a document with an external subset or external
6230 * parameter entities with "standalone='no'", ...
6231 * ... The declaration of a parameter entity must precede
6232 * any reference to it...
6234 if ((!ctxt->disableSAX) &&
6235 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6236 ctxt->sax->warning(ctxt->userData,
6237 "PEReference: %%%s; not found\n", name);
6242 * Internal checking in case the entity quest barfed
6244 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6245 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6246 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6247 ctxt->sax->warning(ctxt->userData,
6248 "Internal: %%%s; is not a parameter entity\n", name);
6249 } else if (ctxt->input->free != deallocblankswrapper) {
6250 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6251 xmlPushInput(ctxt, input);
6255 * handle the extra spaces added before and after
6256 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6258 input = xmlNewEntityInputStream(ctxt, entity);
6259 xmlPushInput(ctxt, input);
6260 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6261 (RAW == '<') && (NXT(1) == '?') &&
6262 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6263 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6264 xmlParseTextDecl(ctxt);
6265 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6267 * The XML REC instructs us to stop parsing
6270 ctxt->instate = XML_PARSER_EOF;
6277 ctxt->hasPErefs = 1;
6279 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6281 ctxt->sax->error(ctxt->userData,
6282 "xmlParsePEReference: expecting ';'\n");
6283 ctxt->wellFormed = 0;
6284 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6292 * xmlParseStringPEReference:
6293 * @ctxt: an XML parser context
6294 * @str: a pointer to an index in the string
6296 * parse PEReference declarations
6298 * [69] PEReference ::= '%' Name ';'
6300 * [ WFC: No Recursion ]
6301 * A parsed entity must not contain a recursive
6302 * reference to itself, either directly or indirectly.
6304 * [ WFC: Entity Declared ]
6305 * In a document without any DTD, a document with only an internal DTD
6306 * subset which contains no parameter entity references, or a document
6307 * with "standalone='yes'", ... ... The declaration of a parameter
6308 * entity must precede any reference to it...
6310 * [ VC: Entity Declared ]
6311 * In a document with an external subset or external parameter entities
6312 * with "standalone='no'", ... ... The declaration of a parameter entity
6313 * must precede any reference to it...
6316 * Parameter-entity references may only appear in the DTD.
6317 * NOTE: misleading but this is handled.
6319 * Returns the string of the entity content.
6320 * str is updated to the current value of the index
6323 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6327 xmlEntityPtr entity = NULL;
6329 if ((str == NULL) || (*str == NULL)) return(NULL);
6335 name = xmlParseStringName(ctxt, &ptr);
6337 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6339 ctxt->sax->error(ctxt->userData,
6340 "xmlParseStringPEReference: no name\n");
6341 ctxt->wellFormed = 0;
6342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6348 if ((ctxt->sax != NULL) &&
6349 (ctxt->sax->getParameterEntity != NULL))
6350 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6352 if (entity == NULL) {
6354 * [ WFC: Entity Declared ]
6355 * In a document without any DTD, a document with only an
6356 * internal DTD subset which contains no parameter entity
6357 * references, or a document with "standalone='yes'", ...
6358 * ... The declaration of a parameter entity must precede
6359 * any reference to it...
6361 if ((ctxt->standalone == 1) ||
6362 ((ctxt->hasExternalSubset == 0) &&
6363 (ctxt->hasPErefs == 0))) {
6364 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6366 ctxt->sax->error(ctxt->userData,
6367 "PEReference: %%%s; not found\n", name);
6368 ctxt->wellFormed = 0;
6369 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6372 * [ VC: Entity Declared ]
6373 * In a document with an external subset or external
6374 * parameter entities with "standalone='no'", ...
6375 * ... The declaration of a parameter entity must
6376 * precede any reference to it...
6378 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6379 ctxt->sax->warning(ctxt->userData,
6380 "PEReference: %%%s; not found\n", name);
6385 * Internal checking in case the entity quest barfed
6387 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6388 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6389 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6390 ctxt->sax->warning(ctxt->userData,
6391 "Internal: %%%s; is not a parameter entity\n", name);
6394 ctxt->hasPErefs = 1;
6396 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6398 ctxt->sax->error(ctxt->userData,
6399 "xmlParseStringPEReference: expecting ';'\n");
6400 ctxt->wellFormed = 0;
6401 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6411 * xmlParseDocTypeDecl:
6412 * @ctxt: an XML parser context
6414 * parse a DOCTYPE declaration
6416 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6417 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6419 * [ VC: Root Element Type ]
6420 * The Name in the document type declaration must match the element
6421 * type of the root element.
6425 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6426 xmlChar *name = NULL;
6427 xmlChar *ExternalID = NULL;
6428 xmlChar *URI = NULL;
6431 * We know that '<!DOCTYPE' has been detected.
6438 * Parse the DOCTYPE name.
6440 name = xmlParseName(ctxt);
6442 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6444 ctxt->sax->error(ctxt->userData,
6445 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6446 ctxt->wellFormed = 0;
6447 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6449 ctxt->intSubName = name;
6454 * Check for SystemID and ExternalID
6456 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6458 if ((URI != NULL) || (ExternalID != NULL)) {
6459 ctxt->hasExternalSubset = 1;
6461 ctxt->extSubURI = URI;
6462 ctxt->extSubSystem = ExternalID;
6467 * Create and update the internal subset.
6469 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6470 (!ctxt->disableSAX))
6471 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6474 * Is there any internal subset declarations ?
6475 * they are handled separately in xmlParseInternalSubset()
6481 * We should be at the end of the DOCTYPE declaration.
6484 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6486 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
6487 ctxt->wellFormed = 0;
6488 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6494 * xmlParseInternalSubset:
6495 * @ctxt: an XML parser context
6497 * parse the internal subset declaration
6499 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6503 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6505 * Is there any DTD definition ?
6508 ctxt->instate = XML_PARSER_DTD;
6511 * Parse the succession of Markup declarations and
6513 * Subsequence (markupdecl | PEReference | S)*
6515 while (RAW != ']') {
6516 const xmlChar *check = CUR_PTR;
6517 unsigned int cons = ctxt->input->consumed;
6520 xmlParseMarkupDecl(ctxt);
6521 xmlParsePEReference(ctxt);
6524 * Pop-up of finished entities.
6526 while ((RAW == 0) && (ctxt->inputNr > 1))
6529 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6530 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6532 ctxt->sax->error(ctxt->userData,
6533 "xmlParseInternalSubset: error detected in Markup declaration\n");
6534 ctxt->wellFormed = 0;
6535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6546 * We should be at the end of the DOCTYPE declaration.
6549 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6551 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
6552 ctxt->wellFormed = 0;
6553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6559 * xmlParseAttribute:
6560 * @ctxt: an XML parser context
6561 * @value: a xmlChar ** used to store the value of the attribute
6563 * parse an attribute
6565 * [41] Attribute ::= Name Eq AttValue
6567 * [ WFC: No External Entity References ]
6568 * Attribute values cannot contain direct or indirect entity references
6569 * to external entities.
6571 * [ WFC: No < in Attribute Values ]
6572 * The replacement text of any entity referred to directly or indirectly in
6573 * an attribute value (other than "<") must not contain a <.
6575 * [ VC: Attribute Value Type ]
6576 * The attribute must have been declared; the value must be of the type
6579 * [25] Eq ::= S? '=' S?
6583 * [NS 11] Attribute ::= QName Eq AttValue
6585 * Also the case QName == xmlns:??? is handled independently as a namespace
6588 * Returns the attribute name, and the value in *value.
6592 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6593 xmlChar *name, *val;
6597 name = xmlParseName(ctxt);
6599 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6601 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6602 ctxt->wellFormed = 0;
6603 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6614 val = xmlParseAttValue(ctxt);
6615 ctxt->instate = XML_PARSER_CONTENT;
6617 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6619 ctxt->sax->error(ctxt->userData,
6620 "Specification mandate value for attribute %s\n", name);
6621 ctxt->wellFormed = 0;
6622 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6628 * Check that xml:lang conforms to the specification
6629 * No more registered as an error, just generate a warning now
6630 * since this was deprecated in XML second edition
6632 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6633 if (!xmlCheckLanguageID(val)) {
6634 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6635 ctxt->sax->warning(ctxt->userData,
6636 "Malformed value for xml:lang : %s\n", val);
6641 * Check that xml:space conforms to the specification
6643 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6644 if (xmlStrEqual(val, BAD_CAST "default"))
6646 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6649 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6651 ctxt->sax->error(ctxt->userData,
6652 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
6654 ctxt->wellFormed = 0;
6655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6665 * @ctxt: an XML parser context
6667 * parse a start of tag either for rule element or
6668 * EmptyElement. In both case we don't parse the tag closing chars.
6670 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6672 * [ WFC: Unique Att Spec ]
6673 * No attribute name may appear more than once in the same start-tag or
6674 * empty-element tag.
6676 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6678 * [ WFC: Unique Att Spec ]
6679 * No attribute name may appear more than once in the same start-tag or
6680 * empty-element tag.
6684 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6686 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6688 * Returns the element name parsed
6692 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6696 const xmlChar **atts = NULL;
6701 if (RAW != '<') return(NULL);
6704 name = xmlParseName(ctxt);
6706 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6708 ctxt->sax->error(ctxt->userData,
6709 "xmlParseStartTag: invalid element name\n");
6710 ctxt->wellFormed = 0;
6711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6716 * Now parse the attributes, it ends up with the ending
6723 while ((RAW != '>') &&
6724 ((RAW != '/') || (NXT(1) != '>')) &&
6725 (IS_CHAR((unsigned int) RAW))) {
6726 const xmlChar *q = CUR_PTR;
6727 unsigned int cons = ctxt->input->consumed;
6729 attname = xmlParseAttribute(ctxt, &attvalue);
6730 if ((attname != NULL) && (attvalue != NULL)) {
6732 * [ WFC: Unique Att Spec ]
6733 * No attribute name may appear more than once in the same
6734 * start-tag or empty-element tag.
6736 for (i = 0; i < nbatts;i += 2) {
6737 if (xmlStrEqual(atts[i], attname)) {
6738 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6740 ctxt->sax->error(ctxt->userData,
6741 "Attribute %s redefined\n",
6743 ctxt->wellFormed = 0;
6744 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6752 * Add the pair to atts
6756 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6758 xmlGenericError(xmlGenericErrorContext,
6759 "malloc of %ld byte failed\n",
6760 maxatts * (long)sizeof(xmlChar *));
6761 if (attname != NULL)
6763 if (attvalue != NULL)
6765 ctxt->errNo = XML_ERR_NO_MEMORY;
6766 ctxt->instate = XML_PARSER_EOF;
6767 ctxt->disableSAX = 1;
6770 } else if (nbatts + 4 > maxatts) {
6774 n = (const xmlChar **) xmlRealloc((void *) atts,
6775 maxatts * sizeof(xmlChar *));
6777 xmlGenericError(xmlGenericErrorContext,
6778 "realloc of %ld byte failed\n",
6779 maxatts * (long)sizeof(xmlChar *));
6780 if (attname != NULL)
6782 if (attvalue != NULL)
6784 ctxt->errNo = XML_ERR_NO_MEMORY;
6785 ctxt->instate = XML_PARSER_EOF;
6786 ctxt->disableSAX = 1;
6791 atts[nbatts++] = attname;
6792 atts[nbatts++] = attvalue;
6793 atts[nbatts] = NULL;
6794 atts[nbatts + 1] = NULL;
6796 if (attname != NULL)
6798 if (attvalue != NULL)
6805 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6807 if (!IS_BLANK(RAW)) {
6808 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6810 ctxt->sax->error(ctxt->userData,
6811 "attributes construct error\n");
6812 ctxt->wellFormed = 0;
6813 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6816 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6817 (attname == NULL) && (attvalue == NULL)) {
6818 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6820 ctxt->sax->error(ctxt->userData,
6821 "xmlParseStartTag: problem parsing attributes\n");
6822 ctxt->wellFormed = 0;
6823 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6830 * SAX: Start of Element !
6832 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6833 (!ctxt->disableSAX))
6834 ctxt->sax->startElement(ctxt->userData, name, atts);
6837 for (i = 0;i < nbatts;i++)
6838 if (atts[i] != NULL)
6839 xmlFree((xmlChar *) atts[i]);
6840 xmlFree((void *) atts);
6846 * xmlParseEndTagInternal:
6847 * @ctxt: an XML parser context
6849 * parse an end of tag
6851 * [42] ETag ::= '</' Name S? '>'
6855 * [NS 9] ETag ::= '</' QName S? '>'
6859 xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
6864 if ((RAW != '<') || (NXT(1) != '/')) {
6865 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6867 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6868 ctxt->wellFormed = 0;
6869 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6874 name = xmlParseNameAndCompare(ctxt,ctxt->name);
6877 * We should definitely be at the ending "S? '>'" part
6881 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
6882 ctxt->errNo = XML_ERR_GT_REQUIRED;
6883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6884 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6885 ctxt->wellFormed = 0;
6886 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6891 * [ WFC: Element Type Match ]
6892 * The Name in an element's end-tag must match the element type in the
6896 if (name != (xmlChar*)1) {
6897 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6900 ctxt->sax->error(ctxt->userData,
6901 "Opening and ending tag mismatch: %s line %d and %s\n",
6902 ctxt->name, line, name);
6904 ctxt->sax->error(ctxt->userData,
6905 "Ending tag error for: %s line %d\n", ctxt->name, line);
6909 ctxt->wellFormed = 0;
6910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6918 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6919 (!ctxt->disableSAX))
6920 ctxt->sax->endElement(ctxt->userData, ctxt->name);
6922 oldname = namePop(ctxt);
6924 if (oldname != NULL) {
6926 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6935 * @ctxt: an XML parser context
6937 * parse an end of tag
6939 * [42] ETag ::= '</' Name S? '>'
6943 * [NS 9] ETag ::= '</' QName S? '>'
6947 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6948 xmlParseEndTagInternal(ctxt, 0);
6953 * @ctxt: an XML parser context
6955 * Parse escaped pure raw content.
6957 * [18] CDSect ::= CDStart CData CDEnd
6959 * [19] CDStart ::= '<![CDATA['
6961 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6963 * [21] CDEnd ::= ']]>'
6966 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6967 xmlChar *buf = NULL;
6969 int size = XML_PARSER_BUFFER_SIZE;
6975 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6976 (NXT(2) == '[') && (NXT(3) == 'C') &&
6977 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6978 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6984 ctxt->instate = XML_PARSER_CDATA_SECTION;
6987 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6989 ctxt->sax->error(ctxt->userData,
6990 "CData section not finished\n");
6991 ctxt->wellFormed = 0;
6992 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6993 ctxt->instate = XML_PARSER_CONTENT;
6999 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
7000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7001 ctxt->sax->error(ctxt->userData,
7002 "CData section not finished\n");
7003 ctxt->wellFormed = 0;
7004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7005 ctxt->instate = XML_PARSER_CONTENT;
7010 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
7012 xmlGenericError(xmlGenericErrorContext,
7013 "malloc of %d byte failed\n", size);
7016 while (IS_CHAR(cur) &&
7017 ((r != ']') || (s != ']') || (cur != '>'))) {
7018 if (len + 5 >= size) {
7020 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7022 xmlGenericError(xmlGenericErrorContext,
7023 "realloc of %d byte failed\n", size);
7027 COPY_BUF(rl,buf,len,r);
7041 ctxt->instate = XML_PARSER_CONTENT;
7043 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
7044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7045 ctxt->sax->error(ctxt->userData,
7046 "CData section not finished\n%.50s\n", buf);
7047 ctxt->wellFormed = 0;
7048 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7055 * OK the buffer is to be consumed as cdata.
7057 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7058 if (ctxt->sax->cdataBlock != NULL)
7059 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
7060 else if (ctxt->sax->characters != NULL)
7061 ctxt->sax->characters(ctxt->userData, buf, len);
7068 * @ctxt: an XML parser context
7072 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7076 xmlParseContent(xmlParserCtxtPtr ctxt) {
7078 while ((RAW != 0) &&
7079 ((RAW != '<') || (NXT(1) != '/'))) {
7080 const xmlChar *test = CUR_PTR;
7081 unsigned int cons = ctxt->input->consumed;
7082 const xmlChar *cur = ctxt->input->cur;
7085 * First case : a Processing Instruction.
7087 if ((*cur == '<') && (cur[1] == '?')) {
7092 * Second case : a CDSection
7094 else if ((*cur == '<') && (NXT(1) == '!') &&
7095 (NXT(2) == '[') && (NXT(3) == 'C') &&
7096 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7097 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7099 xmlParseCDSect(ctxt);
7103 * Third case : a comment
7105 else if ((*cur == '<') && (NXT(1) == '!') &&
7106 (NXT(2) == '-') && (NXT(3) == '-')) {
7107 xmlParseComment(ctxt);
7108 ctxt->instate = XML_PARSER_CONTENT;
7112 * Fourth case : a sub-element.
7114 else if (*cur == '<') {
7115 xmlParseElement(ctxt);
7119 * Fifth case : a reference. If if has not been resolved,
7120 * parsing returns it's Name, create the node
7123 else if (*cur == '&') {
7124 xmlParseReference(ctxt);
7128 * Last case, text. Note that References are handled directly.
7131 xmlParseCharData(ctxt, 0);
7136 * Pop-up of finished entities.
7138 while ((RAW == 0) && (ctxt->inputNr > 1))
7142 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
7143 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7145 ctxt->sax->error(ctxt->userData,
7146 "detected an error in element content\n");
7147 ctxt->wellFormed = 0;
7148 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7149 ctxt->instate = XML_PARSER_EOF;
7157 * @ctxt: an XML parser context
7159 * parse an XML element, this is highly recursive
7161 * [39] element ::= EmptyElemTag | STag content ETag
7163 * [ WFC: Element Type Match ]
7164 * The Name in an element's end-tag must match the element type in the
7167 * [ VC: Element Valid ]
7168 * An element is valid if there is a declaration matching elementdecl
7169 * where the Name matches the element type and one of the following holds:
7170 * - The declaration matches EMPTY and the element has no content.
7171 * - The declaration matches children and the sequence of child elements
7172 * belongs to the language generated by the regular expression in the
7173 * content model, with optional white space (characters matching the
7174 * nonterminal S) between each pair of child elements.
7175 * - The declaration matches Mixed and the content consists of character
7176 * data and child elements whose types match names in the content model.
7177 * - The declaration matches ANY, and the types of any child elements have
7182 xmlParseElement(xmlParserCtxtPtr ctxt) {
7185 xmlParserNodeInfo node_info;
7189 /* Capture start position */
7190 if (ctxt->record_info) {
7191 node_info.begin_pos = ctxt->input->consumed +
7192 (CUR_PTR - ctxt->input->base);
7193 node_info.begin_line = ctxt->input->line;
7196 if (ctxt->spaceNr == 0)
7197 spacePush(ctxt, -1);
7199 spacePush(ctxt, *ctxt->space);
7201 line = ctxt->input->line;
7202 name = xmlParseStartTag(ctxt);
7207 namePush(ctxt, name);
7211 * [ VC: Root Element Type ]
7212 * The Name in the document type declaration must match the element
7213 * type of the root element.
7215 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7216 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7217 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7220 * Check for an Empty Element.
7222 if ((RAW == '/') && (NXT(1) == '>')) {
7224 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7225 (!ctxt->disableSAX))
7226 ctxt->sax->endElement(ctxt->userData, name);
7227 oldname = namePop(ctxt);
7229 if (oldname != NULL) {
7231 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7235 if ( ret != NULL && ctxt->record_info ) {
7236 node_info.end_pos = ctxt->input->consumed +
7237 (CUR_PTR - ctxt->input->base);
7238 node_info.end_line = ctxt->input->line;
7239 node_info.node = ret;
7240 xmlParserAddNodeInfo(ctxt, &node_info);
7247 ctxt->errNo = XML_ERR_GT_REQUIRED;
7248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7249 ctxt->sax->error(ctxt->userData,
7250 "Couldn't find end of Start Tag %s line %d\n",
7252 ctxt->wellFormed = 0;
7253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7256 * end of parsing of this node.
7259 oldname = namePop(ctxt);
7261 if (oldname != NULL) {
7263 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7269 * Capture end position and add node
7271 if ( ret != NULL && ctxt->record_info ) {
7272 node_info.end_pos = ctxt->input->consumed +
7273 (CUR_PTR - ctxt->input->base);
7274 node_info.end_line = ctxt->input->line;
7275 node_info.node = ret;
7276 xmlParserAddNodeInfo(ctxt, &node_info);
7282 * Parse the content of the element:
7284 xmlParseContent(ctxt);
7285 if (!IS_CHAR((unsigned int) RAW)) {
7286 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
7287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7288 ctxt->sax->error(ctxt->userData,
7289 "Premature end of data in tag %s line %d\n", name, line);
7290 ctxt->wellFormed = 0;
7291 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7294 * end of parsing of this node.
7297 oldname = namePop(ctxt);
7299 if (oldname != NULL) {
7301 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7309 * parse the end of tag: '</' should be here.
7311 xmlParseEndTagInternal(ctxt, line);
7314 * Capture end position and add node
7316 if ( ret != NULL && ctxt->record_info ) {
7317 node_info.end_pos = ctxt->input->consumed +
7318 (CUR_PTR - ctxt->input->base);
7319 node_info.end_line = ctxt->input->line;
7320 node_info.node = ret;
7321 xmlParserAddNodeInfo(ctxt, &node_info);
7326 * xmlParseVersionNum:
7327 * @ctxt: an XML parser context
7329 * parse the XML version value.
7331 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7333 * Returns the string giving the XML version number, or NULL
7336 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7337 xmlChar *buf = NULL;
7342 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
7344 xmlGenericError(xmlGenericErrorContext,
7345 "malloc of %d byte failed\n", size);
7349 while (((cur >= 'a') && (cur <= 'z')) ||
7350 ((cur >= 'A') && (cur <= 'Z')) ||
7351 ((cur >= '0') && (cur <= '9')) ||
7352 (cur == '_') || (cur == '.') ||
7353 (cur == ':') || (cur == '-')) {
7354 if (len + 1 >= size) {
7356 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7358 xmlGenericError(xmlGenericErrorContext,
7359 "realloc of %d byte failed\n", size);
7372 * xmlParseVersionInfo:
7373 * @ctxt: an XML parser context
7375 * parse the XML version.
7377 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7379 * [25] Eq ::= S? '=' S?
7381 * Returns the version string, e.g. "1.0"
7385 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7386 xmlChar *version = NULL;
7389 if ((RAW == 'v') && (NXT(1) == 'e') &&
7390 (NXT(2) == 'r') && (NXT(3) == 's') &&
7391 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7396 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7398 ctxt->sax->error(ctxt->userData,
7399 "xmlParseVersionInfo : expected '='\n");
7400 ctxt->wellFormed = 0;
7401 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7409 version = xmlParseVersionNum(ctxt);
7411 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7413 ctxt->sax->error(ctxt->userData,
7414 "String not closed\n%.50s\n", q);
7415 ctxt->wellFormed = 0;
7416 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7419 } else if (RAW == '\''){
7422 version = xmlParseVersionNum(ctxt);
7424 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7426 ctxt->sax->error(ctxt->userData,
7427 "String not closed\n%.50s\n", q);
7428 ctxt->wellFormed = 0;
7429 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7433 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7435 ctxt->sax->error(ctxt->userData,
7436 "xmlParseVersionInfo : expected ' or \"\n");
7437 ctxt->wellFormed = 0;
7438 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7446 * @ctxt: an XML parser context
7448 * parse the XML encoding name
7450 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7452 * Returns the encoding name value or NULL
7455 xmlParseEncName(xmlParserCtxtPtr ctxt) {
7456 xmlChar *buf = NULL;
7462 if (((cur >= 'a') && (cur <= 'z')) ||
7463 ((cur >= 'A') && (cur <= 'Z'))) {
7464 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
7466 xmlGenericError(xmlGenericErrorContext,
7467 "malloc of %d byte failed\n", size);
7474 while (((cur >= 'a') && (cur <= 'z')) ||
7475 ((cur >= 'A') && (cur <= 'Z')) ||
7476 ((cur >= '0') && (cur <= '9')) ||
7477 (cur == '.') || (cur == '_') ||
7479 if (len + 1 >= size) {
7481 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7483 xmlGenericError(xmlGenericErrorContext,
7484 "realloc of %d byte failed\n", size);
7499 ctxt->errNo = XML_ERR_ENCODING_NAME;
7500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7501 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7502 ctxt->wellFormed = 0;
7503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7509 * xmlParseEncodingDecl:
7510 * @ctxt: an XML parser context
7512 * parse the XML encoding declaration
7514 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7516 * this setups the conversion filters.
7518 * Returns the encoding value or NULL
7522 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7523 xmlChar *encoding = NULL;
7527 if ((RAW == 'e') && (NXT(1) == 'n') &&
7528 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7529 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7530 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7534 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7536 ctxt->sax->error(ctxt->userData,
7537 "xmlParseEncodingDecl : expected '='\n");
7538 ctxt->wellFormed = 0;
7539 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7547 encoding = xmlParseEncName(ctxt);
7549 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7551 ctxt->sax->error(ctxt->userData,
7552 "String not closed\n%.50s\n", q);
7553 ctxt->wellFormed = 0;
7554 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7557 } else if (RAW == '\''){
7560 encoding = xmlParseEncName(ctxt);
7562 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7564 ctxt->sax->error(ctxt->userData,
7565 "String not closed\n%.50s\n", q);
7566 ctxt->wellFormed = 0;
7567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7571 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7573 ctxt->sax->error(ctxt->userData,
7574 "xmlParseEncodingDecl : expected ' or \"\n");
7575 ctxt->wellFormed = 0;
7576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7579 * UTF-16 encoding stwich has already taken place at this stage,
7580 * more over the little-endian/big-endian selection is already done
7582 if ((encoding != NULL) &&
7583 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
7584 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
7585 if (ctxt->encoding != NULL)
7586 xmlFree((xmlChar *) ctxt->encoding);
7587 ctxt->encoding = encoding;
7590 * UTF-8 encoding is handled natively
7592 else if ((encoding != NULL) &&
7593 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
7594 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
7595 if (ctxt->encoding != NULL)
7596 xmlFree((xmlChar *) ctxt->encoding);
7597 ctxt->encoding = encoding;
7599 else if (encoding != NULL) {
7600 xmlCharEncodingHandlerPtr handler;
7602 if (ctxt->input->encoding != NULL)
7603 xmlFree((xmlChar *) ctxt->input->encoding);
7604 ctxt->input->encoding = encoding;
7606 handler = xmlFindCharEncodingHandler((const char *) encoding);
7607 if (handler != NULL) {
7608 xmlSwitchToEncoding(ctxt, handler);
7610 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7612 ctxt->sax->error(ctxt->userData,
7613 "Unsupported encoding %s\n", encoding);
7623 * @ctxt: an XML parser context
7625 * parse the XML standalone declaration
7627 * [32] SDDecl ::= S 'standalone' Eq
7628 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7630 * [ VC: Standalone Document Declaration ]
7631 * TODO The standalone document declaration must have the value "no"
7632 * if any external markup declarations contain declarations of:
7633 * - attributes with default values, if elements to which these
7634 * attributes apply appear in the document without specifications
7635 * of values for these attributes, or
7636 * - entities (other than amp, lt, gt, apos, quot), if references
7637 * to those entities appear in the document, or
7638 * - attributes with values subject to normalization, where the
7639 * attribute appears in the document with a value which will change
7640 * as a result of normalization, or
7641 * - element types with element content, if white space occurs directly
7642 * within any instance of those types.
7644 * Returns 1 if standalone, 0 otherwise
7648 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7649 int standalone = -1;
7652 if ((RAW == 's') && (NXT(1) == 't') &&
7653 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7654 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7655 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7656 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7660 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7662 ctxt->sax->error(ctxt->userData,
7663 "XML standalone declaration : expected '='\n");
7664 ctxt->wellFormed = 0;
7665 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7672 if ((RAW == 'n') && (NXT(1) == 'o')) {
7675 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7680 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7682 ctxt->sax->error(ctxt->userData,
7683 "standalone accepts only 'yes' or 'no'\n");
7684 ctxt->wellFormed = 0;
7685 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7688 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7690 ctxt->sax->error(ctxt->userData, "String not closed\n");
7691 ctxt->wellFormed = 0;
7692 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7695 } else if (RAW == '"'){
7697 if ((RAW == 'n') && (NXT(1) == 'o')) {
7700 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7705 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7707 ctxt->sax->error(ctxt->userData,
7708 "standalone accepts only 'yes' or 'no'\n");
7709 ctxt->wellFormed = 0;
7710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7713 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7715 ctxt->sax->error(ctxt->userData, "String not closed\n");
7716 ctxt->wellFormed = 0;
7717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7721 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7723 ctxt->sax->error(ctxt->userData,
7724 "Standalone value not found\n");
7725 ctxt->wellFormed = 0;
7726 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7734 * @ctxt: an XML parser context
7736 * parse an XML declaration header
7738 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7742 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7746 * We know that '<?xml' is here.
7750 if (!IS_BLANK(RAW)) {
7751 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7753 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7754 ctxt->wellFormed = 0;
7755 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7760 * We must have the VersionInfo here.
7762 version = xmlParseVersionInfo(ctxt);
7763 if (version == NULL) {
7764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7765 ctxt->sax->error(ctxt->userData,
7766 "Malformed declaration expecting version\n");
7767 ctxt->wellFormed = 0;
7768 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7770 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7772 * TODO: Blueberry should be detected here
7774 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7775 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7778 if (ctxt->version != NULL)
7779 xmlFree((void *) ctxt->version);
7780 ctxt->version = version;
7784 * We may have the encoding declaration
7786 if (!IS_BLANK(RAW)) {
7787 if ((RAW == '?') && (NXT(1) == '>')) {
7791 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7793 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7794 ctxt->wellFormed = 0;
7795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7797 xmlParseEncodingDecl(ctxt);
7798 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7800 * The XML REC instructs us to stop parsing right here
7806 * We may have the standalone status.
7808 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7809 if ((RAW == '?') && (NXT(1) == '>')) {
7813 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7815 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7816 ctxt->wellFormed = 0;
7817 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7820 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7823 if ((RAW == '?') && (NXT(1) == '>')) {
7825 } else if (RAW == '>') {
7826 /* Deprecated old WD ... */
7827 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7829 ctxt->sax->error(ctxt->userData,
7830 "XML declaration must end-up with '?>'\n");
7831 ctxt->wellFormed = 0;
7832 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7835 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7837 ctxt->sax->error(ctxt->userData,
7838 "parsing XML declaration: '?>' expected\n");
7839 ctxt->wellFormed = 0;
7840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7841 MOVETO_ENDTAG(CUR_PTR);
7848 * @ctxt: an XML parser context
7850 * parse an XML Misc* optional field.
7852 * [27] Misc ::= Comment | PI | S
7856 xmlParseMisc(xmlParserCtxtPtr ctxt) {
7857 while (((RAW == '<') && (NXT(1) == '?')) ||
7858 ((RAW == '<') && (NXT(1) == '!') &&
7859 (NXT(2) == '-') && (NXT(3) == '-')) ||
7861 if ((RAW == '<') && (NXT(1) == '?')) {
7863 } else if (IS_BLANK(CUR)) {
7866 xmlParseComment(ctxt);
7872 * @ctxt: an XML parser context
7874 * parse an XML document (and build a tree if using the standard SAX
7877 * [1] document ::= prolog element Misc*
7879 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7881 * Returns 0, -1 in case of error. the parser context is augmented
7882 * as a result of the parsing.
7886 xmlParseDocument(xmlParserCtxtPtr ctxt) {
7888 xmlCharEncoding enc;
7895 * SAX: beginning of the document processing.
7897 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7898 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7900 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
7902 * Get the 4 first bytes and decode the charset
7903 * if enc != XML_CHAR_ENCODING_NONE
7904 * plug some encoding conversion routines.
7910 enc = xmlDetectCharEncoding(start, 4);
7911 if (enc != XML_CHAR_ENCODING_NONE) {
7912 xmlSwitchEncoding(ctxt, enc);
7918 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7920 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7921 ctxt->wellFormed = 0;
7922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7926 * Check for the XMLDecl in the Prolog.
7929 if ((RAW == '<') && (NXT(1) == '?') &&
7930 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7931 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7934 * Note that we will switch encoding on the fly.
7936 xmlParseXMLDecl(ctxt);
7937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7939 * The XML REC instructs us to stop parsing right here
7943 ctxt->standalone = ctxt->input->standalone;
7946 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7948 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7949 ctxt->sax->startDocument(ctxt->userData);
7952 * The Misc part of the Prolog
7958 * Then possibly doc type declaration(s) and more Misc
7959 * (doctypedecl Misc*)?
7962 if ((RAW == '<') && (NXT(1) == '!') &&
7963 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7964 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7965 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7969 xmlParseDocTypeDecl(ctxt);
7971 ctxt->instate = XML_PARSER_DTD;
7972 xmlParseInternalSubset(ctxt);
7976 * Create and update the external subset.
7979 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7980 (!ctxt->disableSAX))
7981 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7982 ctxt->extSubSystem, ctxt->extSubURI);
7986 ctxt->instate = XML_PARSER_PROLOG;
7991 * Time to start parsing the tree itself
7995 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7997 ctxt->sax->error(ctxt->userData,
7998 "Start tag expected, '<' not found\n");
7999 ctxt->wellFormed = 0;
8000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8001 ctxt->instate = XML_PARSER_EOF;
8003 ctxt->instate = XML_PARSER_CONTENT;
8004 xmlParseElement(ctxt);
8005 ctxt->instate = XML_PARSER_EPILOG;
8009 * The Misc part at the end
8014 ctxt->errNo = XML_ERR_DOCUMENT_END;
8015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8016 ctxt->sax->error(ctxt->userData,
8017 "Extra content at the end of the document\n");
8018 ctxt->wellFormed = 0;
8019 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8021 ctxt->instate = XML_PARSER_EOF;
8025 * SAX: end of the document processing.
8027 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8028 ctxt->sax->endDocument(ctxt->userData);
8031 * Remove locally kept entity definitions if the tree was not built
8033 if ((ctxt->myDoc != NULL) &&
8034 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8035 xmlFreeDoc(ctxt->myDoc);
8039 if (! ctxt->wellFormed) {
8047 * xmlParseExtParsedEnt:
8048 * @ctxt: an XML parser context
8050 * parse a general parsed entity
8051 * An external general parsed entity is well-formed if it matches the
8052 * production labeled extParsedEnt.
8054 * [78] extParsedEnt ::= TextDecl? content
8056 * Returns 0, -1 in case of error. the parser context is augmented
8057 * as a result of the parsing.
8061 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8063 xmlCharEncoding enc;
8065 xmlDefaultSAXHandlerInit();
8070 * SAX: beginning of the document processing.
8072 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8073 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8076 * Get the 4 first bytes and decode the charset
8077 * if enc != XML_CHAR_ENCODING_NONE
8078 * plug some encoding conversion routines.
8084 enc = xmlDetectCharEncoding(start, 4);
8085 if (enc != XML_CHAR_ENCODING_NONE) {
8086 xmlSwitchEncoding(ctxt, enc);
8091 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8093 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8094 ctxt->wellFormed = 0;
8095 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8099 * Check for the XMLDecl in the Prolog.
8102 if ((RAW == '<') && (NXT(1) == '?') &&
8103 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8104 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8107 * Note that we will switch encoding on the fly.
8109 xmlParseXMLDecl(ctxt);
8110 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8112 * The XML REC instructs us to stop parsing right here
8118 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8120 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8121 ctxt->sax->startDocument(ctxt->userData);
8124 * Doing validity checking on chunk doesn't make sense
8126 ctxt->instate = XML_PARSER_CONTENT;
8128 ctxt->loadsubset = 0;
8131 xmlParseContent(ctxt);
8133 if ((RAW == '<') && (NXT(1) == '/')) {
8134 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8136 ctxt->sax->error(ctxt->userData,
8137 "chunk is not well balanced\n");
8138 ctxt->wellFormed = 0;
8139 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8140 } else if (RAW != 0) {
8141 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8143 ctxt->sax->error(ctxt->userData,
8144 "extra content at the end of well balanced chunk\n");
8145 ctxt->wellFormed = 0;
8146 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8150 * SAX: end of the document processing.
8152 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8153 ctxt->sax->endDocument(ctxt->userData);
8155 if (! ctxt->wellFormed) return(-1);
8159 /************************************************************************
8161 * Progressive parsing interfaces *
8163 ************************************************************************/
8166 * xmlParseLookupSequence:
8167 * @ctxt: an XML parser context
8168 * @first: the first char to lookup
8169 * @next: the next char to lookup or zero
8170 * @third: the next char to lookup or zero
8172 * Try to find if a sequence (first, next, third) or just (first next) or
8173 * (first) is available in the input stream.
8174 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8175 * to avoid rescanning sequences of bytes, it DOES change the state of the
8176 * parser, do not use liberally.
8178 * Returns the index to the current parsing point if the full sequence
8179 * is available, -1 otherwise.
8182 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8183 xmlChar next, xmlChar third) {
8185 xmlParserInputPtr in;
8189 if (in == NULL) return(-1);
8190 base = in->cur - in->base;
8191 if (base < 0) return(-1);
8192 if (ctxt->checkIndex > base)
8193 base = ctxt->checkIndex;
8194 if (in->buf == NULL) {
8198 buf = in->buf->buffer->content;
8199 len = in->buf->buffer->use;
8201 /* take into account the sequence length */
8202 if (third) len -= 2;
8203 else if (next) len --;
8204 for (;base < len;base++) {
8205 if (buf[base] == first) {
8207 if ((buf[base + 1] != next) ||
8208 (buf[base + 2] != third)) continue;
8209 } else if (next != 0) {
8210 if (buf[base + 1] != next) continue;
8212 ctxt->checkIndex = 0;
8215 xmlGenericError(xmlGenericErrorContext,
8216 "PP: lookup '%c' found at %d\n",
8218 else if (third == 0)
8219 xmlGenericError(xmlGenericErrorContext,
8220 "PP: lookup '%c%c' found at %d\n",
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: lookup '%c%c%c' found at %d\n",
8225 first, next, third, base);
8227 return(base - (in->cur - in->base));
8230 ctxt->checkIndex = base;
8233 xmlGenericError(xmlGenericErrorContext,
8234 "PP: lookup '%c' failed\n", first);
8235 else if (third == 0)
8236 xmlGenericError(xmlGenericErrorContext,
8237 "PP: lookup '%c%c' failed\n", first, next);
8239 xmlGenericError(xmlGenericErrorContext,
8240 "PP: lookup '%c%c%c' failed\n", first, next, third);
8247 * @ctxt: an XML parser context
8248 * @lastlt: pointer to store the last '<' from the input
8249 * @lastgt: pointer to store the last '>' from the input
8251 * Lookup the last < and > in the current chunk
8254 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8255 const xmlChar **lastgt) {
8258 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8259 xmlGenericError(xmlGenericErrorContext,
8260 "Internal error: xmlParseGetLasts\n");
8263 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8264 tmp = ctxt->input->end;
8266 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8267 (*tmp != '>')) tmp--;
8268 if (tmp < ctxt->input->base) {
8271 } else if (*tmp == '<') {
8274 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8275 if (tmp < ctxt->input->base)
8282 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8283 if (tmp < ctxt->input->base)
8295 * xmlParseTryOrFinish:
8296 * @ctxt: an XML parser context
8297 * @terminate: last chunk indicator
8299 * Try to progress on parsing
8301 * Returns zero if no parsing was possible
8304 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8308 const xmlChar *lastlt, *lastgt;
8311 switch (ctxt->instate) {
8312 case XML_PARSER_EOF:
8313 xmlGenericError(xmlGenericErrorContext,
8314 "PP: try EOF\n"); break;
8315 case XML_PARSER_START:
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: try START\n"); break;
8318 case XML_PARSER_MISC:
8319 xmlGenericError(xmlGenericErrorContext,
8320 "PP: try MISC\n");break;
8321 case XML_PARSER_COMMENT:
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: try COMMENT\n");break;
8324 case XML_PARSER_PROLOG:
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: try PROLOG\n");break;
8327 case XML_PARSER_START_TAG:
8328 xmlGenericError(xmlGenericErrorContext,
8329 "PP: try START_TAG\n");break;
8330 case XML_PARSER_CONTENT:
8331 xmlGenericError(xmlGenericErrorContext,
8332 "PP: try CONTENT\n");break;
8333 case XML_PARSER_CDATA_SECTION:
8334 xmlGenericError(xmlGenericErrorContext,
8335 "PP: try CDATA_SECTION\n");break;
8336 case XML_PARSER_END_TAG:
8337 xmlGenericError(xmlGenericErrorContext,
8338 "PP: try END_TAG\n");break;
8339 case XML_PARSER_ENTITY_DECL:
8340 xmlGenericError(xmlGenericErrorContext,
8341 "PP: try ENTITY_DECL\n");break;
8342 case XML_PARSER_ENTITY_VALUE:
8343 xmlGenericError(xmlGenericErrorContext,
8344 "PP: try ENTITY_VALUE\n");break;
8345 case XML_PARSER_ATTRIBUTE_VALUE:
8346 xmlGenericError(xmlGenericErrorContext,
8347 "PP: try ATTRIBUTE_VALUE\n");break;
8348 case XML_PARSER_DTD:
8349 xmlGenericError(xmlGenericErrorContext,
8350 "PP: try DTD\n");break;
8351 case XML_PARSER_EPILOG:
8352 xmlGenericError(xmlGenericErrorContext,
8353 "PP: try EPILOG\n");break;
8355 xmlGenericError(xmlGenericErrorContext,
8356 "PP: try PI\n");break;
8357 case XML_PARSER_IGNORE:
8358 xmlGenericError(xmlGenericErrorContext,
8359 "PP: try IGNORE\n");break;
8363 if (ctxt->input->cur - ctxt->input->base > 4096) {
8365 ctxt->checkIndex = 0;
8367 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
8370 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8375 * Pop-up of finished entities.
8377 while ((RAW == 0) && (ctxt->inputNr > 1))
8380 if (ctxt->input ==NULL) break;
8381 if (ctxt->input->buf == NULL)
8382 avail = ctxt->input->length -
8383 (ctxt->input->cur - ctxt->input->base);
8386 * If we are operating on converted input, try to flush
8387 * remainng chars to avoid them stalling in the non-converted
8390 if ((ctxt->input->buf->raw != NULL) &&
8391 (ctxt->input->buf->raw->use > 0)) {
8392 int base = ctxt->input->base -
8393 ctxt->input->buf->buffer->content;
8394 int current = ctxt->input->cur - ctxt->input->base;
8396 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8397 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8398 ctxt->input->cur = ctxt->input->base + current;
8400 &ctxt->input->buf->buffer->content[
8401 ctxt->input->buf->buffer->use];
8403 avail = ctxt->input->buf->buffer->use -
8404 (ctxt->input->cur - ctxt->input->base);
8408 switch (ctxt->instate) {
8409 case XML_PARSER_EOF:
8411 * Document parsing is done !
8414 case XML_PARSER_START:
8415 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8417 xmlCharEncoding enc;
8420 * Very first chars read from the document flow.
8426 * Get the 4 first bytes and decode the charset
8427 * if enc != XML_CHAR_ENCODING_NONE
8428 * plug some encoding conversion routines.
8434 enc = xmlDetectCharEncoding(start, 4);
8435 if (enc != XML_CHAR_ENCODING_NONE) {
8436 xmlSwitchEncoding(ctxt, enc);
8441 cur = ctxt->input->cur[0];
8442 next = ctxt->input->cur[1];
8444 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8445 ctxt->sax->setDocumentLocator(ctxt->userData,
8446 &xmlDefaultSAXLocator);
8447 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8449 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8450 ctxt->wellFormed = 0;
8451 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8452 ctxt->instate = XML_PARSER_EOF;
8454 xmlGenericError(xmlGenericErrorContext,
8455 "PP: entering EOF\n");
8457 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8458 ctxt->sax->endDocument(ctxt->userData);
8461 if ((cur == '<') && (next == '?')) {
8462 /* PI or XML decl */
8463 if (avail < 5) return(ret);
8465 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8467 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8468 ctxt->sax->setDocumentLocator(ctxt->userData,
8469 &xmlDefaultSAXLocator);
8470 if ((ctxt->input->cur[2] == 'x') &&
8471 (ctxt->input->cur[3] == 'm') &&
8472 (ctxt->input->cur[4] == 'l') &&
8473 (IS_BLANK(ctxt->input->cur[5]))) {
8476 xmlGenericError(xmlGenericErrorContext,
8477 "PP: Parsing XML Decl\n");
8479 xmlParseXMLDecl(ctxt);
8480 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8482 * The XML REC instructs us to stop parsing right
8485 ctxt->instate = XML_PARSER_EOF;
8488 ctxt->standalone = ctxt->input->standalone;
8489 if ((ctxt->encoding == NULL) &&
8490 (ctxt->input->encoding != NULL))
8491 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8492 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8493 (!ctxt->disableSAX))
8494 ctxt->sax->startDocument(ctxt->userData);
8495 ctxt->instate = XML_PARSER_MISC;
8497 xmlGenericError(xmlGenericErrorContext,
8498 "PP: entering MISC\n");
8501 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8502 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8503 (!ctxt->disableSAX))
8504 ctxt->sax->startDocument(ctxt->userData);
8505 ctxt->instate = XML_PARSER_MISC;
8507 xmlGenericError(xmlGenericErrorContext,
8508 "PP: entering MISC\n");
8512 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8513 ctxt->sax->setDocumentLocator(ctxt->userData,
8514 &xmlDefaultSAXLocator);
8515 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8516 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8517 (!ctxt->disableSAX))
8518 ctxt->sax->startDocument(ctxt->userData);
8519 ctxt->instate = XML_PARSER_MISC;
8521 xmlGenericError(xmlGenericErrorContext,
8522 "PP: entering MISC\n");
8526 case XML_PARSER_START_TAG: {
8527 xmlChar *name, *oldname;
8529 if ((avail < 2) && (ctxt->inputNr == 1))
8531 cur = ctxt->input->cur[0];
8533 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8535 ctxt->sax->error(ctxt->userData,
8536 "Start tag expect, '<' not found\n");
8537 ctxt->wellFormed = 0;
8538 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8539 ctxt->instate = XML_PARSER_EOF;
8541 xmlGenericError(xmlGenericErrorContext,
8542 "PP: entering EOF\n");
8544 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8545 ctxt->sax->endDocument(ctxt->userData);
8549 if (ctxt->progressive) {
8550 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8552 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8556 if (ctxt->spaceNr == 0)
8557 spacePush(ctxt, -1);
8559 spacePush(ctxt, *ctxt->space);
8560 name = xmlParseStartTag(ctxt);
8563 ctxt->instate = XML_PARSER_EOF;
8565 xmlGenericError(xmlGenericErrorContext,
8566 "PP: entering EOF\n");
8568 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8569 ctxt->sax->endDocument(ctxt->userData);
8572 namePush(ctxt, name);
8575 * [ VC: Root Element Type ]
8576 * The Name in the document type declaration must match
8577 * the element type of the root element.
8579 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8580 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8581 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8584 * Check for an Empty Element.
8586 if ((RAW == '/') && (NXT(1) == '>')) {
8588 if ((ctxt->sax != NULL) &&
8589 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8590 ctxt->sax->endElement(ctxt->userData, name);
8591 oldname = namePop(ctxt);
8593 if (oldname != NULL) {
8595 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8599 if (ctxt->name == NULL) {
8600 ctxt->instate = XML_PARSER_EPILOG;
8602 xmlGenericError(xmlGenericErrorContext,
8603 "PP: entering EPILOG\n");
8606 ctxt->instate = XML_PARSER_CONTENT;
8608 xmlGenericError(xmlGenericErrorContext,
8609 "PP: entering CONTENT\n");
8617 ctxt->errNo = XML_ERR_GT_REQUIRED;
8618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8619 ctxt->sax->error(ctxt->userData,
8620 "Couldn't find end of Start Tag %s\n",
8622 ctxt->wellFormed = 0;
8623 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8626 * end of parsing of this node.
8629 oldname = namePop(ctxt);
8631 if (oldname != NULL) {
8633 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8638 ctxt->instate = XML_PARSER_CONTENT;
8640 xmlGenericError(xmlGenericErrorContext,
8641 "PP: entering CONTENT\n");
8645 case XML_PARSER_CONTENT: {
8646 const xmlChar *test;
8648 if ((avail < 2) && (ctxt->inputNr == 1))
8650 cur = ctxt->input->cur[0];
8651 next = ctxt->input->cur[1];
8654 cons = ctxt->input->consumed;
8655 if ((cur == '<') && (next == '/')) {
8656 ctxt->instate = XML_PARSER_END_TAG;
8658 xmlGenericError(xmlGenericErrorContext,
8659 "PP: entering END_TAG\n");
8662 } else if ((cur == '<') && (next == '?')) {
8664 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8667 xmlGenericError(xmlGenericErrorContext,
8668 "PP: Parsing PI\n");
8671 } else if ((cur == '<') && (next != '!')) {
8672 ctxt->instate = XML_PARSER_START_TAG;
8674 xmlGenericError(xmlGenericErrorContext,
8675 "PP: entering START_TAG\n");
8678 } else if ((cur == '<') && (next == '!') &&
8679 (ctxt->input->cur[2] == '-') &&
8680 (ctxt->input->cur[3] == '-')) {
8682 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8685 xmlGenericError(xmlGenericErrorContext,
8686 "PP: Parsing Comment\n");
8688 xmlParseComment(ctxt);
8689 ctxt->instate = XML_PARSER_CONTENT;
8690 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8691 (ctxt->input->cur[2] == '[') &&
8692 (ctxt->input->cur[3] == 'C') &&
8693 (ctxt->input->cur[4] == 'D') &&
8694 (ctxt->input->cur[5] == 'A') &&
8695 (ctxt->input->cur[6] == 'T') &&
8696 (ctxt->input->cur[7] == 'A') &&
8697 (ctxt->input->cur[8] == '[')) {
8699 ctxt->instate = XML_PARSER_CDATA_SECTION;
8701 xmlGenericError(xmlGenericErrorContext,
8702 "PP: entering CDATA_SECTION\n");
8705 } else if ((cur == '<') && (next == '!') &&
8708 } else if (cur == '&') {
8710 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8713 xmlGenericError(xmlGenericErrorContext,
8714 "PP: Parsing Reference\n");
8716 xmlParseReference(ctxt);
8718 /* TODO Avoid the extra copy, handle directly !!! */
8720 * Goal of the following test is:
8721 * - minimize calls to the SAX 'character' callback
8722 * when they are mergeable
8723 * - handle an problem for isBlank when we only parse
8724 * a sequence of blank chars and the next one is
8725 * not available to check against '<' presence.
8726 * - tries to homogenize the differences in SAX
8727 * callbacks between the push and pull versions
8730 if ((ctxt->inputNr == 1) &&
8731 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8733 if (ctxt->progressive) {
8734 if ((lastlt == NULL) ||
8735 (ctxt->input->cur > lastlt))
8737 } else if (xmlParseLookupSequence(ctxt,
8743 ctxt->checkIndex = 0;
8745 xmlGenericError(xmlGenericErrorContext,
8746 "PP: Parsing char data\n");
8748 xmlParseCharData(ctxt, 0);
8751 * Pop-up of finished entities.
8753 while ((RAW == 0) && (ctxt->inputNr > 1))
8755 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8756 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8758 ctxt->sax->error(ctxt->userData,
8759 "detected an error in element content\n");
8760 ctxt->wellFormed = 0;
8761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8762 ctxt->instate = XML_PARSER_EOF;
8767 case XML_PARSER_END_TAG:
8771 if (ctxt->progressive) {
8772 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8774 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8778 xmlParseEndTag(ctxt);
8779 if (ctxt->name == NULL) {
8780 ctxt->instate = XML_PARSER_EPILOG;
8782 xmlGenericError(xmlGenericErrorContext,
8783 "PP: entering EPILOG\n");
8786 ctxt->instate = XML_PARSER_CONTENT;
8788 xmlGenericError(xmlGenericErrorContext,
8789 "PP: entering CONTENT\n");
8793 case XML_PARSER_CDATA_SECTION: {
8795 * The Push mode need to have the SAX callback for
8796 * cdataBlock merge back contiguous callbacks.
8800 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8802 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8803 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8804 if (ctxt->sax->cdataBlock != NULL)
8805 ctxt->sax->cdataBlock(ctxt->userData,
8807 XML_PARSER_BIG_BUFFER_SIZE);
8808 else if (ctxt->sax->characters != NULL)
8809 ctxt->sax->characters(ctxt->userData,
8811 XML_PARSER_BIG_BUFFER_SIZE);
8813 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8814 ctxt->checkIndex = 0;
8818 if ((ctxt->sax != NULL) && (base > 0) &&
8819 (!ctxt->disableSAX)) {
8820 if (ctxt->sax->cdataBlock != NULL)
8821 ctxt->sax->cdataBlock(ctxt->userData,
8822 ctxt->input->cur, base);
8823 else if (ctxt->sax->characters != NULL)
8824 ctxt->sax->characters(ctxt->userData,
8825 ctxt->input->cur, base);
8828 ctxt->checkIndex = 0;
8829 ctxt->instate = XML_PARSER_CONTENT;
8831 xmlGenericError(xmlGenericErrorContext,
8832 "PP: entering CONTENT\n");
8837 case XML_PARSER_MISC:
8839 if (ctxt->input->buf == NULL)
8840 avail = ctxt->input->length -
8841 (ctxt->input->cur - ctxt->input->base);
8843 avail = ctxt->input->buf->buffer->use -
8844 (ctxt->input->cur - ctxt->input->base);
8847 cur = ctxt->input->cur[0];
8848 next = ctxt->input->cur[1];
8849 if ((cur == '<') && (next == '?')) {
8851 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8854 xmlGenericError(xmlGenericErrorContext,
8855 "PP: Parsing PI\n");
8858 } else if ((cur == '<') && (next == '!') &&
8859 (ctxt->input->cur[2] == '-') &&
8860 (ctxt->input->cur[3] == '-')) {
8862 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8865 xmlGenericError(xmlGenericErrorContext,
8866 "PP: Parsing Comment\n");
8868 xmlParseComment(ctxt);
8869 ctxt->instate = XML_PARSER_MISC;
8870 } else if ((cur == '<') && (next == '!') &&
8871 (ctxt->input->cur[2] == 'D') &&
8872 (ctxt->input->cur[3] == 'O') &&
8873 (ctxt->input->cur[4] == 'C') &&
8874 (ctxt->input->cur[5] == 'T') &&
8875 (ctxt->input->cur[6] == 'Y') &&
8876 (ctxt->input->cur[7] == 'P') &&
8877 (ctxt->input->cur[8] == 'E')) {
8879 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8882 xmlGenericError(xmlGenericErrorContext,
8883 "PP: Parsing internal subset\n");
8886 xmlParseDocTypeDecl(ctxt);
8888 ctxt->instate = XML_PARSER_DTD;
8890 xmlGenericError(xmlGenericErrorContext,
8891 "PP: entering DTD\n");
8895 * Create and update the external subset.
8898 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8899 (ctxt->sax->externalSubset != NULL))
8900 ctxt->sax->externalSubset(ctxt->userData,
8901 ctxt->intSubName, ctxt->extSubSystem,
8904 ctxt->instate = XML_PARSER_PROLOG;
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: entering PROLOG\n");
8910 } else if ((cur == '<') && (next == '!') &&
8914 ctxt->instate = XML_PARSER_START_TAG;
8915 ctxt->progressive = 1;
8916 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
8918 xmlGenericError(xmlGenericErrorContext,
8919 "PP: entering START_TAG\n");
8923 case XML_PARSER_PROLOG:
8925 if (ctxt->input->buf == NULL)
8926 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8928 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8931 cur = ctxt->input->cur[0];
8932 next = ctxt->input->cur[1];
8933 if ((cur == '<') && (next == '?')) {
8935 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8938 xmlGenericError(xmlGenericErrorContext,
8939 "PP: Parsing PI\n");
8942 } else if ((cur == '<') && (next == '!') &&
8943 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8945 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8948 xmlGenericError(xmlGenericErrorContext,
8949 "PP: Parsing Comment\n");
8951 xmlParseComment(ctxt);
8952 ctxt->instate = XML_PARSER_PROLOG;
8953 } else if ((cur == '<') && (next == '!') &&
8957 ctxt->instate = XML_PARSER_START_TAG;
8958 ctxt->progressive = 1;
8959 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
8961 xmlGenericError(xmlGenericErrorContext,
8962 "PP: entering START_TAG\n");
8966 case XML_PARSER_EPILOG:
8968 if (ctxt->input->buf == NULL)
8969 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8971 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8974 cur = ctxt->input->cur[0];
8975 next = ctxt->input->cur[1];
8976 if ((cur == '<') && (next == '?')) {
8978 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8981 xmlGenericError(xmlGenericErrorContext,
8982 "PP: Parsing PI\n");
8985 ctxt->instate = XML_PARSER_EPILOG;
8986 } else if ((cur == '<') && (next == '!') &&
8987 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8989 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8992 xmlGenericError(xmlGenericErrorContext,
8993 "PP: Parsing Comment\n");
8995 xmlParseComment(ctxt);
8996 ctxt->instate = XML_PARSER_EPILOG;
8997 } else if ((cur == '<') && (next == '!') &&
9001 ctxt->errNo = XML_ERR_DOCUMENT_END;
9002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9003 ctxt->sax->error(ctxt->userData,
9004 "Extra content at the end of the document\n");
9005 ctxt->wellFormed = 0;
9006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9007 ctxt->instate = XML_PARSER_EOF;
9009 xmlGenericError(xmlGenericErrorContext,
9010 "PP: entering EOF\n");
9012 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9013 ctxt->sax->endDocument(ctxt->userData);
9017 case XML_PARSER_DTD: {
9019 * Sorry but progressive parsing of the internal subset
9020 * is not expected to be supported. We first check that
9021 * the full content of the internal subset is available and
9022 * the parsing is launched only at that point.
9023 * Internal subset ends up with "']' S? '>'" in an unescaped
9024 * section and not in a ']]>' sequence which are conditional
9025 * sections (whoever argued to keep that crap in XML deserve
9026 * a place in hell !).
9032 base = ctxt->input->cur - ctxt->input->base;
9033 if (base < 0) return(0);
9034 if (ctxt->checkIndex > base)
9035 base = ctxt->checkIndex;
9036 buf = ctxt->input->buf->buffer->content;
9037 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9040 if (buf[base] == quote)
9044 if (buf[base] == '"') {
9048 if (buf[base] == '\'') {
9052 if (buf[base] == ']') {
9053 if ((unsigned int) base +1 >=
9054 ctxt->input->buf->buffer->use)
9056 if (buf[base + 1] == ']') {
9057 /* conditional crap, skip both ']' ! */
9062 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9064 if (buf[base + i] == '>')
9065 goto found_end_int_subset;
9071 * We didn't found the end of the Internal subset
9074 ctxt->checkIndex = base;
9077 xmlGenericError(xmlGenericErrorContext,
9078 "PP: lookup of int subset end filed\n");
9082 found_end_int_subset:
9083 xmlParseInternalSubset(ctxt);
9085 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9086 (ctxt->sax->externalSubset != NULL))
9087 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9088 ctxt->extSubSystem, ctxt->extSubURI);
9090 ctxt->instate = XML_PARSER_PROLOG;
9091 ctxt->checkIndex = 0;
9093 xmlGenericError(xmlGenericErrorContext,
9094 "PP: entering PROLOG\n");
9098 case XML_PARSER_COMMENT:
9099 xmlGenericError(xmlGenericErrorContext,
9100 "PP: internal error, state == COMMENT\n");
9101 ctxt->instate = XML_PARSER_CONTENT;
9103 xmlGenericError(xmlGenericErrorContext,
9104 "PP: entering CONTENT\n");
9107 case XML_PARSER_IGNORE:
9108 xmlGenericError(xmlGenericErrorContext,
9109 "PP: internal error, state == IGNORE");
9110 ctxt->instate = XML_PARSER_DTD;
9112 xmlGenericError(xmlGenericErrorContext,
9113 "PP: entering DTD\n");
9117 xmlGenericError(xmlGenericErrorContext,
9118 "PP: internal error, state == PI\n");
9119 ctxt->instate = XML_PARSER_CONTENT;
9121 xmlGenericError(xmlGenericErrorContext,
9122 "PP: entering CONTENT\n");
9125 case XML_PARSER_ENTITY_DECL:
9126 xmlGenericError(xmlGenericErrorContext,
9127 "PP: internal error, state == ENTITY_DECL\n");
9128 ctxt->instate = XML_PARSER_DTD;
9130 xmlGenericError(xmlGenericErrorContext,
9131 "PP: entering DTD\n");
9134 case XML_PARSER_ENTITY_VALUE:
9135 xmlGenericError(xmlGenericErrorContext,
9136 "PP: internal error, state == ENTITY_VALUE\n");
9137 ctxt->instate = XML_PARSER_CONTENT;
9139 xmlGenericError(xmlGenericErrorContext,
9140 "PP: entering DTD\n");
9143 case XML_PARSER_ATTRIBUTE_VALUE:
9144 xmlGenericError(xmlGenericErrorContext,
9145 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9146 ctxt->instate = XML_PARSER_START_TAG;
9148 xmlGenericError(xmlGenericErrorContext,
9149 "PP: entering START_TAG\n");
9152 case XML_PARSER_SYSTEM_LITERAL:
9153 xmlGenericError(xmlGenericErrorContext,
9154 "PP: internal error, state == SYSTEM_LITERAL\n");
9155 ctxt->instate = XML_PARSER_START_TAG;
9157 xmlGenericError(xmlGenericErrorContext,
9158 "PP: entering START_TAG\n");
9161 case XML_PARSER_PUBLIC_LITERAL:
9162 xmlGenericError(xmlGenericErrorContext,
9163 "PP: internal error, state == PUBLIC_LITERAL\n");
9164 ctxt->instate = XML_PARSER_START_TAG;
9166 xmlGenericError(xmlGenericErrorContext,
9167 "PP: entering START_TAG\n");
9174 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9181 * @ctxt: an XML parser context
9182 * @chunk: an char array
9183 * @size: the size in byte of the chunk
9184 * @terminate: last chunk indicator
9186 * Parse a Chunk of memory
9188 * Returns zero if no error, the xmlParserErrors otherwise.
9191 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9193 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9194 return(ctxt->errNo);
9195 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9196 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9197 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9198 int cur = ctxt->input->cur - ctxt->input->base;
9200 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9201 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9202 ctxt->input->cur = ctxt->input->base + cur;
9204 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
9206 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9210 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9211 xmlParseTryOrFinish(ctxt, terminate);
9213 } else if (ctxt->instate != XML_PARSER_EOF) {
9214 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9215 xmlParserInputBufferPtr in = ctxt->input->buf;
9216 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9217 (in->raw != NULL)) {
9220 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9222 xmlGenericError(xmlGenericErrorContext,
9223 "xmlParseChunk: encoder error\n");
9224 return(XML_ERR_INVALID_ENCODING);
9229 xmlParseTryOrFinish(ctxt, terminate);
9230 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9231 return(ctxt->errNo);
9234 * Check for termination
9237 if (ctxt->input->buf == NULL)
9238 avail = ctxt->input->length -
9239 (ctxt->input->cur - ctxt->input->base);
9241 avail = ctxt->input->buf->buffer->use -
9242 (ctxt->input->cur - ctxt->input->base);
9244 if ((ctxt->instate != XML_PARSER_EOF) &&
9245 (ctxt->instate != XML_PARSER_EPILOG)) {
9246 ctxt->errNo = XML_ERR_DOCUMENT_END;
9247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9248 ctxt->sax->error(ctxt->userData,
9249 "Extra content at the end of the document\n");
9250 ctxt->wellFormed = 0;
9251 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9253 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9254 ctxt->errNo = XML_ERR_DOCUMENT_END;
9255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9256 ctxt->sax->error(ctxt->userData,
9257 "Extra content at the end of the document\n");
9258 ctxt->wellFormed = 0;
9259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9262 if (ctxt->instate != XML_PARSER_EOF) {
9263 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9264 ctxt->sax->endDocument(ctxt->userData);
9266 ctxt->instate = XML_PARSER_EOF;
9268 return((xmlParserErrors) ctxt->errNo);
9271 /************************************************************************
9273 * I/O front end functions to the parser *
9275 ************************************************************************/
9279 * @ctxt: an XML parser context
9281 * Blocks further parser processing
9284 xmlStopParser(xmlParserCtxtPtr ctxt) {
9285 ctxt->instate = XML_PARSER_EOF;
9286 if (ctxt->input != NULL)
9287 ctxt->input->cur = BAD_CAST"";
9291 * xmlCreatePushParserCtxt:
9292 * @sax: a SAX handler
9293 * @user_data: The user data returned on SAX callbacks
9294 * @chunk: a pointer to an array of chars
9295 * @size: number of chars in the array
9296 * @filename: an optional file name or URI
9298 * Create a parser context for using the XML parser in push mode.
9299 * If @buffer and @size are non-NULL, the data is used to detect
9300 * the encoding. The remaining characters will be parsed so they
9301 * don't need to be fed in again through xmlParseChunk.
9302 * To allow content encoding detection, @size should be >= 4
9303 * The value of @filename is used for fetching external entities
9304 * and error/warning reports.
9306 * Returns the new parser context or NULL
9310 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9311 const char *chunk, int size, const char *filename) {
9312 xmlParserCtxtPtr ctxt;
9313 xmlParserInputPtr inputStream;
9314 xmlParserInputBufferPtr buf;
9315 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9318 * plug some encoding conversion routines
9320 if ((chunk != NULL) && (size >= 4))
9321 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9323 buf = xmlAllocParserInputBuffer(enc);
9324 if (buf == NULL) return(NULL);
9326 ctxt = xmlNewParserCtxt();
9328 xmlGenericError(xmlGenericErrorContext,
9329 "xml parser: out of memory\n");
9330 xmlFreeParserInputBuffer(buf);
9334 if (ctxt->sax != &xmlDefaultSAXHandler)
9336 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9337 if (ctxt->sax == NULL) {
9338 xmlGenericError(xmlGenericErrorContext,
9339 "xml parser: out of memory\n");
9340 xmlFreeParserInputBuffer(buf);
9341 xmlFreeParserCtxt(ctxt);
9344 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9345 if (user_data != NULL)
9346 ctxt->userData = user_data;
9348 if (filename == NULL) {
9349 ctxt->directory = NULL;
9351 ctxt->directory = xmlParserGetDirectory(filename);
9354 inputStream = xmlNewInputStream(ctxt);
9355 if (inputStream == NULL) {
9356 xmlFreeParserCtxt(ctxt);
9357 xmlFreeParserInputBuffer(buf);
9361 if (filename == NULL)
9362 inputStream->filename = NULL;
9364 inputStream->filename = (char *)
9365 xmlCanonicPath((const xmlChar *) filename);
9366 inputStream->buf = buf;
9367 inputStream->base = inputStream->buf->buffer->content;
9368 inputStream->cur = inputStream->buf->buffer->content;
9370 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
9372 inputPush(ctxt, inputStream);
9374 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9375 (ctxt->input->buf != NULL)) {
9376 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9377 int cur = ctxt->input->cur - ctxt->input->base;
9379 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9381 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9382 ctxt->input->cur = ctxt->input->base + cur;
9384 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
9386 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9390 if (enc != XML_CHAR_ENCODING_NONE) {
9391 xmlSwitchEncoding(ctxt, enc);
9398 * xmlCreateIOParserCtxt:
9399 * @sax: a SAX handler
9400 * @user_data: The user data returned on SAX callbacks
9401 * @ioread: an I/O read function
9402 * @ioclose: an I/O close function
9403 * @ioctx: an I/O handler
9404 * @enc: the charset encoding if known
9406 * Create a parser context for using the XML parser with an existing
9409 * Returns the new parser context or NULL
9412 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9413 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9414 void *ioctx, xmlCharEncoding enc) {
9415 xmlParserCtxtPtr ctxt;
9416 xmlParserInputPtr inputStream;
9417 xmlParserInputBufferPtr buf;
9419 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9420 if (buf == NULL) return(NULL);
9422 ctxt = xmlNewParserCtxt();
9428 if (ctxt->sax != &xmlDefaultSAXHandler)
9430 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9431 if (ctxt->sax == NULL) {
9436 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9437 if (user_data != NULL)
9438 ctxt->userData = user_data;
9441 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9442 if (inputStream == NULL) {
9443 xmlFreeParserCtxt(ctxt);
9446 inputPush(ctxt, inputStream);
9451 /************************************************************************
9453 * Front ends when parsing a DTD *
9455 ************************************************************************/
9459 * @sax: the SAX handler block or NULL
9460 * @input: an Input Buffer
9461 * @enc: the charset encoding if known
9463 * Load and parse a DTD
9465 * Returns the resulting xmlDtdPtr or NULL in case of error.
9466 * @input will be freed at parsing end.
9470 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9471 xmlCharEncoding enc) {
9472 xmlDtdPtr ret = NULL;
9473 xmlParserCtxtPtr ctxt;
9474 xmlParserInputPtr pinput = NULL;
9480 ctxt = xmlNewParserCtxt();
9486 * Set-up the SAX context
9489 if (ctxt->sax != NULL)
9492 ctxt->userData = NULL;
9496 * generate a parser input from the I/O handler
9499 pinput = xmlNewIOInputStream(ctxt, input, enc);
9500 if (pinput == NULL) {
9501 if (sax != NULL) ctxt->sax = NULL;
9502 xmlFreeParserCtxt(ctxt);
9507 * plug some encoding conversion routines here.
9509 xmlPushInput(ctxt, pinput);
9511 pinput->filename = NULL;
9514 pinput->base = ctxt->input->cur;
9515 pinput->cur = ctxt->input->cur;
9516 pinput->free = NULL;
9519 * let's parse that entity knowing it's an external subset.
9522 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9523 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9524 BAD_CAST "none", BAD_CAST "none");
9526 if (enc == XML_CHAR_ENCODING_NONE) {
9528 * Get the 4 first bytes and decode the charset
9529 * if enc != XML_CHAR_ENCODING_NONE
9530 * plug some encoding conversion routines.
9536 enc = xmlDetectCharEncoding(start, 4);
9537 if (enc != XML_CHAR_ENCODING_NONE) {
9538 xmlSwitchEncoding(ctxt, enc);
9542 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9544 if (ctxt->myDoc != NULL) {
9545 if (ctxt->wellFormed) {
9546 ret = ctxt->myDoc->extSubset;
9547 ctxt->myDoc->extSubset = NULL;
9552 tmp = ret->children;
9553 while (tmp != NULL) {
9561 xmlFreeDoc(ctxt->myDoc);
9564 if (sax != NULL) ctxt->sax = NULL;
9565 xmlFreeParserCtxt(ctxt);
9572 * @sax: the SAX handler block
9573 * @ExternalID: a NAME* containing the External ID of the DTD
9574 * @SystemID: a NAME* containing the URL to the DTD
9576 * Load and parse an external subset.
9578 * Returns the resulting xmlDtdPtr or NULL in case of error.
9582 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9583 const xmlChar *SystemID) {
9584 xmlDtdPtr ret = NULL;
9585 xmlParserCtxtPtr ctxt;
9586 xmlParserInputPtr input = NULL;
9587 xmlCharEncoding enc;
9589 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9591 ctxt = xmlNewParserCtxt();
9597 * Set-up the SAX context
9600 if (ctxt->sax != NULL)
9603 ctxt->userData = ctxt;
9607 * Ask the Entity resolver to load the damn thing
9610 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9611 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
9612 if (input == NULL) {
9613 if (sax != NULL) ctxt->sax = NULL;
9614 xmlFreeParserCtxt(ctxt);
9619 * plug some encoding conversion routines here.
9621 xmlPushInput(ctxt, input);
9622 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9623 xmlSwitchEncoding(ctxt, enc);
9625 if (input->filename == NULL)
9626 input->filename = (char *) xmlCanonicPath(SystemID);
9629 input->base = ctxt->input->cur;
9630 input->cur = ctxt->input->cur;
9634 * let's parse that entity knowing it's an external subset.
9637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9638 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9639 ExternalID, SystemID);
9640 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9642 if (ctxt->myDoc != NULL) {
9643 if (ctxt->wellFormed) {
9644 ret = ctxt->myDoc->extSubset;
9645 ctxt->myDoc->extSubset = NULL;
9650 tmp = ret->children;
9651 while (tmp != NULL) {
9659 xmlFreeDoc(ctxt->myDoc);
9662 if (sax != NULL) ctxt->sax = NULL;
9663 xmlFreeParserCtxt(ctxt);
9670 * @ExternalID: a NAME* containing the External ID of the DTD
9671 * @SystemID: a NAME* containing the URL to the DTD
9673 * Load and parse an external subset.
9675 * Returns the resulting xmlDtdPtr or NULL in case of error.
9679 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9680 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9683 /************************************************************************
9685 * Front ends when parsing an Entity *
9687 ************************************************************************/
9690 * xmlParseCtxtExternalEntity:
9691 * @ctx: the existing parsing context
9692 * @URL: the URL for the entity to load
9693 * @ID: the System ID for the entity to load
9694 * @lst: the return value for the set of parsed nodes
9696 * Parse an external general entity within an existing parsing context
9697 * An external general parsed entity is well-formed if it matches the
9698 * production labeled extParsedEnt.
9700 * [78] extParsedEnt ::= TextDecl? content
9702 * Returns 0 if the entity is well formed, -1 in case of args problem and
9703 * the parser error code otherwise
9707 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
9708 const xmlChar *ID, xmlNodePtr *lst) {
9709 xmlParserCtxtPtr ctxt;
9711 xmlSAXHandlerPtr oldsax = NULL;
9714 xmlCharEncoding enc;
9716 if (ctx->depth > 40) {
9717 return(XML_ERR_ENTITY_LOOP);
9722 if ((URL == NULL) && (ID == NULL))
9724 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9728 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9729 if (ctxt == NULL) return(-1);
9730 ctxt->userData = ctxt;
9731 ctxt->_private = ctx->_private;
9733 ctxt->sax = ctx->sax;
9734 newDoc = xmlNewDoc(BAD_CAST "1.0");
9735 if (newDoc == NULL) {
9736 xmlFreeParserCtxt(ctxt);
9739 if (ctx->myDoc != NULL) {
9740 newDoc->intSubset = ctx->myDoc->intSubset;
9741 newDoc->extSubset = ctx->myDoc->extSubset;
9743 if (ctx->myDoc->URL != NULL) {
9744 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9746 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9747 if (newDoc->children == NULL) {
9749 xmlFreeParserCtxt(ctxt);
9750 newDoc->intSubset = NULL;
9751 newDoc->extSubset = NULL;
9755 nodePush(ctxt, newDoc->children);
9756 if (ctx->myDoc == NULL) {
9757 ctxt->myDoc = newDoc;
9759 ctxt->myDoc = ctx->myDoc;
9760 newDoc->children->doc = ctx->myDoc;
9764 * Get the 4 first bytes and decode the charset
9765 * if enc != XML_CHAR_ENCODING_NONE
9766 * plug some encoding conversion routines.
9773 enc = xmlDetectCharEncoding(start, 4);
9774 if (enc != XML_CHAR_ENCODING_NONE) {
9775 xmlSwitchEncoding(ctxt, enc);
9779 * Parse a possible text declaration first
9781 if ((RAW == '<') && (NXT(1) == '?') &&
9782 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9783 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9784 xmlParseTextDecl(ctxt);
9788 * Doing validity checking on chunk doesn't make sense
9790 ctxt->instate = XML_PARSER_CONTENT;
9791 ctxt->validate = ctx->validate;
9792 ctxt->valid = ctx->valid;
9793 ctxt->loadsubset = ctx->loadsubset;
9794 ctxt->depth = ctx->depth + 1;
9795 ctxt->replaceEntities = ctx->replaceEntities;
9796 if (ctxt->validate) {
9797 ctxt->vctxt.error = ctx->vctxt.error;
9798 ctxt->vctxt.warning = ctx->vctxt.warning;
9800 ctxt->vctxt.error = NULL;
9801 ctxt->vctxt.warning = NULL;
9803 ctxt->vctxt.nodeTab = NULL;
9804 ctxt->vctxt.nodeNr = 0;
9805 ctxt->vctxt.nodeMax = 0;
9806 ctxt->vctxt.node = NULL;
9808 xmlParseContent(ctxt);
9810 ctx->validate = ctxt->validate;
9811 ctx->valid = ctxt->valid;
9812 if ((RAW == '<') && (NXT(1) == '/')) {
9813 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9815 ctxt->sax->error(ctxt->userData,
9816 "chunk is not well balanced\n");
9817 ctxt->wellFormed = 0;
9818 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9819 } else if (RAW != 0) {
9820 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9822 ctxt->sax->error(ctxt->userData,
9823 "extra content at the end of well balanced chunk\n");
9824 ctxt->wellFormed = 0;
9825 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9827 if (ctxt->node != newDoc->children) {
9828 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9830 ctxt->sax->error(ctxt->userData,
9831 "chunk is not well balanced\n");
9832 ctxt->wellFormed = 0;
9833 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9836 if (!ctxt->wellFormed) {
9837 if (ctxt->errNo == 0)
9846 * Return the newly created nodeset after unlinking it from
9847 * they pseudo parent.
9849 cur = newDoc->children->children;
9851 while (cur != NULL) {
9855 newDoc->children->children = NULL;
9860 xmlFreeParserCtxt(ctxt);
9861 newDoc->intSubset = NULL;
9862 newDoc->extSubset = NULL;
9869 * xmlParseExternalEntityPrivate:
9870 * @doc: the document the chunk pertains to
9871 * @oldctxt: the previous parser context if available
9872 * @sax: the SAX handler bloc (possibly NULL)
9873 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9874 * @depth: Used for loop detection, use 0
9875 * @URL: the URL for the entity to load
9876 * @ID: the System ID for the entity to load
9877 * @list: the return value for the set of parsed nodes
9879 * Private version of xmlParseExternalEntity()
9881 * Returns 0 if the entity is well formed, -1 in case of args problem and
9882 * the parser error code otherwise
9886 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9887 xmlSAXHandlerPtr sax,
9888 void *user_data, int depth, const xmlChar *URL,
9889 const xmlChar *ID, xmlNodePtr *list) {
9890 xmlParserCtxtPtr ctxt;
9892 xmlSAXHandlerPtr oldsax = NULL;
9895 xmlCharEncoding enc;
9898 return(XML_ERR_ENTITY_LOOP);
9905 if ((URL == NULL) && (ID == NULL))
9907 if (doc == NULL) /* @@ relax but check for dereferences */
9911 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9912 if (ctxt == NULL) return(-1);
9913 ctxt->userData = ctxt;
9914 if (oldctxt != NULL) {
9915 ctxt->_private = oldctxt->_private;
9916 ctxt->loadsubset = oldctxt->loadsubset;
9917 ctxt->validate = oldctxt->validate;
9918 ctxt->external = oldctxt->external;
9919 ctxt->record_info = oldctxt->record_info;
9920 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9921 ctxt->node_seq.length = oldctxt->node_seq.length;
9922 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
9925 * Doing validity checking on chunk without context
9926 * doesn't make sense
9928 ctxt->_private = NULL;
9931 ctxt->loadsubset = 0;
9936 if (user_data != NULL)
9937 ctxt->userData = user_data;
9939 newDoc = xmlNewDoc(BAD_CAST "1.0");
9940 if (newDoc == NULL) {
9941 ctxt->node_seq.maximum = 0;
9942 ctxt->node_seq.length = 0;
9943 ctxt->node_seq.buffer = NULL;
9944 xmlFreeParserCtxt(ctxt);
9948 newDoc->intSubset = doc->intSubset;
9949 newDoc->extSubset = doc->extSubset;
9951 if (doc->URL != NULL) {
9952 newDoc->URL = xmlStrdup(doc->URL);
9954 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9955 if (newDoc->children == NULL) {
9958 ctxt->node_seq.maximum = 0;
9959 ctxt->node_seq.length = 0;
9960 ctxt->node_seq.buffer = NULL;
9961 xmlFreeParserCtxt(ctxt);
9962 newDoc->intSubset = NULL;
9963 newDoc->extSubset = NULL;
9967 nodePush(ctxt, newDoc->children);
9969 ctxt->myDoc = newDoc;
9972 newDoc->children->doc = doc;
9976 * Get the 4 first bytes and decode the charset
9977 * if enc != XML_CHAR_ENCODING_NONE
9978 * plug some encoding conversion routines.
9985 enc = xmlDetectCharEncoding(start, 4);
9986 if (enc != XML_CHAR_ENCODING_NONE) {
9987 xmlSwitchEncoding(ctxt, enc);
9991 * Parse a possible text declaration first
9993 if ((RAW == '<') && (NXT(1) == '?') &&
9994 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9995 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9996 xmlParseTextDecl(ctxt);
9999 ctxt->instate = XML_PARSER_CONTENT;
10000 ctxt->depth = depth;
10002 xmlParseContent(ctxt);
10004 if ((RAW == '<') && (NXT(1) == '/')) {
10005 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10007 ctxt->sax->error(ctxt->userData,
10008 "chunk is not well balanced\n");
10009 ctxt->wellFormed = 0;
10010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10011 } else if (RAW != 0) {
10012 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10014 ctxt->sax->error(ctxt->userData,
10015 "extra content at the end of well balanced chunk\n");
10016 ctxt->wellFormed = 0;
10017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10019 if (ctxt->node != newDoc->children) {
10020 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10022 ctxt->sax->error(ctxt->userData,
10023 "chunk is not well balanced\n");
10024 ctxt->wellFormed = 0;
10025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10028 if (!ctxt->wellFormed) {
10029 if (ctxt->errNo == 0)
10034 if (list != NULL) {
10038 * Return the newly created nodeset after unlinking it from
10039 * they pseudo parent.
10041 cur = newDoc->children->children;
10043 while (cur != NULL) {
10044 cur->parent = NULL;
10047 newDoc->children->children = NULL;
10052 ctxt->sax = oldsax;
10053 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10054 oldctxt->node_seq.length = ctxt->node_seq.length;
10055 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
10056 ctxt->node_seq.maximum = 0;
10057 ctxt->node_seq.length = 0;
10058 ctxt->node_seq.buffer = NULL;
10059 xmlFreeParserCtxt(ctxt);
10060 newDoc->intSubset = NULL;
10061 newDoc->extSubset = NULL;
10062 xmlFreeDoc(newDoc);
10068 * xmlParseExternalEntity:
10069 * @doc: the document the chunk pertains to
10070 * @sax: the SAX handler bloc (possibly NULL)
10071 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10072 * @depth: Used for loop detection, use 0
10073 * @URL: the URL for the entity to load
10074 * @ID: the System ID for the entity to load
10075 * @lst: the return value for the set of parsed nodes
10077 * Parse an external general entity
10078 * An external general parsed entity is well-formed if it matches the
10079 * production labeled extParsedEnt.
10081 * [78] extParsedEnt ::= TextDecl? content
10083 * Returns 0 if the entity is well formed, -1 in case of args problem and
10084 * the parser error code otherwise
10088 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
10089 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
10090 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
10095 * xmlParseBalancedChunkMemory:
10096 * @doc: the document the chunk pertains to
10097 * @sax: the SAX handler bloc (possibly NULL)
10098 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10099 * @depth: Used for loop detection, use 0
10100 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10101 * @lst: the return value for the set of parsed nodes
10103 * Parse a well-balanced chunk of an XML document
10104 * called by the parser
10105 * The allowed sequence for the Well Balanced Chunk is the one defined by
10106 * the content production in the XML grammar:
10108 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10110 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10111 * the parser error code otherwise
10115 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10116 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
10117 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10118 depth, string, lst, 0 );
10122 * xmlParseBalancedChunkMemoryInternal:
10123 * @oldctxt: the existing parsing context
10124 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10125 * @user_data: the user data field for the parser context
10126 * @lst: the return value for the set of parsed nodes
10129 * Parse a well-balanced chunk of an XML document
10130 * called by the parser
10131 * The allowed sequence for the Well Balanced Chunk is the one defined by
10132 * the content production in the XML grammar:
10134 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10136 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10137 * the parser error code otherwise
10139 * In case recover is set to 1, the nodelist will not be empty even if
10140 * the parsed chunk is not well balanced.
10143 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10144 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10145 xmlParserCtxtPtr ctxt;
10146 xmlDocPtr newDoc = NULL;
10147 xmlSAXHandlerPtr oldsax = NULL;
10148 xmlNodePtr content = NULL;
10152 if (oldctxt->depth > 40) {
10153 return(XML_ERR_ENTITY_LOOP);
10159 if (string == NULL)
10162 size = xmlStrlen(string);
10164 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10165 if (ctxt == NULL) return(-1);
10166 if (user_data != NULL)
10167 ctxt->userData = user_data;
10169 ctxt->userData = ctxt;
10171 oldsax = ctxt->sax;
10172 ctxt->sax = oldctxt->sax;
10173 ctxt->_private = oldctxt->_private;
10174 if (oldctxt->myDoc == NULL) {
10175 newDoc = xmlNewDoc(BAD_CAST "1.0");
10176 if (newDoc == NULL) {
10177 ctxt->sax = oldsax;
10178 xmlFreeParserCtxt(ctxt);
10181 ctxt->myDoc = newDoc;
10183 ctxt->myDoc = oldctxt->myDoc;
10184 content = ctxt->myDoc->children;
10186 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
10187 BAD_CAST "pseudoroot", NULL);
10188 if (ctxt->myDoc->children == NULL) {
10189 ctxt->sax = oldsax;
10190 xmlFreeParserCtxt(ctxt);
10191 if (newDoc != NULL)
10192 xmlFreeDoc(newDoc);
10195 nodePush(ctxt, ctxt->myDoc->children);
10196 ctxt->instate = XML_PARSER_CONTENT;
10197 ctxt->depth = oldctxt->depth + 1;
10199 ctxt->validate = 0;
10200 ctxt->loadsubset = oldctxt->loadsubset;
10201 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10203 * ID/IDREF registration will be done in xmlValidateElement below
10205 ctxt->loadsubset |= XML_SKIP_IDS;
10208 xmlParseContent(ctxt);
10209 if ((RAW == '<') && (NXT(1) == '/')) {
10210 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10212 ctxt->sax->error(ctxt->userData,
10213 "chunk is not well balanced\n");
10214 ctxt->wellFormed = 0;
10215 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10216 } else if (RAW != 0) {
10217 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10219 ctxt->sax->error(ctxt->userData,
10220 "extra content at the end of well balanced chunk\n");
10221 ctxt->wellFormed = 0;
10222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10224 if (ctxt->node != ctxt->myDoc->children) {
10225 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10227 ctxt->sax->error(ctxt->userData,
10228 "chunk is not well balanced\n");
10229 ctxt->wellFormed = 0;
10230 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10233 if (!ctxt->wellFormed) {
10234 if (ctxt->errNo == 0)
10242 if ((lst != NULL) && (ret == 0)) {
10246 * Return the newly created nodeset after unlinking it from
10247 * they pseudo parent.
10249 cur = ctxt->myDoc->children->children;
10251 while (cur != NULL) {
10252 if (oldctxt->validate && oldctxt->wellFormed &&
10253 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10254 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10255 oldctxt->myDoc, cur);
10257 cur->parent = NULL;
10260 ctxt->myDoc->children->children = NULL;
10262 if (ctxt->myDoc != NULL) {
10263 xmlFreeNode(ctxt->myDoc->children);
10264 ctxt->myDoc->children = content;
10267 ctxt->sax = oldsax;
10268 xmlFreeParserCtxt(ctxt);
10269 if (newDoc != NULL)
10270 xmlFreeDoc(newDoc);
10276 * xmlParseBalancedChunkMemoryRecover:
10277 * @doc: the document the chunk pertains to
10278 * @sax: the SAX handler bloc (possibly NULL)
10279 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10280 * @depth: Used for loop detection, use 0
10281 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10282 * @lst: the return value for the set of parsed nodes
10283 * @recover: return nodes even if the data is broken (use 0)
10286 * Parse a well-balanced chunk of an XML document
10287 * called by the parser
10288 * The allowed sequence for the Well Balanced Chunk is the one defined by
10289 * the content production in the XML grammar:
10291 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10293 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10294 * the parser error code otherwise
10296 * In case recover is set to 1, the nodelist will not be empty even if
10297 * the parsed chunk is not well balanced.
10300 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10301 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10303 xmlParserCtxtPtr ctxt;
10305 xmlSAXHandlerPtr oldsax = NULL;
10306 xmlNodePtr content;
10311 return(XML_ERR_ENTITY_LOOP);
10317 if (string == NULL)
10320 size = xmlStrlen(string);
10322 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10323 if (ctxt == NULL) return(-1);
10324 ctxt->userData = ctxt;
10326 oldsax = ctxt->sax;
10328 if (user_data != NULL)
10329 ctxt->userData = user_data;
10331 newDoc = xmlNewDoc(BAD_CAST "1.0");
10332 if (newDoc == NULL) {
10333 xmlFreeParserCtxt(ctxt);
10337 newDoc->intSubset = doc->intSubset;
10338 newDoc->extSubset = doc->extSubset;
10340 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10341 if (newDoc->children == NULL) {
10343 ctxt->sax = oldsax;
10344 xmlFreeParserCtxt(ctxt);
10345 newDoc->intSubset = NULL;
10346 newDoc->extSubset = NULL;
10347 xmlFreeDoc(newDoc);
10350 nodePush(ctxt, newDoc->children);
10352 ctxt->myDoc = newDoc;
10354 ctxt->myDoc = newDoc;
10355 newDoc->children->doc = doc;
10357 ctxt->instate = XML_PARSER_CONTENT;
10358 ctxt->depth = depth;
10361 * Doing validity checking on chunk doesn't make sense
10363 ctxt->validate = 0;
10364 ctxt->loadsubset = 0;
10366 if ( doc != NULL ){
10367 content = doc->children;
10368 doc->children = NULL;
10369 xmlParseContent(ctxt);
10370 doc->children = content;
10373 xmlParseContent(ctxt);
10375 if ((RAW == '<') && (NXT(1) == '/')) {
10376 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10378 ctxt->sax->error(ctxt->userData,
10379 "chunk is not well balanced\n");
10380 ctxt->wellFormed = 0;
10381 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10382 } else if (RAW != 0) {
10383 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10385 ctxt->sax->error(ctxt->userData,
10386 "extra content at the end of well balanced chunk\n");
10387 ctxt->wellFormed = 0;
10388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10390 if (ctxt->node != newDoc->children) {
10391 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10393 ctxt->sax->error(ctxt->userData,
10394 "chunk is not well balanced\n");
10395 ctxt->wellFormed = 0;
10396 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10399 if (!ctxt->wellFormed) {
10400 if (ctxt->errNo == 0)
10408 if (lst != NULL && (ret == 0 || recover == 1)) {
10412 * Return the newly created nodeset after unlinking it from
10413 * they pseudo parent.
10415 cur = newDoc->children->children;
10417 while (cur != NULL) {
10418 cur->parent = NULL;
10421 newDoc->children->children = NULL;
10425 ctxt->sax = oldsax;
10426 xmlFreeParserCtxt(ctxt);
10427 newDoc->intSubset = NULL;
10428 newDoc->extSubset = NULL;
10429 xmlFreeDoc(newDoc);
10435 * xmlSAXParseEntity:
10436 * @sax: the SAX handler block
10437 * @filename: the filename
10439 * parse an XML external entity out of context and build a tree.
10440 * It use the given SAX function block to handle the parsing callback.
10441 * If sax is NULL, fallback to the default DOM tree building routines.
10443 * [78] extParsedEnt ::= TextDecl? content
10445 * This correspond to a "Well Balanced" chunk
10447 * Returns the resulting document tree
10451 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10453 xmlParserCtxtPtr ctxt;
10455 ctxt = xmlCreateFileParserCtxt(filename);
10456 if (ctxt == NULL) {
10460 if (ctxt->sax != NULL)
10461 xmlFree(ctxt->sax);
10463 ctxt->userData = NULL;
10466 xmlParseExtParsedEnt(ctxt);
10468 if (ctxt->wellFormed)
10472 xmlFreeDoc(ctxt->myDoc);
10473 ctxt->myDoc = NULL;
10477 xmlFreeParserCtxt(ctxt);
10484 * @filename: the filename
10486 * parse an XML external entity out of context and build a tree.
10488 * [78] extParsedEnt ::= TextDecl? content
10490 * This correspond to a "Well Balanced" chunk
10492 * Returns the resulting document tree
10496 xmlParseEntity(const char *filename) {
10497 return(xmlSAXParseEntity(NULL, filename));
10501 * xmlCreateEntityParserCtxt:
10502 * @URL: the entity URL
10503 * @ID: the entity PUBLIC ID
10504 * @base: a possible base for the target URI
10506 * Create a parser context for an external entity
10507 * Automatic support for ZLIB/Compress compressed document is provided
10508 * by default if found at compile-time.
10510 * Returns the new parser context or NULL
10513 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10514 const xmlChar *base) {
10515 xmlParserCtxtPtr ctxt;
10516 xmlParserInputPtr inputStream;
10517 char *directory = NULL;
10520 ctxt = xmlNewParserCtxt();
10521 if (ctxt == NULL) {
10525 uri = xmlBuildURI(URL, base);
10528 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10529 if (inputStream == NULL) {
10530 xmlFreeParserCtxt(ctxt);
10534 inputPush(ctxt, inputStream);
10536 if ((ctxt->directory == NULL) && (directory == NULL))
10537 directory = xmlParserGetDirectory((char *)URL);
10538 if ((ctxt->directory == NULL) && (directory != NULL))
10539 ctxt->directory = directory;
10541 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10542 if (inputStream == NULL) {
10544 xmlFreeParserCtxt(ctxt);
10548 inputPush(ctxt, inputStream);
10550 if ((ctxt->directory == NULL) && (directory == NULL))
10551 directory = xmlParserGetDirectory((char *)uri);
10552 if ((ctxt->directory == NULL) && (directory != NULL))
10553 ctxt->directory = directory;
10560 /************************************************************************
10562 * Front ends when parsing from a file *
10564 ************************************************************************/
10567 * xmlCreateFileParserCtxt:
10568 * @filename: the filename
10570 * Create a parser context for a file content.
10571 * Automatic support for ZLIB/Compress compressed document is provided
10572 * by default if found at compile-time.
10574 * Returns the new parser context or NULL
10577 xmlCreateFileParserCtxt(const char *filename)
10579 xmlParserCtxtPtr ctxt;
10580 xmlParserInputPtr inputStream;
10581 char *directory = NULL;
10583 ctxt = xmlNewParserCtxt();
10584 if (ctxt == NULL) {
10585 if (xmlDefaultSAXHandler.error != NULL) {
10586 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10592 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
10593 if (inputStream == NULL) {
10594 xmlFreeParserCtxt(ctxt);
10598 inputPush(ctxt, inputStream);
10599 if ((ctxt->directory == NULL) && (directory == NULL))
10600 directory = xmlParserGetDirectory(filename);
10601 if ((ctxt->directory == NULL) && (directory != NULL))
10602 ctxt->directory = directory;
10608 * xmlSAXParseFileWithData:
10609 * @sax: the SAX handler block
10610 * @filename: the filename
10611 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10613 * @data: the userdata
10615 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10616 * compressed document is provided by default if found at compile-time.
10617 * It use the given SAX function block to handle the parsing callback.
10618 * If sax is NULL, fallback to the default DOM tree building routines.
10620 * User data (void *) is stored within the parser context in the
10621 * context's _private member, so it is available nearly everywhere in libxml
10623 * Returns the resulting document tree
10627 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10628 int recovery, void *data) {
10630 xmlParserCtxtPtr ctxt;
10631 char *directory = NULL;
10635 ctxt = xmlCreateFileParserCtxt(filename);
10636 if (ctxt == NULL) {
10640 if (ctxt->sax != NULL)
10641 xmlFree(ctxt->sax);
10645 ctxt->_private=data;
10648 if ((ctxt->directory == NULL) && (directory == NULL))
10649 directory = xmlParserGetDirectory(filename);
10650 if ((ctxt->directory == NULL) && (directory != NULL))
10651 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10653 ctxt->recovery = recovery;
10655 xmlParseDocument(ctxt);
10657 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10660 xmlFreeDoc(ctxt->myDoc);
10661 ctxt->myDoc = NULL;
10665 xmlFreeParserCtxt(ctxt);
10672 * @sax: the SAX handler block
10673 * @filename: the filename
10674 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10677 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10678 * compressed document is provided by default if found at compile-time.
10679 * It use the given SAX function block to handle the parsing callback.
10680 * If sax is NULL, fallback to the default DOM tree building routines.
10682 * Returns the resulting document tree
10686 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10688 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10693 * @cur: a pointer to an array of xmlChar
10695 * parse an XML in-memory document and build a tree.
10696 * In the case the document is not Well Formed, a tree is built anyway
10698 * Returns the resulting document tree
10702 xmlRecoverDoc(xmlChar *cur) {
10703 return(xmlSAXParseDoc(NULL, cur, 1));
10708 * @filename: the filename
10710 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10711 * compressed document is provided by default if found at compile-time.
10713 * Returns the resulting document tree if the file was wellformed,
10718 xmlParseFile(const char *filename) {
10719 return(xmlSAXParseFile(NULL, filename, 0));
10724 * @filename: the filename
10726 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10727 * compressed document is provided by default if found at compile-time.
10728 * In the case the document is not Well Formed, a tree is built anyway
10730 * Returns the resulting document tree
10734 xmlRecoverFile(const char *filename) {
10735 return(xmlSAXParseFile(NULL, filename, 1));
10740 * xmlSetupParserForBuffer:
10741 * @ctxt: an XML parser context
10742 * @buffer: a xmlChar * buffer
10743 * @filename: a file name
10745 * Setup the parser context to parse a new buffer; Clears any prior
10746 * contents from the parser context. The buffer parameter must not be
10747 * NULL, but the filename parameter can be
10750 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10751 const char* filename)
10753 xmlParserInputPtr input;
10755 input = xmlNewInputStream(ctxt);
10756 if (input == NULL) {
10757 xmlGenericError(xmlGenericErrorContext,
10763 xmlClearParserCtxt(ctxt);
10764 if (filename != NULL)
10765 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
10766 input->base = buffer;
10767 input->cur = buffer;
10768 input->end = &buffer[xmlStrlen(buffer)];
10769 inputPush(ctxt, input);
10773 * xmlSAXUserParseFile:
10774 * @sax: a SAX handler
10775 * @user_data: The user data returned on SAX callbacks
10776 * @filename: a file name
10778 * parse an XML file and call the given SAX handler routines.
10779 * Automatic support for ZLIB/Compress compressed document is provided
10781 * Returns 0 in case of success or a error number otherwise
10784 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10785 const char *filename) {
10787 xmlParserCtxtPtr ctxt;
10789 ctxt = xmlCreateFileParserCtxt(filename);
10790 if (ctxt == NULL) return -1;
10791 if (ctxt->sax != &xmlDefaultSAXHandler)
10792 xmlFree(ctxt->sax);
10794 if (user_data != NULL)
10795 ctxt->userData = user_data;
10797 xmlParseDocument(ctxt);
10799 if (ctxt->wellFormed)
10802 if (ctxt->errNo != 0)
10809 xmlFreeParserCtxt(ctxt);
10814 /************************************************************************
10816 * Front ends when parsing from memory *
10818 ************************************************************************/
10821 * xmlCreateMemoryParserCtxt:
10822 * @buffer: a pointer to a char array
10823 * @size: the size of the array
10825 * Create a parser context for an XML in-memory document.
10827 * Returns the new parser context or NULL
10830 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
10831 xmlParserCtxtPtr ctxt;
10832 xmlParserInputPtr input;
10833 xmlParserInputBufferPtr buf;
10835 if (buffer == NULL)
10840 ctxt = xmlNewParserCtxt();
10844 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10846 xmlFreeParserCtxt(ctxt);
10850 input = xmlNewInputStream(ctxt);
10851 if (input == NULL) {
10852 xmlFreeParserInputBuffer(buf);
10853 xmlFreeParserCtxt(ctxt);
10857 input->filename = NULL;
10859 input->base = input->buf->buffer->content;
10860 input->cur = input->buf->buffer->content;
10861 input->end = &input->buf->buffer->content[input->buf->buffer->use];
10863 inputPush(ctxt, input);
10868 * xmlSAXParseMemoryWithData:
10869 * @sax: the SAX handler block
10870 * @buffer: an pointer to a char array
10871 * @size: the size of the array
10872 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10874 * @data: the userdata
10876 * parse an XML in-memory block and use the given SAX function block
10877 * to handle the parsing callback. If sax is NULL, fallback to the default
10878 * DOM tree building routines.
10880 * User data (void *) is stored within the parser context in the
10881 * context's _private member, so it is available nearly everywhere in libxml
10883 * Returns the resulting document tree
10887 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10888 int size, int recovery, void *data) {
10890 xmlParserCtxtPtr ctxt;
10892 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10893 if (ctxt == NULL) return(NULL);
10895 if (ctxt->sax != NULL)
10896 xmlFree(ctxt->sax);
10900 ctxt->_private=data;
10903 ctxt->recovery = recovery;
10905 xmlParseDocument(ctxt);
10907 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10910 xmlFreeDoc(ctxt->myDoc);
10911 ctxt->myDoc = NULL;
10915 xmlFreeParserCtxt(ctxt);
10921 * xmlSAXParseMemory:
10922 * @sax: the SAX handler block
10923 * @buffer: an pointer to a char array
10924 * @size: the size of the array
10925 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10928 * parse an XML in-memory block and use the given SAX function block
10929 * to handle the parsing callback. If sax is NULL, fallback to the default
10930 * DOM tree building routines.
10932 * Returns the resulting document tree
10935 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10936 int size, int recovery) {
10937 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
10942 * @buffer: an pointer to a char array
10943 * @size: the size of the array
10945 * parse an XML in-memory block and build a tree.
10947 * Returns the resulting document tree
10950 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
10951 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10955 * xmlRecoverMemory:
10956 * @buffer: an pointer to a char array
10957 * @size: the size of the array
10959 * parse an XML in-memory block and build a tree.
10960 * In the case the document is not Well Formed, a tree is built anyway
10962 * Returns the resulting document tree
10965 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
10966 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10970 * xmlSAXUserParseMemory:
10971 * @sax: a SAX handler
10972 * @user_data: The user data returned on SAX callbacks
10973 * @buffer: an in-memory XML document input
10974 * @size: the length of the XML document in bytes
10976 * A better SAX parsing routine.
10977 * parse an XML in-memory buffer and call the given SAX handler routines.
10979 * Returns 0 in case of success or a error number otherwise
10981 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10982 const char *buffer, int size) {
10984 xmlParserCtxtPtr ctxt;
10985 xmlSAXHandlerPtr oldsax = NULL;
10987 if (sax == NULL) return -1;
10988 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10989 if (ctxt == NULL) return -1;
10990 oldsax = ctxt->sax;
10992 if (user_data != NULL)
10993 ctxt->userData = user_data;
10995 xmlParseDocument(ctxt);
10997 if (ctxt->wellFormed)
11000 if (ctxt->errNo != 0)
11005 ctxt->sax = oldsax;
11006 xmlFreeParserCtxt(ctxt);
11012 * xmlCreateDocParserCtxt:
11013 * @cur: a pointer to an array of xmlChar
11015 * Creates a parser context for an XML in-memory document.
11017 * Returns the new parser context or NULL
11020 xmlCreateDocParserCtxt(xmlChar *cur) {
11025 len = xmlStrlen(cur);
11026 return(xmlCreateMemoryParserCtxt((char *)cur, len));
11031 * @sax: the SAX handler block
11032 * @cur: a pointer to an array of xmlChar
11033 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11036 * parse an XML in-memory document and build a tree.
11037 * It use the given SAX function block to handle the parsing callback.
11038 * If sax is NULL, fallback to the default DOM tree building routines.
11040 * Returns the resulting document tree
11044 xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11046 xmlParserCtxtPtr ctxt;
11048 if (cur == NULL) return(NULL);
11051 ctxt = xmlCreateDocParserCtxt(cur);
11052 if (ctxt == NULL) return(NULL);
11055 ctxt->userData = NULL;
11058 xmlParseDocument(ctxt);
11059 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11062 xmlFreeDoc(ctxt->myDoc);
11063 ctxt->myDoc = NULL;
11067 xmlFreeParserCtxt(ctxt);
11074 * @cur: a pointer to an array of xmlChar
11076 * parse an XML in-memory document and build a tree.
11078 * Returns the resulting document tree
11082 xmlParseDoc(xmlChar *cur) {
11083 return(xmlSAXParseDoc(NULL, cur, 0));
11086 /************************************************************************
11088 * Specific function to keep track of entities references *
11089 * and used by the XSLT debugger *
11091 ************************************************************************/
11093 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11096 * xmlAddEntityReference:
11097 * @ent : A valid entity
11098 * @firstNode : A valid first node for children of entity
11099 * @lastNode : A valid last node of children entity
11101 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11104 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11105 xmlNodePtr lastNode)
11107 if (xmlEntityRefFunc != NULL) {
11108 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11114 * xmlSetEntityReferenceFunc:
11115 * @func: A valid function
11117 * Set the function to call call back when a xml reference has been made
11120 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11122 xmlEntityRefFunc = func;
11125 /************************************************************************
11129 ************************************************************************/
11131 #ifdef LIBXML_XPATH_ENABLED
11132 #include <libxml/xpath.h>
11135 extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
11136 static int xmlParserInitialized = 0;
11141 * Initialization function for the XML parser.
11142 * This is not reentrant. Call once before processing in case of
11143 * use in multithreaded programs.
11147 xmlInitParser(void) {
11148 if (xmlParserInitialized != 0)
11151 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11152 (xmlGenericError == NULL))
11153 initGenericErrorDefaultFunc(NULL);
11157 xmlInitCharEncodingHandlers();
11158 xmlInitializePredefinedEntities();
11159 xmlDefaultSAXHandlerInit();
11160 xmlRegisterDefaultInputCallbacks();
11161 xmlRegisterDefaultOutputCallbacks();
11162 #ifdef LIBXML_HTML_ENABLED
11163 htmlInitAutoClose();
11164 htmlDefaultSAXHandlerInit();
11166 #ifdef LIBXML_XPATH_ENABLED
11169 xmlParserInitialized = 1;
11173 * xmlCleanupParser:
11175 * Cleanup function for the XML parser. It tries to reclaim all
11176 * parsing related global memory allocated for the parser processing.
11177 * It doesn't deallocate any document related memory. Calling this
11178 * function should not prevent reusing the parser.
11179 * One should call xmlCleanupParser() only when the process has
11180 * finished using the library or XML document built with it.
11184 xmlCleanupParser(void) {
11185 xmlCleanupCharEncodingHandlers();
11186 xmlCleanupPredefinedEntities();
11187 #ifdef LIBXML_CATALOG_ENABLED
11188 xmlCatalogCleanup();
11190 xmlCleanupThreads();
11191 xmlCleanupGlobals();
11192 xmlParserInitialized = 0;