2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
44 #include <libxml/xmlmemory.h>
45 #include <libxml/threads.h>
46 #include <libxml/globals.h>
47 #include <libxml/tree.h>
48 #include <libxml/parser.h>
49 #include <libxml/parserInternals.h>
50 #include <libxml/valid.h>
51 #include <libxml/entities.h>
52 #include <libxml/xmlerror.h>
53 #include <libxml/encoding.h>
54 #include <libxml/xmlIO.h>
55 #include <libxml/uri.h>
56 #ifdef LIBXML_CATALOG_ENABLED
57 #include <libxml/catalog.h>
66 #ifdef HAVE_SYS_STAT_H
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
86 #define MAX_DEPTH 1024
88 #define XML_PARSER_BIG_BUFFER_SIZE 300
89 #define XML_PARSER_BUFFER_SIZE 100
91 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
94 * List of XML prefixed PI allowed by W3C specs
97 static const char *xmlW3CPIs[] = {
102 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
103 xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
107 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
109 void *user_data, int depth, const xmlChar *URL,
110 const xmlChar *ID, xmlNodePtr *list);
113 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
117 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
119 /************************************************************************
121 * Parser stacks related functions and macros *
123 ************************************************************************/
125 xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
130 * @ctxt: an XML parser context
131 * @value: the parser input
133 * Pushes a new parser input on top of the input stack
135 * Returns 0 in case of error, the index in the stack otherwise
138 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
140 if (ctxt->inputNr >= ctxt->inputMax) {
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
151 ctxt->inputTab[ctxt->inputNr] = value;
153 return (ctxt->inputNr++);
157 * @ctxt: an XML parser context
159 * Pops the top parser input from the input stack
161 * Returns the input just removed
163 extern xmlParserInputPtr
164 inputPop(xmlParserCtxtPtr ctxt)
166 xmlParserInputPtr ret;
168 if (ctxt->inputNr <= 0)
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
181 * @ctxt: an XML parser context
182 * @value: the element node
184 * Pushes a new element node on top of the node stack
186 * Returns 0 in case of error, the index in the stack otherwise
189 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
214 ctxt->nodeTab[ctxt->nodeNr] = value;
216 return (ctxt->nodeNr++);
220 * @ctxt: an XML parser context
222 * Pops the top element node from the node stack
224 * Returns the node just removed
227 nodePop(xmlParserCtxtPtr ctxt)
231 if (ctxt->nodeNr <= 0)
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
244 * @ctxt: an XML parser context
245 * @value: the element name
247 * Pushes a new element name on top of the name stack
249 * Returns 0 in case of error, the index in the stack otherwise
252 namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
254 if (ctxt->nameNr >= ctxt->nameMax) {
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
265 ctxt->nameTab[ctxt->nameNr] = value;
267 return (ctxt->nameNr++);
271 * @ctxt: an XML parser context
273 * Pops the top element name from the name stack
275 * Returns the name just removed
278 namePop(xmlParserCtxtPtr ctxt)
282 if (ctxt->nameNr <= 0)
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
294 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
295 if (ctxt->spaceNr >= ctxt->spaceMax) {
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
310 static int spacePop(xmlParserCtxtPtr ctxt) {
312 if (ctxt->spaceNr <= 0) return(0);
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
324 * Macros for accessing the content. Those should be used only by the parser,
327 * Dirty macros, i.e. one often need to make assumption on the context to
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
342 * strings within the parser.
344 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 * NEXT Skip to the next character, this does the proper decoding
347 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
348 * NEXTL(l) Skip l xmlChar in the input buffer
349 * CUR_CHAR(l) returns the current unicode character (int), set l
350 * to the number of xmlChars used for the encoding [0-5].
351 * CUR_SCHAR same but operate on a string instead of the context
352 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * GROW, SHRINK handling of input buffers
357 #define RAW (*ctxt->input->cur)
358 #define CUR (*ctxt->input->cur)
359 #define NXT(val) ctxt->input->cur[(val)]
360 #define CUR_PTR ctxt->input->cur
362 #define SKIP(val) do { \
363 ctxt->nbChars += (val),ctxt->input->cur += (val); \
364 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
365 if ((*ctxt->input->cur == 0) && \
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
370 #define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
373 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
374 xmlParserInputShrink(ctxt->input);
375 if ((*ctxt->input->cur == 0) &&
376 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
380 #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
383 static void xmlGROW (xmlParserCtxtPtr ctxt) {
384 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
385 if ((*ctxt->input->cur == 0) &&
386 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
390 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
392 #define NEXT xmlNextChar(ctxt)
395 ctxt->input->cur++; \
397 if (*ctxt->input->cur == 0) \
398 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
401 #define NEXTL(l) do { \
402 if (*(ctxt->input->cur) == '\n') { \
403 ctxt->input->line++; ctxt->input->col = 1; \
404 } else ctxt->input->col++; \
405 ctxt->input->cur += l; \
406 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
409 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
410 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
412 #define COPY_BUF(l,b,i,v) \
413 if (l == 1) b[i++] = (xmlChar) v; \
414 else i += xmlCopyCharMultiByte(&b[i],v)
418 * @ctxt: the XML parser context
420 * skip all blanks character found at that point in the input streams.
421 * It pops up finished entities in the process if allowable at that point.
423 * Returns the number of space chars skipped
427 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
431 * It's Okay to use CUR/NEXT here since all the blanks are on
434 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
437 * if we are in the document content, go really fast
439 cur = ctxt->input->cur;
440 while (IS_BLANK(*cur)) {
442 ctxt->input->line++; ctxt->input->col = 1;
447 ctxt->input->cur = cur;
448 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
449 cur = ctxt->input->cur;
452 ctxt->input->cur = cur;
457 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
462 while ((cur == 0) && (ctxt->inputNr > 1) &&
463 (ctxt->instate != XML_PARSER_COMMENT)) {
468 * Need to handle support of entities branching here
470 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
471 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
476 /************************************************************************
478 * Commodity functions to handle entities *
480 ************************************************************************/
484 * @ctxt: an XML parser context
486 * xmlPopInput: the current input pointed by ctxt->input came to an end
487 * pop it and return the next char.
489 * Returns the current xmlChar in the parser context
492 xmlPopInput(xmlParserCtxtPtr ctxt) {
493 if (ctxt->inputNr == 1) return(0); /* End of main Input */
494 if (xmlParserDebugEntities)
495 xmlGenericError(xmlGenericErrorContext,
496 "Popping input %d\n", ctxt->inputNr);
497 xmlFreeInputStream(inputPop(ctxt));
498 if ((*ctxt->input->cur == 0) &&
499 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
500 return(xmlPopInput(ctxt));
506 * @ctxt: an XML parser context
507 * @input: an XML parser input fragment (entity, XML fragment ...).
509 * xmlPushInput: switch to a new input stream which is stacked on top
510 * of the previous one(s).
513 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
514 if (input == NULL) return;
516 if (xmlParserDebugEntities) {
517 if ((ctxt->input != NULL) && (ctxt->input->filename))
518 xmlGenericError(xmlGenericErrorContext,
519 "%s(%d): ", ctxt->input->filename,
521 xmlGenericError(xmlGenericErrorContext,
522 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
524 inputPush(ctxt, input);
530 * @ctxt: an XML parser context
532 * parse Reference declarations
534 * [66] CharRef ::= '&#' [0-9]+ ';' |
535 * '&#x' [0-9a-fA-F]+ ';'
537 * [ WFC: Legal Character ]
538 * Characters referred to using character references must match the
539 * production for Char.
541 * Returns the value parsed (as an int), 0 in case of error
544 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
545 unsigned int val = 0;
549 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
551 if ((RAW == '&') && (NXT(1) == '#') &&
555 while (RAW != ';') { /* loop blocked by count */
560 if ((RAW >= '0') && (RAW <= '9'))
561 val = val * 16 + (CUR - '0');
562 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
563 val = val * 16 + (CUR - 'a') + 10;
564 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
565 val = val * 16 + (CUR - 'A') + 10;
567 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
569 ctxt->sax->error(ctxt->userData,
570 "xmlParseCharRef: invalid hexadecimal value\n");
571 ctxt->wellFormed = 0;
572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
580 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
584 } else if ((RAW == '&') && (NXT(1) == '#')) {
587 while (RAW != ';') { /* loop blocked by count */
592 if ((RAW >= '0') && (RAW <= '9'))
593 val = val * 10 + (CUR - '0');
595 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
597 ctxt->sax->error(ctxt->userData,
598 "xmlParseCharRef: invalid decimal value\n");
599 ctxt->wellFormed = 0;
600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
608 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
613 ctxt->errNo = XML_ERR_INVALID_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseCharRef: invalid value\n");
617 ctxt->wellFormed = 0;
618 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
622 * [ WFC: Legal Character ]
623 * Characters referred to using character references must match the
624 * production for Char.
629 ctxt->errNo = XML_ERR_INVALID_CHAR;
630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
631 ctxt->sax->error(ctxt->userData,
632 "xmlParseCharRef: invalid xmlChar value %d\n",
634 ctxt->wellFormed = 0;
635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
641 * xmlParseStringCharRef:
642 * @ctxt: an XML parser context
643 * @str: a pointer to an index in the string
645 * parse Reference declarations, variant parsing from a string rather
646 * than an an input flow.
648 * [66] CharRef ::= '&#' [0-9]+ ';' |
649 * '&#x' [0-9a-fA-F]+ ';'
651 * [ WFC: Legal Character ]
652 * Characters referred to using character references must match the
653 * production for Char.
655 * Returns the value parsed (as an int), 0 in case of error, str will be
656 * updated to the current value of the index
659 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
664 if ((str == NULL) || (*str == NULL)) return(0);
667 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
670 while (cur != ';') { /* Non input consuming loop */
671 if ((cur >= '0') && (cur <= '9'))
672 val = val * 16 + (cur - '0');
673 else if ((cur >= 'a') && (cur <= 'f'))
674 val = val * 16 + (cur - 'a') + 10;
675 else if ((cur >= 'A') && (cur <= 'F'))
676 val = val * 16 + (cur - 'A') + 10;
678 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
680 ctxt->sax->error(ctxt->userData,
681 "xmlParseStringCharRef: invalid hexadecimal value\n");
682 ctxt->wellFormed = 0;
683 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
692 } else if ((cur == '&') && (ptr[1] == '#')){
695 while (cur != ';') { /* Non input consuming loops */
696 if ((cur >= '0') && (cur <= '9'))
697 val = val * 10 + (cur - '0');
699 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
701 ctxt->sax->error(ctxt->userData,
702 "xmlParseStringCharRef: invalid decimal value\n");
703 ctxt->wellFormed = 0;
704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
714 ctxt->errNo = XML_ERR_INVALID_CHARREF;
715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
716 ctxt->sax->error(ctxt->userData,
717 "xmlParseStringCharRef: invalid value\n");
718 ctxt->wellFormed = 0;
719 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
725 * [ WFC: Legal Character ]
726 * Characters referred to using character references must match the
727 * production for Char.
732 ctxt->errNo = XML_ERR_INVALID_CHAR;
733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
734 ctxt->sax->error(ctxt->userData,
735 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
736 ctxt->wellFormed = 0;
737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
743 * xmlNewBlanksWrapperInputStream:
744 * @ctxt: an XML parser context
745 * @entity: an Entity pointer
747 * Create a new input stream for wrapping
748 * blanks around a PEReference
750 * Returns the new input stream or NULL
753 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
755 static xmlParserInputPtr
756 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
757 xmlParserInputPtr input;
760 if (entity == NULL) {
761 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763 ctxt->sax->error(ctxt->userData,
764 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
765 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
768 if (xmlParserDebugEntities)
769 xmlGenericError(xmlGenericErrorContext,
770 "new blanks wrapper for entity: %s\n", entity->name);
771 input = xmlNewInputStream(ctxt);
775 length = xmlStrlen(entity->name) + 5;
776 buffer = xmlMalloc(length);
777 if (buffer == NULL) {
782 buffer [length-3] = ';';
783 buffer [length-2] = ' ';
784 buffer [length-1] = 0;
785 memcpy(buffer + 2, entity->name, length - 5);
786 input->free = deallocblankswrapper;
787 input->base = buffer;
789 input->length = length;
790 input->end = &buffer[length];
795 * xmlParserHandlePEReference:
796 * @ctxt: the parser context
798 * [69] PEReference ::= '%' Name ';'
800 * [ WFC: No Recursion ]
801 * A parsed entity must not contain a recursive
802 * reference to itself, either directly or indirectly.
804 * [ WFC: Entity Declared ]
805 * In a document without any DTD, a document with only an internal DTD
806 * subset which contains no parameter entity references, or a document
807 * with "standalone='yes'", ... ... The declaration of a parameter
808 * entity must precede any reference to it...
810 * [ VC: Entity Declared ]
811 * In a document with an external subset or external parameter entities
812 * with "standalone='no'", ... ... The declaration of a parameter entity
813 * must precede any reference to it...
816 * Parameter-entity references may only appear in the DTD.
817 * NOTE: misleading but this is handled.
819 * A PEReference may have been detected in the current input stream
820 * the handling is done accordingly to
821 * http://www.w3.org/TR/REC-xml#entproc
823 * - Included in literal in entity values
824 * - Included as Parameter Entity reference within DTDs
827 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
829 xmlEntityPtr entity = NULL;
830 xmlParserInputPtr input;
832 if (RAW != '%') return;
833 switch(ctxt->instate) {
834 case XML_PARSER_CDATA_SECTION:
836 case XML_PARSER_COMMENT:
838 case XML_PARSER_START_TAG:
840 case XML_PARSER_END_TAG:
843 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
845 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
846 ctxt->wellFormed = 0;
847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
849 case XML_PARSER_PROLOG:
850 case XML_PARSER_START:
851 case XML_PARSER_MISC:
852 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
854 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
855 ctxt->wellFormed = 0;
856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
858 case XML_PARSER_ENTITY_DECL:
859 case XML_PARSER_CONTENT:
860 case XML_PARSER_ATTRIBUTE_VALUE:
862 case XML_PARSER_SYSTEM_LITERAL:
863 case XML_PARSER_PUBLIC_LITERAL:
864 /* we just ignore it there */
866 case XML_PARSER_EPILOG:
867 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
869 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
870 ctxt->wellFormed = 0;
871 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
873 case XML_PARSER_ENTITY_VALUE:
875 * NOTE: in the case of entity values, we don't do the
876 * substitution here since we need the literal
877 * entity value to be able to save the internal
878 * subset of the document.
879 * This will be handled by xmlStringDecodeEntities
884 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
885 * In the internal DTD subset, parameter-entity references
886 * can occur only where markup declarations can occur, not
887 * within markup declarations.
888 * In that case this is handled in xmlParseMarkupDecl
890 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
892 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
895 case XML_PARSER_IGNORE:
900 name = xmlParseName(ctxt);
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
903 "PEReference: %s\n", name);
905 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
907 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
908 ctxt->wellFormed = 0;
909 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
913 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
914 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
915 if (entity == NULL) {
918 * [ WFC: Entity Declared ]
919 * In a document without any DTD, a document with only an
920 * internal DTD subset which contains no parameter entity
921 * references, or a document with "standalone='yes'", ...
922 * ... The declaration of a parameter entity must precede
923 * any reference to it...
925 if ((ctxt->standalone == 1) ||
926 ((ctxt->hasExternalSubset == 0) &&
927 (ctxt->hasPErefs == 0))) {
928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
929 ctxt->sax->error(ctxt->userData,
930 "PEReference: %%%s; not found\n", name);
931 ctxt->wellFormed = 0;
932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
935 * [ VC: Entity Declared ]
936 * In a document with an external subset or external
937 * parameter entities with "standalone='no'", ...
938 * ... The declaration of a parameter entity must precede
939 * any reference to it...
941 if ((!ctxt->disableSAX) &&
942 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
943 ctxt->vctxt.error(ctxt->vctxt.userData,
944 "PEReference: %%%s; not found\n", name);
945 } else if ((!ctxt->disableSAX) &&
946 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
947 ctxt->sax->warning(ctxt->userData,
948 "PEReference: %%%s; not found\n", name);
951 } else if (ctxt->input->free != deallocblankswrapper) {
952 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
953 xmlPushInput(ctxt, input);
955 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
956 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
961 * handle the extra spaces added before and after
962 * c.f. http://www.w3.org/TR/REC-xml#as-PE
963 * this is done independently.
965 input = xmlNewEntityInputStream(ctxt, entity);
966 xmlPushInput(ctxt, input);
969 * Get the 4 first bytes and decode the charset
970 * if enc != XML_CHAR_ENCODING_NONE
971 * plug some encoding conversion routines.
974 if (entity->length >= 4) {
979 enc = xmlDetectCharEncoding(start, 4);
980 if (enc != XML_CHAR_ENCODING_NONE) {
981 xmlSwitchEncoding(ctxt, enc);
985 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
986 (RAW == '<') && (NXT(1) == '?') &&
987 (NXT(2) == 'x') && (NXT(3) == 'm') &&
988 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
989 xmlParseTextDecl(ctxt);
992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
993 ctxt->sax->error(ctxt->userData,
994 "xmlParserHandlePEReference: %s is not a parameter entity\n",
996 ctxt->wellFormed = 0;
997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1001 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1003 ctxt->sax->error(ctxt->userData,
1004 "xmlParserHandlePEReference: expecting ';'\n");
1005 ctxt->wellFormed = 0;
1006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1013 * Macro used to grow the current buffer.
1015 #define growBuffer(buffer) { \
1016 buffer##_size *= 2; \
1017 buffer = (xmlChar *) \
1018 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1019 if (buffer == NULL) { \
1020 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
1026 * xmlStringDecodeEntities:
1027 * @ctxt: the parser context
1028 * @str: the input string
1029 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1030 * @end: an end marker xmlChar, 0 if none
1031 * @end2: an end marker xmlChar, 0 if none
1032 * @end3: an end marker xmlChar, 0 if none
1034 * Takes a entity string content and process to do the adequate substitutions.
1036 * [67] Reference ::= EntityRef | CharRef
1038 * [69] PEReference ::= '%' Name ';'
1040 * Returns A newly allocated string with the substitution done. The caller
1041 * must deallocate it !
1044 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1045 xmlChar end, xmlChar end2, xmlChar end3) {
1046 xmlChar *buffer = NULL;
1047 int buffer_size = 0;
1049 xmlChar *current = NULL;
1057 if (ctxt->depth > 40) {
1058 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1060 ctxt->sax->error(ctxt->userData,
1061 "Detected entity reference loop\n");
1062 ctxt->wellFormed = 0;
1063 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1068 * allocate a translation buffer.
1070 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1071 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1072 if (buffer == NULL) {
1073 xmlGenericError(xmlGenericErrorContext,
1074 "xmlStringDecodeEntities: malloc failed");
1079 * OK loop until we reach one of the ending char or a size limit.
1080 * we are operating on already parsed values.
1082 c = CUR_SCHAR(str, l);
1083 while ((c != 0) && (c != end) && /* non input consuming loop */
1084 (c != end2) && (c != end3)) {
1087 if ((c == '&') && (str[1] == '#')) {
1088 int val = xmlParseStringCharRef(ctxt, &str);
1090 COPY_BUF(0,buffer,nbchars,val);
1092 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1093 if (xmlParserDebugEntities)
1094 xmlGenericError(xmlGenericErrorContext,
1095 "String decoding Entity Reference: %.30s\n",
1097 ent = xmlParseStringEntityRef(ctxt, &str);
1098 if ((ent != NULL) &&
1099 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1100 if (ent->content != NULL) {
1101 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1104 ctxt->sax->error(ctxt->userData,
1105 "internal error entity has no content\n");
1107 } else if ((ent != NULL) && (ent->content != NULL)) {
1111 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1116 while (*current != 0) { /* non input consuming loop */
1117 buffer[nbchars++] = *current++;
1119 buffer_size - XML_PARSER_BUFFER_SIZE) {
1125 } else if (ent != NULL) {
1126 int i = xmlStrlen(ent->name);
1127 const xmlChar *cur = ent->name;
1129 buffer[nbchars++] = '&';
1130 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1134 buffer[nbchars++] = *cur++;
1135 buffer[nbchars++] = ';';
1137 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1138 if (xmlParserDebugEntities)
1139 xmlGenericError(xmlGenericErrorContext,
1140 "String decoding PE Reference: %.30s\n", str);
1141 ent = xmlParseStringPEReference(ctxt, &str);
1146 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1151 while (*current != 0) { /* non input consuming loop */
1152 buffer[nbchars++] = *current++;
1154 buffer_size - XML_PARSER_BUFFER_SIZE) {
1162 COPY_BUF(l,buffer,nbchars,c);
1164 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1168 c = CUR_SCHAR(str, l);
1170 buffer[nbchars++] = 0;
1175 /************************************************************************
1177 * Commodity functions to handle xmlChars *
1179 ************************************************************************/
1183 * @cur: the input xmlChar *
1184 * @len: the len of @cur
1186 * a strndup for array of xmlChar's
1188 * Returns a new xmlChar * or NULL
1191 xmlStrndup(const xmlChar *cur, int len) {
1194 if ((cur == NULL) || (len < 0)) return(NULL);
1195 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1197 xmlGenericError(xmlGenericErrorContext,
1198 "malloc of %ld byte failed\n",
1199 (len + 1) * (long)sizeof(xmlChar));
1202 memcpy(ret, cur, len * sizeof(xmlChar));
1209 * @cur: the input xmlChar *
1211 * a strdup for array of xmlChar's. Since they are supposed to be
1212 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1213 * a termination mark of '0'.
1215 * Returns a new xmlChar * or NULL
1218 xmlStrdup(const xmlChar *cur) {
1219 const xmlChar *p = cur;
1221 if (cur == NULL) return(NULL);
1222 while (*p != 0) p++; /* non input consuming */
1223 return(xmlStrndup(cur, p - cur));
1228 * @cur: the input char *
1229 * @len: the len of @cur
1231 * a strndup for char's to xmlChar's
1233 * Returns a new xmlChar * or NULL
1237 xmlCharStrndup(const char *cur, int len) {
1241 if ((cur == NULL) || (len < 0)) return(NULL);
1242 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1244 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1245 (len + 1) * (long)sizeof(xmlChar));
1248 for (i = 0;i < len;i++)
1249 ret[i] = (xmlChar) cur[i];
1256 * @cur: the input char *
1258 * a strdup for char's to xmlChar's
1260 * Returns a new xmlChar * or NULL
1264 xmlCharStrdup(const char *cur) {
1265 const char *p = cur;
1267 if (cur == NULL) return(NULL);
1268 while (*p != '\0') p++; /* non input consuming */
1269 return(xmlCharStrndup(cur, p - cur));
1274 * @str1: the first xmlChar *
1275 * @str2: the second xmlChar *
1277 * a strcmp for xmlChar's
1279 * Returns the integer result of the comparison
1283 xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1286 if (str1 == str2) return(0);
1287 if (str1 == NULL) return(-1);
1288 if (str2 == NULL) return(1);
1290 tmp = *str1++ - *str2;
1291 if (tmp != 0) return(tmp);
1292 } while (*str2++ != 0);
1298 * @str1: the first xmlChar *
1299 * @str2: the second xmlChar *
1301 * Check if both string are equal of have same content
1302 * Should be a bit more readable and faster than xmlStrEqual()
1304 * Returns 1 if they are equal, 0 if they are different
1308 xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1309 if (str1 == str2) return(1);
1310 if (str1 == NULL) return(0);
1311 if (str2 == NULL) return(0);
1313 if (*str1++ != *str2) return(0);
1320 * @str1: the first xmlChar *
1321 * @str2: the second xmlChar *
1322 * @len: the max comparison length
1324 * a strncmp for xmlChar's
1326 * Returns the integer result of the comparison
1330 xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1333 if (len <= 0) return(0);
1334 if (str1 == str2) return(0);
1335 if (str1 == NULL) return(-1);
1336 if (str2 == NULL) return(1);
1338 tmp = *str1++ - *str2;
1339 if (tmp != 0 || --len == 0) return(tmp);
1340 } while (*str2++ != 0);
1344 static const xmlChar casemap[256] = {
1345 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1346 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1347 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1348 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1349 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1350 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1351 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1352 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1353 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1354 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1355 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1356 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1357 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1358 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1359 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1360 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1361 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1362 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1363 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1364 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1365 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1366 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1367 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1368 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1369 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1370 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1371 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1372 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1373 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1374 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1375 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1376 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1381 * @str1: the first xmlChar *
1382 * @str2: the second xmlChar *
1384 * a strcasecmp for xmlChar's
1386 * Returns the integer result of the comparison
1390 xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1393 if (str1 == str2) return(0);
1394 if (str1 == NULL) return(-1);
1395 if (str2 == NULL) return(1);
1397 tmp = casemap[*str1++] - casemap[*str2];
1398 if (tmp != 0) return(tmp);
1399 } while (*str2++ != 0);
1405 * @str1: the first xmlChar *
1406 * @str2: the second xmlChar *
1407 * @len: the max comparison length
1409 * a strncasecmp for xmlChar's
1411 * Returns the integer result of the comparison
1415 xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1418 if (len <= 0) return(0);
1419 if (str1 == str2) return(0);
1420 if (str1 == NULL) return(-1);
1421 if (str2 == NULL) return(1);
1423 tmp = casemap[*str1++] - casemap[*str2];
1424 if (tmp != 0 || --len == 0) return(tmp);
1425 } while (*str2++ != 0);
1431 * @str: the xmlChar * array
1432 * @val: the xmlChar to search
1434 * a strchr for xmlChar's
1436 * Returns the xmlChar * for the first occurrence or NULL.
1440 xmlStrchr(const xmlChar *str, xmlChar val) {
1441 if (str == NULL) return(NULL);
1442 while (*str != 0) { /* non input consuming */
1443 if (*str == val) return((xmlChar *) str);
1451 * @str: the xmlChar * array (haystack)
1452 * @val: the xmlChar to search (needle)
1454 * a strstr for xmlChar's
1456 * Returns the xmlChar * for the first occurrence or NULL.
1460 xmlStrstr(const xmlChar *str, const xmlChar *val) {
1463 if (str == NULL) return(NULL);
1464 if (val == NULL) return(NULL);
1467 if (n == 0) return(str);
1468 while (*str != 0) { /* non input consuming */
1470 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1479 * @str: the xmlChar * array (haystack)
1480 * @val: the xmlChar to search (needle)
1482 * a case-ignoring strstr for xmlChar's
1484 * Returns the xmlChar * for the first occurrence or NULL.
1488 xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1491 if (str == NULL) return(NULL);
1492 if (val == NULL) return(NULL);
1495 if (n == 0) return(str);
1496 while (*str != 0) { /* non input consuming */
1497 if (casemap[*str] == casemap[*val])
1498 if (!xmlStrncasecmp(str, val, n)) return(str);
1506 * @str: the xmlChar * array (haystack)
1507 * @start: the index of the first char (zero based)
1508 * @len: the length of the substring
1510 * Extract a substring of a given string
1512 * Returns the xmlChar * for the first occurrence or NULL.
1516 xmlStrsub(const xmlChar *str, int start, int len) {
1519 if (str == NULL) return(NULL);
1520 if (start < 0) return(NULL);
1521 if (len < 0) return(NULL);
1523 for (i = 0;i < start;i++) {
1524 if (*str == 0) return(NULL);
1527 if (*str == 0) return(NULL);
1528 return(xmlStrndup(str, len));
1533 * @str: the xmlChar * array
1535 * length of a xmlChar's string
1537 * Returns the number of xmlChar contained in the ARRAY.
1541 xmlStrlen(const xmlChar *str) {
1544 if (str == NULL) return(0);
1545 while (*str != 0) { /* non input consuming */
1554 * @cur: the original xmlChar * array
1555 * @add: the xmlChar * array added
1556 * @len: the length of @add
1558 * a strncat for array of xmlChar's, it will extend @cur with the len
1559 * first bytes of @add.
1561 * Returns a new xmlChar *, the original @cur is reallocated if needed
1562 * and should not be freed
1566 xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1570 if ((add == NULL) || (len == 0))
1573 return(xmlStrndup(add, len));
1575 size = xmlStrlen(cur);
1576 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1578 xmlGenericError(xmlGenericErrorContext,
1579 "xmlStrncat: realloc of %ld byte failed\n",
1580 (size + len + 1) * (long)sizeof(xmlChar));
1583 memcpy(&ret[size], add, len * sizeof(xmlChar));
1584 ret[size + len] = 0;
1590 * @cur: the original xmlChar * array
1591 * @add: the xmlChar * array added
1593 * a strcat for array of xmlChar's. Since they are supposed to be
1594 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1595 * a termination mark of '0'.
1597 * Returns a new xmlChar * containing the concatenated string.
1600 xmlStrcat(xmlChar *cur, const xmlChar *add) {
1601 const xmlChar *p = add;
1603 if (add == NULL) return(cur);
1605 return(xmlStrdup(add));
1607 while (*p != 0) p++; /* non input consuming */
1608 return(xmlStrncat(cur, add, p - add));
1611 /************************************************************************
1613 * Commodity functions, cleanup needed ? *
1615 ************************************************************************/
1619 * @ctxt: an XML parser context
1621 * @len: the size of @str
1623 * Is this a sequence of blank chars that one can ignore ?
1625 * Returns 1 if ignorable 0 otherwise.
1628 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1630 xmlNodePtr lastChild;
1633 * Don't spend time trying to differentiate them, the same callback is
1636 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
1640 * Check for xml:space value.
1642 if (*(ctxt->space) == 1)
1646 * Check that the string is made of blanks
1648 for (i = 0;i < len;i++)
1649 if (!(IS_BLANK(str[i]))) return(0);
1652 * Look if the element is mixed content in the DTD if available
1654 if (ctxt->node == NULL) return(0);
1655 if (ctxt->myDoc != NULL) {
1656 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1657 if (ret == 0) return(1);
1658 if (ret == 1) return(0);
1662 * Otherwise, heuristic :-\
1664 if (RAW != '<') return(0);
1665 if ((ctxt->node->children == NULL) &&
1666 (RAW == '<') && (NXT(1) == '/')) return(0);
1668 lastChild = xmlGetLastChild(ctxt->node);
1669 if (lastChild == NULL) {
1670 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1671 (ctxt->node->content != NULL)) return(0);
1672 } else if (xmlNodeIsText(lastChild))
1674 else if ((ctxt->node->children != NULL) &&
1675 (xmlNodeIsText(ctxt->node->children)))
1680 /************************************************************************
1682 * Extra stuff for namespace support *
1683 * Relates to http://www.w3.org/TR/WD-xml-names *
1685 ************************************************************************/
1689 * @ctxt: an XML parser context
1690 * @name: an XML parser context
1691 * @prefix: a xmlChar **
1693 * parse an UTF8 encoded XML qualified name string
1695 * [NS 5] QName ::= (Prefix ':')? LocalPart
1697 * [NS 6] Prefix ::= NCName
1699 * [NS 7] LocalPart ::= NCName
1701 * Returns the local part, and prefix is updated
1702 * to get the Prefix if any.
1706 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1707 xmlChar buf[XML_MAX_NAMELEN + 5];
1708 xmlChar *buffer = NULL;
1710 int max = XML_MAX_NAMELEN;
1711 xmlChar *ret = NULL;
1712 const xmlChar *cur = name;
1717 #ifndef XML_XML_NAMESPACE
1718 /* xml: prefix is not really a namespace */
1719 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1720 (cur[2] == 'l') && (cur[3] == ':'))
1721 return(xmlStrdup(name));
1724 /* nasty but valid */
1726 return(xmlStrdup(name));
1729 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1735 * Okay someone managed to make a huge name, so he's ready to pay
1736 * for the processing speed.
1740 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1741 if (buffer == NULL) {
1742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1743 ctxt->sax->error(ctxt->userData,
1744 "xmlSplitQName: out of memory\n");
1747 memcpy(buffer, buf, len);
1748 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1749 if (len + 10 > max) {
1751 buffer = (xmlChar *) xmlRealloc(buffer,
1752 max * sizeof(xmlChar));
1753 if (buffer == NULL) {
1754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1755 ctxt->sax->error(ctxt->userData,
1756 "xmlSplitQName: out of memory\n");
1767 ret = xmlStrndup(buf, len);
1771 max = XML_MAX_NAMELEN;
1777 if (c == 0) return(ret);
1782 * Check that the first character is proper to start
1785 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1786 ((c >= 0x41) && (c <= 0x5A)) ||
1787 (c == '_') || (c == ':'))) {
1789 int first = CUR_SCHAR(cur, l);
1791 if (!IS_LETTER(first) && (first != '_')) {
1792 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1793 (ctxt->sax->error != NULL))
1794 ctxt->sax->error(ctxt->userData,
1795 "Name %s is not XML Namespace compliant\n",
1801 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1807 * Okay someone managed to make a huge name, so he's ready to pay
1808 * for the processing speed.
1812 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1813 if (buffer == NULL) {
1814 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1815 (ctxt->sax->error != NULL))
1816 ctxt->sax->error(ctxt->userData,
1817 "xmlSplitQName: out of memory\n");
1820 memcpy(buffer, buf, len);
1821 while (c != 0) { /* tested bigname2.xml */
1822 if (len + 10 > max) {
1824 buffer = (xmlChar *) xmlRealloc(buffer,
1825 max * sizeof(xmlChar));
1826 if (buffer == NULL) {
1827 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1828 (ctxt->sax->error != NULL))
1829 ctxt->sax->error(ctxt->userData,
1830 "xmlSplitQName: out of memory\n");
1841 ret = xmlStrndup(buf, len);
1850 /************************************************************************
1852 * The parser itself *
1853 * Relates to http://www.w3.org/TR/REC-xml *
1855 ************************************************************************/
1857 static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
1860 * @ctxt: an XML parser context
1862 * parse an XML name.
1864 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1865 * CombiningChar | Extender
1867 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1869 * [6] Names ::= Name (S Name)*
1871 * Returns the Name parsed or NULL
1875 xmlParseName(xmlParserCtxtPtr ctxt) {
1883 * Accelerator for simple ASCII names
1885 in = ctxt->input->cur;
1886 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1887 ((*in >= 0x41) && (*in <= 0x5A)) ||
1888 (*in == '_') || (*in == ':')) {
1890 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1891 ((*in >= 0x41) && (*in <= 0x5A)) ||
1892 ((*in >= 0x30) && (*in <= 0x39)) ||
1893 (*in == '_') || (*in == '-') ||
1894 (*in == ':') || (*in == '.'))
1896 if ((*in > 0) && (*in < 0x80)) {
1897 count = in - ctxt->input->cur;
1898 ret = xmlStrndup(ctxt->input->cur, count);
1899 ctxt->input->cur = in;
1903 return(xmlParseNameComplex(ctxt));
1907 * xmlParseNameAndCompare:
1908 * @ctxt: an XML parser context
1910 * parse an XML name and compares for match
1911 * (specialized for endtag parsing)
1914 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1915 * and the name for mismatch
1919 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1920 const xmlChar *cmp = other;
1926 in = ctxt->input->cur;
1927 while (*in != 0 && *in == *cmp) {
1931 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1933 ctxt->input->cur = in;
1934 return (xmlChar*) 1;
1936 /* failure (or end of input buffer), check with full function */
1937 ret = xmlParseName (ctxt);
1938 if (ret != 0 && xmlStrEqual (ret, other)) {
1940 return (xmlChar*) 1;
1946 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1947 xmlChar buf[XML_MAX_NAMELEN + 5];
1953 * Handler for more complex cases
1957 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1958 (!IS_LETTER(c) && (c != '_') &&
1963 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1964 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1965 (c == '.') || (c == '-') ||
1966 (c == '_') || (c == ':') ||
1967 (IS_COMBINING(c)) ||
1968 (IS_EXTENDER(c)))) {
1969 if (count++ > 100) {
1973 COPY_BUF(l,buf,len,c);
1976 if (len >= XML_MAX_NAMELEN) {
1978 * Okay someone managed to make a huge name, so he's ready to pay
1979 * for the processing speed.
1984 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
1988 "xmlParseNameComplex: out of memory\n");
1991 memcpy(buffer, buf, len);
1992 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1993 (c == '.') || (c == '-') ||
1994 (c == '_') || (c == ':') ||
1995 (IS_COMBINING(c)) ||
1997 if (count++ > 100) {
2001 if (len + 10 > max) {
2003 buffer = (xmlChar *) xmlRealloc(buffer,
2004 max * sizeof(xmlChar));
2005 if (buffer == NULL) {
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData,
2008 "xmlParseNameComplex: out of memory\n");
2012 COPY_BUF(l,buffer,len,c);
2020 return(xmlStrndup(buf, len));
2024 * xmlParseStringName:
2025 * @ctxt: an XML parser context
2026 * @str: a pointer to the string pointer (IN/OUT)
2028 * parse an XML name.
2030 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2031 * CombiningChar | Extender
2033 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2035 * [6] Names ::= Name (S Name)*
2037 * Returns the Name parsed or NULL. The @str pointer
2038 * is updated to the current location in the string.
2042 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2043 xmlChar buf[XML_MAX_NAMELEN + 5];
2044 const xmlChar *cur = *str;
2048 c = CUR_SCHAR(cur, l);
2049 if (!IS_LETTER(c) && (c != '_') &&
2054 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2055 (c == '.') || (c == '-') ||
2056 (c == '_') || (c == ':') ||
2057 (IS_COMBINING(c)) ||
2059 COPY_BUF(l,buf,len,c);
2061 c = CUR_SCHAR(cur, l);
2062 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2064 * Okay someone managed to make a huge name, so he's ready to pay
2065 * for the processing speed.
2070 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2071 if (buffer == NULL) {
2072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073 ctxt->sax->error(ctxt->userData,
2074 "xmlParseStringName: out of memory\n");
2077 memcpy(buffer, buf, len);
2078 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2079 (c == '.') || (c == '-') ||
2080 (c == '_') || (c == ':') ||
2081 (IS_COMBINING(c)) ||
2083 if (len + 10 > max) {
2085 buffer = (xmlChar *) xmlRealloc(buffer,
2086 max * sizeof(xmlChar));
2087 if (buffer == NULL) {
2088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2089 ctxt->sax->error(ctxt->userData,
2090 "xmlParseStringName: out of memory\n");
2094 COPY_BUF(l,buffer,len,c);
2096 c = CUR_SCHAR(cur, l);
2104 return(xmlStrndup(buf, len));
2109 * @ctxt: an XML parser context
2111 * parse an XML Nmtoken.
2113 * [7] Nmtoken ::= (NameChar)+
2115 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2117 * Returns the Nmtoken parsed or NULL
2121 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2122 xmlChar buf[XML_MAX_NAMELEN + 5];
2130 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2131 (c == '.') || (c == '-') ||
2132 (c == '_') || (c == ':') ||
2133 (IS_COMBINING(c)) ||
2135 if (count++ > 100) {
2139 COPY_BUF(l,buf,len,c);
2142 if (len >= XML_MAX_NAMELEN) {
2144 * Okay someone managed to make a huge token, so he's ready to pay
2145 * for the processing speed.
2150 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
2154 "xmlParseNmtoken: out of memory\n");
2157 memcpy(buffer, buf, len);
2158 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2159 (c == '.') || (c == '-') ||
2160 (c == '_') || (c == ':') ||
2161 (IS_COMBINING(c)) ||
2163 if (count++ > 100) {
2167 if (len + 10 > max) {
2169 buffer = (xmlChar *) xmlRealloc(buffer,
2170 max * sizeof(xmlChar));
2171 if (buffer == NULL) {
2172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2173 ctxt->sax->error(ctxt->userData,
2174 "xmlParseNmtoken: out of memory\n");
2178 COPY_BUF(l,buffer,len,c);
2188 return(xmlStrndup(buf, len));
2192 * xmlParseEntityValue:
2193 * @ctxt: an XML parser context
2194 * @orig: if non-NULL store a copy of the original entity value
2196 * parse a value for ENTITY declarations
2198 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2199 * "'" ([^%&'] | PEReference | Reference)* "'"
2201 * Returns the EntityValue parsed with reference substituted or NULL
2205 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2206 xmlChar *buf = NULL;
2208 int size = XML_PARSER_BUFFER_SIZE;
2211 xmlChar *ret = NULL;
2212 const xmlChar *cur = NULL;
2213 xmlParserInputPtr input;
2215 if (RAW == '"') stop = '"';
2216 else if (RAW == '\'') stop = '\'';
2218 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2220 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2221 ctxt->wellFormed = 0;
2222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2225 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2227 xmlGenericError(xmlGenericErrorContext,
2228 "malloc of %d byte failed\n", size);
2233 * The content of the entity definition is copied in a buffer.
2236 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2237 input = ctxt->input;
2242 * NOTE: 4.4.5 Included in Literal
2243 * When a parameter entity reference appears in a literal entity
2244 * value, ... a single or double quote character in the replacement
2245 * text is always treated as a normal data character and will not
2246 * terminate the literal.
2247 * In practice it means we stop the loop only when back at parsing
2248 * the initial entity and the quote is found
2250 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2251 (ctxt->input != input))) {
2252 if (len + 5 >= size) {
2254 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2256 xmlGenericError(xmlGenericErrorContext,
2257 "realloc of %d byte failed\n", size);
2261 COPY_BUF(l,buf,len,c);
2264 * Pop-up of finished entities.
2266 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2279 * Raise problem w.r.t. '&' and '%' being used in non-entities
2280 * reference constructs. Note Charref will be handled in
2281 * xmlStringDecodeEntities()
2284 while (*cur != 0) { /* non input consuming */
2285 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2290 name = xmlParseStringName(ctxt, &cur);
2291 if ((name == NULL) || (*cur != ';')) {
2292 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2294 ctxt->sax->error(ctxt->userData,
2295 "EntityValue: '%c' forbidden except for entities references\n",
2297 ctxt->wellFormed = 0;
2298 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2300 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2301 (ctxt->inputNr == 1)) {
2302 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2304 ctxt->sax->error(ctxt->userData,
2305 "EntityValue: PEReferences forbidden in internal subset\n",
2307 ctxt->wellFormed = 0;
2308 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2317 * Then PEReference entities are substituted.
2320 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2322 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2323 ctxt->wellFormed = 0;
2324 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2329 * NOTE: 4.4.7 Bypassed
2330 * When a general entity reference appears in the EntityValue in
2331 * an entity declaration, it is bypassed and left as is.
2332 * so XML_SUBSTITUTE_REF is not set here.
2334 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2347 * @ctxt: an XML parser context
2349 * parse a value for an attribute
2350 * Note: the parser won't do substitution of entities here, this
2351 * will be handled later in xmlStringGetNodeList
2353 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2354 * "'" ([^<&'] | Reference)* "'"
2356 * 3.3.3 Attribute-Value Normalization:
2357 * Before the value of an attribute is passed to the application or
2358 * checked for validity, the XML processor must normalize it as follows:
2359 * - a character reference is processed by appending the referenced
2360 * character to the attribute value
2361 * - an entity reference is processed by recursively processing the
2362 * replacement text of the entity
2363 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2364 * appending #x20 to the normalized value, except that only a single
2365 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2366 * parsed entity or the literal entity value of an internal parsed entity
2367 * - other characters are processed by appending them to the normalized value
2368 * If the declared value is not CDATA, then the XML processor must further
2369 * process the normalized attribute value by discarding any leading and
2370 * trailing space (#x20) characters, and by replacing sequences of space
2371 * (#x20) characters by a single space (#x20) character.
2372 * All attributes for which no declaration has been read should be treated
2373 * by a non-validating parser as if declared CDATA.
2375 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2379 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2382 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2384 const xmlChar *in = NULL;
2385 xmlChar *ret = NULL;
2388 in = (xmlChar *) CUR_PTR;
2389 if (*in != '"' && *in != '\'') {
2390 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2392 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2393 ctxt->wellFormed = 0;
2394 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2397 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2401 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2402 *in != '&' && *in != '<'
2407 return xmlParseAttValueComplex(ctxt);
2410 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2416 * xmlParseAttValueComplex:
2417 * @ctxt: an XML parser context
2419 * parse a value for an attribute, this is the fallback function
2420 * of xmlParseAttValue() when the attribute parsing requires handling
2421 * of non-ASCII characters.
2423 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2426 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2428 xmlChar *buf = NULL;
2432 xmlChar *current = NULL;
2437 if (NXT(0) == '"') {
2438 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2441 } else if (NXT(0) == '\'') {
2443 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2446 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2449 ctxt->wellFormed = 0;
2450 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2455 * allocate a translation buffer.
2457 buf_size = XML_PARSER_BUFFER_SIZE;
2458 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2460 xmlGenericError(xmlGenericErrorContext,
2461 "xmlParseAttValue: malloc failed");
2466 * OK loop until we reach one of the ending char or a size limit.
2469 while ((NXT(0) != limit) && /* checked */
2473 if (NXT(1) == '#') {
2474 int val = xmlParseCharRef(ctxt);
2476 if (ctxt->replaceEntities) {
2477 if (len > buf_size - 10) {
2483 * The reparsing will be done in xmlStringGetNodeList()
2484 * called by the attribute() function in SAX.c
2486 static xmlChar buffer[6] = "&";
2488 if (len > buf_size - 10) {
2491 current = &buffer[0];
2492 while (*current != 0) { /* non input consuming */
2493 buf[len++] = *current++;
2497 if (len > buf_size - 10) {
2500 len += xmlCopyChar(0, &buf[len], val);
2503 ent = xmlParseEntityRef(ctxt);
2504 if ((ent != NULL) &&
2505 (ctxt->replaceEntities != 0)) {
2508 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2509 rep = xmlStringDecodeEntities(ctxt, ent->content,
2510 XML_SUBSTITUTE_REF, 0, 0, 0);
2513 while (*current != 0) { /* non input consuming */
2514 buf[len++] = *current++;
2515 if (len > buf_size - 10) {
2522 if (len > buf_size - 10) {
2525 if (ent->content != NULL)
2526 buf[len++] = ent->content[0];
2528 } else if (ent != NULL) {
2529 int i = xmlStrlen(ent->name);
2530 const xmlChar *cur = ent->name;
2533 * This may look absurd but is needed to detect
2536 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2537 (ent->content != NULL)) {
2539 rep = xmlStringDecodeEntities(ctxt, ent->content,
2540 XML_SUBSTITUTE_REF, 0, 0, 0);
2546 * Just output the reference
2549 if (len > buf_size - i - 10) {
2553 buf[len++] = *cur++;
2558 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2559 COPY_BUF(l,buf,len,0x20);
2560 if (len > buf_size - 10) {
2564 COPY_BUF(l,buf,len,c);
2565 if (len > buf_size - 10) {
2576 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2578 ctxt->sax->error(ctxt->userData,
2579 "Unescaped '<' not allowed in attributes values\n");
2580 ctxt->wellFormed = 0;
2581 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2582 } else if (RAW != limit) {
2583 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2585 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2586 ctxt->wellFormed = 0;
2587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2594 * xmlParseSystemLiteral:
2595 * @ctxt: an XML parser context
2597 * parse an XML Literal
2599 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2601 * Returns the SystemLiteral parsed or NULL
2605 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2606 xmlChar *buf = NULL;
2608 int size = XML_PARSER_BUFFER_SIZE;
2611 int state = ctxt->instate;
2618 } else if (RAW == '\'') {
2622 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2624 ctxt->sax->error(ctxt->userData,
2625 "SystemLiteral \" or ' expected\n");
2626 ctxt->wellFormed = 0;
2627 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2631 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2633 xmlGenericError(xmlGenericErrorContext,
2634 "malloc of %d byte failed\n", size);
2637 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2639 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2640 if (len + 5 >= size) {
2642 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2644 xmlGenericError(xmlGenericErrorContext,
2645 "realloc of %d byte failed\n", size);
2646 ctxt->instate = (xmlParserInputState) state;
2655 COPY_BUF(l,buf,len,cur);
2665 ctxt->instate = (xmlParserInputState) state;
2666 if (!IS_CHAR(cur)) {
2667 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2670 ctxt->wellFormed = 0;
2671 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2679 * xmlParsePubidLiteral:
2680 * @ctxt: an XML parser context
2682 * parse an XML public literal
2684 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2686 * Returns the PubidLiteral parsed or NULL.
2690 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2691 xmlChar *buf = NULL;
2693 int size = XML_PARSER_BUFFER_SIZE;
2697 xmlParserInputState oldstate = ctxt->instate;
2703 } else if (RAW == '\'') {
2707 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2709 ctxt->sax->error(ctxt->userData,
2710 "SystemLiteral \" or ' expected\n");
2711 ctxt->wellFormed = 0;
2712 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2715 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2717 xmlGenericError(xmlGenericErrorContext,
2718 "malloc of %d byte failed\n", size);
2721 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
2723 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2724 if (len + 1 >= size) {
2726 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2728 xmlGenericError(xmlGenericErrorContext,
2729 "realloc of %d byte failed\n", size);
2749 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2752 ctxt->wellFormed = 0;
2753 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2757 ctxt->instate = oldstate;
2761 void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
2764 * @ctxt: an XML parser context
2765 * @cdata: int indicating whether we are within a CDATA section
2767 * parse a CharData section.
2768 * if we are within a CDATA section ']]>' marks an end of section.
2770 * The right angle bracket (>) may be represented using the string ">",
2771 * and must, for compatibility, be escaped using ">" or a character
2772 * reference when it appears in the string "]]>" in content, when that
2773 * string is not marking the end of a CDATA section.
2775 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2779 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
2782 int line = ctxt->input->line;
2783 int col = ctxt->input->col;
2788 * Accelerated common case where input don't need to be
2789 * modified before passing it to the handler.
2792 in = ctxt->input->cur;
2795 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2796 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2799 ctxt->input->line++;
2801 while (*in == 0xA) {
2802 ctxt->input->line++;
2808 if ((in[1] == ']') && (in[2] == '>')) {
2809 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2811 ctxt->sax->error(ctxt->userData,
2812 "Sequence ']]>' not allowed in content\n");
2813 ctxt->input->cur = in;
2814 ctxt->wellFormed = 0;
2815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2821 nbchar = in - ctxt->input->cur;
2823 if (IS_BLANK(*ctxt->input->cur)) {
2824 const xmlChar *tmp = ctxt->input->cur;
2825 ctxt->input->cur = in;
2826 if (areBlanks(ctxt, tmp, nbchar)) {
2827 if (ctxt->sax->ignorableWhitespace != NULL)
2828 ctxt->sax->ignorableWhitespace(ctxt->userData,
2831 if (ctxt->sax->characters != NULL)
2832 ctxt->sax->characters(ctxt->userData,
2835 line = ctxt->input->line;
2836 col = ctxt->input->col;
2838 if (ctxt->sax->characters != NULL)
2839 ctxt->sax->characters(ctxt->userData,
2840 ctxt->input->cur, nbchar);
2841 line = ctxt->input->line;
2842 col = ctxt->input->col;
2845 ctxt->input->cur = in;
2849 ctxt->input->cur = in;
2851 ctxt->input->line++;
2852 continue; /* while */
2864 in = ctxt->input->cur;
2865 } while ((*in >= 0x20) && (*in <= 0x7F));
2868 ctxt->input->line = line;
2869 ctxt->input->col = col;
2870 xmlParseCharDataComplex(ctxt, cdata);
2874 * xmlParseCharDataComplex:
2875 * @ctxt: an XML parser context
2876 * @cdata: int indicating whether we are within a CDATA section
2878 * parse a CharData section.this is the fallback function
2879 * of xmlParseCharData() when the parsing requires handling
2880 * of non-ASCII characters.
2883 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
2884 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2892 while ((cur != '<') && /* checked */
2894 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2895 if ((cur == ']') && (NXT(1) == ']') &&
2899 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2901 ctxt->sax->error(ctxt->userData,
2902 "Sequence ']]>' not allowed in content\n");
2903 /* Should this be relaxed ??? I see a "must here */
2904 ctxt->wellFormed = 0;
2905 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2908 COPY_BUF(l,buf,nbchar,cur);
2909 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2911 * OK the segment is to be consumed as chars.
2913 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2914 if (areBlanks(ctxt, buf, nbchar)) {
2915 if (ctxt->sax->ignorableWhitespace != NULL)
2916 ctxt->sax->ignorableWhitespace(ctxt->userData,
2919 if (ctxt->sax->characters != NULL)
2920 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2935 * OK the segment is to be consumed as chars.
2937 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2938 if (areBlanks(ctxt, buf, nbchar)) {
2939 if (ctxt->sax->ignorableWhitespace != NULL)
2940 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2942 if (ctxt->sax->characters != NULL)
2943 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2950 * xmlParseExternalID:
2951 * @ctxt: an XML parser context
2952 * @publicID: a xmlChar** receiving PubidLiteral
2953 * @strict: indicate whether we should restrict parsing to only
2954 * production [75], see NOTE below
2956 * Parse an External ID or a Public ID
2958 * NOTE: Productions [75] and [83] interact badly since [75] can generate
2959 * 'PUBLIC' S PubidLiteral S SystemLiteral
2961 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2962 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2964 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2966 * Returns the function returns SystemLiteral and in the second
2967 * case publicID receives PubidLiteral, is strict is off
2968 * it is possible to return NULL and have publicID set.
2972 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2973 xmlChar *URI = NULL;
2978 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2979 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2980 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2982 if (!IS_BLANK(CUR)) {
2983 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2985 ctxt->sax->error(ctxt->userData,
2986 "Space required after 'SYSTEM'\n");
2987 ctxt->wellFormed = 0;
2988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2991 URI = xmlParseSystemLiteral(ctxt);
2993 ctxt->errNo = XML_ERR_URI_REQUIRED;
2994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2995 ctxt->sax->error(ctxt->userData,
2996 "xmlParseExternalID: SYSTEM, no URI\n");
2997 ctxt->wellFormed = 0;
2998 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3000 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3001 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3002 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3004 if (!IS_BLANK(CUR)) {
3005 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3007 ctxt->sax->error(ctxt->userData,
3008 "Space required after 'PUBLIC'\n");
3009 ctxt->wellFormed = 0;
3010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3013 *publicID = xmlParsePubidLiteral(ctxt);
3014 if (*publicID == NULL) {
3015 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3019 ctxt->wellFormed = 0;
3020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3024 * We don't handle [83] so "S SystemLiteral" is required.
3026 if (!IS_BLANK(CUR)) {
3027 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3029 ctxt->sax->error(ctxt->userData,
3030 "Space required after the Public Identifier\n");
3031 ctxt->wellFormed = 0;
3032 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3036 * We handle [83] so we return immediately, if
3037 * "S SystemLiteral" is not detected. From a purely parsing
3038 * point of view that's a nice mess.
3044 if (!IS_BLANK(*ptr)) return(NULL);
3046 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3047 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3050 URI = xmlParseSystemLiteral(ctxt);
3052 ctxt->errNo = XML_ERR_URI_REQUIRED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "xmlParseExternalID: PUBLIC, no URI\n");
3056 ctxt->wellFormed = 0;
3057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3065 * @ctxt: an XML parser context
3067 * Skip an XML (SGML) comment <!-- .... -->
3068 * The spec says that "For compatibility, the string "--" (double-hyphen)
3069 * must not occur within comments. "
3071 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3074 xmlParseComment(xmlParserCtxtPtr ctxt) {
3075 xmlChar *buf = NULL;
3077 int size = XML_PARSER_BUFFER_SIZE;
3081 xmlParserInputState state;
3082 xmlParserInputPtr input = ctxt->input;
3086 * Check that there is a comment right here.
3088 if ((RAW != '<') || (NXT(1) != '!') ||
3089 (NXT(2) != '-') || (NXT(3) != '-')) return;
3091 state = ctxt->instate;
3092 ctxt->instate = XML_PARSER_COMMENT;
3095 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3097 xmlGenericError(xmlGenericErrorContext,
3098 "malloc of %d byte failed\n", size);
3099 ctxt->instate = state;
3108 while (IS_CHAR(cur) && /* checked */
3110 (r != '-') || (q != '-'))) {
3111 if ((r == '-') && (q == '-')) {
3112 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3114 ctxt->sax->error(ctxt->userData,
3115 "Comment must not contain '--' (double-hyphen)`\n");
3116 ctxt->wellFormed = 0;
3117 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3119 if (len + 5 >= size) {
3121 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3123 xmlGenericError(xmlGenericErrorContext,
3124 "realloc of %d byte failed\n", size);
3125 ctxt->instate = state;
3129 COPY_BUF(ql,buf,len,q);
3149 if (!IS_CHAR(cur)) {
3150 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3152 ctxt->sax->error(ctxt->userData,
3153 "Comment not terminated \n<!--%.50s\n", buf);
3154 ctxt->wellFormed = 0;
3155 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3158 if (input != ctxt->input) {
3159 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3161 ctxt->sax->error(ctxt->userData,
3162 "Comment doesn't start and stop in the same entity\n");
3163 ctxt->wellFormed = 0;
3164 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3167 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3168 (!ctxt->disableSAX))
3169 ctxt->sax->comment(ctxt->userData, buf);
3172 ctxt->instate = state;
3177 * @ctxt: an XML parser context
3179 * parse the name of a PI
3181 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3183 * Returns the PITarget name or NULL
3187 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3190 name = xmlParseName(ctxt);
3191 if ((name != NULL) &&
3192 ((name[0] == 'x') || (name[0] == 'X')) &&
3193 ((name[1] == 'm') || (name[1] == 'M')) &&
3194 ((name[2] == 'l') || (name[2] == 'L'))) {
3196 if ((name[0] == 'x') && (name[1] == 'm') &&
3197 (name[2] == 'l') && (name[3] == 0)) {
3198 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData,
3201 "XML declaration allowed only at the start of the document\n");
3202 ctxt->wellFormed = 0;
3203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3205 } else if (name[3] == 0) {
3206 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3209 ctxt->wellFormed = 0;
3210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3214 if (xmlW3CPIs[i] == NULL) break;
3215 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3218 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3219 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3220 ctxt->sax->warning(ctxt->userData,
3221 "xmlParsePITarget: invalid name prefix 'xml'\n");
3227 #ifdef LIBXML_CATALOG_ENABLED
3229 * xmlParseCatalogPI:
3230 * @ctxt: an XML parser context
3231 * @catalog: the PI value string
3233 * parse an XML Catalog Processing Instruction.
3235 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3237 * Occurs only if allowed by the user and if happening in the Misc
3238 * part of the document before any doctype informations
3239 * This will add the given catalog to the parsing context in order
3240 * to be used if there is a resolution need further down in the document
3244 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3245 xmlChar *URL = NULL;
3246 const xmlChar *tmp, *base;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3254 while (IS_BLANK(*tmp)) tmp++;
3259 while (IS_BLANK(*tmp)) tmp++;
3261 if ((marker != '\'') && (marker != '"'))
3265 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3268 URL = xmlStrndup(base, tmp - base);
3270 while (IS_BLANK(*tmp)) tmp++;
3275 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3281 ctxt->errNo = XML_WAR_CATALOG_PI;
3282 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3283 ctxt->sax->warning(ctxt->userData,
3284 "Catalog PI syntax error: %s\n", catalog);
3292 * @ctxt: an XML parser context
3294 * parse an XML Processing Instruction.
3296 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3298 * The processing is transfered to SAX once parsed.
3302 xmlParsePI(xmlParserCtxtPtr ctxt) {
3303 xmlChar *buf = NULL;
3305 int size = XML_PARSER_BUFFER_SIZE;
3308 xmlParserInputState state;
3311 if ((RAW == '<') && (NXT(1) == '?')) {
3312 xmlParserInputPtr input = ctxt->input;
3313 state = ctxt->instate;
3314 ctxt->instate = XML_PARSER_PI;
3316 * this is a Processing Instruction.
3322 * Parse the target name and check for special support like
3325 target = xmlParsePITarget(ctxt);
3326 if (target != NULL) {
3327 if ((RAW == '?') && (NXT(1) == '>')) {
3328 if (input != ctxt->input) {
3329 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData,
3332 "PI declaration doesn't start and stop in the same entity\n");
3333 ctxt->wellFormed = 0;
3334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3341 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3342 (ctxt->sax->processingInstruction != NULL))
3343 ctxt->sax->processingInstruction(ctxt->userData,
3345 ctxt->instate = state;
3349 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3351 xmlGenericError(xmlGenericErrorContext,
3352 "malloc of %d byte failed\n", size);
3353 ctxt->instate = state;
3357 if (!IS_BLANK(cur)) {
3358 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3360 ctxt->sax->error(ctxt->userData,
3361 "xmlParsePI: PI %s space expected\n", target);
3362 ctxt->wellFormed = 0;
3363 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3367 while (IS_CHAR(cur) && /* checked */
3368 ((cur != '?') || (NXT(1) != '>'))) {
3369 if (len + 5 >= size) {
3371 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3373 xmlGenericError(xmlGenericErrorContext,
3374 "realloc of %d byte failed\n", size);
3375 ctxt->instate = state;
3384 COPY_BUF(l,buf,len,cur);
3395 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3397 ctxt->sax->error(ctxt->userData,
3398 "xmlParsePI: PI %s never end ...\n", target);
3399 ctxt->wellFormed = 0;
3400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3402 if (input != ctxt->input) {
3403 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3405 ctxt->sax->error(ctxt->userData,
3406 "PI declaration doesn't start and stop in the same entity\n");
3407 ctxt->wellFormed = 0;
3408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3412 #ifdef LIBXML_CATALOG_ENABLED
3413 if (((state == XML_PARSER_MISC) ||
3414 (state == XML_PARSER_START)) &&
3415 (xmlStrEqual(target, XML_CATALOG_PI))) {
3416 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3417 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3418 (allow == XML_CATA_ALLOW_ALL))
3419 xmlParseCatalogPI(ctxt, buf);
3427 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3428 (ctxt->sax->processingInstruction != NULL))
3429 ctxt->sax->processingInstruction(ctxt->userData,
3435 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3437 ctxt->sax->error(ctxt->userData,
3438 "xmlParsePI : no target name\n");
3439 ctxt->wellFormed = 0;
3440 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3442 ctxt->instate = state;
3447 * xmlParseNotationDecl:
3448 * @ctxt: an XML parser context
3450 * parse a notation declaration
3452 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3454 * Hence there is actually 3 choices:
3455 * 'PUBLIC' S PubidLiteral
3456 * 'PUBLIC' S PubidLiteral S SystemLiteral
3457 * and 'SYSTEM' S SystemLiteral
3459 * See the NOTE on xmlParseExternalID().
3463 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3468 if ((RAW == '<') && (NXT(1) == '!') &&
3469 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3470 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3471 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3472 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3473 xmlParserInputPtr input = ctxt->input;
3476 if (!IS_BLANK(CUR)) {
3477 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3479 ctxt->sax->error(ctxt->userData,
3480 "Space required after '<!NOTATION'\n");
3481 ctxt->wellFormed = 0;
3482 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3487 name = xmlParseName(ctxt);
3489 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491 ctxt->sax->error(ctxt->userData,
3492 "NOTATION: Name expected here\n");
3493 ctxt->wellFormed = 0;
3494 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3497 if (!IS_BLANK(CUR)) {
3498 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3500 ctxt->sax->error(ctxt->userData,
3501 "Space required after the NOTATION name'\n");
3502 ctxt->wellFormed = 0;
3503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3511 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3515 if (input != ctxt->input) {
3516 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3518 ctxt->sax->error(ctxt->userData,
3519 "Notation declaration doesn't start and stop in the same entity\n");
3520 ctxt->wellFormed = 0;
3521 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3525 (ctxt->sax->notationDecl != NULL))
3526 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3528 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3530 ctxt->sax->error(ctxt->userData,
3531 "'>' required to close NOTATION declaration\n");
3532 ctxt->wellFormed = 0;
3533 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3536 if (Systemid != NULL) xmlFree(Systemid);
3537 if (Pubid != NULL) xmlFree(Pubid);
3542 * xmlParseEntityDecl:
3543 * @ctxt: an XML parser context
3545 * parse <!ENTITY declarations
3547 * [70] EntityDecl ::= GEDecl | PEDecl
3549 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3551 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3553 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3555 * [74] PEDef ::= EntityValue | ExternalID
3557 * [76] NDataDecl ::= S 'NDATA' S Name
3559 * [ VC: Notation Declared ]
3560 * The Name must match the declared name of a notation.
3564 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3565 xmlChar *name = NULL;
3566 xmlChar *value = NULL;
3567 xmlChar *URI = NULL, *literal = NULL;
3568 xmlChar *ndata = NULL;
3569 int isParameter = 0;
3570 xmlChar *orig = NULL;
3574 if ((RAW == '<') && (NXT(1) == '!') &&
3575 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3576 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3577 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3578 xmlParserInputPtr input = ctxt->input;
3581 skipped = SKIP_BLANKS;
3583 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3585 ctxt->sax->error(ctxt->userData,
3586 "Space required after '<!ENTITY'\n");
3587 ctxt->wellFormed = 0;
3588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3593 skipped = SKIP_BLANKS;
3595 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3597 ctxt->sax->error(ctxt->userData,
3598 "Space required after '%'\n");
3599 ctxt->wellFormed = 0;
3600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3605 name = xmlParseName(ctxt);
3607 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3609 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3610 ctxt->wellFormed = 0;
3611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3614 skipped = SKIP_BLANKS;
3616 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3618 ctxt->sax->error(ctxt->userData,
3619 "Space required after the entity name\n");
3620 ctxt->wellFormed = 0;
3621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3624 ctxt->instate = XML_PARSER_ENTITY_DECL;
3626 * handle the various case of definitions...
3629 if ((RAW == '"') || (RAW == '\'')) {
3630 value = xmlParseEntityValue(ctxt, &orig);
3632 if ((ctxt->sax != NULL) &&
3633 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3634 ctxt->sax->entityDecl(ctxt->userData, name,
3635 XML_INTERNAL_PARAMETER_ENTITY,
3639 URI = xmlParseExternalID(ctxt, &literal, 1);
3640 if ((URI == NULL) && (literal == NULL)) {
3641 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3643 ctxt->sax->error(ctxt->userData,
3644 "Entity value required\n");
3645 ctxt->wellFormed = 0;
3646 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3651 uri = xmlParseURI((const char *) URI);
3653 ctxt->errNo = XML_ERR_INVALID_URI;
3654 if ((ctxt->sax != NULL) &&
3655 (!ctxt->disableSAX) &&
3656 (ctxt->sax->error != NULL))
3657 ctxt->sax->error(ctxt->userData,
3658 "Invalid URI: %s\n", URI);
3660 * This really ought to be a well formedness error
3661 * but the XML Core WG decided otherwise c.f. issue
3662 * E26 of the XML erratas.
3665 if (uri->fragment != NULL) {
3666 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3667 if ((ctxt->sax != NULL) &&
3668 (!ctxt->disableSAX) &&
3669 (ctxt->sax->error != NULL))
3670 ctxt->sax->error(ctxt->userData,
3671 "Fragment not allowed: %s\n", URI);
3673 * Okay this is foolish to block those but not
3676 ctxt->wellFormed = 0;
3678 if ((ctxt->sax != NULL) &&
3679 (!ctxt->disableSAX) &&
3680 (ctxt->sax->entityDecl != NULL))
3681 ctxt->sax->entityDecl(ctxt->userData, name,
3682 XML_EXTERNAL_PARAMETER_ENTITY,
3683 literal, URI, NULL);
3690 if ((RAW == '"') || (RAW == '\'')) {
3691 value = xmlParseEntityValue(ctxt, &orig);
3692 if ((ctxt->sax != NULL) &&
3693 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3694 ctxt->sax->entityDecl(ctxt->userData, name,
3695 XML_INTERNAL_GENERAL_ENTITY,
3698 * For expat compatibility in SAX mode.
3700 if ((ctxt->myDoc == NULL) ||
3701 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3702 if (ctxt->myDoc == NULL) {
3703 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3705 if (ctxt->myDoc->intSubset == NULL)
3706 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3707 BAD_CAST "fake", NULL, NULL);
3709 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3713 URI = xmlParseExternalID(ctxt, &literal, 1);
3714 if ((URI == NULL) && (literal == NULL)) {
3715 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718 "Entity value required\n");
3719 ctxt->wellFormed = 0;
3720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3725 uri = xmlParseURI((const char *)URI);
3727 ctxt->errNo = XML_ERR_INVALID_URI;
3728 if ((ctxt->sax != NULL) &&
3729 (!ctxt->disableSAX) &&
3730 (ctxt->sax->error != NULL))
3731 ctxt->sax->error(ctxt->userData,
3732 "Invalid URI: %s\n", URI);
3734 * This really ought to be a well formedness error
3735 * but the XML Core WG decided otherwise c.f. issue
3736 * E26 of the XML erratas.
3739 if (uri->fragment != NULL) {
3740 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3741 if ((ctxt->sax != NULL) &&
3742 (!ctxt->disableSAX) &&
3743 (ctxt->sax->error != NULL))
3744 ctxt->sax->error(ctxt->userData,
3745 "Fragment not allowed: %s\n", URI);
3747 * Okay this is foolish to block those but not
3750 ctxt->wellFormed = 0;
3755 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3756 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3758 ctxt->sax->error(ctxt->userData,
3759 "Space required before 'NDATA'\n");
3760 ctxt->wellFormed = 0;
3761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3764 if ((RAW == 'N') && (NXT(1) == 'D') &&
3765 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3768 if (!IS_BLANK(CUR)) {
3769 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3771 ctxt->sax->error(ctxt->userData,
3772 "Space required after 'NDATA'\n");
3773 ctxt->wellFormed = 0;
3774 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3777 ndata = xmlParseName(ctxt);
3778 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3779 (ctxt->sax->unparsedEntityDecl != NULL))
3780 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3781 literal, URI, ndata);
3783 if ((ctxt->sax != NULL) &&
3784 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3785 ctxt->sax->entityDecl(ctxt->userData, name,
3786 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3787 literal, URI, NULL);
3789 * For expat compatibility in SAX mode.
3790 * assuming the entity repalcement was asked for
3792 if ((ctxt->replaceEntities != 0) &&
3793 ((ctxt->myDoc == NULL) ||
3794 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3795 if (ctxt->myDoc == NULL) {
3796 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3799 if (ctxt->myDoc->intSubset == NULL)
3800 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3801 BAD_CAST "fake", NULL, NULL);
3802 entityDecl(ctxt, name,
3803 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3804 literal, URI, NULL);
3811 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3813 ctxt->sax->error(ctxt->userData,
3814 "xmlParseEntityDecl: entity %s not terminated\n", name);
3815 ctxt->wellFormed = 0;
3816 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3818 if (input != ctxt->input) {
3819 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3821 ctxt->sax->error(ctxt->userData,
3822 "Entity declaration doesn't start and stop in the same entity\n");
3823 ctxt->wellFormed = 0;
3824 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3830 * Ugly mechanism to save the raw entity value.
3832 xmlEntityPtr cur = NULL;
3835 if ((ctxt->sax != NULL) &&
3836 (ctxt->sax->getParameterEntity != NULL))
3837 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3839 if ((ctxt->sax != NULL) &&
3840 (ctxt->sax->getEntity != NULL))
3841 cur = ctxt->sax->getEntity(ctxt->userData, name);
3842 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3843 cur = getEntity(ctxt, name);
3847 if (cur->orig != NULL)
3854 if (name != NULL) xmlFree(name);
3855 if (value != NULL) xmlFree(value);
3856 if (URI != NULL) xmlFree(URI);
3857 if (literal != NULL) xmlFree(literal);
3858 if (ndata != NULL) xmlFree(ndata);
3863 * xmlParseDefaultDecl:
3864 * @ctxt: an XML parser context
3865 * @value: Receive a possible fixed default value for the attribute
3867 * Parse an attribute default declaration
3869 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3871 * [ VC: Required Attribute ]
3872 * if the default declaration is the keyword #REQUIRED, then the
3873 * attribute must be specified for all elements of the type in the
3874 * attribute-list declaration.
3876 * [ VC: Attribute Default Legal ]
3877 * The declared default value must meet the lexical constraints of
3878 * the declared attribute type c.f. xmlValidateAttributeDecl()
3880 * [ VC: Fixed Attribute Default ]
3881 * if an attribute has a default value declared with the #FIXED
3882 * keyword, instances of that attribute must match the default value.
3884 * [ WFC: No < in Attribute Values ]
3885 * handled in xmlParseAttValue()
3887 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3888 * or XML_ATTRIBUTE_FIXED.
3892 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3897 if ((RAW == '#') && (NXT(1) == 'R') &&
3898 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3899 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3900 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3903 return(XML_ATTRIBUTE_REQUIRED);
3905 if ((RAW == '#') && (NXT(1) == 'I') &&
3906 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3907 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3908 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3910 return(XML_ATTRIBUTE_IMPLIED);
3912 val = XML_ATTRIBUTE_NONE;
3913 if ((RAW == '#') && (NXT(1) == 'F') &&
3914 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3915 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3917 val = XML_ATTRIBUTE_FIXED;
3918 if (!IS_BLANK(CUR)) {
3919 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3921 ctxt->sax->error(ctxt->userData,
3922 "Space required after '#FIXED'\n");
3923 ctxt->wellFormed = 0;
3924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3928 ret = xmlParseAttValue(ctxt);
3929 ctxt->instate = XML_PARSER_DTD;
3931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3932 ctxt->sax->error(ctxt->userData,
3933 "Attribute default value declaration error\n");
3934 ctxt->wellFormed = 0;
3935 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3942 * xmlParseNotationType:
3943 * @ctxt: an XML parser context
3945 * parse an Notation attribute type.
3947 * Note: the leading 'NOTATION' S part has already being parsed...
3949 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3951 * [ VC: Notation Attributes ]
3952 * Values of this type must match one of the notation names included
3953 * in the declaration; all notation names in the declaration must be declared.
3955 * Returns: the notation attribute tree built while parsing
3959 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3961 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3964 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3966 ctxt->sax->error(ctxt->userData,
3967 "'(' required to start 'NOTATION'\n");
3968 ctxt->wellFormed = 0;
3969 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3976 name = xmlParseName(ctxt);
3978 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3980 ctxt->sax->error(ctxt->userData,
3981 "Name expected in NOTATION declaration\n");
3982 ctxt->wellFormed = 0;
3983 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3986 cur = xmlCreateEnumeration(name);
3988 if (cur == NULL) return(ret);
3989 if (last == NULL) ret = last = cur;
3995 } while (RAW == '|');
3997 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3999 ctxt->sax->error(ctxt->userData,
4000 "')' required to finish NOTATION declaration\n");
4001 ctxt->wellFormed = 0;
4002 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4003 if ((last != NULL) && (last != ret))
4004 xmlFreeEnumeration(last);
4012 * xmlParseEnumerationType:
4013 * @ctxt: an XML parser context
4015 * parse an Enumeration attribute type.
4017 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4019 * [ VC: Enumeration ]
4020 * Values of this type must match one of the Nmtoken tokens in
4023 * Returns: the enumeration attribute tree built while parsing
4027 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4029 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4032 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4034 ctxt->sax->error(ctxt->userData,
4035 "'(' required to start ATTLIST enumeration\n");
4036 ctxt->wellFormed = 0;
4037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4044 name = xmlParseNmtoken(ctxt);
4046 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4048 ctxt->sax->error(ctxt->userData,
4049 "NmToken expected in ATTLIST enumeration\n");
4050 ctxt->wellFormed = 0;
4051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4054 cur = xmlCreateEnumeration(name);
4056 if (cur == NULL) return(ret);
4057 if (last == NULL) ret = last = cur;
4063 } while (RAW == '|');
4065 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4067 ctxt->sax->error(ctxt->userData,
4068 "')' required to finish ATTLIST enumeration\n");
4069 ctxt->wellFormed = 0;
4070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4078 * xmlParseEnumeratedType:
4079 * @ctxt: an XML parser context
4080 * @tree: the enumeration tree built while parsing
4082 * parse an Enumerated attribute type.
4084 * [57] EnumeratedType ::= NotationType | Enumeration
4086 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4089 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4093 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4094 if ((RAW == 'N') && (NXT(1) == 'O') &&
4095 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4096 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4097 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4099 if (!IS_BLANK(CUR)) {
4100 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102 ctxt->sax->error(ctxt->userData,
4103 "Space required after 'NOTATION'\n");
4104 ctxt->wellFormed = 0;
4105 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4109 *tree = xmlParseNotationType(ctxt);
4110 if (*tree == NULL) return(0);
4111 return(XML_ATTRIBUTE_NOTATION);
4113 *tree = xmlParseEnumerationType(ctxt);
4114 if (*tree == NULL) return(0);
4115 return(XML_ATTRIBUTE_ENUMERATION);
4119 * xmlParseAttributeType:
4120 * @ctxt: an XML parser context
4121 * @tree: the enumeration tree built while parsing
4123 * parse the Attribute list def for an element
4125 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4127 * [55] StringType ::= 'CDATA'
4129 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4130 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4132 * Validity constraints for attribute values syntax are checked in
4133 * xmlValidateAttributeValue()
4136 * Values of type ID must match the Name production. A name must not
4137 * appear more than once in an XML document as a value of this type;
4138 * i.e., ID values must uniquely identify the elements which bear them.
4140 * [ VC: One ID per Element Type ]
4141 * No element type may have more than one ID attribute specified.
4143 * [ VC: ID Attribute Default ]
4144 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4147 * Values of type IDREF must match the Name production, and values
4148 * of type IDREFS must match Names; each IDREF Name must match the value
4149 * of an ID attribute on some element in the XML document; i.e. IDREF
4150 * values must match the value of some ID attribute.
4152 * [ VC: Entity Name ]
4153 * Values of type ENTITY must match the Name production, values
4154 * of type ENTITIES must match Names; each Entity Name must match the
4155 * name of an unparsed entity declared in the DTD.
4157 * [ VC: Name Token ]
4158 * Values of type NMTOKEN must match the Nmtoken production; values
4159 * of type NMTOKENS must match Nmtokens.
4161 * Returns the attribute type
4164 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4166 if ((RAW == 'C') && (NXT(1) == 'D') &&
4167 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4170 return(XML_ATTRIBUTE_CDATA);
4171 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4172 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4173 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4175 return(XML_ATTRIBUTE_IDREFS);
4176 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4177 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4180 return(XML_ATTRIBUTE_IDREF);
4181 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4183 return(XML_ATTRIBUTE_ID);
4184 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4185 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4186 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4188 return(XML_ATTRIBUTE_ENTITY);
4189 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4190 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4191 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4192 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4194 return(XML_ATTRIBUTE_ENTITIES);
4195 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4196 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4197 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4198 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4200 return(XML_ATTRIBUTE_NMTOKENS);
4201 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4202 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4203 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4206 return(XML_ATTRIBUTE_NMTOKEN);
4208 return(xmlParseEnumeratedType(ctxt, tree));
4212 * xmlParseAttributeListDecl:
4213 * @ctxt: an XML parser context
4215 * : parse the Attribute list def for an element
4217 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4219 * [53] AttDef ::= S Name S AttType S DefaultDecl
4223 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4226 xmlEnumerationPtr tree;
4228 if ((RAW == '<') && (NXT(1) == '!') &&
4229 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4230 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4231 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4233 xmlParserInputPtr input = ctxt->input;
4236 if (!IS_BLANK(CUR)) {
4237 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4239 ctxt->sax->error(ctxt->userData,
4240 "Space required after '<!ATTLIST'\n");
4241 ctxt->wellFormed = 0;
4242 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4245 elemName = xmlParseName(ctxt);
4246 if (elemName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Element\n");
4251 ctxt->wellFormed = 0;
4252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4257 while (RAW != '>') {
4258 const xmlChar *check = CUR_PTR;
4261 xmlChar *defaultValue = NULL;
4265 attrName = xmlParseName(ctxt);
4266 if (attrName == NULL) {
4267 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4269 ctxt->sax->error(ctxt->userData,
4270 "ATTLIST: no name for Attribute\n");
4271 ctxt->wellFormed = 0;
4272 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4276 if (!IS_BLANK(CUR)) {
4277 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4279 ctxt->sax->error(ctxt->userData,
4280 "Space required after the attribute name\n");
4281 ctxt->wellFormed = 0;
4282 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4283 if (attrName != NULL)
4285 if (defaultValue != NULL)
4286 xmlFree(defaultValue);
4291 type = xmlParseAttributeType(ctxt, &tree);
4293 if (attrName != NULL)
4295 if (defaultValue != NULL)
4296 xmlFree(defaultValue);
4301 if (!IS_BLANK(CUR)) {
4302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4304 ctxt->sax->error(ctxt->userData,
4305 "Space required after the attribute type\n");
4306 ctxt->wellFormed = 0;
4307 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4308 if (attrName != NULL)
4310 if (defaultValue != NULL)
4311 xmlFree(defaultValue);
4313 xmlFreeEnumeration(tree);
4318 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4320 if (attrName != NULL)
4322 if (defaultValue != NULL)
4323 xmlFree(defaultValue);
4325 xmlFreeEnumeration(tree);
4331 if (!IS_BLANK(CUR)) {
4332 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4334 ctxt->sax->error(ctxt->userData,
4335 "Space required after the attribute default value\n");
4336 ctxt->wellFormed = 0;
4337 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4338 if (attrName != NULL)
4340 if (defaultValue != NULL)
4341 xmlFree(defaultValue);
4343 xmlFreeEnumeration(tree);
4348 if (check == CUR_PTR) {
4349 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4351 ctxt->sax->error(ctxt->userData,
4352 "xmlParseAttributeListDecl: detected internal error\n");
4353 if (attrName != NULL)
4355 if (defaultValue != NULL)
4356 xmlFree(defaultValue);
4358 xmlFreeEnumeration(tree);
4361 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4362 (ctxt->sax->attributeDecl != NULL))
4363 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4364 type, def, defaultValue, tree);
4365 if (attrName != NULL)
4367 if (defaultValue != NULL)
4368 xmlFree(defaultValue);
4372 if (input != ctxt->input) {
4373 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4375 ctxt->sax->error(ctxt->userData,
4376 "Attribute list declaration doesn't start and stop in the same entity\n");
4377 ctxt->wellFormed = 0;
4378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4388 * xmlParseElementMixedContentDecl:
4389 * @ctxt: an XML parser context
4390 * @inputchk: the input used for the current entity, needed for boundary checks
4392 * parse the declaration for a Mixed Element content
4393 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4395 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4396 * '(' S? '#PCDATA' S? ')'
4398 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4400 * [ VC: No Duplicate Types ]
4401 * The same name must not appear more than once in a single
4402 * mixed-content declaration.
4404 * returns: the list of the xmlElementContentPtr describing the element choices
4406 xmlElementContentPtr
4407 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
4408 xmlElementContentPtr ret = NULL, cur = NULL, n;
4409 xmlChar *elem = NULL;
4412 if ((RAW == '#') && (NXT(1) == 'P') &&
4413 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4414 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4420 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4421 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4422 if (ctxt->vctxt.error != NULL)
4423 ctxt->vctxt.error(ctxt->vctxt.userData,
4424 "Element content declaration doesn't start and stop in the same entity\n");
4428 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4430 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4435 if ((RAW == '(') || (RAW == '|')) {
4436 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4437 if (ret == NULL) return(NULL);
4439 while (RAW == '|') {
4442 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4443 if (ret == NULL) return(NULL);
4449 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4450 if (n == NULL) return(NULL);
4451 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4461 elem = xmlParseName(ctxt);
4463 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4465 ctxt->sax->error(ctxt->userData,
4466 "xmlParseElementMixedContentDecl : Name expected\n");
4467 ctxt->wellFormed = 0;
4468 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4469 xmlFreeElementContent(cur);
4475 if ((RAW == ')') && (NXT(1) == '*')) {
4477 cur->c2 = xmlNewElementContent(elem,
4478 XML_ELEMENT_CONTENT_ELEMENT);
4479 if (cur->c2 != NULL)
4480 cur->c2->parent = cur;
4483 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4484 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4485 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4486 if (ctxt->vctxt.error != NULL)
4487 ctxt->vctxt.error(ctxt->vctxt.userData,
4488 "Element content declaration doesn't start and stop in the same entity\n");
4493 if (elem != NULL) xmlFree(elem);
4494 xmlFreeElementContent(ret);
4495 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4497 ctxt->sax->error(ctxt->userData,
4498 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4499 ctxt->wellFormed = 0;
4500 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4505 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4507 ctxt->sax->error(ctxt->userData,
4508 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4509 ctxt->wellFormed = 0;
4510 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4516 * xmlParseElementChildrenContentDecl:
4517 * @ctxt: an XML parser context
4518 * @inputchk: the input used for the current entity, needed for boundary checks
4520 * parse the declaration for a Mixed Element content
4521 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4524 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4526 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4528 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4530 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4532 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4533 * TODO Parameter-entity replacement text must be properly nested
4534 * with parenthesized groups. That is to say, if either of the
4535 * opening or closing parentheses in a choice, seq, or Mixed
4536 * construct is contained in the replacement text for a parameter
4537 * entity, both must be contained in the same replacement text. For
4538 * interoperability, if a parameter-entity reference appears in a
4539 * choice, seq, or Mixed construct, its replacement text should not
4540 * be empty, and neither the first nor last non-blank character of
4541 * the replacement text should be a connector (| or ,).
4543 * Returns the tree of xmlElementContentPtr describing the element
4546 xmlElementContentPtr
4547 xmlParseElementChildrenContentDecl
4548 (xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
4549 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4556 xmlParserInputPtr input = ctxt->input;
4558 /* Recurse on first child */
4561 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
4565 elem = xmlParseName(ctxt);
4567 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4569 ctxt->sax->error(ctxt->userData,
4570 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4571 ctxt->wellFormed = 0;
4572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4575 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4578 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4580 } else if (RAW == '*') {
4581 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4583 } else if (RAW == '+') {
4584 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4587 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4594 while (RAW != ')') {
4596 * Each loop we parse one separator and one element.
4599 if (type == 0) type = CUR;
4602 * Detect "Name | Name , Name" error
4604 else if (type != CUR) {
4605 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4607 ctxt->sax->error(ctxt->userData,
4608 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4610 ctxt->wellFormed = 0;
4611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4612 if ((last != NULL) && (last != ret))
4613 xmlFreeElementContent(last);
4615 xmlFreeElementContent(ret);
4620 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4622 if ((last != NULL) && (last != ret))
4623 xmlFreeElementContent(last);
4624 xmlFreeElementContent(ret);
4642 } else if (RAW == '|') {
4643 if (type == 0) type = CUR;
4646 * Detect "Name , Name | Name" error
4648 else if (type != CUR) {
4649 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4651 ctxt->sax->error(ctxt->userData,
4652 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4654 ctxt->wellFormed = 0;
4655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4656 if ((last != NULL) && (last != ret))
4657 xmlFreeElementContent(last);
4659 xmlFreeElementContent(ret);
4664 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4666 if ((last != NULL) && (last != ret))
4667 xmlFreeElementContent(last);
4669 xmlFreeElementContent(ret);
4688 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4690 ctxt->sax->error(ctxt->userData,
4691 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4692 ctxt->wellFormed = 0;
4693 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4695 xmlFreeElementContent(ret);
4702 xmlParserInputPtr input = ctxt->input;
4703 /* Recurse on second child */
4706 last = xmlParseElementChildrenContentDecl(ctxt, input);
4709 elem = xmlParseName(ctxt);
4711 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713 ctxt->sax->error(ctxt->userData,
4714 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4715 ctxt->wellFormed = 0;
4716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4718 xmlFreeElementContent(ret);
4721 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4724 last->ocur = XML_ELEMENT_CONTENT_OPT;
4726 } else if (RAW == '*') {
4727 last->ocur = XML_ELEMENT_CONTENT_MULT;
4729 } else if (RAW == '+') {
4730 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4733 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4739 if ((cur != NULL) && (last != NULL)) {
4744 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4745 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4746 if (ctxt->vctxt.error != NULL)
4747 ctxt->vctxt.error(ctxt->vctxt.userData,
4748 "Element content declaration doesn't start and stop in the same entity\n");
4754 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4756 } else if (RAW == '*') {
4758 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4761 * Some normalization:
4762 * (a | b* | c?)* == (a | b | c)*
4764 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4765 if ((cur->c1 != NULL) &&
4766 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4767 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4768 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4769 if ((cur->c2 != NULL) &&
4770 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4771 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4772 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4777 } else if (RAW == '+') {
4781 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4783 * Some normalization:
4784 * (a | b*)+ == (a | b)*
4785 * (a | b?)+ == (a | b)*
4787 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4788 if ((cur->c1 != NULL) &&
4789 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4790 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4791 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4794 if ((cur->c2 != NULL) &&
4795 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4796 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4797 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4803 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4811 * xmlParseElementContentDecl:
4812 * @ctxt: an XML parser context
4813 * @name: the name of the element being defined.
4814 * @result: the Element Content pointer will be stored here if any
4816 * parse the declaration for an Element content either Mixed or Children,
4817 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4819 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4821 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4825 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4826 xmlElementContentPtr *result) {
4828 xmlElementContentPtr tree = NULL;
4829 xmlParserInputPtr input = ctxt->input;
4835 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4837 ctxt->sax->error(ctxt->userData,
4838 "xmlParseElementContentDecl : %s '(' expected\n", name);
4839 ctxt->wellFormed = 0;
4840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4846 if ((RAW == '#') && (NXT(1) == 'P') &&
4847 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4848 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4850 tree = xmlParseElementMixedContentDecl(ctxt, input);
4851 res = XML_ELEMENT_TYPE_MIXED;
4853 tree = xmlParseElementChildrenContentDecl(ctxt, input);
4854 res = XML_ELEMENT_TYPE_ELEMENT;
4862 * xmlParseElementDecl:
4863 * @ctxt: an XML parser context
4865 * parse an Element declaration.
4867 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4869 * [ VC: Unique Element Type Declaration ]
4870 * No element type may be declared more than once
4872 * Returns the type of the element, or -1 in case of error
4875 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4878 xmlElementContentPtr content = NULL;
4881 if ((RAW == '<') && (NXT(1) == '!') &&
4882 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4883 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4884 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4886 xmlParserInputPtr input = ctxt->input;
4889 if (!IS_BLANK(CUR)) {
4890 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4892 ctxt->sax->error(ctxt->userData,
4893 "Space required after 'ELEMENT'\n");
4894 ctxt->wellFormed = 0;
4895 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4898 name = xmlParseName(ctxt);
4900 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4902 ctxt->sax->error(ctxt->userData,
4903 "xmlParseElementDecl: no name for Element\n");
4904 ctxt->wellFormed = 0;
4905 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4908 while ((RAW == 0) && (ctxt->inputNr > 1))
4910 if (!IS_BLANK(CUR)) {
4911 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4913 ctxt->sax->error(ctxt->userData,
4914 "Space required after the element name\n");
4915 ctxt->wellFormed = 0;
4916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4919 if ((RAW == 'E') && (NXT(1) == 'M') &&
4920 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4924 * Element must always be empty.
4926 ret = XML_ELEMENT_TYPE_EMPTY;
4927 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4931 * Element is a generic container.
4933 ret = XML_ELEMENT_TYPE_ANY;
4934 } else if (RAW == '(') {
4935 ret = xmlParseElementContentDecl(ctxt, name, &content);
4938 * [ WFC: PEs in Internal Subset ] error handling.
4940 if ((RAW == '%') && (ctxt->external == 0) &&
4941 (ctxt->inputNr == 1)) {
4942 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4944 ctxt->sax->error(ctxt->userData,
4945 "PEReference: forbidden within markup decl in internal subset\n");
4947 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4952 ctxt->wellFormed = 0;
4953 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4954 if (name != NULL) xmlFree(name);
4960 * Pop-up of finished entities.
4962 while ((RAW == 0) && (ctxt->inputNr > 1))
4967 ctxt->errNo = XML_ERR_GT_REQUIRED;
4968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4969 ctxt->sax->error(ctxt->userData,
4970 "xmlParseElementDecl: expected '>' at the end\n");
4971 ctxt->wellFormed = 0;
4972 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4974 if (input != ctxt->input) {
4975 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4977 ctxt->sax->error(ctxt->userData,
4978 "Element declaration doesn't start and stop in the same entity\n");
4979 ctxt->wellFormed = 0;
4980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4984 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4985 (ctxt->sax->elementDecl != NULL))
4986 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4989 if (content != NULL) {
4990 xmlFreeElementContent(content);
5000 * xmlParseConditionalSections
5001 * @ctxt: an XML parser context
5003 * [61] conditionalSect ::= includeSect | ignoreSect
5004 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5005 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5006 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5007 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5011 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5014 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5015 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5020 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5022 ctxt->sax->error(ctxt->userData,
5023 "XML conditional section '[' expected\n");
5024 ctxt->wellFormed = 0;
5025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5029 if (xmlParserDebugEntities) {
5030 if ((ctxt->input != NULL) && (ctxt->input->filename))
5031 xmlGenericError(xmlGenericErrorContext,
5032 "%s(%d): ", ctxt->input->filename,
5034 xmlGenericError(xmlGenericErrorContext,
5035 "Entering INCLUDE Conditional Section\n");
5038 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5040 const xmlChar *check = CUR_PTR;
5041 int cons = ctxt->input->consumed;
5043 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5044 xmlParseConditionalSections(ctxt);
5045 } else if (IS_BLANK(CUR)) {
5047 } else if (RAW == '%') {
5048 xmlParsePEReference(ctxt);
5050 xmlParseMarkupDecl(ctxt);
5053 * Pop-up of finished entities.
5055 while ((RAW == 0) && (ctxt->inputNr > 1))
5058 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5059 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5061 ctxt->sax->error(ctxt->userData,
5062 "Content error in the external subset\n");
5063 ctxt->wellFormed = 0;
5064 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5068 if (xmlParserDebugEntities) {
5069 if ((ctxt->input != NULL) && (ctxt->input->filename))
5070 xmlGenericError(xmlGenericErrorContext,
5071 "%s(%d): ", ctxt->input->filename,
5073 xmlGenericError(xmlGenericErrorContext,
5074 "Leaving INCLUDE Conditional Section\n");
5077 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5078 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5086 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "XML conditional section '[' expected\n");
5090 ctxt->wellFormed = 0;
5091 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5095 if (xmlParserDebugEntities) {
5096 if ((ctxt->input != NULL) && (ctxt->input->filename))
5097 xmlGenericError(xmlGenericErrorContext,
5098 "%s(%d): ", ctxt->input->filename,
5100 xmlGenericError(xmlGenericErrorContext,
5101 "Entering IGNORE Conditional Section\n");
5105 * Parse up to the end of the conditional section
5106 * But disable SAX event generating DTD building in the meantime
5108 state = ctxt->disableSAX;
5109 instate = ctxt->instate;
5110 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5111 ctxt->instate = XML_PARSER_IGNORE;
5113 while ((depth >= 0) && (RAW != 0)) {
5114 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5119 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5120 if (--depth >= 0) SKIP(3);
5127 ctxt->disableSAX = state;
5128 ctxt->instate = instate;
5130 if (xmlParserDebugEntities) {
5131 if ((ctxt->input != NULL) && (ctxt->input->filename))
5132 xmlGenericError(xmlGenericErrorContext,
5133 "%s(%d): ", ctxt->input->filename,
5135 xmlGenericError(xmlGenericErrorContext,
5136 "Leaving IGNORE Conditional Section\n");
5140 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5142 ctxt->sax->error(ctxt->userData,
5143 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5144 ctxt->wellFormed = 0;
5145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5152 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5154 ctxt->sax->error(ctxt->userData,
5155 "XML conditional section not closed\n");
5156 ctxt->wellFormed = 0;
5157 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5164 * xmlParseMarkupDecl:
5165 * @ctxt: an XML parser context
5167 * parse Markup declarations
5169 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5170 * NotationDecl | PI | Comment
5172 * [ VC: Proper Declaration/PE Nesting ]
5173 * Parameter-entity replacement text must be properly nested with
5174 * markup declarations. That is to say, if either the first character
5175 * or the last character of a markup declaration (markupdecl above) is
5176 * contained in the replacement text for a parameter-entity reference,
5177 * both must be contained in the same replacement text.
5179 * [ WFC: PEs in Internal Subset ]
5180 * In the internal DTD subset, parameter-entity references can occur
5181 * only where markup declarations can occur, not within markup declarations.
5182 * (This does not apply to references that occur in external parameter
5183 * entities or to the external subset.)
5186 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5188 xmlParseElementDecl(ctxt);
5189 xmlParseAttributeListDecl(ctxt);
5190 xmlParseEntityDecl(ctxt);
5191 xmlParseNotationDecl(ctxt);
5193 xmlParseComment(ctxt);
5195 * This is only for internal subset. On external entities,
5196 * the replacement is done before parsing stage
5198 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5199 xmlParsePEReference(ctxt);
5202 * Conditional sections are allowed from entities included
5203 * by PE References in the internal subset.
5205 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5206 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5207 xmlParseConditionalSections(ctxt);
5211 ctxt->instate = XML_PARSER_DTD;
5216 * @ctxt: an XML parser context
5218 * parse an XML declaration header for external entities
5220 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5222 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5226 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5230 * We know that '<?xml' is here.
5232 if ((RAW == '<') && (NXT(1) == '?') &&
5233 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5234 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5237 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5239 ctxt->sax->error(ctxt->userData,
5240 "Text declaration '<?xml' required\n");
5241 ctxt->wellFormed = 0;
5242 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5247 if (!IS_BLANK(CUR)) {
5248 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5250 ctxt->sax->error(ctxt->userData,
5251 "Space needed after '<?xml'\n");
5252 ctxt->wellFormed = 0;
5253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5258 * We may have the VersionInfo here.
5260 version = xmlParseVersionInfo(ctxt);
5261 if (version == NULL)
5262 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5264 if (!IS_BLANK(CUR)) {
5265 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5267 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5268 ctxt->wellFormed = 0;
5269 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5272 ctxt->input->version = version;
5275 * We must have the encoding declaration
5277 xmlParseEncodingDecl(ctxt);
5278 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5280 * The XML REC instructs us to stop parsing right here
5286 if ((RAW == '?') && (NXT(1) == '>')) {
5288 } else if (RAW == '>') {
5289 /* Deprecated old WD ... */
5290 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5292 ctxt->sax->error(ctxt->userData,
5293 "XML declaration must end-up with '?>'\n");
5294 ctxt->wellFormed = 0;
5295 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5298 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5300 ctxt->sax->error(ctxt->userData,
5301 "parsing XML declaration: '?>' expected\n");
5302 ctxt->wellFormed = 0;
5303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5304 MOVETO_ENDTAG(CUR_PTR);
5310 * xmlParseExternalSubset:
5311 * @ctxt: an XML parser context
5312 * @ExternalID: the external identifier
5313 * @SystemID: the system identifier (or URL)
5315 * parse Markup declarations from an external subset
5317 * [30] extSubset ::= textDecl? extSubsetDecl
5319 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5322 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5323 const xmlChar *SystemID) {
5325 if ((RAW == '<') && (NXT(1) == '?') &&
5326 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5328 xmlParseTextDecl(ctxt);
5329 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5331 * The XML REC instructs us to stop parsing right here
5333 ctxt->instate = XML_PARSER_EOF;
5337 if (ctxt->myDoc == NULL) {
5338 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5340 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5341 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5343 ctxt->instate = XML_PARSER_DTD;
5345 while (((RAW == '<') && (NXT(1) == '?')) ||
5346 ((RAW == '<') && (NXT(1) == '!')) ||
5347 (RAW == '%') || IS_BLANK(CUR)) {
5348 const xmlChar *check = CUR_PTR;
5349 int cons = ctxt->input->consumed;
5352 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5353 xmlParseConditionalSections(ctxt);
5354 } else if (IS_BLANK(CUR)) {
5356 } else if (RAW == '%') {
5357 xmlParsePEReference(ctxt);
5359 xmlParseMarkupDecl(ctxt);
5362 * Pop-up of finished entities.
5364 while ((RAW == 0) && (ctxt->inputNr > 1))
5367 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5368 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5370 ctxt->sax->error(ctxt->userData,
5371 "Content error in the external subset\n");
5372 ctxt->wellFormed = 0;
5373 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5379 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5381 ctxt->sax->error(ctxt->userData,
5382 "Extra content at the end of the document\n");
5383 ctxt->wellFormed = 0;
5384 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5390 * xmlParseReference:
5391 * @ctxt: an XML parser context
5393 * parse and handle entity references in content, depending on the SAX
5394 * interface, this may end-up in a call to character() if this is a
5395 * CharRef, a predefined entity, if there is no reference() callback.
5396 * or if the parser was asked to switch to that mode.
5398 * [67] Reference ::= EntityRef | CharRef
5401 xmlParseReference(xmlParserCtxtPtr ctxt) {
5404 if (RAW != '&') return;
5406 if (NXT(1) == '#') {
5410 int value = xmlParseCharRef(ctxt);
5412 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5414 * So we are using non-UTF-8 buffers
5415 * Check that the char fit on 8bits, if not
5416 * generate a CharRef.
5418 if (value <= 0xFF) {
5421 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5422 (!ctxt->disableSAX))
5423 ctxt->sax->characters(ctxt->userData, out, 1);
5425 if ((hex == 'x') || (hex == 'X'))
5426 snprintf((char *)out, sizeof(out), "#x%X", value);
5428 snprintf((char *)out, sizeof(out), "#%d", value);
5429 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5430 (!ctxt->disableSAX))
5431 ctxt->sax->reference(ctxt->userData, out);
5435 * Just encode the value in UTF-8
5437 COPY_BUF(0 ,out, i, value);
5439 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5440 (!ctxt->disableSAX))
5441 ctxt->sax->characters(ctxt->userData, out, i);
5444 ent = xmlParseEntityRef(ctxt);
5445 if (ent == NULL) return;
5446 if (!ctxt->wellFormed)
5448 if ((ent->name != NULL) &&
5449 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5450 xmlNodePtr list = NULL;
5455 * The first reference to the entity trigger a parsing phase
5456 * where the ent->children is filled with the result from
5459 if (ent->children == NULL) {
5461 value = ent->content;
5464 * Check that this entity is well formed
5466 if ((value != NULL) &&
5467 (value[1] == 0) && (value[0] == '<') &&
5468 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5470 * DONE: get definite answer on this !!!
5471 * Lots of entity decls are used to declare a single
5474 * Which seems to be valid since
5475 * 2.4: The ampersand character (&) and the left angle
5476 * bracket (<) may appear in their literal form only
5477 * when used ... They are also legal within the literal
5478 * entity value of an internal entity declaration;i
5479 * see "4.3.2 Well-Formed Parsed Entities".
5480 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5481 * Looking at the OASIS test suite and James Clark
5482 * tests, this is broken. However the XML REC uses
5483 * it. Is the XML REC not well-formed ????
5484 * This is a hack to avoid this problem
5486 * ANSWER: since lt gt amp .. are already defined,
5487 * this is a redefinition and hence the fact that the
5488 * content is not well balanced is not a Wf error, this
5489 * is lousy but acceptable.
5491 list = xmlNewDocText(ctxt->myDoc, value);
5493 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5494 (ent->children == NULL)) {
5495 ent->children = list;
5498 list->parent = (xmlNodePtr) ent;
5500 xmlFreeNodeList(list);
5502 } else if (list != NULL) {
5503 xmlFreeNodeList(list);
5507 * 4.3.2: An internal general parsed entity is well-formed
5508 * if its replacement text matches the production labeled
5514 * This is a bit hackish but this seems the best
5515 * way to make sure both SAX and DOM entity support
5518 if (ctxt->userData == ctxt)
5521 user_data = ctxt->userData;
5523 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5525 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5526 value, user_data, &list);
5528 } else if (ent->etype ==
5529 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5531 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
5532 ctxt->sax, user_data, ctxt->depth,
5533 ent->URI, ent->ExternalID, &list);
5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538 ctxt->sax->error(ctxt->userData,
5539 "Internal: invalid entity type\n");
5541 if (ret == XML_ERR_ENTITY_LOOP) {
5542 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5544 ctxt->sax->error(ctxt->userData,
5545 "Detected entity reference loop\n");
5546 ctxt->wellFormed = 0;
5547 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5549 } else if ((ret == 0) && (list != NULL)) {
5550 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5551 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
5552 (ent->children == NULL)) {
5553 ent->children = list;
5554 if (ctxt->replaceEntities) {
5556 * Prune it directly in the generated document
5557 * except for single text nodes.
5559 if ((list->type == XML_TEXT_NODE) &&
5560 (list->next == NULL)) {
5561 list->parent = (xmlNodePtr) ent;
5566 while (list != NULL) {
5567 list->parent = (xmlNodePtr) ctxt->node;
5568 list->doc = ctxt->myDoc;
5569 if (list->next == NULL)
5573 list = ent->children;
5574 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5575 xmlAddEntityReference(ent, list, NULL);
5579 while (list != NULL) {
5580 list->parent = (xmlNodePtr) ent;
5581 if (list->next == NULL)
5587 xmlFreeNodeList(list);
5590 } else if (ret > 0) {
5592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5593 ctxt->sax->error(ctxt->userData,
5594 "Entity value required\n");
5595 ctxt->wellFormed = 0;
5596 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5597 } else if (list != NULL) {
5598 xmlFreeNodeList(list);
5603 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5604 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5608 ctxt->sax->reference(ctxt->userData, ent->name);
5610 } else if (ctxt->replaceEntities) {
5611 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5613 * Seems we are generating the DOM content, do
5614 * a simple tree copy for all references except the first
5615 * In the first occurrence list contains the replacement
5618 xmlNodePtr new = NULL, cur, firstChild = NULL;
5619 cur = ent->children;
5620 while (cur != NULL) {
5621 new = xmlCopyNode(cur, 1);
5623 new->_private = cur->_private;
5624 if (firstChild == NULL){
5627 xmlAddChild(ctxt->node, new);
5629 if (cur == ent->last)
5633 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5634 xmlAddEntityReference(ent, firstChild, new);
5637 * the name change is to avoid coalescing of the
5638 * node with a possible previous text one which
5639 * would make ent->children a dangling pointer
5641 if (ent->children->type == XML_TEXT_NODE)
5642 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5643 if ((ent->last != ent->children) &&
5644 (ent->last->type == XML_TEXT_NODE))
5645 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5646 xmlAddChildList(ctxt->node, ent->children);
5650 * This is to avoid a nasty side effect, see
5651 * characters() in SAX.c
5658 * Probably running in SAX mode
5660 xmlParserInputPtr input;
5662 input = xmlNewEntityInputStream(ctxt, ent);
5663 xmlPushInput(ctxt, input);
5664 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5665 (RAW == '<') && (NXT(1) == '?') &&
5666 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5667 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5668 xmlParseTextDecl(ctxt);
5669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5671 * The XML REC instructs us to stop parsing right here
5673 ctxt->instate = XML_PARSER_EOF;
5676 if (input->standalone == 1) {
5677 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5679 ctxt->sax->error(ctxt->userData,
5680 "external parsed entities cannot be standalone\n");
5681 ctxt->wellFormed = 0;
5682 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5690 if (val == NULL) return;
5692 * inline the entity.
5694 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5695 (!ctxt->disableSAX))
5696 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5702 * xmlParseEntityRef:
5703 * @ctxt: an XML parser context
5705 * parse ENTITY references declarations
5707 * [68] EntityRef ::= '&' Name ';'
5709 * [ WFC: Entity Declared ]
5710 * In a document without any DTD, a document with only an internal DTD
5711 * subset which contains no parameter entity references, or a document
5712 * with "standalone='yes'", the Name given in the entity reference
5713 * must match that in an entity declaration, except that well-formed
5714 * documents need not declare any of the following entities: amp, lt,
5715 * gt, apos, quot. The declaration of a parameter entity must precede
5716 * any reference to it. Similarly, the declaration of a general entity
5717 * must precede any reference to it which appears in a default value in an
5718 * attribute-list declaration. Note that if entities are declared in the
5719 * external subset or in external parameter entities, a non-validating
5720 * processor is not obligated to read and process their declarations;
5721 * for such documents, the rule that an entity must be declared is a
5722 * well-formedness constraint only if standalone='yes'.
5724 * [ WFC: Parsed Entity ]
5725 * An entity reference must not contain the name of an unparsed entity
5727 * Returns the xmlEntityPtr if found, or NULL otherwise.
5730 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5732 xmlEntityPtr ent = NULL;
5738 name = xmlParseName(ctxt);
5740 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5742 ctxt->sax->error(ctxt->userData,
5743 "xmlParseEntityRef: no name\n");
5744 ctxt->wellFormed = 0;
5745 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5750 * Ask first SAX for entity resolution, otherwise try the
5753 if (ctxt->sax != NULL) {
5754 if (ctxt->sax->getEntity != NULL)
5755 ent = ctxt->sax->getEntity(ctxt->userData, name);
5757 ent = xmlGetPredefinedEntity(name);
5758 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5759 ent = getEntity(ctxt, name);
5763 * [ WFC: Entity Declared ]
5764 * In a document without any DTD, a document with only an
5765 * internal DTD subset which contains no parameter entity
5766 * references, or a document with "standalone='yes'", the
5767 * Name given in the entity reference must match that in an
5768 * entity declaration, except that well-formed documents
5769 * need not declare any of the following entities: amp, lt,
5771 * The declaration of a parameter entity must precede any
5773 * Similarly, the declaration of a general entity must
5774 * precede any reference to it which appears in a default
5775 * value in an attribute-list declaration. Note that if
5776 * entities are declared in the external subset or in
5777 * external parameter entities, a non-validating processor
5778 * is not obligated to read and process their declarations;
5779 * for such documents, the rule that an entity must be
5780 * declared is a well-formedness constraint only if
5784 if ((ctxt->standalone == 1) ||
5785 ((ctxt->hasExternalSubset == 0) &&
5786 (ctxt->hasPErefs == 0))) {
5787 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5789 ctxt->sax->error(ctxt->userData,
5790 "Entity '%s' not defined\n", name);
5791 ctxt->wellFormed = 0;
5793 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5795 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797 ctxt->sax->error(ctxt->userData,
5798 "Entity '%s' not defined\n", name);
5804 * [ WFC: Parsed Entity ]
5805 * An entity reference must not contain the name of an
5808 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5809 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5811 ctxt->sax->error(ctxt->userData,
5812 "Entity reference to unparsed entity %s\n", name);
5813 ctxt->wellFormed = 0;
5814 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5818 * [ WFC: No External Entity References ]
5819 * Attribute values cannot contain direct or indirect
5820 * entity references to external entities.
5822 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5823 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5824 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826 ctxt->sax->error(ctxt->userData,
5827 "Attribute references external entity '%s'\n", name);
5828 ctxt->wellFormed = 0;
5829 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5832 * [ WFC: No < in Attribute Values ]
5833 * The replacement text of any entity referred to directly or
5834 * indirectly in an attribute value (other than "<") must
5837 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5839 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5840 (ent->content != NULL) &&
5841 (xmlStrchr(ent->content, '<'))) {
5842 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5844 ctxt->sax->error(ctxt->userData,
5845 "'<' in entity '%s' is not allowed in attributes values\n", name);
5846 ctxt->wellFormed = 0;
5847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5851 * Internal check, no parameter entities here ...
5854 switch (ent->etype) {
5855 case XML_INTERNAL_PARAMETER_ENTITY:
5856 case XML_EXTERNAL_PARAMETER_ENTITY:
5857 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5859 ctxt->sax->error(ctxt->userData,
5860 "Attempt to reference the parameter entity '%s'\n", name);
5861 ctxt->wellFormed = 0;
5862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5870 * [ WFC: No Recursion ]
5871 * A parsed entity must not contain a recursive reference
5872 * to itself, either directly or indirectly.
5873 * Done somewhere else
5877 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5879 ctxt->sax->error(ctxt->userData,
5880 "xmlParseEntityRef: expecting ';'\n");
5881 ctxt->wellFormed = 0;
5882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5891 * xmlParseStringEntityRef:
5892 * @ctxt: an XML parser context
5893 * @str: a pointer to an index in the string
5895 * parse ENTITY references declarations, but this version parses it from
5898 * [68] EntityRef ::= '&' Name ';'
5900 * [ WFC: Entity Declared ]
5901 * In a document without any DTD, a document with only an internal DTD
5902 * subset which contains no parameter entity references, or a document
5903 * with "standalone='yes'", the Name given in the entity reference
5904 * must match that in an entity declaration, except that well-formed
5905 * documents need not declare any of the following entities: amp, lt,
5906 * gt, apos, quot. The declaration of a parameter entity must precede
5907 * any reference to it. Similarly, the declaration of a general entity
5908 * must precede any reference to it which appears in a default value in an
5909 * attribute-list declaration. Note that if entities are declared in the
5910 * external subset or in external parameter entities, a non-validating
5911 * processor is not obligated to read and process their declarations;
5912 * for such documents, the rule that an entity must be declared is a
5913 * well-formedness constraint only if standalone='yes'.
5915 * [ WFC: Parsed Entity ]
5916 * An entity reference must not contain the name of an unparsed entity
5918 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5919 * is updated to the current location in the string.
5922 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5926 xmlEntityPtr ent = NULL;
5928 if ((str == NULL) || (*str == NULL))
5935 name = xmlParseStringName(ctxt, &ptr);
5937 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5939 ctxt->sax->error(ctxt->userData,
5940 "xmlParseStringEntityRef: no name\n");
5941 ctxt->wellFormed = 0;
5942 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5947 * Ask first SAX for entity resolution, otherwise try the
5950 if (ctxt->sax != NULL) {
5951 if (ctxt->sax->getEntity != NULL)
5952 ent = ctxt->sax->getEntity(ctxt->userData, name);
5954 ent = xmlGetPredefinedEntity(name);
5955 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5956 ent = getEntity(ctxt, name);
5960 * [ WFC: Entity Declared ]
5961 * In a document without any DTD, a document with only an
5962 * internal DTD subset which contains no parameter entity
5963 * references, or a document with "standalone='yes'", the
5964 * Name given in the entity reference must match that in an
5965 * entity declaration, except that well-formed documents
5966 * need not declare any of the following entities: amp, lt,
5968 * The declaration of a parameter entity must precede any
5970 * Similarly, the declaration of a general entity must
5971 * precede any reference to it which appears in a default
5972 * value in an attribute-list declaration. Note that if
5973 * entities are declared in the external subset or in
5974 * external parameter entities, a non-validating processor
5975 * is not obligated to read and process their declarations;
5976 * for such documents, the rule that an entity must be
5977 * declared is a well-formedness constraint only if
5981 if ((ctxt->standalone == 1) ||
5982 ((ctxt->hasExternalSubset == 0) &&
5983 (ctxt->hasPErefs == 0))) {
5984 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5986 ctxt->sax->error(ctxt->userData,
5987 "Entity '%s' not defined\n", name);
5988 ctxt->wellFormed = 0;
5989 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5991 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5992 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5993 ctxt->sax->warning(ctxt->userData,
5994 "Entity '%s' not defined\n", name);
5999 * [ WFC: Parsed Entity ]
6000 * An entity reference must not contain the name of an
6003 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6004 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6006 ctxt->sax->error(ctxt->userData,
6007 "Entity reference to unparsed entity %s\n", name);
6008 ctxt->wellFormed = 0;
6009 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6013 * [ WFC: No External Entity References ]
6014 * Attribute values cannot contain direct or indirect
6015 * entity references to external entities.
6017 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6018 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6019 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6021 ctxt->sax->error(ctxt->userData,
6022 "Attribute references external entity '%s'\n", name);
6023 ctxt->wellFormed = 0;
6024 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6027 * [ WFC: No < in Attribute Values ]
6028 * The replacement text of any entity referred to directly or
6029 * indirectly in an attribute value (other than "<") must
6032 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6034 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6035 (ent->content != NULL) &&
6036 (xmlStrchr(ent->content, '<'))) {
6037 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6039 ctxt->sax->error(ctxt->userData,
6040 "'<' in entity '%s' is not allowed in attributes values\n", name);
6041 ctxt->wellFormed = 0;
6042 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6046 * Internal check, no parameter entities here ...
6049 switch (ent->etype) {
6050 case XML_INTERNAL_PARAMETER_ENTITY:
6051 case XML_EXTERNAL_PARAMETER_ENTITY:
6052 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6054 ctxt->sax->error(ctxt->userData,
6055 "Attempt to reference the parameter entity '%s'\n", name);
6056 ctxt->wellFormed = 0;
6057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6065 * [ WFC: No Recursion ]
6066 * A parsed entity must not contain a recursive reference
6067 * to itself, either directly or indirectly.
6068 * Done somewhere else
6072 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6074 ctxt->sax->error(ctxt->userData,
6075 "xmlParseStringEntityRef: expecting ';'\n");
6076 ctxt->wellFormed = 0;
6077 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6087 * xmlParsePEReference:
6088 * @ctxt: an XML parser context
6090 * parse PEReference declarations
6091 * The entity content is handled directly by pushing it's content as
6092 * a new input stream.
6094 * [69] PEReference ::= '%' Name ';'
6096 * [ WFC: No Recursion ]
6097 * A parsed entity must not contain a recursive
6098 * reference to itself, either directly or indirectly.
6100 * [ WFC: Entity Declared ]
6101 * In a document without any DTD, a document with only an internal DTD
6102 * subset which contains no parameter entity references, or a document
6103 * with "standalone='yes'", ... ... The declaration of a parameter
6104 * entity must precede any reference to it...
6106 * [ VC: Entity Declared ]
6107 * In a document with an external subset or external parameter entities
6108 * with "standalone='no'", ... ... The declaration of a parameter entity
6109 * must precede any reference to it...
6112 * Parameter-entity references may only appear in the DTD.
6113 * NOTE: misleading but this is handled.
6116 xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6118 xmlEntityPtr entity = NULL;
6119 xmlParserInputPtr input;
6123 name = xmlParseName(ctxt);
6125 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6127 ctxt->sax->error(ctxt->userData,
6128 "xmlParsePEReference: no name\n");
6129 ctxt->wellFormed = 0;
6130 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6134 if ((ctxt->sax != NULL) &&
6135 (ctxt->sax->getParameterEntity != NULL))
6136 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6138 if (entity == NULL) {
6140 * [ WFC: Entity Declared ]
6141 * In a document without any DTD, a document with only an
6142 * internal DTD subset which contains no parameter entity
6143 * references, or a document with "standalone='yes'", ...
6144 * ... The declaration of a parameter entity must precede
6145 * any reference to it...
6147 if ((ctxt->standalone == 1) ||
6148 ((ctxt->hasExternalSubset == 0) &&
6149 (ctxt->hasPErefs == 0))) {
6150 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6151 if ((!ctxt->disableSAX) &&
6152 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6153 ctxt->sax->error(ctxt->userData,
6154 "PEReference: %%%s; not found\n", name);
6155 ctxt->wellFormed = 0;
6156 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6159 * [ VC: Entity Declared ]
6160 * In a document with an external subset or external
6161 * parameter entities with "standalone='no'", ...
6162 * ... The declaration of a parameter entity must precede
6163 * any reference to it...
6165 if ((!ctxt->disableSAX) &&
6166 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6167 ctxt->sax->warning(ctxt->userData,
6168 "PEReference: %%%s; not found\n", name);
6173 * Internal checking in case the entity quest barfed
6175 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6176 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6177 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6178 ctxt->sax->warning(ctxt->userData,
6179 "Internal: %%%s; is not a parameter entity\n", name);
6180 } else if (ctxt->input->free != deallocblankswrapper) {
6181 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6182 xmlPushInput(ctxt, input);
6186 * handle the extra spaces added before and after
6187 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6189 input = xmlNewEntityInputStream(ctxt, entity);
6190 xmlPushInput(ctxt, input);
6191 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6192 (RAW == '<') && (NXT(1) == '?') &&
6193 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6194 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6195 xmlParseTextDecl(ctxt);
6196 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6198 * The XML REC instructs us to stop parsing
6201 ctxt->instate = XML_PARSER_EOF;
6208 ctxt->hasPErefs = 1;
6210 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6212 ctxt->sax->error(ctxt->userData,
6213 "xmlParsePEReference: expecting ';'\n");
6214 ctxt->wellFormed = 0;
6215 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6223 * xmlParseStringPEReference:
6224 * @ctxt: an XML parser context
6225 * @str: a pointer to an index in the string
6227 * parse PEReference declarations
6229 * [69] PEReference ::= '%' Name ';'
6231 * [ WFC: No Recursion ]
6232 * A parsed entity must not contain a recursive
6233 * reference to itself, either directly or indirectly.
6235 * [ WFC: Entity Declared ]
6236 * In a document without any DTD, a document with only an internal DTD
6237 * subset which contains no parameter entity references, or a document
6238 * with "standalone='yes'", ... ... The declaration of a parameter
6239 * entity must precede any reference to it...
6241 * [ VC: Entity Declared ]
6242 * In a document with an external subset or external parameter entities
6243 * with "standalone='no'", ... ... The declaration of a parameter entity
6244 * must precede any reference to it...
6247 * Parameter-entity references may only appear in the DTD.
6248 * NOTE: misleading but this is handled.
6250 * Returns the string of the entity content.
6251 * str is updated to the current value of the index
6254 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6258 xmlEntityPtr entity = NULL;
6260 if ((str == NULL) || (*str == NULL)) return(NULL);
6266 name = xmlParseStringName(ctxt, &ptr);
6268 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270 ctxt->sax->error(ctxt->userData,
6271 "xmlParseStringPEReference: no name\n");
6272 ctxt->wellFormed = 0;
6273 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6279 if ((ctxt->sax != NULL) &&
6280 (ctxt->sax->getParameterEntity != NULL))
6281 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6283 if (entity == NULL) {
6285 * [ WFC: Entity Declared ]
6286 * In a document without any DTD, a document with only an
6287 * internal DTD subset which contains no parameter entity
6288 * references, or a document with "standalone='yes'", ...
6289 * ... The declaration of a parameter entity must precede
6290 * any reference to it...
6292 if ((ctxt->standalone == 1) ||
6293 ((ctxt->hasExternalSubset == 0) &&
6294 (ctxt->hasPErefs == 0))) {
6295 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6297 ctxt->sax->error(ctxt->userData,
6298 "PEReference: %%%s; not found\n", name);
6299 ctxt->wellFormed = 0;
6300 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6303 * [ VC: Entity Declared ]
6304 * In a document with an external subset or external
6305 * parameter entities with "standalone='no'", ...
6306 * ... The declaration of a parameter entity must
6307 * precede any reference to it...
6309 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6310 ctxt->sax->warning(ctxt->userData,
6311 "PEReference: %%%s; not found\n", name);
6316 * Internal checking in case the entity quest barfed
6318 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6319 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6320 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6321 ctxt->sax->warning(ctxt->userData,
6322 "Internal: %%%s; is not a parameter entity\n", name);
6325 ctxt->hasPErefs = 1;
6327 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6329 ctxt->sax->error(ctxt->userData,
6330 "xmlParseStringPEReference: expecting ';'\n");
6331 ctxt->wellFormed = 0;
6332 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6342 * xmlParseDocTypeDecl:
6343 * @ctxt: an XML parser context
6345 * parse a DOCTYPE declaration
6347 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6348 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6350 * [ VC: Root Element Type ]
6351 * The Name in the document type declaration must match the element
6352 * type of the root element.
6356 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6357 xmlChar *name = NULL;
6358 xmlChar *ExternalID = NULL;
6359 xmlChar *URI = NULL;
6362 * We know that '<!DOCTYPE' has been detected.
6369 * Parse the DOCTYPE name.
6371 name = xmlParseName(ctxt);
6373 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6375 ctxt->sax->error(ctxt->userData,
6376 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6377 ctxt->wellFormed = 0;
6378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6380 ctxt->intSubName = name;
6385 * Check for SystemID and ExternalID
6387 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6389 if ((URI != NULL) || (ExternalID != NULL)) {
6390 ctxt->hasExternalSubset = 1;
6392 ctxt->extSubURI = URI;
6393 ctxt->extSubSystem = ExternalID;
6398 * Create and update the internal subset.
6400 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6401 (!ctxt->disableSAX))
6402 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6405 * Is there any internal subset declarations ?
6406 * they are handled separately in xmlParseInternalSubset()
6412 * We should be at the end of the DOCTYPE declaration.
6415 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6417 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
6418 ctxt->wellFormed = 0;
6419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6425 * xmlParseInternalSubset:
6426 * @ctxt: an XML parser context
6428 * parse the internal subset declaration
6430 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6434 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6436 * Is there any DTD definition ?
6439 ctxt->instate = XML_PARSER_DTD;
6442 * Parse the succession of Markup declarations and
6444 * Subsequence (markupdecl | PEReference | S)*
6446 while (RAW != ']') {
6447 const xmlChar *check = CUR_PTR;
6448 int cons = ctxt->input->consumed;
6451 xmlParseMarkupDecl(ctxt);
6452 xmlParsePEReference(ctxt);
6455 * Pop-up of finished entities.
6457 while ((RAW == 0) && (ctxt->inputNr > 1))
6460 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6461 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6463 ctxt->sax->error(ctxt->userData,
6464 "xmlParseInternalSubset: error detected in Markup declaration\n");
6465 ctxt->wellFormed = 0;
6466 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6477 * We should be at the end of the DOCTYPE declaration.
6480 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6482 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
6483 ctxt->wellFormed = 0;
6484 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6490 * xmlParseAttribute:
6491 * @ctxt: an XML parser context
6492 * @value: a xmlChar ** used to store the value of the attribute
6494 * parse an attribute
6496 * [41] Attribute ::= Name Eq AttValue
6498 * [ WFC: No External Entity References ]
6499 * Attribute values cannot contain direct or indirect entity references
6500 * to external entities.
6502 * [ WFC: No < in Attribute Values ]
6503 * The replacement text of any entity referred to directly or indirectly in
6504 * an attribute value (other than "<") must not contain a <.
6506 * [ VC: Attribute Value Type ]
6507 * The attribute must have been declared; the value must be of the type
6510 * [25] Eq ::= S? '=' S?
6514 * [NS 11] Attribute ::= QName Eq AttValue
6516 * Also the case QName == xmlns:??? is handled independently as a namespace
6519 * Returns the attribute name, and the value in *value.
6523 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6524 xmlChar *name, *val;
6528 name = xmlParseName(ctxt);
6530 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6532 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6533 ctxt->wellFormed = 0;
6534 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6545 val = xmlParseAttValue(ctxt);
6546 ctxt->instate = XML_PARSER_CONTENT;
6548 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6550 ctxt->sax->error(ctxt->userData,
6551 "Specification mandate value for attribute %s\n", name);
6552 ctxt->wellFormed = 0;
6553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6559 * Check that xml:lang conforms to the specification
6560 * No more registered as an error, just generate a warning now
6561 * since this was deprecated in XML second edition
6563 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6564 if (!xmlCheckLanguageID(val)) {
6565 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6566 ctxt->sax->warning(ctxt->userData,
6567 "Malformed value for xml:lang : %s\n", val);
6572 * Check that xml:space conforms to the specification
6574 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6575 if (xmlStrEqual(val, BAD_CAST "default"))
6577 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6580 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6582 ctxt->sax->error(ctxt->userData,
6583 "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6585 ctxt->wellFormed = 0;
6586 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6596 * @ctxt: an XML parser context
6598 * parse a start of tag either for rule element or
6599 * EmptyElement. In both case we don't parse the tag closing chars.
6601 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6603 * [ WFC: Unique Att Spec ]
6604 * No attribute name may appear more than once in the same start-tag or
6605 * empty-element tag.
6607 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6609 * [ WFC: Unique Att Spec ]
6610 * No attribute name may appear more than once in the same start-tag or
6611 * empty-element tag.
6615 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6617 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6619 * Returns the element name parsed
6623 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6627 const xmlChar **atts = NULL;
6632 if (RAW != '<') return(NULL);
6635 name = xmlParseName(ctxt);
6637 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6639 ctxt->sax->error(ctxt->userData,
6640 "xmlParseStartTag: invalid element name\n");
6641 ctxt->wellFormed = 0;
6642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6647 * Now parse the attributes, it ends up with the ending
6654 while ((RAW != '>') &&
6655 ((RAW != '/') || (NXT(1) != '>')) &&
6657 const xmlChar *q = CUR_PTR;
6658 int cons = ctxt->input->consumed;
6660 attname = xmlParseAttribute(ctxt, &attvalue);
6661 if ((attname != NULL) && (attvalue != NULL)) {
6663 * [ WFC: Unique Att Spec ]
6664 * No attribute name may appear more than once in the same
6665 * start-tag or empty-element tag.
6667 for (i = 0; i < nbatts;i += 2) {
6668 if (xmlStrEqual(atts[i], attname)) {
6669 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6671 ctxt->sax->error(ctxt->userData,
6672 "Attribute %s redefined\n",
6674 ctxt->wellFormed = 0;
6675 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6683 * Add the pair to atts
6687 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6689 xmlGenericError(xmlGenericErrorContext,
6690 "malloc of %ld byte failed\n",
6691 maxatts * (long)sizeof(xmlChar *));
6694 } else if (nbatts + 4 > maxatts) {
6696 atts = (const xmlChar **) xmlRealloc((void *) atts,
6697 maxatts * sizeof(xmlChar *));
6699 xmlGenericError(xmlGenericErrorContext,
6700 "realloc of %ld byte failed\n",
6701 maxatts * (long)sizeof(xmlChar *));
6705 atts[nbatts++] = attname;
6706 atts[nbatts++] = attvalue;
6707 atts[nbatts] = NULL;
6708 atts[nbatts + 1] = NULL;
6710 if (attname != NULL)
6712 if (attvalue != NULL)
6719 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6721 if (!IS_BLANK(RAW)) {
6722 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6724 ctxt->sax->error(ctxt->userData,
6725 "attributes construct error\n");
6726 ctxt->wellFormed = 0;
6727 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6730 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6731 (attname == NULL) && (attvalue == NULL)) {
6732 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6734 ctxt->sax->error(ctxt->userData,
6735 "xmlParseStartTag: problem parsing attributes\n");
6736 ctxt->wellFormed = 0;
6737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6744 * SAX: Start of Element !
6746 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6747 (!ctxt->disableSAX))
6748 ctxt->sax->startElement(ctxt->userData, name, atts);
6751 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6752 xmlFree((void *) atts);
6759 * @ctxt: an XML parser context
6761 * parse an end of tag
6763 * [42] ETag ::= '</' Name S? '>'
6767 * [NS 9] ETag ::= '</' QName S? '>'
6771 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6776 if ((RAW != '<') || (NXT(1) != '/')) {
6777 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6779 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6780 ctxt->wellFormed = 0;
6781 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6786 name = xmlParseNameAndCompare(ctxt,ctxt->name);
6789 * We should definitely be at the ending "S? '>'" part
6793 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6794 ctxt->errNo = XML_ERR_GT_REQUIRED;
6795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6796 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6797 ctxt->wellFormed = 0;
6798 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6803 * [ WFC: Element Type Match ]
6804 * The Name in an element's end-tag must match the element type in the
6808 if (name != (xmlChar*)1) {
6809 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6812 ctxt->sax->error(ctxt->userData,
6813 "Opening and ending tag mismatch: %s and %s\n",
6816 ctxt->sax->error(ctxt->userData,
6817 "Ending tag error for: %s\n", ctxt->name);
6821 ctxt->wellFormed = 0;
6822 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6826 * Recover in case of one missing close
6828 if ((ctxt->nameNr > 2) &&
6829 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6842 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6843 (!ctxt->disableSAX))
6844 ctxt->sax->endElement(ctxt->userData, ctxt->name);
6846 oldname = namePop(ctxt);
6848 if (oldname != NULL) {
6850 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6859 * @ctxt: an XML parser context
6861 * Parse escaped pure raw content.
6863 * [18] CDSect ::= CDStart CData CDEnd
6865 * [19] CDStart ::= '<![CDATA['
6867 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6869 * [21] CDEnd ::= ']]>'
6872 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6873 xmlChar *buf = NULL;
6875 int size = XML_PARSER_BUFFER_SIZE;
6881 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6882 (NXT(2) == '[') && (NXT(3) == 'C') &&
6883 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6884 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6890 ctxt->instate = XML_PARSER_CDATA_SECTION;
6893 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6895 ctxt->sax->error(ctxt->userData,
6896 "CData section not finished\n");
6897 ctxt->wellFormed = 0;
6898 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6899 ctxt->instate = XML_PARSER_CONTENT;
6905 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6907 ctxt->sax->error(ctxt->userData,
6908 "CData section not finished\n");
6909 ctxt->wellFormed = 0;
6910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6911 ctxt->instate = XML_PARSER_CONTENT;
6916 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6918 xmlGenericError(xmlGenericErrorContext,
6919 "malloc of %d byte failed\n", size);
6922 while (IS_CHAR(cur) &&
6923 ((r != ']') || (s != ']') || (cur != '>'))) {
6924 if (len + 5 >= size) {
6926 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6928 xmlGenericError(xmlGenericErrorContext,
6929 "realloc of %d byte failed\n", size);
6933 COPY_BUF(rl,buf,len,r);
6947 ctxt->instate = XML_PARSER_CONTENT;
6949 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6951 ctxt->sax->error(ctxt->userData,
6952 "CData section not finished\n%.50s\n", buf);
6953 ctxt->wellFormed = 0;
6954 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6961 * OK the buffer is to be consumed as cdata.
6963 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6964 if (ctxt->sax->cdataBlock != NULL)
6965 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6966 else if (ctxt->sax->characters != NULL)
6967 ctxt->sax->characters(ctxt->userData, buf, len);
6974 * @ctxt: an XML parser context
6978 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6982 xmlParseContent(xmlParserCtxtPtr ctxt) {
6984 while ((RAW != 0) &&
6985 ((RAW != '<') || (NXT(1) != '/'))) {
6986 const xmlChar *test = CUR_PTR;
6987 int cons = ctxt->input->consumed;
6988 const xmlChar *cur = ctxt->input->cur;
6991 * First case : a Processing Instruction.
6993 if ((*cur == '<') && (cur[1] == '?')) {
6998 * Second case : a CDSection
7000 else if ((*cur == '<') && (NXT(1) == '!') &&
7001 (NXT(2) == '[') && (NXT(3) == 'C') &&
7002 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7003 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7005 xmlParseCDSect(ctxt);
7009 * Third case : a comment
7011 else if ((*cur == '<') && (NXT(1) == '!') &&
7012 (NXT(2) == '-') && (NXT(3) == '-')) {
7013 xmlParseComment(ctxt);
7014 ctxt->instate = XML_PARSER_CONTENT;
7018 * Fourth case : a sub-element.
7020 else if (*cur == '<') {
7021 xmlParseElement(ctxt);
7025 * Fifth case : a reference. If if has not been resolved,
7026 * parsing returns it's Name, create the node
7029 else if (*cur == '&') {
7030 xmlParseReference(ctxt);
7034 * Last case, text. Note that References are handled directly.
7037 xmlParseCharData(ctxt, 0);
7042 * Pop-up of finished entities.
7044 while ((RAW == 0) && (ctxt->inputNr > 1))
7048 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
7049 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7051 ctxt->sax->error(ctxt->userData,
7052 "detected an error in element content\n");
7053 ctxt->wellFormed = 0;
7054 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7055 ctxt->instate = XML_PARSER_EOF;
7063 * @ctxt: an XML parser context
7065 * parse an XML element, this is highly recursive
7067 * [39] element ::= EmptyElemTag | STag content ETag
7069 * [ WFC: Element Type Match ]
7070 * The Name in an element's end-tag must match the element type in the
7073 * [ VC: Element Valid ]
7074 * An element is valid if there is a declaration matching elementdecl
7075 * where the Name matches the element type and one of the following holds:
7076 * - The declaration matches EMPTY and the element has no content.
7077 * - The declaration matches children and the sequence of child elements
7078 * belongs to the language generated by the regular expression in the
7079 * content model, with optional white space (characters matching the
7080 * nonterminal S) between each pair of child elements.
7081 * - The declaration matches Mixed and the content consists of character
7082 * data and child elements whose types match names in the content model.
7083 * - The declaration matches ANY, and the types of any child elements have
7088 xmlParseElement(xmlParserCtxtPtr ctxt) {
7091 xmlParserNodeInfo node_info;
7094 /* Capture start position */
7095 if (ctxt->record_info) {
7096 node_info.begin_pos = ctxt->input->consumed +
7097 (CUR_PTR - ctxt->input->base);
7098 node_info.begin_line = ctxt->input->line;
7101 if (ctxt->spaceNr == 0)
7102 spacePush(ctxt, -1);
7104 spacePush(ctxt, *ctxt->space);
7106 name = xmlParseStartTag(ctxt);
7111 namePush(ctxt, name);
7115 * [ VC: Root Element Type ]
7116 * The Name in the document type declaration must match the element
7117 * type of the root element.
7119 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7120 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7121 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7124 * Check for an Empty Element.
7126 if ((RAW == '/') && (NXT(1) == '>')) {
7128 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7129 (!ctxt->disableSAX))
7130 ctxt->sax->endElement(ctxt->userData, name);
7131 oldname = namePop(ctxt);
7133 if (oldname != NULL) {
7135 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7139 if ( ret != NULL && ctxt->record_info ) {
7140 node_info.end_pos = ctxt->input->consumed +
7141 (CUR_PTR - ctxt->input->base);
7142 node_info.end_line = ctxt->input->line;
7143 node_info.node = ret;
7144 xmlParserAddNodeInfo(ctxt, &node_info);
7151 ctxt->errNo = XML_ERR_GT_REQUIRED;
7152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7153 ctxt->sax->error(ctxt->userData,
7154 "Couldn't find end of Start Tag %s\n",
7156 ctxt->wellFormed = 0;
7157 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7160 * end of parsing of this node.
7163 oldname = namePop(ctxt);
7165 if (oldname != NULL) {
7167 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7173 * Capture end position and add node
7175 if ( ret != NULL && ctxt->record_info ) {
7176 node_info.end_pos = ctxt->input->consumed +
7177 (CUR_PTR - ctxt->input->base);
7178 node_info.end_line = ctxt->input->line;
7179 node_info.node = ret;
7180 xmlParserAddNodeInfo(ctxt, &node_info);
7186 * Parse the content of the element:
7188 xmlParseContent(ctxt);
7189 if (!IS_CHAR(RAW)) {
7190 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
7191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7192 ctxt->sax->error(ctxt->userData,
7193 "Premature end of data in tag %s\n", name);
7194 ctxt->wellFormed = 0;
7195 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7198 * end of parsing of this node.
7201 oldname = namePop(ctxt);
7203 if (oldname != NULL) {
7205 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7213 * parse the end of tag: '</' should be here.
7215 xmlParseEndTag(ctxt);
7218 * Capture end position and add node
7220 if ( ret != NULL && ctxt->record_info ) {
7221 node_info.end_pos = ctxt->input->consumed +
7222 (CUR_PTR - ctxt->input->base);
7223 node_info.end_line = ctxt->input->line;
7224 node_info.node = ret;
7225 xmlParserAddNodeInfo(ctxt, &node_info);
7230 * xmlParseVersionNum:
7231 * @ctxt: an XML parser context
7233 * parse the XML version value.
7235 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7237 * Returns the string giving the XML version number, or NULL
7240 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7241 xmlChar *buf = NULL;
7246 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7248 xmlGenericError(xmlGenericErrorContext,
7249 "malloc of %d byte failed\n", size);
7253 while (((cur >= 'a') && (cur <= 'z')) ||
7254 ((cur >= 'A') && (cur <= 'Z')) ||
7255 ((cur >= '0') && (cur <= '9')) ||
7256 (cur == '_') || (cur == '.') ||
7257 (cur == ':') || (cur == '-')) {
7258 if (len + 1 >= size) {
7260 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7262 xmlGenericError(xmlGenericErrorContext,
7263 "realloc of %d byte failed\n", size);
7276 * xmlParseVersionInfo:
7277 * @ctxt: an XML parser context
7279 * parse the XML version.
7281 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7283 * [25] Eq ::= S? '=' S?
7285 * Returns the version string, e.g. "1.0"
7289 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7290 xmlChar *version = NULL;
7293 if ((RAW == 'v') && (NXT(1) == 'e') &&
7294 (NXT(2) == 'r') && (NXT(3) == 's') &&
7295 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7300 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7302 ctxt->sax->error(ctxt->userData,
7303 "xmlParseVersionInfo : expected '='\n");
7304 ctxt->wellFormed = 0;
7305 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7313 version = xmlParseVersionNum(ctxt);
7315 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7316 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7317 ctxt->sax->error(ctxt->userData,
7318 "String not closed\n%.50s\n", q);
7319 ctxt->wellFormed = 0;
7320 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7323 } else if (RAW == '\''){
7326 version = xmlParseVersionNum(ctxt);
7328 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7330 ctxt->sax->error(ctxt->userData,
7331 "String not closed\n%.50s\n", q);
7332 ctxt->wellFormed = 0;
7333 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7337 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7339 ctxt->sax->error(ctxt->userData,
7340 "xmlParseVersionInfo : expected ' or \"\n");
7341 ctxt->wellFormed = 0;
7342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7350 * @ctxt: an XML parser context
7352 * parse the XML encoding name
7354 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7356 * Returns the encoding name value or NULL
7359 xmlParseEncName(xmlParserCtxtPtr ctxt) {
7360 xmlChar *buf = NULL;
7366 if (((cur >= 'a') && (cur <= 'z')) ||
7367 ((cur >= 'A') && (cur <= 'Z'))) {
7368 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7370 xmlGenericError(xmlGenericErrorContext,
7371 "malloc of %d byte failed\n", size);
7378 while (((cur >= 'a') && (cur <= 'z')) ||
7379 ((cur >= 'A') && (cur <= 'Z')) ||
7380 ((cur >= '0') && (cur <= '9')) ||
7381 (cur == '.') || (cur == '_') ||
7383 if (len + 1 >= size) {
7385 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7387 xmlGenericError(xmlGenericErrorContext,
7388 "realloc of %d byte failed\n", size);
7403 ctxt->errNo = XML_ERR_ENCODING_NAME;
7404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7405 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7406 ctxt->wellFormed = 0;
7407 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7413 * xmlParseEncodingDecl:
7414 * @ctxt: an XML parser context
7416 * parse the XML encoding declaration
7418 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7420 * this setups the conversion filters.
7422 * Returns the encoding value or NULL
7426 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7427 xmlChar *encoding = NULL;
7431 if ((RAW == 'e') && (NXT(1) == 'n') &&
7432 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7433 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7434 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7438 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7440 ctxt->sax->error(ctxt->userData,
7441 "xmlParseEncodingDecl : expected '='\n");
7442 ctxt->wellFormed = 0;
7443 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7451 encoding = xmlParseEncName(ctxt);
7453 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7455 ctxt->sax->error(ctxt->userData,
7456 "String not closed\n%.50s\n", q);
7457 ctxt->wellFormed = 0;
7458 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7461 } else if (RAW == '\''){
7464 encoding = xmlParseEncName(ctxt);
7466 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7468 ctxt->sax->error(ctxt->userData,
7469 "String not closed\n%.50s\n", q);
7470 ctxt->wellFormed = 0;
7471 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7475 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7477 ctxt->sax->error(ctxt->userData,
7478 "xmlParseEncodingDecl : expected ' or \"\n");
7479 ctxt->wellFormed = 0;
7480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7482 if (encoding != NULL) {
7483 xmlCharEncoding enc;
7484 xmlCharEncodingHandlerPtr handler;
7486 if (ctxt->input->encoding != NULL)
7487 xmlFree((xmlChar *) ctxt->input->encoding);
7488 ctxt->input->encoding = encoding;
7490 enc = xmlParseCharEncoding((const char *) encoding);
7492 * registered set of known encodings
7494 if (enc != XML_CHAR_ENCODING_ERROR) {
7495 xmlSwitchEncoding(ctxt, enc);
7496 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7497 ctxt->input->encoding = NULL;
7503 * fallback for unknown encodings
7505 handler = xmlFindCharEncodingHandler((const char *) encoding);
7506 if (handler != NULL) {
7507 xmlSwitchToEncoding(ctxt, handler);
7509 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7511 ctxt->sax->error(ctxt->userData,
7512 "Unsupported encoding %s\n", encoding);
7523 * @ctxt: an XML parser context
7525 * parse the XML standalone declaration
7527 * [32] SDDecl ::= S 'standalone' Eq
7528 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7530 * [ VC: Standalone Document Declaration ]
7531 * TODO The standalone document declaration must have the value "no"
7532 * if any external markup declarations contain declarations of:
7533 * - attributes with default values, if elements to which these
7534 * attributes apply appear in the document without specifications
7535 * of values for these attributes, or
7536 * - entities (other than amp, lt, gt, apos, quot), if references
7537 * to those entities appear in the document, or
7538 * - attributes with values subject to normalization, where the
7539 * attribute appears in the document with a value which will change
7540 * as a result of normalization, or
7541 * - element types with element content, if white space occurs directly
7542 * within any instance of those types.
7544 * Returns 1 if standalone, 0 otherwise
7548 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7549 int standalone = -1;
7552 if ((RAW == 's') && (NXT(1) == 't') &&
7553 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7554 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7555 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7556 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7560 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7562 ctxt->sax->error(ctxt->userData,
7563 "XML standalone declaration : expected '='\n");
7564 ctxt->wellFormed = 0;
7565 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7572 if ((RAW == 'n') && (NXT(1) == 'o')) {
7575 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7580 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7582 ctxt->sax->error(ctxt->userData,
7583 "standalone accepts only 'yes' or 'no'\n");
7584 ctxt->wellFormed = 0;
7585 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7588 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7590 ctxt->sax->error(ctxt->userData, "String not closed\n");
7591 ctxt->wellFormed = 0;
7592 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7595 } else if (RAW == '"'){
7597 if ((RAW == 'n') && (NXT(1) == 'o')) {
7600 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7605 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7607 ctxt->sax->error(ctxt->userData,
7608 "standalone accepts only 'yes' or 'no'\n");
7609 ctxt->wellFormed = 0;
7610 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7613 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7615 ctxt->sax->error(ctxt->userData, "String not closed\n");
7616 ctxt->wellFormed = 0;
7617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7621 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7623 ctxt->sax->error(ctxt->userData,
7624 "Standalone value not found\n");
7625 ctxt->wellFormed = 0;
7626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7634 * @ctxt: an XML parser context
7636 * parse an XML declaration header
7638 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7642 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7646 * We know that '<?xml' is here.
7650 if (!IS_BLANK(RAW)) {
7651 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7653 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7654 ctxt->wellFormed = 0;
7655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7660 * We must have the VersionInfo here.
7662 version = xmlParseVersionInfo(ctxt);
7663 if (version == NULL) {
7664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7665 ctxt->sax->error(ctxt->userData,
7666 "Malformed declaration expecting version\n");
7667 ctxt->wellFormed = 0;
7668 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7670 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7672 * TODO: Blueberry should be detected here
7674 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7675 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7678 if (ctxt->version != NULL)
7679 xmlFree((void *) ctxt->version);
7680 ctxt->version = version;
7684 * We may have the encoding declaration
7686 if (!IS_BLANK(RAW)) {
7687 if ((RAW == '?') && (NXT(1) == '>')) {
7691 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7693 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7694 ctxt->wellFormed = 0;
7695 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7697 xmlParseEncodingDecl(ctxt);
7698 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7700 * The XML REC instructs us to stop parsing right here
7706 * We may have the standalone status.
7708 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7709 if ((RAW == '?') && (NXT(1) == '>')) {
7713 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7715 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7716 ctxt->wellFormed = 0;
7717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7720 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7723 if ((RAW == '?') && (NXT(1) == '>')) {
7725 } else if (RAW == '>') {
7726 /* Deprecated old WD ... */
7727 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7729 ctxt->sax->error(ctxt->userData,
7730 "XML declaration must end-up with '?>'\n");
7731 ctxt->wellFormed = 0;
7732 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7735 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7737 ctxt->sax->error(ctxt->userData,
7738 "parsing XML declaration: '?>' expected\n");
7739 ctxt->wellFormed = 0;
7740 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7741 MOVETO_ENDTAG(CUR_PTR);
7748 * @ctxt: an XML parser context
7750 * parse an XML Misc* optional field.
7752 * [27] Misc ::= Comment | PI | S
7756 xmlParseMisc(xmlParserCtxtPtr ctxt) {
7757 while (((RAW == '<') && (NXT(1) == '?')) ||
7758 ((RAW == '<') && (NXT(1) == '!') &&
7759 (NXT(2) == '-') && (NXT(3) == '-')) ||
7761 if ((RAW == '<') && (NXT(1) == '?')) {
7763 } else if (IS_BLANK(CUR)) {
7766 xmlParseComment(ctxt);
7772 * @ctxt: an XML parser context
7774 * parse an XML document (and build a tree if using the standard SAX
7777 * [1] document ::= prolog element Misc*
7779 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7781 * Returns 0, -1 in case of error. the parser context is augmented
7782 * as a result of the parsing.
7786 xmlParseDocument(xmlParserCtxtPtr ctxt) {
7788 xmlCharEncoding enc;
7795 * SAX: beginning of the document processing.
7797 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7798 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7800 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
7802 * Get the 4 first bytes and decode the charset
7803 * if enc != XML_CHAR_ENCODING_NONE
7804 * plug some encoding conversion routines.
7810 enc = xmlDetectCharEncoding(start, 4);
7811 if (enc != XML_CHAR_ENCODING_NONE) {
7812 xmlSwitchEncoding(ctxt, enc);
7818 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7820 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7821 ctxt->wellFormed = 0;
7822 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7826 * Check for the XMLDecl in the Prolog.
7829 if ((RAW == '<') && (NXT(1) == '?') &&
7830 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7831 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7834 * Note that we will switch encoding on the fly.
7836 xmlParseXMLDecl(ctxt);
7837 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7839 * The XML REC instructs us to stop parsing right here
7843 ctxt->standalone = ctxt->input->standalone;
7846 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7848 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7849 ctxt->sax->startDocument(ctxt->userData);
7852 * The Misc part of the Prolog
7858 * Then possibly doc type declaration(s) and more Misc
7859 * (doctypedecl Misc*)?
7862 if ((RAW == '<') && (NXT(1) == '!') &&
7863 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7864 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7865 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7869 xmlParseDocTypeDecl(ctxt);
7871 ctxt->instate = XML_PARSER_DTD;
7872 xmlParseInternalSubset(ctxt);
7876 * Create and update the external subset.
7879 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7880 (!ctxt->disableSAX))
7881 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7882 ctxt->extSubSystem, ctxt->extSubURI);
7886 ctxt->instate = XML_PARSER_PROLOG;
7891 * Time to start parsing the tree itself
7895 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7897 ctxt->sax->error(ctxt->userData,
7898 "Start tag expected, '<' not found\n");
7899 ctxt->wellFormed = 0;
7900 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7901 ctxt->instate = XML_PARSER_EOF;
7903 ctxt->instate = XML_PARSER_CONTENT;
7904 xmlParseElement(ctxt);
7905 ctxt->instate = XML_PARSER_EPILOG;
7909 * The Misc part at the end
7914 ctxt->errNo = XML_ERR_DOCUMENT_END;
7915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7916 ctxt->sax->error(ctxt->userData,
7917 "Extra content at the end of the document\n");
7918 ctxt->wellFormed = 0;
7919 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7921 ctxt->instate = XML_PARSER_EOF;
7925 * SAX: end of the document processing.
7927 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7928 ctxt->sax->endDocument(ctxt->userData);
7931 * Remove locally kept entity definitions if the tree was not built
7933 if ((ctxt->myDoc != NULL) &&
7934 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7935 xmlFreeDoc(ctxt->myDoc);
7939 if (! ctxt->wellFormed) {
7947 * xmlParseExtParsedEnt:
7948 * @ctxt: an XML parser context
7950 * parse a general parsed entity
7951 * An external general parsed entity is well-formed if it matches the
7952 * production labeled extParsedEnt.
7954 * [78] extParsedEnt ::= TextDecl? content
7956 * Returns 0, -1 in case of error. the parser context is augmented
7957 * as a result of the parsing.
7961 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7963 xmlCharEncoding enc;
7965 xmlDefaultSAXHandlerInit();
7970 * SAX: beginning of the document processing.
7972 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7973 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7976 * Get the 4 first bytes and decode the charset
7977 * if enc != XML_CHAR_ENCODING_NONE
7978 * plug some encoding conversion routines.
7984 enc = xmlDetectCharEncoding(start, 4);
7985 if (enc != XML_CHAR_ENCODING_NONE) {
7986 xmlSwitchEncoding(ctxt, enc);
7991 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7993 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7994 ctxt->wellFormed = 0;
7995 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7999 * Check for the XMLDecl in the Prolog.
8002 if ((RAW == '<') && (NXT(1) == '?') &&
8003 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8004 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8007 * Note that we will switch encoding on the fly.
8009 xmlParseXMLDecl(ctxt);
8010 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8012 * The XML REC instructs us to stop parsing right here
8018 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8020 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8021 ctxt->sax->startDocument(ctxt->userData);
8024 * Doing validity checking on chunk doesn't make sense
8026 ctxt->instate = XML_PARSER_CONTENT;
8028 ctxt->loadsubset = 0;
8031 xmlParseContent(ctxt);
8033 if ((RAW == '<') && (NXT(1) == '/')) {
8034 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8036 ctxt->sax->error(ctxt->userData,
8037 "chunk is not well balanced\n");
8038 ctxt->wellFormed = 0;
8039 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8040 } else if (RAW != 0) {
8041 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8043 ctxt->sax->error(ctxt->userData,
8044 "extra content at the end of well balanced chunk\n");
8045 ctxt->wellFormed = 0;
8046 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8050 * SAX: end of the document processing.
8052 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8053 ctxt->sax->endDocument(ctxt->userData);
8055 if (! ctxt->wellFormed) return(-1);
8059 /************************************************************************
8061 * Progressive parsing interfaces *
8063 ************************************************************************/
8066 * xmlParseLookupSequence:
8067 * @ctxt: an XML parser context
8068 * @first: the first char to lookup
8069 * @next: the next char to lookup or zero
8070 * @third: the next char to lookup or zero
8072 * Try to find if a sequence (first, next, third) or just (first next) or
8073 * (first) is available in the input stream.
8074 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8075 * to avoid rescanning sequences of bytes, it DOES change the state of the
8076 * parser, do not use liberally.
8078 * Returns the index to the current parsing point if the full sequence
8079 * is available, -1 otherwise.
8082 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8083 xmlChar next, xmlChar third) {
8085 xmlParserInputPtr in;
8089 if (in == NULL) return(-1);
8090 base = in->cur - in->base;
8091 if (base < 0) return(-1);
8092 if (ctxt->checkIndex > base)
8093 base = ctxt->checkIndex;
8094 if (in->buf == NULL) {
8098 buf = in->buf->buffer->content;
8099 len = in->buf->buffer->use;
8101 /* take into account the sequence length */
8102 if (third) len -= 2;
8103 else if (next) len --;
8104 for (;base < len;base++) {
8105 if (buf[base] == first) {
8107 if ((buf[base + 1] != next) ||
8108 (buf[base + 2] != third)) continue;
8109 } else if (next != 0) {
8110 if (buf[base + 1] != next) continue;
8112 ctxt->checkIndex = 0;
8115 xmlGenericError(xmlGenericErrorContext,
8116 "PP: lookup '%c' found at %d\n",
8118 else if (third == 0)
8119 xmlGenericError(xmlGenericErrorContext,
8120 "PP: lookup '%c%c' found at %d\n",
8123 xmlGenericError(xmlGenericErrorContext,
8124 "PP: lookup '%c%c%c' found at %d\n",
8125 first, next, third, base);
8127 return(base - (in->cur - in->base));
8130 ctxt->checkIndex = base;
8133 xmlGenericError(xmlGenericErrorContext,
8134 "PP: lookup '%c' failed\n", first);
8135 else if (third == 0)
8136 xmlGenericError(xmlGenericErrorContext,
8137 "PP: lookup '%c%c' failed\n", first, next);
8139 xmlGenericError(xmlGenericErrorContext,
8140 "PP: lookup '%c%c%c' failed\n", first, next, third);
8146 * xmlParseTryOrFinish:
8147 * @ctxt: an XML parser context
8148 * @terminate: last chunk indicator
8150 * Try to progress on parsing
8152 * Returns zero if no parsing was possible
8155 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8161 switch (ctxt->instate) {
8162 case XML_PARSER_EOF:
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: try EOF\n"); break;
8165 case XML_PARSER_START:
8166 xmlGenericError(xmlGenericErrorContext,
8167 "PP: try START\n"); break;
8168 case XML_PARSER_MISC:
8169 xmlGenericError(xmlGenericErrorContext,
8170 "PP: try MISC\n");break;
8171 case XML_PARSER_COMMENT:
8172 xmlGenericError(xmlGenericErrorContext,
8173 "PP: try COMMENT\n");break;
8174 case XML_PARSER_PROLOG:
8175 xmlGenericError(xmlGenericErrorContext,
8176 "PP: try PROLOG\n");break;
8177 case XML_PARSER_START_TAG:
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: try START_TAG\n");break;
8180 case XML_PARSER_CONTENT:
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: try CONTENT\n");break;
8183 case XML_PARSER_CDATA_SECTION:
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: try CDATA_SECTION\n");break;
8186 case XML_PARSER_END_TAG:
8187 xmlGenericError(xmlGenericErrorContext,
8188 "PP: try END_TAG\n");break;
8189 case XML_PARSER_ENTITY_DECL:
8190 xmlGenericError(xmlGenericErrorContext,
8191 "PP: try ENTITY_DECL\n");break;
8192 case XML_PARSER_ENTITY_VALUE:
8193 xmlGenericError(xmlGenericErrorContext,
8194 "PP: try ENTITY_VALUE\n");break;
8195 case XML_PARSER_ATTRIBUTE_VALUE:
8196 xmlGenericError(xmlGenericErrorContext,
8197 "PP: try ATTRIBUTE_VALUE\n");break;
8198 case XML_PARSER_DTD:
8199 xmlGenericError(xmlGenericErrorContext,
8200 "PP: try DTD\n");break;
8201 case XML_PARSER_EPILOG:
8202 xmlGenericError(xmlGenericErrorContext,
8203 "PP: try EPILOG\n");break;
8205 xmlGenericError(xmlGenericErrorContext,
8206 "PP: try PI\n");break;
8207 case XML_PARSER_IGNORE:
8208 xmlGenericError(xmlGenericErrorContext,
8209 "PP: try IGNORE\n");break;
8217 * Pop-up of finished entities.
8219 while ((RAW == 0) && (ctxt->inputNr > 1))
8222 if (ctxt->input ==NULL) break;
8223 if (ctxt->input->buf == NULL)
8224 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8227 * If we are operating on converted input, try to flush
8228 * remainng chars to avoid them stalling in the non-converted
8231 if ((ctxt->input->buf->raw != NULL) &&
8232 (ctxt->input->buf->raw->use > 0)) {
8233 int base = ctxt->input->base -
8234 ctxt->input->buf->buffer->content;
8235 int current = ctxt->input->cur - ctxt->input->base;
8237 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8238 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8239 ctxt->input->cur = ctxt->input->base + current;
8241 &ctxt->input->buf->buffer->content[
8242 ctxt->input->buf->buffer->use];
8244 avail = ctxt->input->buf->buffer->use -
8245 (ctxt->input->cur - ctxt->input->base);
8249 switch (ctxt->instate) {
8250 case XML_PARSER_EOF:
8252 * Document parsing is done !
8255 case XML_PARSER_START:
8256 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8258 xmlCharEncoding enc;
8261 * Very first chars read from the document flow.
8267 * Get the 4 first bytes and decode the charset
8268 * if enc != XML_CHAR_ENCODING_NONE
8269 * plug some encoding conversion routines.
8275 enc = xmlDetectCharEncoding(start, 4);
8276 if (enc != XML_CHAR_ENCODING_NONE) {
8277 xmlSwitchEncoding(ctxt, enc);
8282 cur = ctxt->input->cur[0];
8283 next = ctxt->input->cur[1];
8285 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8286 ctxt->sax->setDocumentLocator(ctxt->userData,
8287 &xmlDefaultSAXLocator);
8288 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8290 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8291 ctxt->wellFormed = 0;
8292 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8293 ctxt->instate = XML_PARSER_EOF;
8295 xmlGenericError(xmlGenericErrorContext,
8296 "PP: entering EOF\n");
8298 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8299 ctxt->sax->endDocument(ctxt->userData);
8302 if ((cur == '<') && (next == '?')) {
8303 /* PI or XML decl */
8304 if (avail < 5) return(ret);
8306 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8308 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8309 ctxt->sax->setDocumentLocator(ctxt->userData,
8310 &xmlDefaultSAXLocator);
8311 if ((ctxt->input->cur[2] == 'x') &&
8312 (ctxt->input->cur[3] == 'm') &&
8313 (ctxt->input->cur[4] == 'l') &&
8314 (IS_BLANK(ctxt->input->cur[5]))) {
8317 xmlGenericError(xmlGenericErrorContext,
8318 "PP: Parsing XML Decl\n");
8320 xmlParseXMLDecl(ctxt);
8321 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8323 * The XML REC instructs us to stop parsing right
8326 ctxt->instate = XML_PARSER_EOF;
8329 ctxt->standalone = ctxt->input->standalone;
8330 if ((ctxt->encoding == NULL) &&
8331 (ctxt->input->encoding != NULL))
8332 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8333 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8334 (!ctxt->disableSAX))
8335 ctxt->sax->startDocument(ctxt->userData);
8336 ctxt->instate = XML_PARSER_MISC;
8338 xmlGenericError(xmlGenericErrorContext,
8339 "PP: entering MISC\n");
8342 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8343 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8344 (!ctxt->disableSAX))
8345 ctxt->sax->startDocument(ctxt->userData);
8346 ctxt->instate = XML_PARSER_MISC;
8348 xmlGenericError(xmlGenericErrorContext,
8349 "PP: entering MISC\n");
8353 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8354 ctxt->sax->setDocumentLocator(ctxt->userData,
8355 &xmlDefaultSAXLocator);
8356 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8357 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8358 (!ctxt->disableSAX))
8359 ctxt->sax->startDocument(ctxt->userData);
8360 ctxt->instate = XML_PARSER_MISC;
8362 xmlGenericError(xmlGenericErrorContext,
8363 "PP: entering MISC\n");
8367 case XML_PARSER_MISC:
8369 if (ctxt->input->buf == NULL)
8370 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8372 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8375 cur = ctxt->input->cur[0];
8376 next = ctxt->input->cur[1];
8377 if ((cur == '<') && (next == '?')) {
8379 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8382 xmlGenericError(xmlGenericErrorContext,
8383 "PP: Parsing PI\n");
8386 } else if ((cur == '<') && (next == '!') &&
8387 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8389 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8392 xmlGenericError(xmlGenericErrorContext,
8393 "PP: Parsing Comment\n");
8395 xmlParseComment(ctxt);
8396 ctxt->instate = XML_PARSER_MISC;
8397 } else if ((cur == '<') && (next == '!') &&
8398 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8399 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8400 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8401 (ctxt->input->cur[8] == 'E')) {
8403 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8406 xmlGenericError(xmlGenericErrorContext,
8407 "PP: Parsing internal subset\n");
8410 xmlParseDocTypeDecl(ctxt);
8412 ctxt->instate = XML_PARSER_DTD;
8414 xmlGenericError(xmlGenericErrorContext,
8415 "PP: entering DTD\n");
8419 * Create and update the external subset.
8422 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8423 (ctxt->sax->externalSubset != NULL))
8424 ctxt->sax->externalSubset(ctxt->userData,
8425 ctxt->intSubName, ctxt->extSubSystem,
8428 ctxt->instate = XML_PARSER_PROLOG;
8430 xmlGenericError(xmlGenericErrorContext,
8431 "PP: entering PROLOG\n");
8434 } else if ((cur == '<') && (next == '!') &&
8438 ctxt->instate = XML_PARSER_START_TAG;
8440 xmlGenericError(xmlGenericErrorContext,
8441 "PP: entering START_TAG\n");
8445 case XML_PARSER_IGNORE:
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: internal error, state == IGNORE");
8448 ctxt->instate = XML_PARSER_DTD;
8450 xmlGenericError(xmlGenericErrorContext,
8451 "PP: entering DTD\n");
8454 case XML_PARSER_PROLOG:
8456 if (ctxt->input->buf == NULL)
8457 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8459 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8462 cur = ctxt->input->cur[0];
8463 next = ctxt->input->cur[1];
8464 if ((cur == '<') && (next == '?')) {
8466 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8469 xmlGenericError(xmlGenericErrorContext,
8470 "PP: Parsing PI\n");
8473 } else if ((cur == '<') && (next == '!') &&
8474 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8476 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8479 xmlGenericError(xmlGenericErrorContext,
8480 "PP: Parsing Comment\n");
8482 xmlParseComment(ctxt);
8483 ctxt->instate = XML_PARSER_PROLOG;
8484 } else if ((cur == '<') && (next == '!') &&
8488 ctxt->instate = XML_PARSER_START_TAG;
8490 xmlGenericError(xmlGenericErrorContext,
8491 "PP: entering START_TAG\n");
8495 case XML_PARSER_EPILOG:
8497 if (ctxt->input->buf == NULL)
8498 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8500 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8503 cur = ctxt->input->cur[0];
8504 next = ctxt->input->cur[1];
8505 if ((cur == '<') && (next == '?')) {
8507 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8510 xmlGenericError(xmlGenericErrorContext,
8511 "PP: Parsing PI\n");
8514 ctxt->instate = XML_PARSER_EPILOG;
8515 } else if ((cur == '<') && (next == '!') &&
8516 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8518 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8521 xmlGenericError(xmlGenericErrorContext,
8522 "PP: Parsing Comment\n");
8524 xmlParseComment(ctxt);
8525 ctxt->instate = XML_PARSER_EPILOG;
8526 } else if ((cur == '<') && (next == '!') &&
8530 ctxt->errNo = XML_ERR_DOCUMENT_END;
8531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8532 ctxt->sax->error(ctxt->userData,
8533 "Extra content at the end of the document\n");
8534 ctxt->wellFormed = 0;
8535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8536 ctxt->instate = XML_PARSER_EOF;
8538 xmlGenericError(xmlGenericErrorContext,
8539 "PP: entering EOF\n");
8541 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8542 ctxt->sax->endDocument(ctxt->userData);
8546 case XML_PARSER_START_TAG: {
8547 xmlChar *name, *oldname;
8549 if ((avail < 2) && (ctxt->inputNr == 1))
8551 cur = ctxt->input->cur[0];
8553 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8555 ctxt->sax->error(ctxt->userData,
8556 "Start tag expect, '<' not found\n");
8557 ctxt->wellFormed = 0;
8558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8559 ctxt->instate = XML_PARSER_EOF;
8561 xmlGenericError(xmlGenericErrorContext,
8562 "PP: entering EOF\n");
8564 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8565 ctxt->sax->endDocument(ctxt->userData);
8569 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8571 if (ctxt->spaceNr == 0)
8572 spacePush(ctxt, -1);
8574 spacePush(ctxt, *ctxt->space);
8575 name = xmlParseStartTag(ctxt);
8578 ctxt->instate = XML_PARSER_EOF;
8580 xmlGenericError(xmlGenericErrorContext,
8581 "PP: entering EOF\n");
8583 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8584 ctxt->sax->endDocument(ctxt->userData);
8587 namePush(ctxt, xmlStrdup(name));
8590 * [ VC: Root Element Type ]
8591 * The Name in the document type declaration must match
8592 * the element type of the root element.
8594 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8595 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8596 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8599 * Check for an Empty Element.
8601 if ((RAW == '/') && (NXT(1) == '>')) {
8603 if ((ctxt->sax != NULL) &&
8604 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8605 ctxt->sax->endElement(ctxt->userData, name);
8607 oldname = namePop(ctxt);
8609 if (oldname != NULL) {
8611 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8615 if (ctxt->name == NULL) {
8616 ctxt->instate = XML_PARSER_EPILOG;
8618 xmlGenericError(xmlGenericErrorContext,
8619 "PP: entering EPILOG\n");
8622 ctxt->instate = XML_PARSER_CONTENT;
8624 xmlGenericError(xmlGenericErrorContext,
8625 "PP: entering CONTENT\n");
8633 ctxt->errNo = XML_ERR_GT_REQUIRED;
8634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8635 ctxt->sax->error(ctxt->userData,
8636 "Couldn't find end of Start Tag %s\n",
8638 ctxt->wellFormed = 0;
8639 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8642 * end of parsing of this node.
8645 oldname = namePop(ctxt);
8647 if (oldname != NULL) {
8649 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8655 ctxt->instate = XML_PARSER_CONTENT;
8657 xmlGenericError(xmlGenericErrorContext,
8658 "PP: entering CONTENT\n");
8662 case XML_PARSER_CONTENT: {
8663 const xmlChar *test;
8665 if ((avail < 2) && (ctxt->inputNr == 1))
8667 cur = ctxt->input->cur[0];
8668 next = ctxt->input->cur[1];
8671 cons = ctxt->input->consumed;
8672 if ((cur == '<') && (next == '?')) {
8674 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8677 xmlGenericError(xmlGenericErrorContext,
8678 "PP: Parsing PI\n");
8681 } else if ((cur == '<') && (next == '!') &&
8682 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8684 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8687 xmlGenericError(xmlGenericErrorContext,
8688 "PP: Parsing Comment\n");
8690 xmlParseComment(ctxt);
8691 ctxt->instate = XML_PARSER_CONTENT;
8692 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8693 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8694 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8695 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8696 (ctxt->input->cur[8] == '[')) {
8698 ctxt->instate = XML_PARSER_CDATA_SECTION;
8700 xmlGenericError(xmlGenericErrorContext,
8701 "PP: entering CDATA_SECTION\n");
8704 } else if ((cur == '<') && (next == '!') &&
8707 } else if ((cur == '<') && (next == '/')) {
8708 ctxt->instate = XML_PARSER_END_TAG;
8710 xmlGenericError(xmlGenericErrorContext,
8711 "PP: entering END_TAG\n");
8714 } else if (cur == '<') {
8715 ctxt->instate = XML_PARSER_START_TAG;
8717 xmlGenericError(xmlGenericErrorContext,
8718 "PP: entering START_TAG\n");
8721 } else if (cur == '&') {
8723 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8726 xmlGenericError(xmlGenericErrorContext,
8727 "PP: Parsing Reference\n");
8729 xmlParseReference(ctxt);
8731 /* TODO Avoid the extra copy, handle directly !!! */
8733 * Goal of the following test is:
8734 * - minimize calls to the SAX 'character' callback
8735 * when they are mergeable
8736 * - handle an problem for isBlank when we only parse
8737 * a sequence of blank chars and the next one is
8738 * not available to check against '<' presence.
8739 * - tries to homogenize the differences in SAX
8740 * callbacks between the push and pull versions
8743 if ((ctxt->inputNr == 1) &&
8744 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8746 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8749 ctxt->checkIndex = 0;
8751 xmlGenericError(xmlGenericErrorContext,
8752 "PP: Parsing char data\n");
8754 xmlParseCharData(ctxt, 0);
8757 * Pop-up of finished entities.
8759 while ((RAW == 0) && (ctxt->inputNr > 1))
8761 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8762 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8764 ctxt->sax->error(ctxt->userData,
8765 "detected an error in element content\n");
8766 ctxt->wellFormed = 0;
8767 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8768 ctxt->instate = XML_PARSER_EOF;
8773 case XML_PARSER_CDATA_SECTION: {
8775 * The Push mode need to have the SAX callback for
8776 * cdataBlock merge back contiguous callbacks.
8780 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8782 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8783 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8784 if (ctxt->sax->cdataBlock != NULL)
8785 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8786 XML_PARSER_BIG_BUFFER_SIZE);
8788 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8789 ctxt->checkIndex = 0;
8793 if ((ctxt->sax != NULL) && (base > 0) &&
8794 (!ctxt->disableSAX)) {
8795 if (ctxt->sax->cdataBlock != NULL)
8796 ctxt->sax->cdataBlock(ctxt->userData,
8797 ctxt->input->cur, base);
8800 ctxt->checkIndex = 0;
8801 ctxt->instate = XML_PARSER_CONTENT;
8803 xmlGenericError(xmlGenericErrorContext,
8804 "PP: entering CONTENT\n");
8809 case XML_PARSER_END_TAG:
8813 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8815 xmlParseEndTag(ctxt);
8816 if (ctxt->name == NULL) {
8817 ctxt->instate = XML_PARSER_EPILOG;
8819 xmlGenericError(xmlGenericErrorContext,
8820 "PP: entering EPILOG\n");
8823 ctxt->instate = XML_PARSER_CONTENT;
8825 xmlGenericError(xmlGenericErrorContext,
8826 "PP: entering CONTENT\n");
8830 case XML_PARSER_DTD: {
8832 * Sorry but progressive parsing of the internal subset
8833 * is not expected to be supported. We first check that
8834 * the full content of the internal subset is available and
8835 * the parsing is launched only at that point.
8836 * Internal subset ends up with "']' S? '>'" in an unescaped
8837 * section and not in a ']]>' sequence which are conditional
8838 * sections (whoever argued to keep that crap in XML deserve
8839 * a place in hell !).
8845 base = ctxt->input->cur - ctxt->input->base;
8846 if (base < 0) return(0);
8847 if (ctxt->checkIndex > base)
8848 base = ctxt->checkIndex;
8849 buf = ctxt->input->buf->buffer->content;
8850 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8853 if (buf[base] == quote)
8857 if (buf[base] == '"') {
8861 if (buf[base] == '\'') {
8865 if (buf[base] == ']') {
8866 if ((unsigned int) base +1 >=
8867 ctxt->input->buf->buffer->use)
8869 if (buf[base + 1] == ']') {
8870 /* conditional crap, skip both ']' ! */
8875 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8877 if (buf[base + i] == '>')
8878 goto found_end_int_subset;
8884 * We didn't found the end of the Internal subset
8887 ctxt->checkIndex = base;
8890 xmlGenericError(xmlGenericErrorContext,
8891 "PP: lookup of int subset end filed\n");
8895 found_end_int_subset:
8896 xmlParseInternalSubset(ctxt);
8898 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8899 (ctxt->sax->externalSubset != NULL))
8900 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8901 ctxt->extSubSystem, ctxt->extSubURI);
8903 ctxt->instate = XML_PARSER_PROLOG;
8904 ctxt->checkIndex = 0;
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: entering PROLOG\n");
8911 case XML_PARSER_COMMENT:
8912 xmlGenericError(xmlGenericErrorContext,
8913 "PP: internal error, state == COMMENT\n");
8914 ctxt->instate = XML_PARSER_CONTENT;
8916 xmlGenericError(xmlGenericErrorContext,
8917 "PP: entering CONTENT\n");
8921 xmlGenericError(xmlGenericErrorContext,
8922 "PP: internal error, state == PI\n");
8923 ctxt->instate = XML_PARSER_CONTENT;
8925 xmlGenericError(xmlGenericErrorContext,
8926 "PP: entering CONTENT\n");
8929 case XML_PARSER_ENTITY_DECL:
8930 xmlGenericError(xmlGenericErrorContext,
8931 "PP: internal error, state == ENTITY_DECL\n");
8932 ctxt->instate = XML_PARSER_DTD;
8934 xmlGenericError(xmlGenericErrorContext,
8935 "PP: entering DTD\n");
8938 case XML_PARSER_ENTITY_VALUE:
8939 xmlGenericError(xmlGenericErrorContext,
8940 "PP: internal error, state == ENTITY_VALUE\n");
8941 ctxt->instate = XML_PARSER_CONTENT;
8943 xmlGenericError(xmlGenericErrorContext,
8944 "PP: entering DTD\n");
8947 case XML_PARSER_ATTRIBUTE_VALUE:
8948 xmlGenericError(xmlGenericErrorContext,
8949 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8950 ctxt->instate = XML_PARSER_START_TAG;
8952 xmlGenericError(xmlGenericErrorContext,
8953 "PP: entering START_TAG\n");
8956 case XML_PARSER_SYSTEM_LITERAL:
8957 xmlGenericError(xmlGenericErrorContext,
8958 "PP: internal error, state == SYSTEM_LITERAL\n");
8959 ctxt->instate = XML_PARSER_START_TAG;
8961 xmlGenericError(xmlGenericErrorContext,
8962 "PP: entering START_TAG\n");
8965 case XML_PARSER_PUBLIC_LITERAL:
8966 xmlGenericError(xmlGenericErrorContext,
8967 "PP: internal error, state == PUBLIC_LITERAL\n");
8968 ctxt->instate = XML_PARSER_START_TAG;
8970 xmlGenericError(xmlGenericErrorContext,
8971 "PP: entering START_TAG\n");
8978 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8985 * @ctxt: an XML parser context
8986 * @chunk: an char array
8987 * @size: the size in byte of the chunk
8988 * @terminate: last chunk indicator
8990 * Parse a Chunk of memory
8992 * Returns zero if no error, the xmlParserErrors otherwise.
8995 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8997 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8998 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8999 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9000 int cur = ctxt->input->cur - ctxt->input->base;
9002 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9003 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9004 ctxt->input->cur = ctxt->input->base + cur;
9006 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
9008 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9011 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9012 xmlParseTryOrFinish(ctxt, terminate);
9013 } else if (ctxt->instate != XML_PARSER_EOF) {
9014 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9015 xmlParserInputBufferPtr in = ctxt->input->buf;
9016 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9017 (in->raw != NULL)) {
9020 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9022 xmlGenericError(xmlGenericErrorContext,
9023 "xmlParseChunk: encoder error\n");
9024 return(XML_ERR_INVALID_ENCODING);
9029 xmlParseTryOrFinish(ctxt, terminate);
9032 * Check for termination
9035 if (ctxt->input->buf == NULL)
9036 avail = ctxt->input->length -
9037 (ctxt->input->cur - ctxt->input->base);
9039 avail = ctxt->input->buf->buffer->use -
9040 (ctxt->input->cur - ctxt->input->base);
9042 if ((ctxt->instate != XML_PARSER_EOF) &&
9043 (ctxt->instate != XML_PARSER_EPILOG)) {
9044 ctxt->errNo = XML_ERR_DOCUMENT_END;
9045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9046 ctxt->sax->error(ctxt->userData,
9047 "Extra content at the end of the document\n");
9048 ctxt->wellFormed = 0;
9049 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9051 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9052 ctxt->errNo = XML_ERR_DOCUMENT_END;
9053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9054 ctxt->sax->error(ctxt->userData,
9055 "Extra content at the end of the document\n");
9056 ctxt->wellFormed = 0;
9057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9060 if (ctxt->instate != XML_PARSER_EOF) {
9061 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9062 ctxt->sax->endDocument(ctxt->userData);
9064 ctxt->instate = XML_PARSER_EOF;
9066 return((xmlParserErrors) ctxt->errNo);
9069 /************************************************************************
9071 * I/O front end functions to the parser *
9073 ************************************************************************/
9077 * @ctxt: an XML parser context
9079 * Blocks further parser processing
9082 xmlStopParser(xmlParserCtxtPtr ctxt) {
9083 ctxt->instate = XML_PARSER_EOF;
9084 if (ctxt->input != NULL)
9085 ctxt->input->cur = BAD_CAST"";
9089 * xmlCreatePushParserCtxt:
9090 * @sax: a SAX handler
9091 * @user_data: The user data returned on SAX callbacks
9092 * @chunk: a pointer to an array of chars
9093 * @size: number of chars in the array
9094 * @filename: an optional file name or URI
9096 * Create a parser context for using the XML parser in push mode.
9097 * If @buffer and @size are non-NULL, the data is used to detect
9098 * the encoding. The remaining characters will be parsed so they
9099 * don't need to be fed in again through xmlParseChunk.
9100 * To allow content encoding detection, @size should be >= 4
9101 * The value of @filename is used for fetching external entities
9102 * and error/warning reports.
9104 * Returns the new parser context or NULL
9108 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9109 const char *chunk, int size, const char *filename) {
9110 xmlParserCtxtPtr ctxt;
9111 xmlParserInputPtr inputStream;
9112 xmlParserInputBufferPtr buf;
9113 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9116 * plug some encoding conversion routines
9118 if ((chunk != NULL) && (size >= 4))
9119 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9121 buf = xmlAllocParserInputBuffer(enc);
9122 if (buf == NULL) return(NULL);
9124 ctxt = xmlNewParserCtxt();
9130 if (ctxt->sax != &xmlDefaultSAXHandler)
9132 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9133 if (ctxt->sax == NULL) {
9138 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9139 if (user_data != NULL)
9140 ctxt->userData = user_data;
9142 if (filename == NULL) {
9143 ctxt->directory = NULL;
9145 ctxt->directory = xmlParserGetDirectory(filename);
9148 inputStream = xmlNewInputStream(ctxt);
9149 if (inputStream == NULL) {
9150 xmlFreeParserCtxt(ctxt);
9154 if (filename == NULL)
9155 inputStream->filename = NULL;
9157 inputStream->filename = (char *)
9158 xmlCanonicPath((const xmlChar *) filename);
9159 inputStream->buf = buf;
9160 inputStream->base = inputStream->buf->buffer->content;
9161 inputStream->cur = inputStream->buf->buffer->content;
9163 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
9165 inputPush(ctxt, inputStream);
9167 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9168 (ctxt->input->buf != NULL)) {
9169 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9170 int cur = ctxt->input->cur - ctxt->input->base;
9172 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9174 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9175 ctxt->input->cur = ctxt->input->base + cur;
9177 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
9179 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9183 if (enc != XML_CHAR_ENCODING_NONE) {
9184 xmlSwitchEncoding(ctxt, enc);
9191 * xmlCreateIOParserCtxt:
9192 * @sax: a SAX handler
9193 * @user_data: The user data returned on SAX callbacks
9194 * @ioread: an I/O read function
9195 * @ioclose: an I/O close function
9196 * @ioctx: an I/O handler
9197 * @enc: the charset encoding if known
9199 * Create a parser context for using the XML parser with an existing
9202 * Returns the new parser context or NULL
9205 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9206 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9207 void *ioctx, xmlCharEncoding enc) {
9208 xmlParserCtxtPtr ctxt;
9209 xmlParserInputPtr inputStream;
9210 xmlParserInputBufferPtr buf;
9212 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9213 if (buf == NULL) return(NULL);
9215 ctxt = xmlNewParserCtxt();
9221 if (ctxt->sax != &xmlDefaultSAXHandler)
9223 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9224 if (ctxt->sax == NULL) {
9229 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9230 if (user_data != NULL)
9231 ctxt->userData = user_data;
9234 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9235 if (inputStream == NULL) {
9236 xmlFreeParserCtxt(ctxt);
9239 inputPush(ctxt, inputStream);
9244 /************************************************************************
9246 * Front ends when parsing a DTD *
9248 ************************************************************************/
9252 * @sax: the SAX handler block or NULL
9253 * @input: an Input Buffer
9254 * @enc: the charset encoding if known
9256 * Load and parse a DTD
9258 * Returns the resulting xmlDtdPtr or NULL in case of error.
9259 * @input will be freed at parsing end.
9263 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9264 xmlCharEncoding enc) {
9265 xmlDtdPtr ret = NULL;
9266 xmlParserCtxtPtr ctxt;
9267 xmlParserInputPtr pinput = NULL;
9273 ctxt = xmlNewParserCtxt();
9279 * Set-up the SAX context
9282 if (ctxt->sax != NULL)
9285 ctxt->userData = NULL;
9289 * generate a parser input from the I/O handler
9292 pinput = xmlNewIOInputStream(ctxt, input, enc);
9293 if (pinput == NULL) {
9294 if (sax != NULL) ctxt->sax = NULL;
9295 xmlFreeParserCtxt(ctxt);
9300 * plug some encoding conversion routines here.
9302 xmlPushInput(ctxt, pinput);
9304 pinput->filename = NULL;
9307 pinput->base = ctxt->input->cur;
9308 pinput->cur = ctxt->input->cur;
9309 pinput->free = NULL;
9312 * let's parse that entity knowing it's an external subset.
9315 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9316 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9317 BAD_CAST "none", BAD_CAST "none");
9319 if (enc == XML_CHAR_ENCODING_NONE) {
9321 * Get the 4 first bytes and decode the charset
9322 * if enc != XML_CHAR_ENCODING_NONE
9323 * plug some encoding conversion routines.
9329 enc = xmlDetectCharEncoding(start, 4);
9330 if (enc != XML_CHAR_ENCODING_NONE) {
9331 xmlSwitchEncoding(ctxt, enc);
9335 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9337 if (ctxt->myDoc != NULL) {
9338 if (ctxt->wellFormed) {
9339 ret = ctxt->myDoc->extSubset;
9340 ctxt->myDoc->extSubset = NULL;
9344 xmlFreeDoc(ctxt->myDoc);
9347 if (sax != NULL) ctxt->sax = NULL;
9348 xmlFreeParserCtxt(ctxt);
9355 * @sax: the SAX handler block
9356 * @ExternalID: a NAME* containing the External ID of the DTD
9357 * @SystemID: a NAME* containing the URL to the DTD
9359 * Load and parse an external subset.
9361 * Returns the resulting xmlDtdPtr or NULL in case of error.
9365 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9366 const xmlChar *SystemID) {
9367 xmlDtdPtr ret = NULL;
9368 xmlParserCtxtPtr ctxt;
9369 xmlParserInputPtr input = NULL;
9370 xmlCharEncoding enc;
9372 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9374 ctxt = xmlNewParserCtxt();
9380 * Set-up the SAX context
9383 if (ctxt->sax != NULL)
9386 ctxt->userData = NULL;
9390 * Ask the Entity resolver to load the damn thing
9393 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9394 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9395 if (input == NULL) {
9396 if (sax != NULL) ctxt->sax = NULL;
9397 xmlFreeParserCtxt(ctxt);
9402 * plug some encoding conversion routines here.
9404 xmlPushInput(ctxt, input);
9405 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9406 xmlSwitchEncoding(ctxt, enc);
9408 if (input->filename == NULL)
9409 input->filename = (char *) xmlStrdup(SystemID);
9412 input->base = ctxt->input->cur;
9413 input->cur = ctxt->input->cur;
9417 * let's parse that entity knowing it's an external subset.
9420 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9421 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9422 ExternalID, SystemID);
9423 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9425 if (ctxt->myDoc != NULL) {
9426 if (ctxt->wellFormed) {
9427 ret = ctxt->myDoc->extSubset;
9428 ctxt->myDoc->extSubset = NULL;
9432 xmlFreeDoc(ctxt->myDoc);
9435 if (sax != NULL) ctxt->sax = NULL;
9436 xmlFreeParserCtxt(ctxt);
9443 * @ExternalID: a NAME* containing the External ID of the DTD
9444 * @SystemID: a NAME* containing the URL to the DTD
9446 * Load and parse an external subset.
9448 * Returns the resulting xmlDtdPtr or NULL in case of error.
9452 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9453 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9456 /************************************************************************
9458 * Front ends when parsing an Entity *
9460 ************************************************************************/
9463 * xmlParseCtxtExternalEntity:
9464 * @ctx: the existing parsing context
9465 * @URL: the URL for the entity to load
9466 * @ID: the System ID for the entity to load
9467 * @lst: the return value for the set of parsed nodes
9469 * Parse an external general entity within an existing parsing context
9470 * An external general parsed entity is well-formed if it matches the
9471 * production labeled extParsedEnt.
9473 * [78] extParsedEnt ::= TextDecl? content
9475 * Returns 0 if the entity is well formed, -1 in case of args problem and
9476 * the parser error code otherwise
9480 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
9481 const xmlChar *ID, xmlNodePtr *lst) {
9482 xmlParserCtxtPtr ctxt;
9484 xmlSAXHandlerPtr oldsax = NULL;
9487 xmlCharEncoding enc;
9489 if (ctx->depth > 40) {
9490 return(XML_ERR_ENTITY_LOOP);
9495 if ((URL == NULL) && (ID == NULL))
9497 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9501 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9502 if (ctxt == NULL) return(-1);
9503 ctxt->userData = ctxt;
9504 ctxt->_private = ctx->_private;
9506 ctxt->sax = ctx->sax;
9507 newDoc = xmlNewDoc(BAD_CAST "1.0");
9508 if (newDoc == NULL) {
9509 xmlFreeParserCtxt(ctxt);
9512 if (ctx->myDoc != NULL) {
9513 newDoc->intSubset = ctx->myDoc->intSubset;
9514 newDoc->extSubset = ctx->myDoc->extSubset;
9516 if (ctx->myDoc->URL != NULL) {
9517 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9519 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9520 if (newDoc->children == NULL) {
9522 xmlFreeParserCtxt(ctxt);
9523 newDoc->intSubset = NULL;
9524 newDoc->extSubset = NULL;
9528 nodePush(ctxt, newDoc->children);
9529 if (ctx->myDoc == NULL) {
9530 ctxt->myDoc = newDoc;
9532 ctxt->myDoc = ctx->myDoc;
9533 newDoc->children->doc = ctx->myDoc;
9537 * Get the 4 first bytes and decode the charset
9538 * if enc != XML_CHAR_ENCODING_NONE
9539 * plug some encoding conversion routines.
9546 enc = xmlDetectCharEncoding(start, 4);
9547 if (enc != XML_CHAR_ENCODING_NONE) {
9548 xmlSwitchEncoding(ctxt, enc);
9552 * Parse a possible text declaration first
9554 if ((RAW == '<') && (NXT(1) == '?') &&
9555 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9556 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9557 xmlParseTextDecl(ctxt);
9561 * Doing validity checking on chunk doesn't make sense
9563 ctxt->instate = XML_PARSER_CONTENT;
9564 ctxt->validate = ctx->validate;
9565 ctxt->loadsubset = ctx->loadsubset;
9566 ctxt->depth = ctx->depth + 1;
9567 ctxt->replaceEntities = ctx->replaceEntities;
9568 if (ctxt->validate) {
9569 ctxt->vctxt.error = ctx->vctxt.error;
9570 ctxt->vctxt.warning = ctx->vctxt.warning;
9572 ctxt->vctxt.error = NULL;
9573 ctxt->vctxt.warning = NULL;
9575 ctxt->vctxt.nodeTab = NULL;
9576 ctxt->vctxt.nodeNr = 0;
9577 ctxt->vctxt.nodeMax = 0;
9578 ctxt->vctxt.node = NULL;
9580 xmlParseContent(ctxt);
9582 if ((RAW == '<') && (NXT(1) == '/')) {
9583 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9585 ctxt->sax->error(ctxt->userData,
9586 "chunk is not well balanced\n");
9587 ctxt->wellFormed = 0;
9588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9589 } else if (RAW != 0) {
9590 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9592 ctxt->sax->error(ctxt->userData,
9593 "extra content at the end of well balanced chunk\n");
9594 ctxt->wellFormed = 0;
9595 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9597 if (ctxt->node != newDoc->children) {
9598 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9600 ctxt->sax->error(ctxt->userData,
9601 "chunk is not well balanced\n");
9602 ctxt->wellFormed = 0;
9603 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9606 if (!ctxt->wellFormed) {
9607 if (ctxt->errNo == 0)
9616 * Return the newly created nodeset after unlinking it from
9617 * they pseudo parent.
9619 cur = newDoc->children->children;
9621 while (cur != NULL) {
9625 newDoc->children->children = NULL;
9630 xmlFreeParserCtxt(ctxt);
9631 newDoc->intSubset = NULL;
9632 newDoc->extSubset = NULL;
9639 * xmlParseExternalEntityPrivate:
9640 * @doc: the document the chunk pertains to
9641 * @oldctxt: the previous parser context if available
9642 * @sax: the SAX handler bloc (possibly NULL)
9643 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9644 * @depth: Used for loop detection, use 0
9645 * @URL: the URL for the entity to load
9646 * @ID: the System ID for the entity to load
9647 * @list: the return value for the set of parsed nodes
9649 * Private version of xmlParseExternalEntity()
9651 * Returns 0 if the entity is well formed, -1 in case of args problem and
9652 * the parser error code otherwise
9656 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9657 xmlSAXHandlerPtr sax,
9658 void *user_data, int depth, const xmlChar *URL,
9659 const xmlChar *ID, xmlNodePtr *list) {
9660 xmlParserCtxtPtr ctxt;
9662 xmlSAXHandlerPtr oldsax = NULL;
9665 xmlCharEncoding enc;
9668 return(XML_ERR_ENTITY_LOOP);
9675 if ((URL == NULL) && (ID == NULL))
9677 if (doc == NULL) /* @@ relax but check for dereferences */
9681 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9682 if (ctxt == NULL) return(-1);
9683 ctxt->userData = ctxt;
9684 if (oldctxt != NULL) {
9685 ctxt->_private = oldctxt->_private;
9686 ctxt->loadsubset = oldctxt->loadsubset;
9687 ctxt->validate = oldctxt->validate;
9688 ctxt->external = oldctxt->external;
9689 ctxt->record_info = oldctxt->record_info;
9690 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9691 ctxt->node_seq.length = oldctxt->node_seq.length;
9692 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
9695 * Doing validity checking on chunk without context
9696 * doesn't make sense
9698 ctxt->_private = NULL;
9701 ctxt->loadsubset = 0;
9706 if (user_data != NULL)
9707 ctxt->userData = user_data;
9709 newDoc = xmlNewDoc(BAD_CAST "1.0");
9710 if (newDoc == NULL) {
9711 ctxt->node_seq.maximum = 0;
9712 ctxt->node_seq.length = 0;
9713 ctxt->node_seq.buffer = NULL;
9714 xmlFreeParserCtxt(ctxt);
9718 newDoc->intSubset = doc->intSubset;
9719 newDoc->extSubset = doc->extSubset;
9721 if (doc->URL != NULL) {
9722 newDoc->URL = xmlStrdup(doc->URL);
9724 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9725 if (newDoc->children == NULL) {
9728 ctxt->node_seq.maximum = 0;
9729 ctxt->node_seq.length = 0;
9730 ctxt->node_seq.buffer = NULL;
9731 xmlFreeParserCtxt(ctxt);
9732 newDoc->intSubset = NULL;
9733 newDoc->extSubset = NULL;
9737 nodePush(ctxt, newDoc->children);
9739 ctxt->myDoc = newDoc;
9742 newDoc->children->doc = doc;
9746 * Get the 4 first bytes and decode the charset
9747 * if enc != XML_CHAR_ENCODING_NONE
9748 * plug some encoding conversion routines.
9755 enc = xmlDetectCharEncoding(start, 4);
9756 if (enc != XML_CHAR_ENCODING_NONE) {
9757 xmlSwitchEncoding(ctxt, enc);
9761 * Parse a possible text declaration first
9763 if ((RAW == '<') && (NXT(1) == '?') &&
9764 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9765 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9766 xmlParseTextDecl(ctxt);
9769 ctxt->instate = XML_PARSER_CONTENT;
9770 ctxt->depth = depth;
9772 xmlParseContent(ctxt);
9774 if ((RAW == '<') && (NXT(1) == '/')) {
9775 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9777 ctxt->sax->error(ctxt->userData,
9778 "chunk is not well balanced\n");
9779 ctxt->wellFormed = 0;
9780 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9781 } else if (RAW != 0) {
9782 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9784 ctxt->sax->error(ctxt->userData,
9785 "extra content at the end of well balanced chunk\n");
9786 ctxt->wellFormed = 0;
9787 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9789 if (ctxt->node != newDoc->children) {
9790 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9792 ctxt->sax->error(ctxt->userData,
9793 "chunk is not well balanced\n");
9794 ctxt->wellFormed = 0;
9795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9798 if (!ctxt->wellFormed) {
9799 if (ctxt->errNo == 0)
9808 * Return the newly created nodeset after unlinking it from
9809 * they pseudo parent.
9811 cur = newDoc->children->children;
9813 while (cur != NULL) {
9817 newDoc->children->children = NULL;
9823 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
9824 oldctxt->node_seq.length = ctxt->node_seq.length;
9825 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
9826 ctxt->node_seq.maximum = 0;
9827 ctxt->node_seq.length = 0;
9828 ctxt->node_seq.buffer = NULL;
9829 xmlFreeParserCtxt(ctxt);
9830 newDoc->intSubset = NULL;
9831 newDoc->extSubset = NULL;
9838 * xmlParseExternalEntity:
9839 * @doc: the document the chunk pertains to
9840 * @sax: the SAX handler bloc (possibly NULL)
9841 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9842 * @depth: Used for loop detection, use 0
9843 * @URL: the URL for the entity to load
9844 * @ID: the System ID for the entity to load
9845 * @lst: the return value for the set of parsed nodes
9847 * Parse an external general entity
9848 * An external general parsed entity is well-formed if it matches the
9849 * production labeled extParsedEnt.
9851 * [78] extParsedEnt ::= TextDecl? content
9853 * Returns 0 if the entity is well formed, -1 in case of args problem and
9854 * the parser error code otherwise
9858 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9859 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
9860 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9865 * xmlParseBalancedChunkMemory:
9866 * @doc: the document the chunk pertains to
9867 * @sax: the SAX handler bloc (possibly NULL)
9868 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9869 * @depth: Used for loop detection, use 0
9870 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9871 * @lst: the return value for the set of parsed nodes
9873 * Parse a well-balanced chunk of an XML document
9874 * called by the parser
9875 * The allowed sequence for the Well Balanced Chunk is the one defined by
9876 * the content production in the XML grammar:
9878 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9880 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9881 * the parser error code otherwise
9885 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9886 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
9887 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9888 depth, string, lst, 0 );
9892 * xmlParseBalancedChunkMemoryInternal:
9893 * @oldctxt: the existing parsing context
9894 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9895 * @user_data: the user data field for the parser context
9896 * @lst: the return value for the set of parsed nodes
9899 * Parse a well-balanced chunk of an XML document
9900 * called by the parser
9901 * The allowed sequence for the Well Balanced Chunk is the one defined by
9902 * the content production in the XML grammar:
9904 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9906 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9907 * the parser error code otherwise
9909 * In case recover is set to 1, the nodelist will not be empty even if
9910 * the parsed chunk is not well balanced.
9913 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9914 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9915 xmlParserCtxtPtr ctxt;
9916 xmlDocPtr newDoc = NULL;
9917 xmlSAXHandlerPtr oldsax = NULL;
9918 xmlNodePtr content = NULL;
9922 if (oldctxt->depth > 40) {
9923 return(XML_ERR_ENTITY_LOOP);
9932 size = xmlStrlen(string);
9934 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9935 if (ctxt == NULL) return(-1);
9936 if (user_data != NULL)
9937 ctxt->userData = user_data;
9939 ctxt->userData = ctxt;
9942 ctxt->sax = oldctxt->sax;
9943 ctxt->_private = oldctxt->_private;
9944 if (oldctxt->myDoc == NULL) {
9945 newDoc = xmlNewDoc(BAD_CAST "1.0");
9946 if (newDoc == NULL) {
9948 xmlFreeParserCtxt(ctxt);
9951 ctxt->myDoc = newDoc;
9953 ctxt->myDoc = oldctxt->myDoc;
9954 content = ctxt->myDoc->children;
9956 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
9957 BAD_CAST "pseudoroot", NULL);
9958 if (ctxt->myDoc->children == NULL) {
9960 xmlFreeParserCtxt(ctxt);
9965 nodePush(ctxt, ctxt->myDoc->children);
9966 ctxt->instate = XML_PARSER_CONTENT;
9967 ctxt->depth = oldctxt->depth + 1;
9970 ctxt->loadsubset = oldctxt->loadsubset;
9972 xmlParseContent(ctxt);
9973 if ((RAW == '<') && (NXT(1) == '/')) {
9974 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9976 ctxt->sax->error(ctxt->userData,
9977 "chunk is not well balanced\n");
9978 ctxt->wellFormed = 0;
9979 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9980 } else if (RAW != 0) {
9981 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9983 ctxt->sax->error(ctxt->userData,
9984 "extra content at the end of well balanced chunk\n");
9985 ctxt->wellFormed = 0;
9986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9988 if (ctxt->node != ctxt->myDoc->children) {
9989 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9991 ctxt->sax->error(ctxt->userData,
9992 "chunk is not well balanced\n");
9993 ctxt->wellFormed = 0;
9994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
9997 if (!ctxt->wellFormed) {
9998 if (ctxt->errNo == 0)
10006 if ((lst != NULL) && (ret == 0)) {
10010 * Return the newly created nodeset after unlinking it from
10011 * they pseudo parent.
10013 cur = ctxt->myDoc->children->children;
10015 while (cur != NULL) {
10016 if (oldctxt->validate && oldctxt->wellFormed &&
10017 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10018 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10019 oldctxt->myDoc, cur);
10021 cur->parent = NULL;
10024 ctxt->myDoc->children->children = NULL;
10026 if (ctxt->myDoc != NULL) {
10027 xmlFreeNode(ctxt->myDoc->children);
10028 ctxt->myDoc->children = content;
10031 ctxt->sax = oldsax;
10032 xmlFreeParserCtxt(ctxt);
10033 if (newDoc != NULL)
10034 xmlFreeDoc(newDoc);
10040 * xmlParseBalancedChunkMemoryRecover:
10041 * @doc: the document the chunk pertains to
10042 * @sax: the SAX handler bloc (possibly NULL)
10043 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10044 * @depth: Used for loop detection, use 0
10045 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10046 * @lst: the return value for the set of parsed nodes
10047 * @recover: return nodes even if the data is broken (use 0)
10050 * Parse a well-balanced chunk of an XML document
10051 * called by the parser
10052 * The allowed sequence for the Well Balanced Chunk is the one defined by
10053 * the content production in the XML grammar:
10055 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10057 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10058 * the parser error code otherwise
10060 * In case recover is set to 1, the nodelist will not be empty even if
10061 * the parsed chunk is not well balanced.
10064 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10065 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10067 xmlParserCtxtPtr ctxt;
10069 xmlSAXHandlerPtr oldsax = NULL;
10070 xmlNodePtr content;
10075 return(XML_ERR_ENTITY_LOOP);
10081 if (string == NULL)
10084 size = xmlStrlen(string);
10086 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10087 if (ctxt == NULL) return(-1);
10088 ctxt->userData = ctxt;
10090 oldsax = ctxt->sax;
10092 if (user_data != NULL)
10093 ctxt->userData = user_data;
10095 newDoc = xmlNewDoc(BAD_CAST "1.0");
10096 if (newDoc == NULL) {
10097 xmlFreeParserCtxt(ctxt);
10101 newDoc->intSubset = doc->intSubset;
10102 newDoc->extSubset = doc->extSubset;
10104 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10105 if (newDoc->children == NULL) {
10107 ctxt->sax = oldsax;
10108 xmlFreeParserCtxt(ctxt);
10109 newDoc->intSubset = NULL;
10110 newDoc->extSubset = NULL;
10111 xmlFreeDoc(newDoc);
10114 nodePush(ctxt, newDoc->children);
10116 ctxt->myDoc = newDoc;
10118 ctxt->myDoc = newDoc;
10119 newDoc->children->doc = doc;
10121 ctxt->instate = XML_PARSER_CONTENT;
10122 ctxt->depth = depth;
10125 * Doing validity checking on chunk doesn't make sense
10127 ctxt->validate = 0;
10128 ctxt->loadsubset = 0;
10130 if ( doc != NULL ){
10131 content = doc->children;
10132 doc->children = NULL;
10133 xmlParseContent(ctxt);
10134 doc->children = content;
10137 xmlParseContent(ctxt);
10139 if ((RAW == '<') && (NXT(1) == '/')) {
10140 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10142 ctxt->sax->error(ctxt->userData,
10143 "chunk is not well balanced\n");
10144 ctxt->wellFormed = 0;
10145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10146 } else if (RAW != 0) {
10147 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10149 ctxt->sax->error(ctxt->userData,
10150 "extra content at the end of well balanced chunk\n");
10151 ctxt->wellFormed = 0;
10152 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10154 if (ctxt->node != newDoc->children) {
10155 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10157 ctxt->sax->error(ctxt->userData,
10158 "chunk is not well balanced\n");
10159 ctxt->wellFormed = 0;
10160 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
10163 if (!ctxt->wellFormed) {
10164 if (ctxt->errNo == 0)
10172 if (lst != NULL && (ret == 0 || recover == 1)) {
10176 * Return the newly created nodeset after unlinking it from
10177 * they pseudo parent.
10179 cur = newDoc->children->children;
10181 while (cur != NULL) {
10182 cur->parent = NULL;
10185 newDoc->children->children = NULL;
10189 ctxt->sax = oldsax;
10190 xmlFreeParserCtxt(ctxt);
10191 newDoc->intSubset = NULL;
10192 newDoc->extSubset = NULL;
10193 xmlFreeDoc(newDoc);
10199 * xmlSAXParseEntity:
10200 * @sax: the SAX handler block
10201 * @filename: the filename
10203 * parse an XML external entity out of context and build a tree.
10204 * It use the given SAX function block to handle the parsing callback.
10205 * If sax is NULL, fallback to the default DOM tree building routines.
10207 * [78] extParsedEnt ::= TextDecl? content
10209 * This correspond to a "Well Balanced" chunk
10211 * Returns the resulting document tree
10215 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10217 xmlParserCtxtPtr ctxt;
10218 char *directory = NULL;
10220 ctxt = xmlCreateFileParserCtxt(filename);
10221 if (ctxt == NULL) {
10225 if (ctxt->sax != NULL)
10226 xmlFree(ctxt->sax);
10228 ctxt->userData = NULL;
10231 if ((ctxt->directory == NULL) && (directory == NULL))
10232 directory = xmlParserGetDirectory(filename);
10234 xmlParseExtParsedEnt(ctxt);
10236 if (ctxt->wellFormed)
10240 xmlFreeDoc(ctxt->myDoc);
10241 ctxt->myDoc = NULL;
10245 xmlFreeParserCtxt(ctxt);
10252 * @filename: the filename
10254 * parse an XML external entity out of context and build a tree.
10256 * [78] extParsedEnt ::= TextDecl? content
10258 * This correspond to a "Well Balanced" chunk
10260 * Returns the resulting document tree
10264 xmlParseEntity(const char *filename) {
10265 return(xmlSAXParseEntity(NULL, filename));
10269 * xmlCreateEntityParserCtxt:
10270 * @URL: the entity URL
10271 * @ID: the entity PUBLIC ID
10272 * @base: a possible base for the target URI
10274 * Create a parser context for an external entity
10275 * Automatic support for ZLIB/Compress compressed document is provided
10276 * by default if found at compile-time.
10278 * Returns the new parser context or NULL
10281 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10282 const xmlChar *base) {
10283 xmlParserCtxtPtr ctxt;
10284 xmlParserInputPtr inputStream;
10285 char *directory = NULL;
10288 ctxt = xmlNewParserCtxt();
10289 if (ctxt == NULL) {
10293 uri = xmlBuildURI(URL, base);
10296 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10297 if (inputStream == NULL) {
10298 xmlFreeParserCtxt(ctxt);
10302 inputPush(ctxt, inputStream);
10304 if ((ctxt->directory == NULL) && (directory == NULL))
10305 directory = xmlParserGetDirectory((char *)URL);
10306 if ((ctxt->directory == NULL) && (directory != NULL))
10307 ctxt->directory = directory;
10309 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10310 if (inputStream == NULL) {
10312 xmlFreeParserCtxt(ctxt);
10316 inputPush(ctxt, inputStream);
10318 if ((ctxt->directory == NULL) && (directory == NULL))
10319 directory = xmlParserGetDirectory((char *)uri);
10320 if ((ctxt->directory == NULL) && (directory != NULL))
10321 ctxt->directory = directory;
10328 /************************************************************************
10330 * Front ends when parsing from a file *
10332 ************************************************************************/
10335 * xmlCreateFileParserCtxt:
10336 * @filename: the filename
10338 * Create a parser context for a file content.
10339 * Automatic support for ZLIB/Compress compressed document is provided
10340 * by default if found at compile-time.
10342 * Returns the new parser context or NULL
10345 xmlCreateFileParserCtxt(const char *filename)
10347 xmlParserCtxtPtr ctxt;
10348 xmlParserInputPtr inputStream;
10349 char *canonicFilename;
10350 char *directory = NULL;
10352 ctxt = xmlNewParserCtxt();
10353 if (ctxt == NULL) {
10354 if (xmlDefaultSAXHandler.error != NULL) {
10355 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10360 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
10361 if (canonicFilename == NULL) {
10362 if (xmlDefaultSAXHandler.error != NULL) {
10363 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10368 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
10369 xmlFree(canonicFilename);
10370 if (inputStream == NULL) {
10371 xmlFreeParserCtxt(ctxt);
10375 inputPush(ctxt, inputStream);
10376 if ((ctxt->directory == NULL) && (directory == NULL))
10377 directory = xmlParserGetDirectory(filename);
10378 if ((ctxt->directory == NULL) && (directory != NULL))
10379 ctxt->directory = directory;
10385 * xmlSAXParseFileWithData:
10386 * @sax: the SAX handler block
10387 * @filename: the filename
10388 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10390 * @data: the userdata
10392 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10393 * compressed document is provided by default if found at compile-time.
10394 * It use the given SAX function block to handle the parsing callback.
10395 * If sax is NULL, fallback to the default DOM tree building routines.
10397 * User data (void *) is stored within the parser context in the
10398 * context's _private member, so it is available nearly everywhere in libxml
10400 * Returns the resulting document tree
10404 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10405 int recovery, void *data) {
10407 xmlParserCtxtPtr ctxt;
10408 char *directory = NULL;
10412 ctxt = xmlCreateFileParserCtxt(filename);
10413 if (ctxt == NULL) {
10417 if (ctxt->sax != NULL)
10418 xmlFree(ctxt->sax);
10422 ctxt->_private=data;
10425 if ((ctxt->directory == NULL) && (directory == NULL))
10426 directory = xmlParserGetDirectory(filename);
10427 if ((ctxt->directory == NULL) && (directory != NULL))
10428 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10430 ctxt->recovery = recovery;
10432 xmlParseDocument(ctxt);
10434 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10437 xmlFreeDoc(ctxt->myDoc);
10438 ctxt->myDoc = NULL;
10442 xmlFreeParserCtxt(ctxt);
10449 * @sax: the SAX handler block
10450 * @filename: the filename
10451 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10454 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10455 * compressed document is provided by default if found at compile-time.
10456 * It use the given SAX function block to handle the parsing callback.
10457 * If sax is NULL, fallback to the default DOM tree building routines.
10459 * Returns the resulting document tree
10463 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10465 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10470 * @cur: a pointer to an array of xmlChar
10472 * parse an XML in-memory document and build a tree.
10473 * In the case the document is not Well Formed, a tree is built anyway
10475 * Returns the resulting document tree
10479 xmlRecoverDoc(xmlChar *cur) {
10480 return(xmlSAXParseDoc(NULL, cur, 1));
10485 * @filename: the filename
10487 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10488 * compressed document is provided by default if found at compile-time.
10490 * Returns the resulting document tree if the file was wellformed,
10495 xmlParseFile(const char *filename) {
10496 return(xmlSAXParseFile(NULL, filename, 0));
10501 * @filename: the filename
10503 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10504 * compressed document is provided by default if found at compile-time.
10505 * In the case the document is not Well Formed, a tree is built anyway
10507 * Returns the resulting document tree
10511 xmlRecoverFile(const char *filename) {
10512 return(xmlSAXParseFile(NULL, filename, 1));
10517 * xmlSetupParserForBuffer:
10518 * @ctxt: an XML parser context
10519 * @buffer: a xmlChar * buffer
10520 * @filename: a file name
10522 * Setup the parser context to parse a new buffer; Clears any prior
10523 * contents from the parser context. The buffer parameter must not be
10524 * NULL, but the filename parameter can be
10527 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10528 const char* filename)
10530 xmlParserInputPtr input;
10532 input = xmlNewInputStream(ctxt);
10533 if (input == NULL) {
10534 xmlGenericError(xmlGenericErrorContext,
10540 xmlClearParserCtxt(ctxt);
10541 if (filename != NULL)
10542 input->filename = xmlMemStrdup(filename);
10543 input->base = buffer;
10544 input->cur = buffer;
10545 input->end = &buffer[xmlStrlen(buffer)];
10546 inputPush(ctxt, input);
10550 * xmlSAXUserParseFile:
10551 * @sax: a SAX handler
10552 * @user_data: The user data returned on SAX callbacks
10553 * @filename: a file name
10555 * parse an XML file and call the given SAX handler routines.
10556 * Automatic support for ZLIB/Compress compressed document is provided
10558 * Returns 0 in case of success or a error number otherwise
10561 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10562 const char *filename) {
10564 xmlParserCtxtPtr ctxt;
10566 ctxt = xmlCreateFileParserCtxt(filename);
10567 if (ctxt == NULL) return -1;
10568 if (ctxt->sax != &xmlDefaultSAXHandler)
10569 xmlFree(ctxt->sax);
10571 if (user_data != NULL)
10572 ctxt->userData = user_data;
10574 xmlParseDocument(ctxt);
10576 if (ctxt->wellFormed)
10579 if (ctxt->errNo != 0)
10586 xmlFreeParserCtxt(ctxt);
10591 /************************************************************************
10593 * Front ends when parsing from memory *
10595 ************************************************************************/
10598 * xmlCreateMemoryParserCtxt:
10599 * @buffer: a pointer to a char array
10600 * @size: the size of the array
10602 * Create a parser context for an XML in-memory document.
10604 * Returns the new parser context or NULL
10607 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
10608 xmlParserCtxtPtr ctxt;
10609 xmlParserInputPtr input;
10610 xmlParserInputBufferPtr buf;
10612 if (buffer == NULL)
10617 ctxt = xmlNewParserCtxt();
10621 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10623 xmlFreeParserCtxt(ctxt);
10627 input = xmlNewInputStream(ctxt);
10628 if (input == NULL) {
10629 xmlFreeParserInputBuffer(buf);
10630 xmlFreeParserCtxt(ctxt);
10634 input->filename = NULL;
10636 input->base = input->buf->buffer->content;
10637 input->cur = input->buf->buffer->content;
10638 input->end = &input->buf->buffer->content[input->buf->buffer->use];
10640 inputPush(ctxt, input);
10645 * xmlSAXParseMemoryWithData:
10646 * @sax: the SAX handler block
10647 * @buffer: an pointer to a char array
10648 * @size: the size of the array
10649 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10651 * @data: the userdata
10653 * parse an XML in-memory block and use the given SAX function block
10654 * to handle the parsing callback. If sax is NULL, fallback to the default
10655 * DOM tree building routines.
10657 * User data (void *) is stored within the parser context in the
10658 * context's _private member, so it is available nearly everywhere in libxml
10660 * Returns the resulting document tree
10664 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10665 int size, int recovery, void *data) {
10667 xmlParserCtxtPtr ctxt;
10669 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10670 if (ctxt == NULL) return(NULL);
10672 if (ctxt->sax != NULL)
10673 xmlFree(ctxt->sax);
10677 ctxt->_private=data;
10680 xmlParseDocument(ctxt);
10682 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10685 xmlFreeDoc(ctxt->myDoc);
10686 ctxt->myDoc = NULL;
10690 xmlFreeParserCtxt(ctxt);
10696 * xmlSAXParseMemory:
10697 * @sax: the SAX handler block
10698 * @buffer: an pointer to a char array
10699 * @size: the size of the array
10700 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10703 * parse an XML in-memory block and use the given SAX function block
10704 * to handle the parsing callback. If sax is NULL, fallback to the default
10705 * DOM tree building routines.
10707 * Returns the resulting document tree
10710 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10711 int size, int recovery) {
10712 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
10717 * @buffer: an pointer to a char array
10718 * @size: the size of the array
10720 * parse an XML in-memory block and build a tree.
10722 * Returns the resulting document tree
10725 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
10726 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10730 * xmlRecoverMemory:
10731 * @buffer: an pointer to a char array
10732 * @size: the size of the array
10734 * parse an XML in-memory block and build a tree.
10735 * In the case the document is not Well Formed, a tree is built anyway
10737 * Returns the resulting document tree
10740 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
10741 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10745 * xmlSAXUserParseMemory:
10746 * @sax: a SAX handler
10747 * @user_data: The user data returned on SAX callbacks
10748 * @buffer: an in-memory XML document input
10749 * @size: the length of the XML document in bytes
10751 * A better SAX parsing routine.
10752 * parse an XML in-memory buffer and call the given SAX handler routines.
10754 * Returns 0 in case of success or a error number otherwise
10756 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10757 const char *buffer, int size) {
10759 xmlParserCtxtPtr ctxt;
10760 xmlSAXHandlerPtr oldsax = NULL;
10762 if (sax == NULL) return -1;
10763 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10764 if (ctxt == NULL) return -1;
10765 oldsax = ctxt->sax;
10767 if (user_data != NULL)
10768 ctxt->userData = user_data;
10770 xmlParseDocument(ctxt);
10772 if (ctxt->wellFormed)
10775 if (ctxt->errNo != 0)
10780 ctxt->sax = oldsax;
10781 xmlFreeParserCtxt(ctxt);
10787 * xmlCreateDocParserCtxt:
10788 * @cur: a pointer to an array of xmlChar
10790 * Creates a parser context for an XML in-memory document.
10792 * Returns the new parser context or NULL
10795 xmlCreateDocParserCtxt(xmlChar *cur) {
10800 len = xmlStrlen(cur);
10801 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10806 * @sax: the SAX handler block
10807 * @cur: a pointer to an array of xmlChar
10808 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10811 * parse an XML in-memory document and build a tree.
10812 * It use the given SAX function block to handle the parsing callback.
10813 * If sax is NULL, fallback to the default DOM tree building routines.
10815 * Returns the resulting document tree
10819 xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10821 xmlParserCtxtPtr ctxt;
10823 if (cur == NULL) return(NULL);
10826 ctxt = xmlCreateDocParserCtxt(cur);
10827 if (ctxt == NULL) return(NULL);
10830 ctxt->userData = NULL;
10833 xmlParseDocument(ctxt);
10834 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10837 xmlFreeDoc(ctxt->myDoc);
10838 ctxt->myDoc = NULL;
10842 xmlFreeParserCtxt(ctxt);
10849 * @cur: a pointer to an array of xmlChar
10851 * parse an XML in-memory document and build a tree.
10853 * Returns the resulting document tree
10857 xmlParseDoc(xmlChar *cur) {
10858 return(xmlSAXParseDoc(NULL, cur, 0));
10861 /************************************************************************
10863 * Specific function to keep track of entities references *
10864 * and used by the XSLT debugger *
10866 ************************************************************************/
10868 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10871 * xmlAddEntityReference:
10872 * @ent : A valid entity
10873 * @firstNode : A valid first node for children of entity
10874 * @lastNode : A valid last node of children entity
10876 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10879 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10880 xmlNodePtr lastNode)
10882 if (xmlEntityRefFunc != NULL) {
10883 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10889 * xmlSetEntityReferenceFunc:
10890 * @func: A valid function
10892 * Set the function to call call back when a xml reference has been made
10895 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10897 xmlEntityRefFunc = func;
10900 /************************************************************************
10904 ************************************************************************/
10906 #ifdef LIBXML_XPATH_ENABLED
10907 #include <libxml/xpath.h>
10910 extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
10911 static int xmlParserInitialized = 0;
10916 * Initialization function for the XML parser.
10917 * This is not reentrant. Call once before processing in case of
10918 * use in multithreaded programs.
10922 xmlInitParser(void) {
10923 if (xmlParserInitialized != 0)
10926 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10927 (xmlGenericError == NULL))
10928 initGenericErrorDefaultFunc(NULL);
10931 xmlInitCharEncodingHandlers();
10932 xmlInitializePredefinedEntities();
10933 xmlDefaultSAXHandlerInit();
10934 xmlRegisterDefaultInputCallbacks();
10935 xmlRegisterDefaultOutputCallbacks();
10936 #ifdef LIBXML_HTML_ENABLED
10937 htmlInitAutoClose();
10938 htmlDefaultSAXHandlerInit();
10940 #ifdef LIBXML_XPATH_ENABLED
10943 xmlParserInitialized = 1;
10947 * xmlCleanupParser:
10949 * Cleanup function for the XML parser. It tries to reclaim all
10950 * parsing related global memory allocated for the parser processing.
10951 * It doesn't deallocate any document related memory. Calling this
10952 * function should not prevent reusing the parser.
10953 * One should call xmlCleanupParser() only when the process has
10954 * finished using the library or XML document built with it.
10958 xmlCleanupParser(void) {
10959 xmlCleanupCharEncodingHandlers();
10960 xmlCleanupPredefinedEntities();
10961 #ifdef LIBXML_CATALOG_ENABLED
10962 xmlCatalogCleanup();
10964 xmlCleanupThreads();
10965 xmlParserInitialized = 0;