2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
5 * See Copyright for the status of this software.
13 #if defined(WIN32) && !defined (__CYGWIN__)
14 #define XML_DIR_SEP '\\'
16 #define XML_DIR_SEP '/'
26 #ifdef HAVE_SYS_STAT_H
39 #include <libxml/xmlmemory.h>
40 #include <libxml/tree.h>
41 #include <libxml/parser.h>
42 #include <libxml/parserInternals.h>
43 #include <libxml/valid.h>
44 #include <libxml/entities.h>
45 #include <libxml/xmlerror.h>
46 #include <libxml/encoding.h>
47 #include <libxml/valid.h>
48 #include <libxml/xmlIO.h>
49 #include <libxml/uri.h>
50 #include <libxml/SAX.h>
51 #ifdef LIBXML_CATALOG_ENABLED
52 #include <libxml/catalog.h>
54 #include <libxml/globals.h>
56 void xmlUpgradeOldNs(xmlDocPtr doc);
59 * Various global defaults for parsing
64 * @version: the include version number
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
70 xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
75 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
91 static const char *xmlFeaturesList[] = {
96 "fetch external entities",
97 "substitute entities",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
141 * Copy at most *@len feature names into the @result array
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
148 xmlGetFeaturesList(int *len, const char **result) {
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
154 if ((*len < 0) || (*len >= 1000))
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
169 * Read the current value of one feature of this parser instance
171 * Returns -1 in case or error, 0 otherwise
174 xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
270 * Change the current value of one feature of this parser instance
272 * Returns -1 in case or error, 0 otherwise
275 xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
286 ctxt->vctxt.nodeMax = 0;
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
373 /************************************************************************
375 * Some functions to avoid too large macros *
377 ************************************************************************/
381 * @c: an unicode character (int)
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
389 * Returns 0 if not, non-zero otherwise
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
402 * @c: an unicode character (int)
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
408 * Returns 0 if not, non-zero otherwise
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
415 static int xmlBaseArray[] = {
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
420 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
422 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
423 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
429 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
431 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
436 * @c: an unicode character (int)
438 * Check whether the character is allowed by the production
439 * [85] BaseChar ::= ... long list see REC ...
441 * VI is your friend !
442 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
444 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
446 * Returns 0 if not, non-zero otherwise
449 xmlIsBaseChar(int c) {
451 (((c) < 0x0100) ? xmlBaseArray[c] :
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
656 * @c: an unicode character (int)
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
661 * Returns 0 if not, non-zero otherwise
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
686 * @c: an unicode character (int)
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
691 * Returns 0 if not, non-zero otherwise
694 xmlIsCombining(int c) {
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
794 ((c) == 0x309A))))))))));
799 * @c: an unicode character (int)
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
806 * Returns 0 if not, non-zero otherwise
809 xmlIsExtender(int c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
815 case 0x30FD: case 0x30FE:
824 * @c: an unicode character (int)
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
829 * Returns 0 if not, non-zero otherwise
832 xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
842 * @c: an unicode character (int)
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
847 * Returns 0 if not, non-zero otherwise
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
856 * @c: an unicode character (int)
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
861 * Returns 0 if not, non-zero otherwise
864 xmlIsPubidChar(int c) {
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
877 /************************************************************************
879 * Input handling functions for progressive parsing *
881 ************************************************************************/
883 /* #define DEBUG_INPUT */
884 /* #define DEBUG_STACK */
885 /* #define DEBUG_PUSH */
888 /* we need to keep enough input to show errors in context */
892 #define CHECK_BUFFER(in) check_buffer(in)
895 void check_buffer(xmlParserInputPtr in) {
896 if (in->base != in->buf->buffer->content) {
897 xmlGenericError(xmlGenericErrorContext,
898 "xmlParserInput: base mismatch problem\n");
900 if (in->cur < in->base) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: cur < base problem\n");
904 if (in->cur > in->base + in->buf->buffer->use) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur > base + use problem\n");
908 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
909 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
910 in->buf->buffer->use, in->buf->buffer->size);
914 #define CHECK_BUFFER(in)
919 * xmlParserInputRead:
920 * @in: an XML parser input
921 * @len: an indicative size for the lookahead
923 * This function refresh the input for the parser. It doesn't try to
924 * preserve pointers to the input buffer, and discard already read data
926 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
930 xmlParserInputRead(xmlParserInputPtr in, int len) {
936 xmlGenericError(xmlGenericErrorContext, "Read\n");
938 if (in->buf == NULL) return(-1);
939 if (in->base == NULL) return(-1);
940 if (in->cur == NULL) return(-1);
941 if (in->buf->buffer == NULL) return(-1);
942 if (in->buf->readcallback == NULL) return(-1);
946 used = in->cur - in->buf->buffer->content;
947 ret = xmlBufferShrink(in->buf->buffer, used);
952 ret = xmlParserInputBufferRead(in->buf, len);
953 if (in->base != in->buf->buffer->content) {
955 * the buffer has been reallocated
957 indx = in->cur - in->base;
958 in->base = in->buf->buffer->content;
959 in->cur = &in->buf->buffer->content[indx];
961 in->end = &in->buf->buffer->content[in->buf->buffer->use];
969 * xmlParserInputGrow:
970 * @in: an XML parser input
971 * @len: an indicative size for the lookahead
973 * This function increase the input for the parser. It tries to
974 * preserve pointers to the input buffer, and keep already read data
976 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
980 xmlParserInputGrow(xmlParserInputPtr in, int len) {
985 xmlGenericError(xmlGenericErrorContext, "Grow\n");
987 if (in->buf == NULL) return(-1);
988 if (in->base == NULL) return(-1);
989 if (in->cur == NULL) return(-1);
990 if (in->buf->buffer == NULL) return(-1);
994 indx = in->cur - in->base;
995 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
1001 if (in->buf->readcallback != NULL)
1002 ret = xmlParserInputBufferGrow(in->buf, len);
1007 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
1008 * block, but we use it really as an integer to do some
1009 * pointer arithmetic. Insure will raise it as a bug but in
1010 * that specific case, that's not !
1012 if (in->base != in->buf->buffer->content) {
1014 * the buffer has been reallocated
1016 indx = in->cur - in->base;
1017 in->base = in->buf->buffer->content;
1018 in->cur = &in->buf->buffer->content[indx];
1020 in->end = &in->buf->buffer->content[in->buf->buffer->use];
1028 * xmlParserInputShrink:
1029 * @in: an XML parser input
1031 * This function removes used input for the parser.
1034 xmlParserInputShrink(xmlParserInputPtr in) {
1040 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1042 if (in->buf == NULL) return;
1043 if (in->base == NULL) return;
1044 if (in->cur == NULL) return;
1045 if (in->buf->buffer == NULL) return;
1049 used = in->cur - in->buf->buffer->content;
1051 * Do not shrink on large buffers whose only a tiny fraction
1055 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1058 if (used > INPUT_CHUNK) {
1059 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1062 in->consumed += ret;
1064 in->end = &in->buf->buffer->content[in->buf->buffer->use];
1069 if (in->buf->buffer->use > INPUT_CHUNK) {
1072 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1073 if (in->base != in->buf->buffer->content) {
1075 * the buffer has been reallocated
1077 indx = in->cur - in->base;
1078 in->base = in->buf->buffer->content;
1079 in->cur = &in->buf->buffer->content[indx];
1081 in->end = &in->buf->buffer->content[in->buf->buffer->use];
1086 /************************************************************************
1088 * UTF8 character input and related functions *
1090 ************************************************************************/
1094 * @ctxt: the XML parser context
1096 * Skip to the next char input char.
1100 xmlNextChar(xmlParserCtxtPtr ctxt)
1102 if (ctxt->instate == XML_PARSER_EOF)
1105 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1106 if ((*ctxt->input->cur == 0) &&
1107 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1108 (ctxt->instate != XML_PARSER_COMMENT)) {
1110 * If we are at the end of the current entity and
1111 * the context allows it, we pop consumed entities
1113 * the auto closing should be blocked in other cases
1117 const unsigned char *cur;
1121 * 2.11 End-of-Line Handling
1122 * the literal two-character sequence "#xD#xA" or a standalone
1123 * literal #xD, an XML processor must pass to the application
1124 * the single character #xA.
1126 if (*(ctxt->input->cur) == '\n') {
1127 ctxt->input->line++;
1128 ctxt->input->col = 1;
1133 * We are supposed to handle UTF8, check it's valid
1134 * From rfc2044: encoding of the Unicode values on UTF-8:
1136 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1137 * 0000 0000-0000 007F 0xxxxxxx
1138 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1139 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1141 * Check for the 0x110000 limit too
1143 cur = ctxt->input->cur;
1148 goto encoding_error;
1150 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1151 if ((cur[1] & 0xc0) != 0x80)
1152 goto encoding_error;
1153 if ((c & 0xe0) == 0xe0) {
1157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1158 if ((cur[2] & 0xc0) != 0x80)
1159 goto encoding_error;
1160 if ((c & 0xf0) == 0xf0) {
1162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1163 if (((c & 0xf8) != 0xf0) ||
1164 ((cur[3] & 0xc0) != 0x80))
1165 goto encoding_error;
1167 ctxt->input->cur += 4;
1168 val = (cur[0] & 0x7) << 18;
1169 val |= (cur[1] & 0x3f) << 12;
1170 val |= (cur[2] & 0x3f) << 6;
1171 val |= cur[3] & 0x3f;
1174 ctxt->input->cur += 3;
1175 val = (cur[0] & 0xf) << 12;
1176 val |= (cur[1] & 0x3f) << 6;
1177 val |= cur[2] & 0x3f;
1179 if (((val > 0xd7ff) && (val < 0xe000)) ||
1180 ((val > 0xfffd) && (val < 0x10000)) ||
1181 (val >= 0x110000)) {
1182 if ((ctxt->sax != NULL) &&
1183 (ctxt->sax->error != NULL))
1184 ctxt->sax->error(ctxt->userData,
1185 "Char 0x%X out of allowed range\n",
1187 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1188 ctxt->wellFormed = 0;
1189 if (ctxt->recovery == 0)
1190 ctxt->disableSAX = 1;
1194 ctxt->input->cur += 2;
1200 if (*ctxt->input->cur == 0)
1201 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1205 * Assume it's a fixed length encoding (1) with
1206 * a compatible encoding for the ASCII set, since
1207 * XML constructs only use < 128 chars
1210 if (*(ctxt->input->cur) == '\n') {
1211 ctxt->input->line++;
1212 ctxt->input->col = 1;
1217 if (*ctxt->input->cur == 0)
1218 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1220 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1221 xmlParserHandlePEReference(ctxt);
1222 if ((*ctxt->input->cur == 0) &&
1223 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1228 * If we detect an UTF8 error that probably mean that the
1229 * input encoding didn't get properly advertised in the
1230 * declaration header. Report the error and switch the encoding
1231 * to ISO-Latin-1 (if you don't like this policy, just declare the
1234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1235 ctxt->sax->error(ctxt->userData,
1236 "Input is not proper UTF-8, indicate encoding !\n");
1237 ctxt->sax->error(ctxt->userData,
1238 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1239 ctxt->input->cur[0], ctxt->input->cur[1],
1240 ctxt->input->cur[2], ctxt->input->cur[3]);
1242 ctxt->wellFormed = 0;
1243 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1245 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1252 * @ctxt: the XML parser context
1253 * @len: pointer to the length of the char read
1255 * The current char value, if using UTF-8 this may actually span multiple
1256 * bytes in the input buffer. Implement the end of line normalization:
1257 * 2.11 End-of-Line Handling
1258 * Wherever an external parsed entity or the literal entity value
1259 * of an internal parsed entity contains either the literal two-character
1260 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1261 * must pass to the application the single character #xA.
1262 * This behavior can conveniently be produced by normalizing all
1263 * line breaks to #xA on input, before parsing.)
1265 * Returns the current char value and its length
1269 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1270 if (ctxt->instate == XML_PARSER_EOF)
1273 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1275 return((int) *ctxt->input->cur);
1277 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1279 * We are supposed to handle UTF8, check it's valid
1280 * From rfc2044: encoding of the Unicode values on UTF-8:
1282 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1283 * 0000 0000-0000 007F 0xxxxxxx
1284 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1285 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1287 * Check for the 0x110000 limit too
1289 const unsigned char *cur = ctxt->input->cur;
1296 goto encoding_error;
1298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1299 if ((cur[1] & 0xc0) != 0x80)
1300 goto encoding_error;
1301 if ((c & 0xe0) == 0xe0) {
1304 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1305 if ((cur[2] & 0xc0) != 0x80)
1306 goto encoding_error;
1307 if ((c & 0xf0) == 0xf0) {
1309 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1310 if (((c & 0xf8) != 0xf0) ||
1311 ((cur[3] & 0xc0) != 0x80))
1312 goto encoding_error;
1315 val = (cur[0] & 0x7) << 18;
1316 val |= (cur[1] & 0x3f) << 12;
1317 val |= (cur[2] & 0x3f) << 6;
1318 val |= cur[3] & 0x3f;
1322 val = (cur[0] & 0xf) << 12;
1323 val |= (cur[1] & 0x3f) << 6;
1324 val |= cur[2] & 0x3f;
1329 val = (cur[0] & 0x1f) << 6;
1330 val |= cur[1] & 0x3f;
1332 if (!IS_CHAR(val)) {
1333 if ((ctxt->sax != NULL) &&
1334 (ctxt->sax->error != NULL))
1335 ctxt->sax->error(ctxt->userData,
1336 "Char 0x%X out of allowed range\n", val);
1337 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1338 ctxt->wellFormed = 0;
1339 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1345 if (*ctxt->input->cur == 0xD) {
1346 if (ctxt->input->cur[1] == 0xA) {
1352 return((int) *ctxt->input->cur);
1356 * Assume it's a fixed length encoding (1) with
1357 * a compatible encoding for the ASCII set, since
1358 * XML constructs only use < 128 chars
1361 if (*ctxt->input->cur == 0xD) {
1362 if (ctxt->input->cur[1] == 0xA) {
1368 return((int) *ctxt->input->cur);
1371 * An encoding problem may arise from a truncated input buffer
1372 * splitting a character in the middle. In that case do not raise
1373 * an error but return 0 to endicate an end of stream problem
1375 if (ctxt->input->end - ctxt->input->cur < 4) {
1381 * If we detect an UTF8 error that probably mean that the
1382 * input encoding didn't get properly advertised in the
1383 * declaration header. Report the error and switch the encoding
1384 * to ISO-Latin-1 (if you don't like this policy, just declare the
1387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1388 ctxt->sax->error(ctxt->userData,
1389 "Input is not proper UTF-8, indicate encoding !\n");
1390 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1391 ctxt->input->cur[0], ctxt->input->cur[1],
1392 ctxt->input->cur[2], ctxt->input->cur[3]);
1394 ctxt->wellFormed = 0;
1395 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1397 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1399 return((int) *ctxt->input->cur);
1403 * xmlStringCurrentChar:
1404 * @ctxt: the XML parser context
1405 * @cur: pointer to the beginning of the char
1406 * @len: pointer to the length of the char read
1408 * The current char value, if using UTF-8 this may actually span multiple
1409 * bytes in the input buffer.
1411 * Returns the current char value and its length
1415 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1417 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
1419 * We are supposed to handle UTF8, check it's valid
1420 * From rfc2044: encoding of the Unicode values on UTF-8:
1422 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1423 * 0000 0000-0000 007F 0xxxxxxx
1424 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1425 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1427 * Check for the 0x110000 limit too
1434 if ((cur[1] & 0xc0) != 0x80)
1435 goto encoding_error;
1436 if ((c & 0xe0) == 0xe0) {
1438 if ((cur[2] & 0xc0) != 0x80)
1439 goto encoding_error;
1440 if ((c & 0xf0) == 0xf0) {
1441 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1442 goto encoding_error;
1445 val = (cur[0] & 0x7) << 18;
1446 val |= (cur[1] & 0x3f) << 12;
1447 val |= (cur[2] & 0x3f) << 6;
1448 val |= cur[3] & 0x3f;
1452 val = (cur[0] & 0xf) << 12;
1453 val |= (cur[1] & 0x3f) << 6;
1454 val |= cur[2] & 0x3f;
1459 val = (cur[0] & 0x1f) << 6;
1460 val |= cur[1] & 0x3f;
1462 if (!IS_CHAR(val)) {
1463 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1464 (ctxt->sax->error != NULL))
1465 ctxt->sax->error(ctxt->userData,
1466 "Char 0x%X out of allowed range\n",
1469 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1470 ctxt->wellFormed = 0;
1471 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1478 return ((int) *cur);
1482 * Assume it's a fixed length encoding (1) with
1483 * a compatible encoding for the ASCII set, since
1484 * XML constructs only use < 128 chars
1487 return ((int) *cur);
1491 * If we detect an UTF8 error that probably mean that the
1492 * input encoding didn't get properly advertised in the
1493 * declaration header. Report the error and switch the encoding
1494 * to ISO-Latin-1 (if you don't like this policy, just declare the
1498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1499 ctxt->sax->error(ctxt->userData,
1500 "Input is not proper UTF-8, indicate encoding !\n");
1501 ctxt->sax->error(ctxt->userData,
1502 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1503 ctxt->input->cur[0], ctxt->input->cur[1],
1504 ctxt->input->cur[2], ctxt->input->cur[3]);
1506 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1507 ctxt->wellFormed = 0;
1511 return ((int) *cur);
1515 * xmlCopyCharMultiByte:
1516 * @out: pointer to an array of xmlChar
1517 * @val: the char value
1519 * append the char value in the array
1521 * Returns the number of xmlChar written
1524 xmlCopyCharMultiByte(xmlChar *out, int val) {
1526 * We are supposed to handle UTF8, check it's valid
1527 * From rfc2044: encoding of the Unicode values on UTF-8:
1529 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1530 * 0000 0000-0000 007F 0xxxxxxx
1531 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1532 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1535 xmlChar *savedout = out;
1537 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1538 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1539 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1541 xmlGenericError(xmlGenericErrorContext,
1542 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
1546 for ( ; bits >= 0; bits-= 6)
1547 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1548 return (out - savedout);
1550 *out = (xmlChar) val;
1556 * @len: Ignored, compatibility
1557 * @out: pointer to an array of xmlChar
1558 * @val: the char value
1560 * append the char value in the array
1562 * Returns the number of xmlChar written
1566 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1567 /* the len parameter is ignored */
1569 return(xmlCopyCharMultiByte (out, val));
1571 *out = (xmlChar) val;
1575 /************************************************************************
1577 * Commodity functions to switch encodings *
1579 ************************************************************************/
1582 * xmlSwitchEncoding:
1583 * @ctxt: the parser context
1584 * @enc: the encoding value (number)
1586 * change the input functions when discovering the character encoding
1587 * of a given entity.
1589 * Returns 0 in case of success, -1 otherwise
1592 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1594 xmlCharEncodingHandlerPtr handler;
1597 case XML_CHAR_ENCODING_ERROR:
1598 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1600 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1601 ctxt->wellFormed = 0;
1602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1604 case XML_CHAR_ENCODING_NONE:
1605 /* let's assume it's UTF-8 without the XML decl */
1606 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1608 case XML_CHAR_ENCODING_UTF8:
1609 /* default encoding, no conversion should be needed */
1610 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1613 * Errata on XML-1.0 June 20 2001
1614 * Specific handling of the Byte Order Mark for
1617 if ((ctxt->input != NULL) &&
1618 (ctxt->input->cur[0] == 0xEF) &&
1619 (ctxt->input->cur[1] == 0xBB) &&
1620 (ctxt->input->cur[2] == 0xBF)) {
1621 ctxt->input->cur += 3;
1624 case XML_CHAR_ENCODING_UTF16LE:
1625 case XML_CHAR_ENCODING_UTF16BE:
1626 /*The raw input characters are encoded
1627 *in UTF-16. As we expect this function
1628 *to be called after xmlCharEncInFunc, we expect
1629 *ctxt->input->cur to contain UTF-8 encoded characters.
1630 *So the raw UTF16 Byte Order Mark
1631 *has also been converted into
1632 *an UTF-8 BOM. Let's skip that BOM.
1634 if ((ctxt->input != NULL) &&
1635 (ctxt->input->cur[0] == 0xEF) &&
1636 (ctxt->input->cur[1] == 0xBB) &&
1637 (ctxt->input->cur[2] == 0xBF)) {
1638 ctxt->input->cur += 3;
1644 handler = xmlGetCharEncodingHandler(enc);
1645 if (handler == NULL) {
1650 case XML_CHAR_ENCODING_ERROR:
1651 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1653 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1654 ctxt->wellFormed = 0;
1655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1656 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1658 case XML_CHAR_ENCODING_NONE:
1659 /* let's assume it's UTF-8 without the XML decl */
1660 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1662 case XML_CHAR_ENCODING_UTF8:
1663 case XML_CHAR_ENCODING_ASCII:
1664 /* default encoding, no conversion should be needed */
1665 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1667 case XML_CHAR_ENCODING_UTF16LE:
1669 case XML_CHAR_ENCODING_UTF16BE:
1671 case XML_CHAR_ENCODING_UCS4LE:
1672 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1674 ctxt->sax->error(ctxt->userData,
1675 "char encoding USC4 little endian not supported\n");
1677 case XML_CHAR_ENCODING_UCS4BE:
1678 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1680 ctxt->sax->error(ctxt->userData,
1681 "char encoding USC4 big endian not supported\n");
1683 case XML_CHAR_ENCODING_EBCDIC:
1684 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1686 ctxt->sax->error(ctxt->userData,
1687 "char encoding EBCDIC not supported\n");
1689 case XML_CHAR_ENCODING_UCS4_2143:
1690 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1692 ctxt->sax->error(ctxt->userData,
1693 "char encoding UCS4 2143 not supported\n");
1695 case XML_CHAR_ENCODING_UCS4_3412:
1696 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698 ctxt->sax->error(ctxt->userData,
1699 "char encoding UCS4 3412 not supported\n");
1701 case XML_CHAR_ENCODING_UCS2:
1702 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1704 ctxt->sax->error(ctxt->userData,
1705 "char encoding UCS2 not supported\n");
1707 case XML_CHAR_ENCODING_8859_1:
1708 case XML_CHAR_ENCODING_8859_2:
1709 case XML_CHAR_ENCODING_8859_3:
1710 case XML_CHAR_ENCODING_8859_4:
1711 case XML_CHAR_ENCODING_8859_5:
1712 case XML_CHAR_ENCODING_8859_6:
1713 case XML_CHAR_ENCODING_8859_7:
1714 case XML_CHAR_ENCODING_8859_8:
1715 case XML_CHAR_ENCODING_8859_9:
1717 * We used to keep the internal content in the
1718 * document encoding however this turns being unmaintainable
1719 * So xmlGetCharEncodingHandler() will return non-null
1720 * values for this now.
1722 if ((ctxt->inputNr == 1) &&
1723 (ctxt->encoding == NULL) &&
1724 (ctxt->input->encoding != NULL)) {
1725 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1727 ctxt->charset = enc;
1729 case XML_CHAR_ENCODING_2022_JP:
1730 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1732 ctxt->sax->error(ctxt->userData,
1733 "char encoding ISO-2022-JPnot supported\n");
1735 case XML_CHAR_ENCODING_SHIFT_JIS:
1736 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1738 ctxt->sax->error(ctxt->userData,
1739 "char encoding Shift_JIS not supported\n");
1741 case XML_CHAR_ENCODING_EUC_JP:
1742 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1744 ctxt->sax->error(ctxt->userData,
1745 "char encoding EUC-JPnot supported\n");
1749 if (handler == NULL)
1751 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1752 return(xmlSwitchToEncoding(ctxt, handler));
1756 * xmlSwitchToEncoding:
1757 * @ctxt: the parser context
1758 * @handler: the encoding handler
1760 * change the input functions when discovering the character encoding
1761 * of a given entity.
1763 * Returns 0 in case of success, -1 otherwise
1766 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1770 if (handler != NULL) {
1771 if (ctxt->input != NULL) {
1772 if (ctxt->input->buf != NULL) {
1773 if (ctxt->input->buf->encoder != NULL) {
1775 * Check in case the auto encoding detetection triggered
1778 if (ctxt->input->buf->encoder == handler)
1782 * "UTF-16" can be used for both LE and BE
1783 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1784 BAD_CAST "UTF-16", 6)) &&
1785 (!xmlStrncmp(BAD_CAST handler->name,
1786 BAD_CAST "UTF-16", 6))) {
1792 * Note: this is a bit dangerous, but that's what it
1793 * takes to use nearly compatible signature for different
1796 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1797 ctxt->input->buf->encoder = handler;
1800 ctxt->input->buf->encoder = handler;
1803 * Is there already some content down the pipe to convert ?
1805 if ((ctxt->input->buf->buffer != NULL) &&
1806 (ctxt->input->buf->buffer->use > 0)) {
1810 * Specific handling of the Byte Order Mark for
1813 if ((handler->name != NULL) &&
1814 (!strcmp(handler->name, "UTF-16LE")) &&
1815 (ctxt->input->cur[0] == 0xFF) &&
1816 (ctxt->input->cur[1] == 0xFE)) {
1817 ctxt->input->cur += 2;
1819 if ((handler->name != NULL) &&
1820 (!strcmp(handler->name, "UTF-16BE")) &&
1821 (ctxt->input->cur[0] == 0xFE) &&
1822 (ctxt->input->cur[1] == 0xFF)) {
1823 ctxt->input->cur += 2;
1826 * Errata on XML-1.0 June 20 2001
1827 * Specific handling of the Byte Order Mark for
1830 if ((handler->name != NULL) &&
1831 (!strcmp(handler->name, "UTF-8")) &&
1832 (ctxt->input->cur[0] == 0xEF) &&
1833 (ctxt->input->cur[1] == 0xBB) &&
1834 (ctxt->input->cur[2] == 0xBF)) {
1835 ctxt->input->cur += 3;
1839 * Shrink the current input buffer.
1840 * Move it as the raw buffer and create a new input buffer
1842 processed = ctxt->input->cur - ctxt->input->base;
1843 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1844 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1845 ctxt->input->buf->buffer = xmlBufferCreate();
1849 * convert as much as possible of the buffer
1851 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1852 ctxt->input->buf->buffer,
1853 ctxt->input->buf->raw);
1856 * convert just enough to get
1857 * '<?xml version="1.0" encoding="xxx"?>'
1858 * parsed with the autodetected encoding
1859 * into the parser reading buffer.
1861 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1862 ctxt->input->buf->buffer,
1863 ctxt->input->buf->raw);
1866 xmlGenericError(xmlGenericErrorContext,
1867 "xmlSwitchToEncoding: encoder error\n");
1871 ctxt->input->cur = ctxt->input->buf->buffer->content;
1873 &ctxt->input->base[ctxt->input->buf->buffer->use];
1878 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1880 * When parsing a static memory array one must know the
1881 * size to be able to convert the buffer.
1883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1884 ctxt->sax->error(ctxt->userData,
1885 "xmlSwitchToEncoding : no input\n");
1891 * Shrink the current input buffer.
1892 * Move it as the raw buffer and create a new input buffer
1894 processed = ctxt->input->cur - ctxt->input->base;
1896 ctxt->input->buf->raw = xmlBufferCreate();
1897 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1898 ctxt->input->length - processed);
1899 ctxt->input->buf->buffer = xmlBufferCreate();
1902 * convert as much as possible of the raw input
1903 * to the parser reading buffer.
1905 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1906 ctxt->input->buf->buffer,
1907 ctxt->input->buf->raw);
1909 xmlGenericError(xmlGenericErrorContext,
1910 "xmlSwitchToEncoding: encoder error\n");
1915 * Conversion succeeded, get rid of the old buffer
1917 if ((ctxt->input->free != NULL) &&
1918 (ctxt->input->base != NULL))
1919 ctxt->input->free((xmlChar *) ctxt->input->base);
1921 ctxt->input->cur = ctxt->input->buf->buffer->content;
1923 &ctxt->input->base[ctxt->input->buf->buffer->use];
1927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1928 ctxt->sax->error(ctxt->userData,
1929 "xmlSwitchToEncoding : no input\n");
1933 * The parsing is now done in UTF8 natively
1935 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1942 /************************************************************************
1944 * Commodity functions to handle entities processing *
1946 ************************************************************************/
1949 * xmlFreeInputStream:
1950 * @input: an xmlParserInputPtr
1952 * Free up an input stream.
1955 xmlFreeInputStream(xmlParserInputPtr input) {
1956 if (input == NULL) return;
1958 if (input->filename != NULL) xmlFree((char *) input->filename);
1959 if (input->directory != NULL) xmlFree((char *) input->directory);
1960 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1961 if (input->version != NULL) xmlFree((char *) input->version);
1962 if ((input->free != NULL) && (input->base != NULL))
1963 input->free((xmlChar *) input->base);
1964 if (input->buf != NULL)
1965 xmlFreeParserInputBuffer(input->buf);
1970 * xmlNewInputStream:
1971 * @ctxt: an XML parser context
1973 * Create a new input stream structure
1974 * Returns the new input stream or NULL
1977 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1978 xmlParserInputPtr input;
1980 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1981 if (input == NULL) {
1983 ctxt->errNo = XML_ERR_NO_MEMORY;
1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985 ctxt->sax->error(ctxt->userData,
1986 "malloc: couldn't allocate a new input stream\n");
1987 ctxt->errNo = XML_ERR_NO_MEMORY;
1991 memset(input, 0, sizeof(xmlParserInput));
1994 input->standalone = -1;
1999 * xmlNewIOInputStream:
2000 * @ctxt: an XML parser context
2001 * @input: an I/O Input
2002 * @enc: the charset encoding if known
2004 * Create a new input stream structure encapsulating the @input into
2005 * a stream suitable for the parser.
2007 * Returns the new input stream or NULL
2010 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
2011 xmlCharEncoding enc) {
2012 xmlParserInputPtr inputStream;
2014 if (xmlParserDebugEntities)
2015 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
2016 inputStream = xmlNewInputStream(ctxt);
2017 if (inputStream == NULL) {
2020 inputStream->filename = NULL;
2021 inputStream->buf = input;
2022 inputStream->base = inputStream->buf->buffer->content;
2023 inputStream->cur = inputStream->buf->buffer->content;
2024 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
2025 if (enc != XML_CHAR_ENCODING_NONE) {
2026 xmlSwitchEncoding(ctxt, enc);
2029 return(inputStream);
2033 * xmlNewEntityInputStream:
2034 * @ctxt: an XML parser context
2035 * @entity: an Entity pointer
2037 * Create a new input stream based on an xmlEntityPtr
2039 * Returns the new input stream or NULL
2042 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2043 xmlParserInputPtr input;
2045 if (entity == NULL) {
2046 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2048 ctxt->sax->error(ctxt->userData,
2049 "internal: xmlNewEntityInputStream entity = NULL\n");
2050 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2053 if (xmlParserDebugEntities)
2054 xmlGenericError(xmlGenericErrorContext,
2055 "new input from entity: %s\n", entity->name);
2056 if (entity->content == NULL) {
2057 switch (entity->etype) {
2058 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2059 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2061 ctxt->sax->error(ctxt->userData,
2062 "xmlNewEntityInputStream unparsed entity !\n");
2064 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2065 case XML_EXTERNAL_PARAMETER_ENTITY:
2066 return(xmlLoadExternalEntity((char *) entity->URI,
2067 (char *) entity->ExternalID, ctxt));
2068 case XML_INTERNAL_GENERAL_ENTITY:
2069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2070 ctxt->sax->error(ctxt->userData,
2071 "Internal entity %s without content !\n", entity->name);
2073 case XML_INTERNAL_PARAMETER_ENTITY:
2074 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2076 ctxt->sax->error(ctxt->userData,
2077 "Internal parameter entity %s without content !\n", entity->name);
2079 case XML_INTERNAL_PREDEFINED_ENTITY:
2080 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2082 ctxt->sax->error(ctxt->userData,
2083 "Predefined entity %s without content !\n", entity->name);
2088 input = xmlNewInputStream(ctxt);
2089 if (input == NULL) {
2092 input->filename = (char *) entity->URI;
2093 input->base = entity->content;
2094 input->cur = entity->content;
2095 input->length = entity->length;
2096 input->end = &entity->content[input->length];
2101 * xmlNewStringInputStream:
2102 * @ctxt: an XML parser context
2103 * @buffer: an memory buffer
2105 * Create a new input stream based on a memory buffer.
2106 * Returns the new input stream
2109 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2110 xmlParserInputPtr input;
2112 if (buffer == NULL) {
2113 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2115 ctxt->sax->error(ctxt->userData,
2116 "internal: xmlNewStringInputStream string = NULL\n");
2119 if (xmlParserDebugEntities)
2120 xmlGenericError(xmlGenericErrorContext,
2121 "new fixed input: %.30s\n", buffer);
2122 input = xmlNewInputStream(ctxt);
2123 if (input == NULL) {
2126 input->base = buffer;
2127 input->cur = buffer;
2128 input->length = xmlStrlen(buffer);
2129 input->end = &buffer[input->length];
2134 * xmlNewInputFromFile:
2135 * @ctxt: an XML parser context
2136 * @filename: the filename to use as entity
2138 * Create a new input stream based on a file.
2140 * Returns the new input stream or NULL in case of error
2143 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2144 xmlParserInputBufferPtr buf;
2145 xmlParserInputPtr inputStream;
2146 char *directory = NULL;
2147 xmlChar *URI = NULL;
2149 if (xmlParserDebugEntities)
2150 xmlGenericError(xmlGenericErrorContext,
2151 "new input from file: %s\n", filename);
2152 if (ctxt == NULL) return(NULL);
2153 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2157 URI = xmlStrdup((xmlChar *) filename);
2158 directory = xmlParserGetDirectory((const char *) URI);
2160 inputStream = xmlNewInputStream(ctxt);
2161 if (inputStream == NULL) {
2162 if (directory != NULL) xmlFree((char *) directory);
2163 if (URI != NULL) xmlFree((char *) URI);
2167 inputStream->filename = (const char *) URI;
2168 inputStream->directory = directory;
2169 inputStream->buf = buf;
2171 inputStream->base = inputStream->buf->buffer->content;
2172 inputStream->cur = inputStream->buf->buffer->content;
2173 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
2174 if ((ctxt->directory == NULL) && (directory != NULL))
2175 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2176 return(inputStream);
2179 /************************************************************************
2181 * Commodity functions to handle parser contexts *
2183 ************************************************************************/
2186 * xmlInitParserCtxt:
2187 * @ctxt: an XML parser context
2189 * Initialize a parser context
2191 * Returns 0 in case of success and -1 in case of error
2195 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2198 xmlGenericError(xmlGenericErrorContext,
2199 "xmlInitParserCtxt: NULL context given\n");
2203 xmlDefaultSAXHandlerInit();
2205 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2206 if (ctxt->sax == NULL) {
2207 xmlGenericError(xmlGenericErrorContext,
2208 "xmlInitParserCtxt: out of memory\n");
2212 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2214 /* Allocate the Input stack */
2215 ctxt->inputTab = (xmlParserInputPtr *)
2216 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2217 if (ctxt->inputTab == NULL) {
2218 xmlGenericError(xmlGenericErrorContext,
2219 "xmlInitParserCtxt: out of memory\n");
2229 ctxt->version = NULL;
2230 ctxt->encoding = NULL;
2231 ctxt->standalone = -1;
2232 ctxt->hasExternalSubset = 0;
2233 ctxt->hasPErefs = 0;
2236 ctxt->instate = XML_PARSER_START;
2238 ctxt->directory = NULL;
2240 /* Allocate the Node stack */
2241 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2242 if (ctxt->nodeTab == NULL) {
2243 xmlGenericError(xmlGenericErrorContext,
2244 "xmlInitParserCtxt: out of memory\n");
2257 /* Allocate the Name stack */
2258 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2259 if (ctxt->nameTab == NULL) {
2260 xmlGenericError(xmlGenericErrorContext,
2261 "xmlInitParserCtxt: out of memory\n");
2277 /* Allocate the space stack */
2278 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2279 if (ctxt->spaceTab == NULL) {
2280 xmlGenericError(xmlGenericErrorContext,
2281 "xmlInitParserCtxt: out of memory\n");
2297 ctxt->spaceMax = 10;
2298 ctxt->spaceTab[0] = -1;
2299 ctxt->space = &ctxt->spaceTab[0];
2300 ctxt->userData = ctxt;
2302 ctxt->wellFormed = 1;
2304 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2305 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2306 ctxt->pedantic = xmlPedanticParserDefaultValue;
2307 ctxt->linenumbers = xmlLineNumbersDefaultValue;
2308 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2309 if (ctxt->keepBlanks == 0)
2310 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
2312 ctxt->vctxt.userData = ctxt;
2313 ctxt->vctxt.error = xmlParserValidityError;
2314 ctxt->vctxt.warning = xmlParserValidityWarning;
2315 if (ctxt->validate) {
2316 if (xmlGetWarningsDefaultValue == 0)
2317 ctxt->vctxt.warning = NULL;
2319 ctxt->vctxt.warning = xmlParserValidityWarning;
2320 ctxt->vctxt.nodeMax = 0;
2322 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2323 ctxt->record_info = 0;
2325 ctxt->checkIndex = 0;
2327 ctxt->errNo = XML_ERR_OK;
2329 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2330 ctxt->catalogs = NULL;
2331 xmlInitNodeInfoSeq(&ctxt->node_seq);
2336 * xmlFreeParserCtxt:
2337 * @ctxt: an XML parser context
2339 * Free all the memory used by a parser context. However the parsed
2340 * document in ctxt->myDoc is not freed.
2344 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2346 xmlParserInputPtr input;
2349 if (ctxt == NULL) return;
2351 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2352 xmlFreeInputStream(input);
2354 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2357 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2358 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2359 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2360 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2361 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2362 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2363 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2364 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2365 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2366 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2368 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2369 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2370 #ifdef LIBXML_CATALOG_ENABLED
2371 if (ctxt->catalogs != NULL)
2372 xmlCatalogFreeLocal(ctxt->catalogs);
2380 * Allocate and initialize a new parser context.
2382 * Returns the xmlParserCtxtPtr or NULL
2388 xmlParserCtxtPtr ctxt;
2390 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2392 xmlGenericError(xmlGenericErrorContext,
2393 "xmlNewParserCtxt : cannot allocate context\n");
2394 xmlGenericError(xmlGenericErrorContext, "malloc failed");
2397 memset(ctxt, 0, sizeof(xmlParserCtxt));
2398 if (xmlInitParserCtxt(ctxt) < 0) {
2399 xmlFreeParserCtxt(ctxt);
2405 /************************************************************************
2407 * Handling of node informations *
2409 ************************************************************************/
2412 * xmlClearParserCtxt:
2413 * @ctxt: an XML parser context
2415 * Clear (release owned resources) and reinitialize a parser context
2419 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2423 xmlClearNodeInfoSeq(&ctxt->node_seq);
2424 xmlInitParserCtxt(ctxt);
2428 * xmlParserFindNodeInfo:
2429 * @ctx: an XML parser context
2430 * @node: an XML node within the tree
2432 * Find the parser node info struct for a given node
2434 * Returns an xmlParserNodeInfo block pointer or NULL
2436 const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2437 const xmlNodePtr node)
2441 /* Find position where node should be at */
2442 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2443 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
2444 return &ctx->node_seq.buffer[pos];
2451 * xmlInitNodeInfoSeq:
2452 * @seq: a node info sequence pointer
2454 * -- Initialize (set to initial state) node info sequence
2457 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2465 * xmlClearNodeInfoSeq:
2466 * @seq: a node info sequence pointer
2468 * -- Clear (release memory and reinitialize) node
2472 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2474 if ( seq->buffer != NULL )
2475 xmlFree(seq->buffer);
2476 xmlInitNodeInfoSeq(seq);
2481 * xmlParserFindNodeInfoIndex:
2482 * @seq: a node info sequence pointer
2483 * @node: an XML node pointer
2486 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2487 * the given node is or should be at in a sorted sequence
2489 * Returns a long indicating the position of the record
2491 unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2492 const xmlNodePtr node)
2494 unsigned long upper, lower, middle;
2497 /* Do a binary search for the key */
2499 upper = seq->length;
2501 while ( lower <= upper && !found) {
2502 middle = lower + (upper - lower) / 2;
2503 if ( node == seq->buffer[middle - 1].node )
2505 else if ( node < seq->buffer[middle - 1].node )
2511 /* Return position */
2512 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2520 * xmlParserAddNodeInfo:
2521 * @ctxt: an XML parser context
2522 * @info: a node info sequence pointer
2524 * Insert node info record into the sorted sequence
2527 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2528 const xmlParserNodeInfoPtr info)
2532 /* Find pos and check to see if node is already in the sequence */
2533 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2535 if (pos < ctxt->node_seq.length
2536 && ctxt->node_seq.buffer[pos].node == info->node) {
2537 ctxt->node_seq.buffer[pos] = *info;
2540 /* Otherwise, we need to add new node to buffer */
2542 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2543 xmlParserNodeInfo *tmp_buffer;
2544 unsigned int byte_size;
2546 if (ctxt->node_seq.maximum == 0)
2547 ctxt->node_seq.maximum = 2;
2548 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2549 (2 * ctxt->node_seq.maximum));
2551 if (ctxt->node_seq.buffer == NULL)
2552 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2555 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2558 if (tmp_buffer == NULL) {
2559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2560 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2561 ctxt->errNo = XML_ERR_NO_MEMORY;
2564 ctxt->node_seq.buffer = tmp_buffer;
2565 ctxt->node_seq.maximum *= 2;
2568 /* If position is not at end, move elements out of the way */
2569 if (pos != ctxt->node_seq.length) {
2572 for (i = ctxt->node_seq.length; i > pos; i--)
2573 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2576 /* Copy element and increase length */
2577 ctxt->node_seq.buffer[pos] = *info;
2578 ctxt->node_seq.length++;
2582 /************************************************************************
2584 * Defaults settings *
2586 ************************************************************************/
2588 * xmlPedanticParserDefault:
2591 * Set and return the previous value for enabling pedantic warnings.
2593 * Returns the last value for 0 for no substitution, 1 for substitution.
2597 xmlPedanticParserDefault(int val) {
2598 int old = xmlPedanticParserDefaultValue;
2600 xmlPedanticParserDefaultValue = val;
2605 * xmlLineNumbersDefault:
2608 * Set and return the previous value for enabling line numbers in elements
2609 * contents. This may break on old application and is turned off by default.
2611 * Returns the last value for 0 for no substitution, 1 for substitution.
2615 xmlLineNumbersDefault(int val) {
2616 int old = xmlLineNumbersDefaultValue;
2618 xmlLineNumbersDefaultValue = val;
2623 * xmlSubstituteEntitiesDefault:
2626 * Set and return the previous value for default entity support.
2627 * Initially the parser always keep entity references instead of substituting
2628 * entity values in the output. This function has to be used to change the
2629 * default parser behavior
2630 * SAX::substituteEntities() has to be used for changing that on a file by
2633 * Returns the last value for 0 for no substitution, 1 for substitution.
2637 xmlSubstituteEntitiesDefault(int val) {
2638 int old = xmlSubstituteEntitiesDefaultValue;
2640 xmlSubstituteEntitiesDefaultValue = val;
2645 * xmlKeepBlanksDefault:
2648 * Set and return the previous value for default blanks text nodes support.
2649 * The 1.x version of the parser used an heuristic to try to detect
2650 * ignorable white spaces. As a result the SAX callback was generating
2651 * ignorableWhitespace() callbacks instead of characters() one, and when
2652 * using the DOM output text nodes containing those blanks were not generated.
2653 * The 2.x and later version will switch to the XML standard way and
2654 * ignorableWhitespace() are only generated when running the parser in
2655 * validating mode and when the current element doesn't allow CDATA or
2657 * This function is provided as a way to force the standard behavior
2658 * on 1.X libs and to switch back to the old mode for compatibility when
2659 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2660 * by using xmlIsBlankNode() commodity function to detect the "empty"
2662 * This value also affect autogeneration of indentation when saving code
2663 * if blanks sections are kept, indentation is not generated.
2665 * Returns the last value for 0 for no substitution, 1 for substitution.
2669 xmlKeepBlanksDefault(int val) {
2670 int old = xmlKeepBlanksDefaultValue;
2672 xmlKeepBlanksDefaultValue = val;
2673 xmlIndentTreeOutput = !val;
2677 /************************************************************************
2679 * Deprecated functions kept for compatibility *
2681 ************************************************************************/
2684 * xmlCheckLanguageID:
2685 * @lang: pointer to the string value
2687 * Checks that the value conforms to the LanguageID production:
2689 * NOTE: this is somewhat deprecated, those productions were removed from
2690 * the XML Second edition.
2692 * [33] LanguageID ::= Langcode ('-' Subcode)*
2693 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2694 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2695 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2696 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2697 * [38] Subcode ::= ([a-z] | [A-Z])+
2699 * Returns 1 if correct 0 otherwise
2702 xmlCheckLanguageID(const xmlChar *lang) {
2703 const xmlChar *cur = lang;
2707 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2708 ((cur[0] == 'I') && (cur[1] == '-'))) {
2713 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2714 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2716 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2717 ((cur[0] == 'X') && (cur[1] == '-'))) {
2722 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2723 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2725 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2726 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2731 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2732 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2738 while (cur[0] != 0) { /* non input consuming */
2742 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2743 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2747 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2748 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2755 * xmlDecodeEntities:
2756 * @ctxt: the parser context
2757 * @len: the len to decode (in bytes !), -1 for no size limit
2758 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2759 * @end: an end marker xmlChar, 0 if none
2760 * @end2: an end marker xmlChar, 0 if none
2761 * @end3: an end marker xmlChar, 0 if none
2763 * This function is deprecated, we now always process entities content
2764 * through xmlStringDecodeEntities
2766 * TODO: remove it in next major release.
2768 * [67] Reference ::= EntityRef | CharRef
2770 * [69] PEReference ::= '%' Name ';'
2772 * Returns A newly allocated string with the substitution done. The caller
2773 * must deallocate it !
2776 xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2777 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
2779 xmlChar *buffer = NULL;
2780 unsigned int buffer_size = 0;
2781 unsigned int nbchars = 0;
2783 xmlChar *current = NULL;
2785 unsigned int max = (unsigned int) len;
2789 static int deprecated = 0;
2791 xmlGenericError(xmlGenericErrorContext,
2792 "xmlDecodeEntities() deprecated function reached\n");
2797 if (ctxt->depth > 40) {
2798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2799 ctxt->sax->error(ctxt->userData,
2800 "Detected entity reference loop\n");
2801 ctxt->wellFormed = 0;
2802 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2803 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2808 * allocate a translation buffer.
2810 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2811 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2812 if (buffer == NULL) {
2813 xmlGenericError(xmlGenericErrorContext,
2814 "xmlDecodeEntities: malloc failed");
2819 * OK loop until we reach one of the ending char or a size limit.
2823 while ((nbchars < max) && (c != end) && /* NOTUSED */
2824 (c != end2) && (c != end3)) {
2827 if ((c == '&') && (NXT(1) == '#')) {
2828 int val = xmlParseCharRef(ctxt);
2829 COPY_BUF(0,buffer,nbchars,val);
2831 } else if (c == '&') &&
2832 (what & XML_SUBSTITUTE_REF)) {
2833 if (xmlParserDebugEntities)
2834 xmlGenericError(xmlGenericErrorContext,
2835 "decoding Entity Reference\n");
2836 ent = xmlParseEntityRef(ctxt);
2837 if ((ent != NULL) &&
2838 (ctxt->replaceEntities != 0)) {
2839 current = ent->content;
2840 while (*current != 0) { /* non input consuming loop */
2841 buffer[nbchars++] = *current++;
2842 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2846 } else if (ent != NULL) {
2847 const xmlChar *cur = ent->name;
2849 buffer[nbchars++] = '&';
2850 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2853 while (*cur != 0) { /* non input consuming loop */
2854 buffer[nbchars++] = *cur++;
2856 buffer[nbchars++] = ';';
2858 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2860 * a PEReference induce to switch the entity flow,
2861 * we break here to flush the current set of chars
2862 * parsed if any. We will be called back later.
2864 if (xmlParserDebugEntities)
2865 xmlGenericError(xmlGenericErrorContext,
2866 "decoding PE Reference\n");
2867 if (nbchars != 0) break;
2869 xmlParsePEReference(ctxt);
2872 * Pop-up of finished entities.
2874 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2879 COPY_BUF(l,buffer,nbchars,c);
2881 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2887 buffer[nbchars++] = 0;
2894 * xmlNamespaceParseNCName:
2895 * @ctxt: an XML parser context
2897 * parse an XML namespace name.
2899 * TODO: this seems not in use anymore, the namespace handling is done on
2900 * top of the SAX interfaces, i.e. not on raw input.
2902 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2904 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2905 * CombiningChar | Extender
2907 * Returns the namespace name or NULL
2911 xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
2913 xmlChar buf[XML_MAX_NAMELEN + 5];
2915 int cur = CUR_CHAR(l);
2918 static int deprecated = 0;
2920 xmlGenericError(xmlGenericErrorContext,
2921 "xmlNamespaceParseNCName() deprecated function reached\n");
2926 /* load first the value of the char !!! */
2928 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2930 xmlGenericError(xmlGenericErrorContext,
2931 "xmlNamespaceParseNCName: reached loop 3\n");
2932 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2933 (cur == '.') || (cur == '-') ||
2935 (IS_COMBINING(cur)) ||
2936 (IS_EXTENDER(cur))) {
2937 COPY_BUF(l,buf,len,cur);
2940 if (len >= XML_MAX_NAMELEN) {
2941 xmlGenericError(xmlGenericErrorContext,
2942 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2943 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2944 (cur == '.') || (cur == '-') ||
2946 (IS_COMBINING(cur)) ||
2947 (IS_EXTENDER(cur))) {
2954 return(xmlStrndup(buf, len));
2960 * xmlNamespaceParseQName:
2961 * @ctxt: an XML parser context
2962 * @prefix: a xmlChar **
2964 * TODO: this seems not in use anymore, the namespace handling is done on
2965 * top of the SAX interfaces, i.e. not on raw input.
2967 * parse an XML qualified name
2969 * [NS 5] QName ::= (Prefix ':')? LocalPart
2971 * [NS 6] Prefix ::= NCName
2973 * [NS 7] LocalPart ::= NCName
2975 * Returns the local part, and prefix is updated
2976 * to get the Prefix if any.
2980 xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
2982 static int deprecated = 0;
2984 xmlGenericError(xmlGenericErrorContext,
2985 "xmlNamespaceParseQName() deprecated function reached\n");
2990 xmlChar *ret = NULL;
2993 ret = xmlNamespaceParseNCName(ctxt);
2997 ret = xmlNamespaceParseNCName(ctxt);
3006 * xmlNamespaceParseNSDef:
3007 * @ctxt: an XML parser context
3009 * parse a namespace prefix declaration
3011 * TODO: this seems not in use anymore, the namespace handling is done on
3012 * top of the SAX interfaces, i.e. not on raw input.
3014 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3016 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
3018 * Returns the namespace name
3022 xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3023 static int deprecated = 0;
3025 xmlGenericError(xmlGenericErrorContext,
3026 "xmlNamespaceParseNSDef() deprecated function reached\n");
3031 xmlChar *name = NULL;
3033 if ((RAW == 'x') && (NXT(1) == 'm') &&
3034 (NXT(2) == 'l') && (NXT(3) == 'n') &&
3039 name = xmlNamespaceParseNCName(ctxt);
3047 * xmlParseQuotedString:
3048 * @ctxt: an XML parser context
3050 * Parse and return a string between quotes or doublequotes
3052 * TODO: Deprecated, to be removed at next drop of binary compatibility
3054 * Returns the string parser or NULL.
3057 xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3058 static int deprecated = 0;
3060 xmlGenericError(xmlGenericErrorContext,
3061 "xmlParseQuotedString() deprecated function reached\n");
3067 xmlChar *buf = NULL;
3069 int size = XML_PARSER_BUFFER_SIZE;
3072 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3074 xmlGenericError(xmlGenericErrorContext,
3075 "malloc of %d byte failed\n", size);
3078 xmlGenericError(xmlGenericErrorContext,
3079 "xmlParseQuotedString: reached loop 4\n");
3083 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3084 if (len + 5 >= size) {
3086 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3088 xmlGenericError(xmlGenericErrorContext,
3089 "realloc of %d byte failed\n", size);
3093 COPY_BUF(l,buf,len,c);
3098 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3100 ctxt->sax->error(ctxt->userData,
3101 "String not closed \"%.50s\"\n", buf);
3102 ctxt->wellFormed = 0;
3103 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3107 } else if (RAW == '\''){
3110 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3111 if (len + 1 >= size) {
3113 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3115 xmlGenericError(xmlGenericErrorContext,
3116 "realloc of %d byte failed\n", size);
3125 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3127 ctxt->sax->error(ctxt->userData,
3128 "String not closed \"%.50s\"\n", buf);
3129 ctxt->wellFormed = 0;
3130 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3140 * xmlParseNamespace:
3141 * @ctxt: an XML parser context
3143 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3145 * This is what the older xml-name Working Draft specified, a bunch of
3146 * other stuff may still rely on it, so support is still here as
3147 * if it was declared on the root of the Tree:-(
3149 * TODO: remove from library
3151 * To be removed at next drop of binary compatibility
3155 xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3156 static int deprecated = 0;
3158 xmlGenericError(xmlGenericErrorContext,
3159 "xmlParseNamespace() deprecated function reached\n");
3164 xmlChar *href = NULL;
3165 xmlChar *prefix = NULL;
3169 * We just skipped "namespace" or "xml:namespace"
3173 xmlGenericError(xmlGenericErrorContext,
3174 "xmlParseNamespace: reached loop 5\n");
3175 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3177 * We can have "ns" or "prefix" attributes
3178 * Old encoding as 'href' or 'AS' attributes is still supported
3180 if ((RAW == 'n') && (NXT(1) == 's')) {
3185 if (RAW != '=') continue;
3189 href = xmlParseQuotedString(ctxt);
3191 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3192 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3197 if (RAW != '=') continue;
3201 href = xmlParseQuotedString(ctxt);
3203 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3204 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3205 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3210 if (RAW != '=') continue;
3214 prefix = xmlParseQuotedString(ctxt);
3216 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3221 if (RAW != '=') continue;
3225 prefix = xmlParseQuotedString(ctxt);
3227 } else if ((RAW == '?') && (NXT(1) == '>')) {
3232 * Found garbage when parsing the namespace
3235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3236 ctxt->sax->error(ctxt->userData,
3237 "xmlParseNamespace found garbage\n");
3239 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3240 ctxt->wellFormed = 0;
3241 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3246 MOVETO_ENDTAG(CUR_PTR);
3252 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3253 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3256 if (prefix != NULL) xmlFree(prefix);
3257 if (href != NULL) xmlFree(href);
3263 * @ctxt: an XML parser context
3265 * Trickery: parse an XML name but without consuming the input flow
3266 * Needed for rollback cases. Used only when parsing entities references.
3268 * TODO: seems deprecated now, only used in the default part of
3269 * xmlParserHandleReference
3271 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3272 * CombiningChar | Extender
3274 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3276 * [6] Names ::= Name (S Name)*
3278 * Returns the Name parsed or NULL
3282 xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3283 static int deprecated = 0;
3285 xmlGenericError(xmlGenericErrorContext,
3286 "xmlScanName() deprecated function reached\n");
3292 xmlChar buf[XML_MAX_NAMELEN];
3296 if (!IS_LETTER(RAW) && (RAW != '_') &&
3302 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3303 (NXT(len) == '.') || (NXT(len) == '-') ||
3304 (NXT(len) == '_') || (NXT(len) == ':') ||
3305 (IS_COMBINING(NXT(len))) ||
3306 (IS_EXTENDER(NXT(len)))) {
3308 buf[len] = NXT(len);
3310 if (len >= XML_MAX_NAMELEN) {
3311 xmlGenericError(xmlGenericErrorContext,
3312 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3313 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3314 (IS_DIGIT(NXT(len))) ||
3315 (NXT(len) == '.') || (NXT(len) == '-') ||
3316 (NXT(len) == '_') || (NXT(len) == ':') ||
3317 (IS_COMBINING(NXT(len))) ||
3318 (IS_EXTENDER(NXT(len))))
3323 return(xmlStrndup(buf, len));
3328 * xmlParserHandleReference:
3329 * @ctxt: the parser context
3331 * TODO: Remove, now deprecated ... the test is done directly in the
3335 * [67] Reference ::= EntityRef | CharRef
3337 * [68] EntityRef ::= '&' Name ';'
3339 * [ WFC: Entity Declared ]
3340 * the Name given in the entity reference must match that in an entity
3341 * declaration, except that well-formed documents need not declare any
3342 * of the following entities: amp, lt, gt, apos, quot.
3344 * [ WFC: Parsed Entity ]
3345 * An entity reference must not contain the name of an unparsed entity
3347 * [66] CharRef ::= '&#' [0-9]+ ';' |
3348 * '&#x' [0-9a-fA-F]+ ';'
3350 * A PEReference may have been detected in the current input stream
3351 * the handling is done accordingly to
3352 * http://www.w3.org/TR/REC-xml#entproc
3355 xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3356 static int deprecated = 0;
3358 xmlGenericError(xmlGenericErrorContext,
3359 "xmlParserHandleReference() deprecated function reached\n");
3368 * @ctxt: an XML parser context
3369 * @entity: an XML entity pointer.
3371 * Default handling of defined entities, when should we define a new input
3372 * stream ? When do we just handle that as a set of chars ?
3374 * OBSOLETE: to be removed at some point.
3378 xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
3379 static int deprecated = 0;
3381 xmlGenericError(xmlGenericErrorContext,
3382 "xmlHandleEntity() deprecated function reached\n");
3388 xmlParserInputPtr input;
3390 if (entity->content == NULL) {
3391 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3393 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3395 ctxt->wellFormed = 0;
3396 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3399 len = xmlStrlen(entity->content);
3400 if (len <= 2) goto handle_as_char;
3403 * Redefine its content as an input stream.
3405 input = xmlNewEntityInputStream(ctxt, entity);
3406 xmlPushInput(ctxt, input);
3411 * Just handle the content as a set of chars.
3413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3414 (ctxt->sax->characters != NULL))
3415 ctxt->sax->characters(ctxt->userData, entity->content, len);
3421 * @doc: the document carrying the namespace
3422 * @href: the URI associated
3423 * @prefix: the prefix for the namespace
3425 * Creation of a Namespace, the old way using PI and without scoping
3427 * It now create a namespace on the root element of the document if found.
3428 * Returns NULL this functionality had been removed
3431 xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3432 const xmlChar *prefix ATTRIBUTE_UNUSED) {
3433 static int deprecated = 0;
3435 xmlGenericError(xmlGenericErrorContext,
3436 "xmlNewGlobalNs() deprecated function reached\n");
3445 root = xmlDocGetRootElement(doc);
3447 return(xmlNewNs(root, href, prefix));
3450 * if there is no root element yet, create an old Namespace type
3451 * and it will be moved to the root at save time.
3453 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3455 xmlGenericError(xmlGenericErrorContext,
3456 "xmlNewGlobalNs : malloc failed\n");
3459 memset(cur, 0, sizeof(xmlNs));
3460 cur->type = XML_GLOBAL_NAMESPACE;
3463 cur->href = xmlStrdup(href);
3465 cur->prefix = xmlStrdup(prefix);
3468 * Add it at the end to preserve parsing order ...
3471 if (doc->oldNs == NULL) {
3474 xmlNsPtr prev = doc->oldNs;
3476 while (prev->next != NULL) prev = prev->next;
3487 * @doc: a document pointer
3489 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3493 xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
3494 static int deprecated = 0;
3496 xmlGenericError(xmlGenericErrorContext,
3497 "xmlUpgradeOldNs() deprecated function reached\n");
3503 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3504 if (doc->children == NULL) {
3506 xmlGenericError(xmlGenericErrorContext,
3507 "xmlUpgradeOldNs: failed no root !\n");
3513 while (cur->next != NULL) {
3514 cur->type = XML_LOCAL_NAMESPACE;
3517 cur->type = XML_LOCAL_NAMESPACE;
3518 cur->next = doc->children->nsDef;
3519 doc->children->nsDef = doc->oldNs;