2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
5 * See Copyright for the status of this software.
13 #if defined(WIN32) && !defined (__CYGWIN__)
14 #define XML_DIR_SEP '\\'
16 #define XML_DIR_SEP '/'
26 #ifdef HAVE_SYS_STAT_H
39 #include <libxml/xmlmemory.h>
40 #include <libxml/tree.h>
41 #include <libxml/parser.h>
42 #include <libxml/parserInternals.h>
43 #include <libxml/valid.h>
44 #include <libxml/entities.h>
45 #include <libxml/xmlerror.h>
46 #include <libxml/encoding.h>
47 #include <libxml/valid.h>
48 #include <libxml/xmlIO.h>
49 #include <libxml/uri.h>
50 #include <libxml/SAX.h>
51 #ifdef LIBXML_CATALOG_ENABLED
52 #include <libxml/catalog.h>
54 #include <libxml/globals.h>
56 void xmlUpgradeOldNs(xmlDocPtr doc);
59 * Various global defaults for parsing
64 * @version: the include version number
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
70 xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION;
75 if ((myversion / 10000) != (version / 10000)) {
76 xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000));
80 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000));
83 if ((myversion / 100) < (version / 100)) {
84 xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100));
91 static const char *xmlFeaturesList[] = {
96 "fetch external entities",
97 "substitute entities",
107 "SAX function internalSubset",
108 "SAX function isStandalone",
109 "SAX function hasInternalSubset",
110 "SAX function hasExternalSubset",
111 "SAX function resolveEntity",
112 "SAX function getEntity",
113 "SAX function entityDecl",
114 "SAX function notationDecl",
115 "SAX function attributeDecl",
116 "SAX function elementDecl",
117 "SAX function unparsedEntityDecl",
118 "SAX function setDocumentLocator",
119 "SAX function startDocument",
120 "SAX function endDocument",
121 "SAX function startElement",
122 "SAX function endElement",
123 "SAX function reference",
124 "SAX function characters",
125 "SAX function ignorableWhitespace",
126 "SAX function processingInstruction",
127 "SAX function comment",
128 "SAX function warning",
129 "SAX function error",
130 "SAX function fatalError",
131 "SAX function getParameterEntity",
132 "SAX function cdataBlock",
133 "SAX function externalSubset",
137 * xmlGetFeaturesList:
138 * @len: the length of the features name array (input/output)
139 * @result: an array of string to be filled with the features name.
141 * Copy at most *@len feature names into the @result array
143 * Returns -1 in case or error, or the total number of features,
144 * len is updated with the number of strings copied,
145 * strings must not be deallocated
148 xmlGetFeaturesList(int *len, const char **result) {
151 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
152 if ((len == NULL) || (result == NULL))
154 if ((*len < 0) || (*len >= 1000))
158 for (i = 0;i < *len;i++)
159 result[i] = xmlFeaturesList[i];
165 * @ctxt: an XML/HTML parser context
166 * @name: the feature name
167 * @result: location to store the result
169 * Read the current value of one feature of this parser instance
171 * Returns -1 in case or error, 0 otherwise
174 xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
175 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
178 if (!strcmp(name, "validate")) {
179 *((int *) result) = ctxt->validate;
180 } else if (!strcmp(name, "keep blanks")) {
181 *((int *) result) = ctxt->keepBlanks;
182 } else if (!strcmp(name, "disable SAX")) {
183 *((int *) result) = ctxt->disableSAX;
184 } else if (!strcmp(name, "fetch external entities")) {
185 *((int *) result) = ctxt->loadsubset;
186 } else if (!strcmp(name, "substitute entities")) {
187 *((int *) result) = ctxt->replaceEntities;
188 } else if (!strcmp(name, "gather line info")) {
189 *((int *) result) = ctxt->record_info;
190 } else if (!strcmp(name, "user data")) {
191 *((void **)result) = ctxt->userData;
192 } else if (!strcmp(name, "is html")) {
193 *((int *) result) = ctxt->html;
194 } else if (!strcmp(name, "is standalone")) {
195 *((int *) result) = ctxt->standalone;
196 } else if (!strcmp(name, "document")) {
197 *((xmlDocPtr *) result) = ctxt->myDoc;
198 } else if (!strcmp(name, "is well formed")) {
199 *((int *) result) = ctxt->wellFormed;
200 } else if (!strcmp(name, "is valid")) {
201 *((int *) result) = ctxt->valid;
202 } else if (!strcmp(name, "SAX block")) {
203 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
204 } else if (!strcmp(name, "SAX function internalSubset")) {
205 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
206 } else if (!strcmp(name, "SAX function isStandalone")) {
207 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
208 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
209 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
210 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
211 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
212 } else if (!strcmp(name, "SAX function resolveEntity")) {
213 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
214 } else if (!strcmp(name, "SAX function getEntity")) {
215 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
216 } else if (!strcmp(name, "SAX function entityDecl")) {
217 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
218 } else if (!strcmp(name, "SAX function notationDecl")) {
219 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
220 } else if (!strcmp(name, "SAX function attributeDecl")) {
221 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
222 } else if (!strcmp(name, "SAX function elementDecl")) {
223 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
224 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
225 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
226 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
227 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
228 } else if (!strcmp(name, "SAX function startDocument")) {
229 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
230 } else if (!strcmp(name, "SAX function endDocument")) {
231 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
232 } else if (!strcmp(name, "SAX function startElement")) {
233 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
234 } else if (!strcmp(name, "SAX function endElement")) {
235 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
236 } else if (!strcmp(name, "SAX function reference")) {
237 *((referenceSAXFunc *) result) = ctxt->sax->reference;
238 } else if (!strcmp(name, "SAX function characters")) {
239 *((charactersSAXFunc *) result) = ctxt->sax->characters;
240 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
241 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
242 } else if (!strcmp(name, "SAX function processingInstruction")) {
243 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
244 } else if (!strcmp(name, "SAX function comment")) {
245 *((commentSAXFunc *) result) = ctxt->sax->comment;
246 } else if (!strcmp(name, "SAX function warning")) {
247 *((warningSAXFunc *) result) = ctxt->sax->warning;
248 } else if (!strcmp(name, "SAX function error")) {
249 *((errorSAXFunc *) result) = ctxt->sax->error;
250 } else if (!strcmp(name, "SAX function fatalError")) {
251 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
252 } else if (!strcmp(name, "SAX function getParameterEntity")) {
253 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
254 } else if (!strcmp(name, "SAX function cdataBlock")) {
255 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
256 } else if (!strcmp(name, "SAX function externalSubset")) {
257 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
266 * @ctxt: an XML/HTML parser context
267 * @name: the feature name
268 * @value: pointer to the location of the new value
270 * Change the current value of one feature of this parser instance
272 * Returns -1 in case or error, 0 otherwise
275 xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
276 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
279 if (!strcmp(name, "validate")) {
280 int newvalidate = *((int *) value);
281 if ((!ctxt->validate) && (newvalidate != 0)) {
282 if (ctxt->vctxt.warning == NULL)
283 ctxt->vctxt.warning = xmlParserValidityWarning;
284 if (ctxt->vctxt.error == NULL)
285 ctxt->vctxt.error = xmlParserValidityError;
286 ctxt->vctxt.nodeMax = 0;
288 ctxt->validate = newvalidate;
289 } else if (!strcmp(name, "keep blanks")) {
290 ctxt->keepBlanks = *((int *) value);
291 } else if (!strcmp(name, "disable SAX")) {
292 ctxt->disableSAX = *((int *) value);
293 } else if (!strcmp(name, "fetch external entities")) {
294 ctxt->loadsubset = *((int *) value);
295 } else if (!strcmp(name, "substitute entities")) {
296 ctxt->replaceEntities = *((int *) value);
297 } else if (!strcmp(name, "gather line info")) {
298 ctxt->record_info = *((int *) value);
299 } else if (!strcmp(name, "user data")) {
300 ctxt->userData = *((void **)value);
301 } else if (!strcmp(name, "is html")) {
302 ctxt->html = *((int *) value);
303 } else if (!strcmp(name, "is standalone")) {
304 ctxt->standalone = *((int *) value);
305 } else if (!strcmp(name, "document")) {
306 ctxt->myDoc = *((xmlDocPtr *) value);
307 } else if (!strcmp(name, "is well formed")) {
308 ctxt->wellFormed = *((int *) value);
309 } else if (!strcmp(name, "is valid")) {
310 ctxt->valid = *((int *) value);
311 } else if (!strcmp(name, "SAX block")) {
312 ctxt->sax = *((xmlSAXHandlerPtr *) value);
313 } else if (!strcmp(name, "SAX function internalSubset")) {
314 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
315 } else if (!strcmp(name, "SAX function isStandalone")) {
316 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
317 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
318 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
319 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
320 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
321 } else if (!strcmp(name, "SAX function resolveEntity")) {
322 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
323 } else if (!strcmp(name, "SAX function getEntity")) {
324 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
325 } else if (!strcmp(name, "SAX function entityDecl")) {
326 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
327 } else if (!strcmp(name, "SAX function notationDecl")) {
328 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
329 } else if (!strcmp(name, "SAX function attributeDecl")) {
330 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
331 } else if (!strcmp(name, "SAX function elementDecl")) {
332 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
333 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
334 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
336 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function startDocument")) {
338 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function endDocument")) {
340 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function startElement")) {
342 ctxt->sax->startElement = *((startElementSAXFunc *) value);
343 } else if (!strcmp(name, "SAX function endElement")) {
344 ctxt->sax->endElement = *((endElementSAXFunc *) value);
345 } else if (!strcmp(name, "SAX function reference")) {
346 ctxt->sax->reference = *((referenceSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function characters")) {
348 ctxt->sax->characters = *((charactersSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
350 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function processingInstruction")) {
352 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function comment")) {
354 ctxt->sax->comment = *((commentSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function warning")) {
356 ctxt->sax->warning = *((warningSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function error")) {
358 ctxt->sax->error = *((errorSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function fatalError")) {
360 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function getParameterEntity")) {
362 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
363 } else if (!strcmp(name, "SAX function cdataBlock")) {
364 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function externalSubset")) {
366 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
373 /************************************************************************
375 * Some functions to avoid too large macros *
377 ************************************************************************/
381 * @c: an unicode character (int)
383 * Check whether the character is allowed by the production
384 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
385 * | [#x10000-#x10FFFF]
386 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
387 * Also available as a macro IS_CHAR()
389 * Returns 0 if not, non-zero otherwise
394 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
395 (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
396 (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
397 (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
402 * @c: an unicode character (int)
404 * Check whether the character is allowed by the production
405 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
406 * Also available as a macro IS_BLANK()
408 * Returns 0 if not, non-zero otherwise
412 return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
415 static int xmlBaseArray[] = {
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
420 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
422 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
423 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
428 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
429 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
430 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
431 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
436 * @c: an unicode character (int)
438 * Check whether the character is allowed by the production
439 * [85] BaseChar ::= ... long list see REC ...
441 * VI is your friend !
442 * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
444 * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
446 * Returns 0 if not, non-zero otherwise
449 xmlIsBaseChar(int c) {
451 (((c) < 0x0100) ? xmlBaseArray[c] :
453 (((c) >= 0x0100) && ((c) <= 0x0131)) ||
454 (((c) >= 0x0134) && ((c) <= 0x013E)) ||
455 (((c) >= 0x0141) && ((c) <= 0x0148)) ||
456 (((c) >= 0x014A) && ((c) <= 0x017E)) ||
457 (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
458 (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
459 (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
460 (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
461 (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
462 (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
464 (((c) >= 0x0388) && ((c) <= 0x038A)) ||
466 (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
467 (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
468 (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
473 (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
474 (((c) >= 0x0401) && ((c) <= 0x040C)) ||
475 (((c) >= 0x040E) && ((c) <= 0x044F)) ||
476 (((c) >= 0x0451) && ((c) <= 0x045C)) ||
477 (((c) >= 0x045E) && ((c) <= 0x0481)) ||
478 (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
479 (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
480 (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
481 (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
482 (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
483 (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
484 (((c) >= 0x0531) && ((c) <= 0x0556)) ||
486 (((c) >= 0x0561) && ((c) <= 0x0586)) ||
487 (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
488 (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
489 (((c) >= 0x0621) && ((c) <= 0x063A)) ||
490 (((c) >= 0x0641) && ((c) <= 0x064A)) ||
491 (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
492 (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
493 (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
494 (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
496 (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
497 (((c) >= 0x905) && ( /* accelerator */
498 (((c) >= 0x0905) && ((c) <= 0x0939)) ||
500 (((c) >= 0x0958) && ((c) <= 0x0961)) ||
501 (((c) >= 0x0985) && ((c) <= 0x098C)) ||
502 (((c) >= 0x098F) && ((c) <= 0x0990)) ||
503 (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
504 (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
506 (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
507 (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
508 (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
509 (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
510 (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
511 (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
512 (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
513 (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
514 (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
515 (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
516 (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
517 (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
519 (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
520 (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
522 (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
523 (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
524 (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
525 (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
526 (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
529 (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
530 (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
531 (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
532 (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
533 (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
534 (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
536 (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
537 (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
538 (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
539 (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
540 (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
541 (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
543 (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
544 (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
545 (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
546 (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
547 (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
548 (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
549 (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
550 (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
551 (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
552 (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
553 (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
554 (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
555 (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
556 (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
557 (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
558 (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
560 (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
561 (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
562 (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
563 (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
564 (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
565 (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
566 (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
568 (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
569 (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
570 (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
572 (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
575 (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
576 (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
577 (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
580 (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
581 (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
583 (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
585 (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
586 (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
587 (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
588 (((c) >= 0x10A0) && ( /* accelerator */
589 (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
590 (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
592 (((c) >= 0x1102) && ((c) <= 0x1103)) ||
593 (((c) >= 0x1105) && ((c) <= 0x1107)) ||
595 (((c) >= 0x110B) && ((c) <= 0x110C)) ||
596 (((c) >= 0x110E) && ((c) <= 0x1112)) ||
603 (((c) >= 0x1154) && ((c) <= 0x1155)) ||
605 (((c) >= 0x115F) && ((c) <= 0x1161)) ||
610 (((c) >= 0x116D) && ((c) <= 0x116E)) ||
611 (((c) >= 0x1172) && ((c) <= 0x1173)) ||
616 (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
617 (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
619 (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
623 (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
624 (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
625 (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
626 (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
627 (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
628 (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
629 (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
633 (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
634 (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
635 (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
637 (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
638 (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
639 (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
640 (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
641 (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
642 (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
643 (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
645 (((c) >= 0x212A) && ((c) <= 0x212B)) ||
647 (((c) >= 0x2180) && ((c) <= 0x2182)) ||
648 (((c) >= 0x3041) && ((c) <= 0x3094)) ||
649 (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
650 (((c) >= 0x3105) && ((c) <= 0x312C)) ||
651 (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
656 * @c: an unicode character (int)
658 * Check whether the character is allowed by the production
659 * [88] Digit ::= ... long list see REC ...
661 * Returns 0 if not, non-zero otherwise
666 (((c) >= 0x0030) && ((c) <= 0x0039)) ||
667 (((c) >= 0x660) && ( /* accelerator */
668 (((c) >= 0x0660) && ((c) <= 0x0669)) ||
669 (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
670 (((c) >= 0x0966) && ((c) <= 0x096F)) ||
671 (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
672 (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
673 (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
674 (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
675 (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
676 (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
677 (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
678 (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
679 (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
680 (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
681 (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
686 * @c: an unicode character (int)
688 * Check whether the character is allowed by the production
689 * [87] CombiningChar ::= ... long list see REC ...
691 * Returns 0 if not, non-zero otherwise
694 xmlIsCombining(int c) {
696 (((c) >= 0x300) && ( /* accelerator */
697 (((c) >= 0x0300) && ((c) <= 0x0345)) ||
698 (((c) >= 0x0360) && ((c) <= 0x0361)) ||
699 (((c) >= 0x0483) && ((c) <= 0x0486)) ||
700 (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
701 (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
702 (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
704 (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
706 (((c) >= 0x064B) && ((c) <= 0x0652)) ||
708 (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
709 (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
710 (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
711 (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
712 (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
713 (((c) >= 0x0901) && ( /* accelerator */
714 (((c) >= 0x0901) && ((c) <= 0x0903)) ||
716 (((c) >= 0x093E) && ((c) <= 0x094C)) ||
718 (((c) >= 0x0951) && ((c) <= 0x0954)) ||
719 (((c) >= 0x0962) && ((c) <= 0x0963)) ||
720 (((c) >= 0x0981) && ((c) <= 0x0983)) ||
724 (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
725 (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
726 (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
728 (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
729 (((c) >= 0x0A02) && ( /* accelerator */
734 (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
735 (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
736 (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
737 (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
738 (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
740 (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
741 (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
742 (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
743 (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
745 (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
746 (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
747 (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
748 (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
749 (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
750 (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
751 (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
752 (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
754 (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
755 (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
756 (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
757 (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
758 (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
759 (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
760 (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
761 (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
762 (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
763 (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
764 (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
765 (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
766 (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
767 (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
769 (((c) >= 0x0E31) && ( /* accelerator */
771 (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
772 (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
774 (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
775 (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
776 (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
777 (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
783 (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
784 (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
785 (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
787 (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
788 (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
790 (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
792 (((c) >= 0x302A) && ((c) <= 0x302F)) ||
794 ((c) == 0x309A))))))))));
799 * @c: an unicode character (int)
801 * Check whether the character is allowed by the production
802 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
803 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
804 * [#x309D-#x309E] | [#x30FC-#x30FE]
806 * Returns 0 if not, non-zero otherwise
809 xmlIsExtender(int c) {
811 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
812 case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
813 case 0x3031: case 0x3032: case 0x3033: case 0x3034:
814 case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
815 case 0x30FD: case 0x30FE:
824 * @c: an unicode character (int)
826 * Check whether the character is allowed by the production
827 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
829 * Returns 0 if not, non-zero otherwise
832 xmlIsIdeographic(int c) {
833 return(((c) < 0x0100) ? 0 :
834 (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
835 (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
836 (((c) >= 0x3021) && ((c) <= 0x3029)) ||
842 * @c: an unicode character (int)
844 * Check whether the character is allowed by the production
845 * [84] Letter ::= BaseChar | Ideographic
847 * Returns 0 if not, non-zero otherwise
851 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
856 * @c: an unicode character (int)
858 * Check whether the character is allowed by the production
859 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
861 * Returns 0 if not, non-zero otherwise
864 xmlIsPubidChar(int c) {
866 ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
867 (((c) >= 'a') && ((c) <= 'z')) ||
868 (((c) >= 'A') && ((c) <= 'Z')) ||
869 (((c) >= '0') && ((c) <= '9')) ||
870 ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
871 ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
872 ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
873 ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
874 ((c) == '$') || ((c) == '_') || ((c) == '%'));
877 /************************************************************************
879 * Input handling functions for progressive parsing *
881 ************************************************************************/
883 /* #define DEBUG_INPUT */
884 /* #define DEBUG_STACK */
885 /* #define DEBUG_PUSH */
888 /* we need to keep enough input to show errors in context */
892 #define CHECK_BUFFER(in) check_buffer(in)
895 void check_buffer(xmlParserInputPtr in) {
896 if (in->base != in->buf->buffer->content) {
897 xmlGenericError(xmlGenericErrorContext,
898 "xmlParserInput: base mismatch problem\n");
900 if (in->cur < in->base) {
901 xmlGenericError(xmlGenericErrorContext,
902 "xmlParserInput: cur < base problem\n");
904 if (in->cur > in->base + in->buf->buffer->use) {
905 xmlGenericError(xmlGenericErrorContext,
906 "xmlParserInput: cur > base + use problem\n");
908 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
909 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
910 in->buf->buffer->use, in->buf->buffer->size);
914 #define CHECK_BUFFER(in)
919 * xmlParserInputRead:
920 * @in: an XML parser input
921 * @len: an indicative size for the lookahead
923 * This function refresh the input for the parser. It doesn't try to
924 * preserve pointers to the input buffer, and discard already read data
926 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
930 xmlParserInputRead(xmlParserInputPtr in, int len) {
936 xmlGenericError(xmlGenericErrorContext, "Read\n");
938 if (in->buf == NULL) return(-1);
939 if (in->base == NULL) return(-1);
940 if (in->cur == NULL) return(-1);
941 if (in->buf->buffer == NULL) return(-1);
942 if (in->buf->readcallback == NULL) return(-1);
946 used = in->cur - in->buf->buffer->content;
947 ret = xmlBufferShrink(in->buf->buffer, used);
952 ret = xmlParserInputBufferRead(in->buf, len);
953 if (in->base != in->buf->buffer->content) {
955 * the buffer has been reallocated
957 indx = in->cur - in->base;
958 in->base = in->buf->buffer->content;
959 in->cur = &in->buf->buffer->content[indx];
961 in->end = &in->buf->buffer->content[in->buf->buffer->use];
969 * xmlParserInputGrow:
970 * @in: an XML parser input
971 * @len: an indicative size for the lookahead
973 * This function increase the input for the parser. It tries to
974 * preserve pointers to the input buffer, and keep already read data
976 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
980 xmlParserInputGrow(xmlParserInputPtr in, int len) {
985 xmlGenericError(xmlGenericErrorContext, "Grow\n");
987 if (in->buf == NULL) return(-1);
988 if (in->base == NULL) return(-1);
989 if (in->cur == NULL) return(-1);
990 if (in->buf->buffer == NULL) return(-1);
994 indx = in->cur - in->base;
995 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
1001 if (in->buf->readcallback != NULL)
1002 ret = xmlParserInputBufferGrow(in->buf, len);
1007 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
1008 * block, but we use it really as an integer to do some
1009 * pointer arithmetic. Insure will raise it as a bug but in
1010 * that specific case, that's not !
1012 if (in->base != in->buf->buffer->content) {
1014 * the buffer has been reallocated
1016 indx = in->cur - in->base;
1017 in->base = in->buf->buffer->content;
1018 in->cur = &in->buf->buffer->content[indx];
1020 in->end = &in->buf->buffer->content[in->buf->buffer->use];
1028 * xmlParserInputShrink:
1029 * @in: an XML parser input
1031 * This function removes used input for the parser.
1034 xmlParserInputShrink(xmlParserInputPtr in) {
1040 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1042 if (in->buf == NULL) return;
1043 if (in->base == NULL) return;
1044 if (in->cur == NULL) return;
1045 if (in->buf->buffer == NULL) return;
1049 used = in->cur - in->buf->buffer->content;
1051 * Do not shrink on large buffers whose only a tiny fraction
1054 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1056 if (used > INPUT_CHUNK) {
1057 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1060 in->consumed += ret;
1062 in->end = &in->buf->buffer->content[in->buf->buffer->use];
1067 if (in->buf->buffer->use > INPUT_CHUNK) {
1070 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1071 if (in->base != in->buf->buffer->content) {
1073 * the buffer has been reallocated
1075 indx = in->cur - in->base;
1076 in->base = in->buf->buffer->content;
1077 in->cur = &in->buf->buffer->content[indx];
1079 in->end = &in->buf->buffer->content[in->buf->buffer->use];
1084 /************************************************************************
1086 * UTF8 character input and related functions *
1088 ************************************************************************/
1092 * @ctxt: the XML parser context
1094 * Skip to the next char input char.
1098 xmlNextChar(xmlParserCtxtPtr ctxt) {
1099 if (ctxt->instate == XML_PARSER_EOF)
1103 * 2.11 End-of-Line Handling
1104 * the literal two-character sequence "#xD#xA" or a standalone
1105 * literal #xD, an XML processor must pass to the application
1106 * the single character #xA.
1108 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1109 if ((*ctxt->input->cur == 0) &&
1110 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1111 (ctxt->instate != XML_PARSER_COMMENT)) {
1113 * If we are at the end of the current entity and
1114 * the context allows it, we pop consumed entities
1116 * the auto closing should be blocked in other cases
1120 if (*(ctxt->input->cur) == '\n') {
1121 ctxt->input->line++; ctxt->input->col = 1;
1122 } else ctxt->input->col++;
1123 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1125 * We are supposed to handle UTF8, check it's valid
1126 * From rfc2044: encoding of the Unicode values on UTF-8:
1128 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1129 * 0000 0000-0000 007F 0xxxxxxx
1130 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1131 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1133 * Check for the 0x110000 limit too
1135 const unsigned char *cur = ctxt->input->cur;
1141 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1142 if ((cur[1] & 0xc0) != 0x80)
1143 goto encoding_error;
1144 if ((c & 0xe0) == 0xe0) {
1148 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1149 if ((cur[2] & 0xc0) != 0x80)
1150 goto encoding_error;
1151 if ((c & 0xf0) == 0xf0) {
1153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1154 if (((c & 0xf8) != 0xf0) ||
1155 ((cur[3] & 0xc0) != 0x80))
1156 goto encoding_error;
1158 ctxt->input->cur += 4;
1159 val = (cur[0] & 0x7) << 18;
1160 val |= (cur[1] & 0x3f) << 12;
1161 val |= (cur[2] & 0x3f) << 6;
1162 val |= cur[3] & 0x3f;
1165 ctxt->input->cur += 3;
1166 val = (cur[0] & 0xf) << 12;
1167 val |= (cur[1] & 0x3f) << 6;
1168 val |= cur[2] & 0x3f;
1170 if (((val > 0xd7ff) && (val < 0xe000)) ||
1171 ((val > 0xfffd) && (val < 0x10000)) ||
1172 (val >= 0x110000)) {
1173 if ((ctxt->sax != NULL) &&
1174 (ctxt->sax->error != NULL))
1175 ctxt->sax->error(ctxt->userData,
1176 "Char 0x%X out of allowed range\n", val);
1177 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1178 ctxt->wellFormed = 0;
1179 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1183 ctxt->input->cur += 2;
1189 * Assume it's a fixed length encoding (1) with
1190 * a compatible encoding for the ASCII set, since
1191 * XML constructs only use < 128 chars
1196 if (*ctxt->input->cur == 0)
1197 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1202 if (*ctxt->input->cur == 0)
1203 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1205 if ((*ctxt->input->cur == '%') && (!ctxt->html))
1206 xmlParserHandlePEReference(ctxt);
1207 if ((*ctxt->input->cur == 0) &&
1208 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1213 * If we detect an UTF8 error that probably mean that the
1214 * input encoding didn't get properly advertised in the
1215 * declaration header. Report the error and switch the encoding
1216 * to ISO-Latin-1 (if you don't like this policy, just declare the
1219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1220 ctxt->sax->error(ctxt->userData,
1221 "Input is not proper UTF-8, indicate encoding !\n");
1222 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1223 ctxt->input->cur[0], ctxt->input->cur[1],
1224 ctxt->input->cur[2], ctxt->input->cur[3]);
1226 ctxt->wellFormed = 0;
1227 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1229 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1236 * @ctxt: the XML parser context
1237 * @len: pointer to the length of the char read
1239 * The current char value, if using UTF-8 this may actually span multiple
1240 * bytes in the input buffer. Implement the end of line normalization:
1241 * 2.11 End-of-Line Handling
1242 * Wherever an external parsed entity or the literal entity value
1243 * of an internal parsed entity contains either the literal two-character
1244 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1245 * must pass to the application the single character #xA.
1246 * This behavior can conveniently be produced by normalizing all
1247 * line breaks to #xA on input, before parsing.)
1249 * Returns the current char value and its length
1253 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1254 if (ctxt->instate == XML_PARSER_EOF)
1257 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
1259 return((int) *ctxt->input->cur);
1261 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1263 * We are supposed to handle UTF8, check it's valid
1264 * From rfc2044: encoding of the Unicode values on UTF-8:
1266 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1267 * 0000 0000-0000 007F 0xxxxxxx
1268 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1269 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1271 * Check for the 0x110000 limit too
1273 const unsigned char *cur = ctxt->input->cur;
1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1281 if ((cur[1] & 0xc0) != 0x80)
1282 goto encoding_error;
1283 if ((c & 0xe0) == 0xe0) {
1286 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1287 if ((cur[2] & 0xc0) != 0x80)
1288 goto encoding_error;
1289 if ((c & 0xf0) == 0xf0) {
1291 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1292 if (((c & 0xf8) != 0xf0) ||
1293 ((cur[3] & 0xc0) != 0x80))
1294 goto encoding_error;
1297 val = (cur[0] & 0x7) << 18;
1298 val |= (cur[1] & 0x3f) << 12;
1299 val |= (cur[2] & 0x3f) << 6;
1300 val |= cur[3] & 0x3f;
1304 val = (cur[0] & 0xf) << 12;
1305 val |= (cur[1] & 0x3f) << 6;
1306 val |= cur[2] & 0x3f;
1311 val = (cur[0] & 0x1f) << 6;
1312 val |= cur[1] & 0x3f;
1314 if (!IS_CHAR(val)) {
1315 if ((ctxt->sax != NULL) &&
1316 (ctxt->sax->error != NULL))
1317 ctxt->sax->error(ctxt->userData,
1318 "Char 0x%X out of allowed range\n", val);
1319 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1320 ctxt->wellFormed = 0;
1321 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1327 if (*ctxt->input->cur == 0xD) {
1328 if (ctxt->input->cur[1] == 0xA) {
1334 return((int) *ctxt->input->cur);
1338 * Assume it's a fixed length encoding (1) with
1339 * a compatible encoding for the ASCII set, since
1340 * XML constructs only use < 128 chars
1343 if (*ctxt->input->cur == 0xD) {
1344 if (ctxt->input->cur[1] == 0xA) {
1350 return((int) *ctxt->input->cur);
1353 * An encoding problem may arise from a truncated input buffer
1354 * splitting a character in the middle. In that case do not raise
1355 * an error but return 0 to endicate an end of stream problem
1357 if (ctxt->input->end - ctxt->input->cur < 4) {
1363 * If we detect an UTF8 error that probably mean that the
1364 * input encoding didn't get properly advertised in the
1365 * declaration header. Report the error and switch the encoding
1366 * to ISO-Latin-1 (if you don't like this policy, just declare the
1369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1370 ctxt->sax->error(ctxt->userData,
1371 "Input is not proper UTF-8, indicate encoding !\n");
1372 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1373 ctxt->input->cur[0], ctxt->input->cur[1],
1374 ctxt->input->cur[2], ctxt->input->cur[3]);
1376 ctxt->wellFormed = 0;
1377 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1379 ctxt->charset = XML_CHAR_ENCODING_8859_1;
1381 return((int) *ctxt->input->cur);
1385 * xmlStringCurrentChar:
1386 * @ctxt: the XML parser context
1387 * @cur: pointer to the beginning of the char
1388 * @len: pointer to the length of the char read
1390 * The current char value, if using UTF-8 this may actually span multiple
1391 * bytes in the input buffer.
1393 * Returns the current char value and its length
1397 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
1399 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
1401 * We are supposed to handle UTF8, check it's valid
1402 * From rfc2044: encoding of the Unicode values on UTF-8:
1404 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1405 * 0000 0000-0000 007F 0xxxxxxx
1406 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1407 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1409 * Check for the 0x110000 limit too
1416 if ((cur[1] & 0xc0) != 0x80)
1417 goto encoding_error;
1418 if ((c & 0xe0) == 0xe0) {
1420 if ((cur[2] & 0xc0) != 0x80)
1421 goto encoding_error;
1422 if ((c & 0xf0) == 0xf0) {
1423 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
1424 goto encoding_error;
1427 val = (cur[0] & 0x7) << 18;
1428 val |= (cur[1] & 0x3f) << 12;
1429 val |= (cur[2] & 0x3f) << 6;
1430 val |= cur[3] & 0x3f;
1434 val = (cur[0] & 0xf) << 12;
1435 val |= (cur[1] & 0x3f) << 6;
1436 val |= cur[2] & 0x3f;
1441 val = (cur[0] & 0x1f) << 6;
1442 val |= cur[1] & 0x3f;
1444 if (!IS_CHAR(val)) {
1445 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1446 (ctxt->sax->error != NULL))
1447 ctxt->sax->error(ctxt->userData,
1448 "Char 0x%X out of allowed range\n",
1451 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1452 ctxt->wellFormed = 0;
1453 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1460 return ((int) *cur);
1464 * Assume it's a fixed length encoding (1) with
1465 * a compatible encoding for the ASCII set, since
1466 * XML constructs only use < 128 chars
1469 return ((int) *cur);
1473 * If we detect an UTF8 error that probably mean that the
1474 * input encoding didn't get properly advertised in the
1475 * declaration header. Report the error and switch the encoding
1476 * to ISO-Latin-1 (if you don't like this policy, just declare the
1480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1481 ctxt->sax->error(ctxt->userData,
1482 "Input is not proper UTF-8, indicate encoding !\n");
1483 ctxt->sax->error(ctxt->userData,
1484 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1485 ctxt->input->cur[0], ctxt->input->cur[1],
1486 ctxt->input->cur[2], ctxt->input->cur[3]);
1488 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1489 ctxt->wellFormed = 0;
1493 return ((int) *cur);
1497 * xmlCopyCharMultiByte:
1498 * @out: pointer to an array of xmlChar
1499 * @val: the char value
1501 * append the char value in the array
1503 * Returns the number of xmlChar written
1506 xmlCopyCharMultiByte(xmlChar *out, int val) {
1508 * We are supposed to handle UTF8, check it's valid
1509 * From rfc2044: encoding of the Unicode values on UTF-8:
1511 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1512 * 0000 0000-0000 007F 0xxxxxxx
1513 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1514 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1517 xmlChar *savedout = out;
1519 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1520 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1521 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1523 xmlGenericError(xmlGenericErrorContext,
1524 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
1528 for ( ; bits >= 0; bits-= 6)
1529 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1530 return (out - savedout);
1532 *out = (xmlChar) val;
1538 * @len: Ignored, compatibility
1539 * @out: pointer to an array of xmlChar
1540 * @val: the char value
1542 * append the char value in the array
1544 * Returns the number of xmlChar written
1548 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1549 /* the len parameter is ignored */
1551 return(xmlCopyCharMultiByte (out, val));
1553 *out = (xmlChar) val;
1557 /************************************************************************
1559 * Commodity functions to switch encodings *
1561 ************************************************************************/
1564 * xmlSwitchEncoding:
1565 * @ctxt: the parser context
1566 * @enc: the encoding value (number)
1568 * change the input functions when discovering the character encoding
1569 * of a given entity.
1571 * Returns 0 in case of success, -1 otherwise
1574 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1576 xmlCharEncodingHandlerPtr handler;
1579 case XML_CHAR_ENCODING_ERROR:
1580 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1582 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1583 ctxt->wellFormed = 0;
1584 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1586 case XML_CHAR_ENCODING_NONE:
1587 /* let's assume it's UTF-8 without the XML decl */
1588 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1590 case XML_CHAR_ENCODING_UTF8:
1591 /* default encoding, no conversion should be needed */
1592 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1595 * Errata on XML-1.0 June 20 2001
1596 * Specific handling of the Byte Order Mark for
1599 if ((ctxt->input != NULL) &&
1600 (ctxt->input->cur[0] == 0xEF) &&
1601 (ctxt->input->cur[1] == 0xBB) &&
1602 (ctxt->input->cur[2] == 0xBF)) {
1603 ctxt->input->cur += 3;
1609 handler = xmlGetCharEncodingHandler(enc);
1610 if (handler == NULL) {
1615 case XML_CHAR_ENCODING_ERROR:
1616 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1618 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1619 ctxt->wellFormed = 0;
1620 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
1621 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1623 case XML_CHAR_ENCODING_NONE:
1624 /* let's assume it's UTF-8 without the XML decl */
1625 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1627 case XML_CHAR_ENCODING_UTF8:
1628 case XML_CHAR_ENCODING_ASCII:
1629 /* default encoding, no conversion should be needed */
1630 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1632 case XML_CHAR_ENCODING_UTF16LE:
1634 case XML_CHAR_ENCODING_UTF16BE:
1636 case XML_CHAR_ENCODING_UCS4LE:
1637 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1639 ctxt->sax->error(ctxt->userData,
1640 "char encoding USC4 little endian not supported\n");
1642 case XML_CHAR_ENCODING_UCS4BE:
1643 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "char encoding USC4 big endian not supported\n");
1648 case XML_CHAR_ENCODING_EBCDIC:
1649 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651 ctxt->sax->error(ctxt->userData,
1652 "char encoding EBCDIC not supported\n");
1654 case XML_CHAR_ENCODING_UCS4_2143:
1655 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "char encoding UCS4 2143 not supported\n");
1660 case XML_CHAR_ENCODING_UCS4_3412:
1661 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663 ctxt->sax->error(ctxt->userData,
1664 "char encoding UCS4 3412 not supported\n");
1666 case XML_CHAR_ENCODING_UCS2:
1667 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "char encoding UCS2 not supported\n");
1672 case XML_CHAR_ENCODING_8859_1:
1673 case XML_CHAR_ENCODING_8859_2:
1674 case XML_CHAR_ENCODING_8859_3:
1675 case XML_CHAR_ENCODING_8859_4:
1676 case XML_CHAR_ENCODING_8859_5:
1677 case XML_CHAR_ENCODING_8859_6:
1678 case XML_CHAR_ENCODING_8859_7:
1679 case XML_CHAR_ENCODING_8859_8:
1680 case XML_CHAR_ENCODING_8859_9:
1682 * We used to keep the internal content in the
1683 * document encoding however this turns being unmaintainable
1684 * So xmlGetCharEncodingHandler() will return non-null
1685 * values for this now.
1687 if ((ctxt->inputNr == 1) &&
1688 (ctxt->encoding == NULL) &&
1689 (ctxt->input->encoding != NULL)) {
1690 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1692 ctxt->charset = enc;
1694 case XML_CHAR_ENCODING_2022_JP:
1695 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1697 ctxt->sax->error(ctxt->userData,
1698 "char encoding ISO-2022-JPnot supported\n");
1700 case XML_CHAR_ENCODING_SHIFT_JIS:
1701 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1703 ctxt->sax->error(ctxt->userData,
1704 "char encoding Shift_JIS not supported\n");
1706 case XML_CHAR_ENCODING_EUC_JP:
1707 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709 ctxt->sax->error(ctxt->userData,
1710 "char encoding EUC-JPnot supported\n");
1714 if (handler == NULL)
1716 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1717 return(xmlSwitchToEncoding(ctxt, handler));
1721 * xmlSwitchToEncoding:
1722 * @ctxt: the parser context
1723 * @handler: the encoding handler
1725 * change the input functions when discovering the character encoding
1726 * of a given entity.
1728 * Returns 0 in case of success, -1 otherwise
1731 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1735 if (handler != NULL) {
1736 if (ctxt->input != NULL) {
1737 if (ctxt->input->buf != NULL) {
1738 if (ctxt->input->buf->encoder != NULL) {
1740 * Check in case the auto encoding detetection triggered
1743 if (ctxt->input->buf->encoder == handler)
1747 * "UTF-16" can be used for both LE and BE
1749 if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
1750 BAD_CAST "UTF-16", 6)) &&
1751 (!xmlStrncmp(BAD_CAST handler->name,
1752 BAD_CAST "UTF-16", 6))) {
1757 * Note: this is a bit dangerous, but that's what it
1758 * takes to use nearly compatible signature for different
1761 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1762 ctxt->input->buf->encoder = handler;
1765 ctxt->input->buf->encoder = handler;
1768 * Is there already some content down the pipe to convert ?
1770 if ((ctxt->input->buf->buffer != NULL) &&
1771 (ctxt->input->buf->buffer->use > 0)) {
1775 * Specific handling of the Byte Order Mark for
1778 if ((handler->name != NULL) &&
1779 (!strcmp(handler->name, "UTF-16LE")) &&
1780 (ctxt->input->cur[0] == 0xFF) &&
1781 (ctxt->input->cur[1] == 0xFE)) {
1782 ctxt->input->cur += 2;
1784 if ((handler->name != NULL) &&
1785 (!strcmp(handler->name, "UTF-16BE")) &&
1786 (ctxt->input->cur[0] == 0xFE) &&
1787 (ctxt->input->cur[1] == 0xFF)) {
1788 ctxt->input->cur += 2;
1791 * Errata on XML-1.0 June 20 2001
1792 * Specific handling of the Byte Order Mark for
1795 if ((handler->name != NULL) &&
1796 (!strcmp(handler->name, "UTF-8")) &&
1797 (ctxt->input->cur[0] == 0xEF) &&
1798 (ctxt->input->cur[1] == 0xBB) &&
1799 (ctxt->input->cur[2] == 0xBF)) {
1800 ctxt->input->cur += 3;
1804 * Shrink the current input buffer.
1805 * Move it as the raw buffer and create a new input buffer
1807 processed = ctxt->input->cur - ctxt->input->base;
1808 xmlBufferShrink(ctxt->input->buf->buffer, processed);
1809 ctxt->input->buf->raw = ctxt->input->buf->buffer;
1810 ctxt->input->buf->buffer = xmlBufferCreate();
1814 * convert as much as possible of the buffer
1816 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1817 ctxt->input->buf->buffer,
1818 ctxt->input->buf->raw);
1821 * convert just enough to get
1822 * '<?xml version="1.0" encoding="xxx"?>'
1823 * parsed with the autodetected encoding
1824 * into the parser reading buffer.
1826 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1827 ctxt->input->buf->buffer,
1828 ctxt->input->buf->raw);
1831 xmlGenericError(xmlGenericErrorContext,
1832 "xmlSwitchToEncoding: encoder error\n");
1836 ctxt->input->cur = ctxt->input->buf->buffer->content;
1838 &ctxt->input->base[ctxt->input->buf->buffer->use];
1843 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1845 * When parsing a static memory array one must know the
1846 * size to be able to convert the buffer.
1848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1849 ctxt->sax->error(ctxt->userData,
1850 "xmlSwitchToEncoding : no input\n");
1856 * Shrink the current input buffer.
1857 * Move it as the raw buffer and create a new input buffer
1859 processed = ctxt->input->cur - ctxt->input->base;
1861 ctxt->input->buf->raw = xmlBufferCreate();
1862 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1863 ctxt->input->length - processed);
1864 ctxt->input->buf->buffer = xmlBufferCreate();
1867 * convert as much as possible of the raw input
1868 * to the parser reading buffer.
1870 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1871 ctxt->input->buf->buffer,
1872 ctxt->input->buf->raw);
1874 xmlGenericError(xmlGenericErrorContext,
1875 "xmlSwitchToEncoding: encoder error\n");
1880 * Conversion succeeded, get rid of the old buffer
1882 if ((ctxt->input->free != NULL) &&
1883 (ctxt->input->base != NULL))
1884 ctxt->input->free((xmlChar *) ctxt->input->base);
1886 ctxt->input->cur = ctxt->input->buf->buffer->content;
1888 &ctxt->input->base[ctxt->input->buf->buffer->use];
1892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1893 ctxt->sax->error(ctxt->userData,
1894 "xmlSwitchToEncoding : no input\n");
1898 * The parsing is now done in UTF8 natively
1900 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1907 /************************************************************************
1909 * Commodity functions to handle entities processing *
1911 ************************************************************************/
1914 * xmlFreeInputStream:
1915 * @input: an xmlParserInputPtr
1917 * Free up an input stream.
1920 xmlFreeInputStream(xmlParserInputPtr input) {
1921 if (input == NULL) return;
1923 if (input->filename != NULL) xmlFree((char *) input->filename);
1924 if (input->directory != NULL) xmlFree((char *) input->directory);
1925 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1926 if (input->version != NULL) xmlFree((char *) input->version);
1927 if ((input->free != NULL) && (input->base != NULL))
1928 input->free((xmlChar *) input->base);
1929 if (input->buf != NULL)
1930 xmlFreeParserInputBuffer(input->buf);
1935 * xmlNewInputStream:
1936 * @ctxt: an XML parser context
1938 * Create a new input stream structure
1939 * Returns the new input stream or NULL
1942 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1943 xmlParserInputPtr input;
1945 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1946 if (input == NULL) {
1948 ctxt->errNo = XML_ERR_NO_MEMORY;
1949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1950 ctxt->sax->error(ctxt->userData,
1951 "malloc: couldn't allocate a new input stream\n");
1952 ctxt->errNo = XML_ERR_NO_MEMORY;
1956 memset(input, 0, sizeof(xmlParserInput));
1959 input->standalone = -1;
1964 * xmlNewIOInputStream:
1965 * @ctxt: an XML parser context
1966 * @input: an I/O Input
1967 * @enc: the charset encoding if known
1969 * Create a new input stream structure encapsulating the @input into
1970 * a stream suitable for the parser.
1972 * Returns the new input stream or NULL
1975 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1976 xmlCharEncoding enc) {
1977 xmlParserInputPtr inputStream;
1979 if (xmlParserDebugEntities)
1980 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1981 inputStream = xmlNewInputStream(ctxt);
1982 if (inputStream == NULL) {
1985 inputStream->filename = NULL;
1986 inputStream->buf = input;
1987 inputStream->base = inputStream->buf->buffer->content;
1988 inputStream->cur = inputStream->buf->buffer->content;
1989 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1990 if (enc != XML_CHAR_ENCODING_NONE) {
1991 xmlSwitchEncoding(ctxt, enc);
1994 return(inputStream);
1998 * xmlNewEntityInputStream:
1999 * @ctxt: an XML parser context
2000 * @entity: an Entity pointer
2002 * Create a new input stream based on an xmlEntityPtr
2004 * Returns the new input stream or NULL
2007 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2008 xmlParserInputPtr input;
2010 if (entity == NULL) {
2011 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData,
2014 "internal: xmlNewEntityInputStream entity = NULL\n");
2015 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2018 if (xmlParserDebugEntities)
2019 xmlGenericError(xmlGenericErrorContext,
2020 "new input from entity: %s\n", entity->name);
2021 if (entity->content == NULL) {
2022 switch (entity->etype) {
2023 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2024 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
2025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2026 ctxt->sax->error(ctxt->userData,
2027 "xmlNewEntityInputStream unparsed entity !\n");
2029 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2030 case XML_EXTERNAL_PARAMETER_ENTITY:
2031 return(xmlLoadExternalEntity((char *) entity->URI,
2032 (char *) entity->ExternalID, ctxt));
2033 case XML_INTERNAL_GENERAL_ENTITY:
2034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2035 ctxt->sax->error(ctxt->userData,
2036 "Internal entity %s without content !\n", entity->name);
2038 case XML_INTERNAL_PARAMETER_ENTITY:
2039 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2041 ctxt->sax->error(ctxt->userData,
2042 "Internal parameter entity %s without content !\n", entity->name);
2044 case XML_INTERNAL_PREDEFINED_ENTITY:
2045 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2047 ctxt->sax->error(ctxt->userData,
2048 "Predefined entity %s without content !\n", entity->name);
2053 input = xmlNewInputStream(ctxt);
2054 if (input == NULL) {
2057 input->filename = (char *) entity->URI;
2058 input->base = entity->content;
2059 input->cur = entity->content;
2060 input->length = entity->length;
2061 input->end = &entity->content[input->length];
2066 * xmlNewStringInputStream:
2067 * @ctxt: an XML parser context
2068 * @buffer: an memory buffer
2070 * Create a new input stream based on a memory buffer.
2071 * Returns the new input stream
2074 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2075 xmlParserInputPtr input;
2077 if (buffer == NULL) {
2078 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2080 ctxt->sax->error(ctxt->userData,
2081 "internal: xmlNewStringInputStream string = NULL\n");
2084 if (xmlParserDebugEntities)
2085 xmlGenericError(xmlGenericErrorContext,
2086 "new fixed input: %.30s\n", buffer);
2087 input = xmlNewInputStream(ctxt);
2088 if (input == NULL) {
2091 input->base = buffer;
2092 input->cur = buffer;
2093 input->length = xmlStrlen(buffer);
2094 input->end = &buffer[input->length];
2099 * xmlNewInputFromFile:
2100 * @ctxt: an XML parser context
2101 * @filename: the filename to use as entity
2103 * Create a new input stream based on a file.
2105 * Returns the new input stream or NULL in case of error
2108 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2109 xmlParserInputBufferPtr buf;
2110 xmlParserInputPtr inputStream;
2111 char *directory = NULL;
2112 xmlChar *URI = NULL;
2114 if (xmlParserDebugEntities)
2115 xmlGenericError(xmlGenericErrorContext,
2116 "new input from file: %s\n", filename);
2117 if (ctxt == NULL) return(NULL);
2118 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2122 URI = xmlStrdup((xmlChar *) filename);
2123 directory = xmlParserGetDirectory((const char *) URI);
2125 inputStream = xmlNewInputStream(ctxt);
2126 if (inputStream == NULL) {
2127 if (directory != NULL) xmlFree((char *) directory);
2128 if (URI != NULL) xmlFree((char *) URI);
2132 inputStream->filename = (const char *) URI;
2133 inputStream->directory = directory;
2134 inputStream->buf = buf;
2136 inputStream->base = inputStream->buf->buffer->content;
2137 inputStream->cur = inputStream->buf->buffer->content;
2138 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
2139 if ((ctxt->directory == NULL) && (directory != NULL))
2140 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2141 return(inputStream);
2144 /************************************************************************
2146 * Commodity functions to handle parser contexts *
2148 ************************************************************************/
2151 * xmlInitParserCtxt:
2152 * @ctxt: an XML parser context
2154 * Initialize a parser context
2158 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2161 xmlGenericError(xmlGenericErrorContext,
2162 "xmlInitParserCtxt: NULL context given\n");
2166 xmlDefaultSAXHandlerInit();
2168 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2169 if (ctxt->sax == NULL) {
2170 xmlGenericError(xmlGenericErrorContext,
2171 "xmlInitParserCtxt: out of memory\n");
2174 memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2176 /* Allocate the Input stack */
2177 ctxt->inputTab = (xmlParserInputPtr *)
2178 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2179 if (ctxt->inputTab == NULL) {
2180 xmlGenericError(xmlGenericErrorContext,
2181 "xmlInitParserCtxt: out of memory\n");
2191 ctxt->version = NULL;
2192 ctxt->encoding = NULL;
2193 ctxt->standalone = -1;
2194 ctxt->hasExternalSubset = 0;
2195 ctxt->hasPErefs = 0;
2198 ctxt->instate = XML_PARSER_START;
2200 ctxt->directory = NULL;
2202 /* Allocate the Node stack */
2203 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2204 if (ctxt->nodeTab == NULL) {
2205 xmlGenericError(xmlGenericErrorContext,
2206 "xmlInitParserCtxt: out of memory\n");
2219 /* Allocate the Name stack */
2220 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2221 if (ctxt->nameTab == NULL) {
2222 xmlGenericError(xmlGenericErrorContext,
2223 "xmlInitParserCtxt: out of memory\n");
2239 /* Allocate the space stack */
2240 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2241 if (ctxt->spaceTab == NULL) {
2242 xmlGenericError(xmlGenericErrorContext,
2243 "xmlInitParserCtxt: out of memory\n");
2259 ctxt->spaceMax = 10;
2260 ctxt->spaceTab[0] = -1;
2261 ctxt->space = &ctxt->spaceTab[0];
2262 ctxt->userData = ctxt;
2264 ctxt->wellFormed = 1;
2266 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2267 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2268 ctxt->pedantic = xmlPedanticParserDefaultValue;
2269 ctxt->linenumbers = xmlLineNumbersDefaultValue;
2270 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2271 if (ctxt->keepBlanks == 0)
2272 ctxt->sax->ignorableWhitespace = ignorableWhitespace;
2274 ctxt->vctxt.userData = ctxt;
2275 ctxt->vctxt.error = xmlParserValidityError;
2276 ctxt->vctxt.warning = xmlParserValidityWarning;
2277 if (ctxt->validate) {
2278 if (xmlGetWarningsDefaultValue == 0)
2279 ctxt->vctxt.warning = NULL;
2281 ctxt->vctxt.warning = xmlParserValidityWarning;
2282 ctxt->vctxt.nodeMax = 0;
2284 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2285 ctxt->record_info = 0;
2287 ctxt->checkIndex = 0;
2289 ctxt->errNo = XML_ERR_OK;
2291 ctxt->charset = XML_CHAR_ENCODING_UTF8;
2292 ctxt->catalogs = NULL;
2293 xmlInitNodeInfoSeq(&ctxt->node_seq);
2297 * xmlFreeParserCtxt:
2298 * @ctxt: an XML parser context
2300 * Free all the memory used by a parser context. However the parsed
2301 * document in ctxt->myDoc is not freed.
2305 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2307 xmlParserInputPtr input;
2310 if (ctxt == NULL) return;
2312 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2313 xmlFreeInputStream(input);
2315 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2318 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2319 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2320 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2321 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2322 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2323 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2324 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2325 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2326 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2327 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2329 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2330 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2331 #ifdef LIBXML_CATALOG_ENABLED
2332 if (ctxt->catalogs != NULL)
2333 xmlCatalogFreeLocal(ctxt->catalogs);
2341 * Allocate and initialize a new parser context.
2343 * Returns the xmlParserCtxtPtr or NULL
2349 xmlParserCtxtPtr ctxt;
2351 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2353 xmlGenericError(xmlGenericErrorContext,
2354 "xmlNewParserCtxt : cannot allocate context\n");
2355 xmlGenericError(xmlGenericErrorContext, "malloc failed");
2358 memset(ctxt, 0, sizeof(xmlParserCtxt));
2359 xmlInitParserCtxt(ctxt);
2363 /************************************************************************
2365 * Handling of node informations *
2367 ************************************************************************/
2370 * xmlClearParserCtxt:
2371 * @ctxt: an XML parser context
2373 * Clear (release owned resources) and reinitialize a parser context
2377 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2381 xmlClearNodeInfoSeq(&ctxt->node_seq);
2382 xmlInitParserCtxt(ctxt);
2386 * xmlParserFindNodeInfo:
2387 * @ctx: an XML parser context
2388 * @node: an XML node within the tree
2390 * Find the parser node info struct for a given node
2392 * Returns an xmlParserNodeInfo block pointer or NULL
2394 const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,
2395 const xmlNodePtr node)
2399 /* Find position where node should be at */
2400 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2401 if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node)
2402 return &ctx->node_seq.buffer[pos];
2409 * xmlInitNodeInfoSeq:
2410 * @seq: a node info sequence pointer
2412 * -- Initialize (set to initial state) node info sequence
2415 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2423 * xmlClearNodeInfoSeq:
2424 * @seq: a node info sequence pointer
2426 * -- Clear (release memory and reinitialize) node
2430 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2432 if ( seq->buffer != NULL )
2433 xmlFree(seq->buffer);
2434 xmlInitNodeInfoSeq(seq);
2439 * xmlParserFindNodeInfoIndex:
2440 * @seq: a node info sequence pointer
2441 * @node: an XML node pointer
2444 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2445 * the given node is or should be at in a sorted sequence
2447 * Returns a long indicating the position of the record
2449 unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2450 const xmlNodePtr node)
2452 unsigned long upper, lower, middle;
2455 /* Do a binary search for the key */
2457 upper = seq->length;
2459 while ( lower <= upper && !found) {
2460 middle = lower + (upper - lower) / 2;
2461 if ( node == seq->buffer[middle - 1].node )
2463 else if ( node < seq->buffer[middle - 1].node )
2469 /* Return position */
2470 if ( middle == 0 || seq->buffer[middle - 1].node < node )
2478 * xmlParserAddNodeInfo:
2479 * @ctxt: an XML parser context
2480 * @info: a node info sequence pointer
2482 * Insert node info record into the sorted sequence
2485 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2486 const xmlParserNodeInfoPtr info)
2490 /* Find pos and check to see if node is already in the sequence */
2491 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr)
2493 if (pos < ctxt->node_seq.length
2494 && ctxt->node_seq.buffer[pos].node == info->node) {
2495 ctxt->node_seq.buffer[pos] = *info;
2498 /* Otherwise, we need to add new node to buffer */
2500 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
2501 xmlParserNodeInfo *tmp_buffer;
2502 unsigned int byte_size;
2504 if (ctxt->node_seq.maximum == 0)
2505 ctxt->node_seq.maximum = 2;
2506 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2507 (2 * ctxt->node_seq.maximum));
2509 if (ctxt->node_seq.buffer == NULL)
2510 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2513 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2516 if (tmp_buffer == NULL) {
2517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2518 ctxt->sax->error(ctxt->userData, "Out of memory\n");
2519 ctxt->errNo = XML_ERR_NO_MEMORY;
2522 ctxt->node_seq.buffer = tmp_buffer;
2523 ctxt->node_seq.maximum *= 2;
2526 /* If position is not at end, move elements out of the way */
2527 if (pos != ctxt->node_seq.length) {
2530 for (i = ctxt->node_seq.length; i > pos; i--)
2531 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2534 /* Copy element and increase length */
2535 ctxt->node_seq.buffer[pos] = *info;
2536 ctxt->node_seq.length++;
2540 /************************************************************************
2542 * Defaults settings *
2544 ************************************************************************/
2546 * xmlPedanticParserDefault:
2549 * Set and return the previous value for enabling pedantic warnings.
2551 * Returns the last value for 0 for no substitution, 1 for substitution.
2555 xmlPedanticParserDefault(int val) {
2556 int old = xmlPedanticParserDefaultValue;
2558 xmlPedanticParserDefaultValue = val;
2563 * xmlLineNumbersDefault:
2566 * Set and return the previous value for enabling line numbers in elements
2567 * contents. This may break on old application and is turned off by default.
2569 * Returns the last value for 0 for no substitution, 1 for substitution.
2573 xmlLineNumbersDefault(int val) {
2574 int old = xmlLineNumbersDefaultValue;
2576 xmlLineNumbersDefaultValue = val;
2581 * xmlSubstituteEntitiesDefault:
2584 * Set and return the previous value for default entity support.
2585 * Initially the parser always keep entity references instead of substituting
2586 * entity values in the output. This function has to be used to change the
2587 * default parser behavior
2588 * SAX::substituteEntities() has to be used for changing that on a file by
2591 * Returns the last value for 0 for no substitution, 1 for substitution.
2595 xmlSubstituteEntitiesDefault(int val) {
2596 int old = xmlSubstituteEntitiesDefaultValue;
2598 xmlSubstituteEntitiesDefaultValue = val;
2603 * xmlKeepBlanksDefault:
2606 * Set and return the previous value for default blanks text nodes support.
2607 * The 1.x version of the parser used an heuristic to try to detect
2608 * ignorable white spaces. As a result the SAX callback was generating
2609 * ignorableWhitespace() callbacks instead of characters() one, and when
2610 * using the DOM output text nodes containing those blanks were not generated.
2611 * The 2.x and later version will switch to the XML standard way and
2612 * ignorableWhitespace() are only generated when running the parser in
2613 * validating mode and when the current element doesn't allow CDATA or
2615 * This function is provided as a way to force the standard behavior
2616 * on 1.X libs and to switch back to the old mode for compatibility when
2617 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2618 * by using xmlIsBlankNode() commodity function to detect the "empty"
2620 * This value also affect autogeneration of indentation when saving code
2621 * if blanks sections are kept, indentation is not generated.
2623 * Returns the last value for 0 for no substitution, 1 for substitution.
2627 xmlKeepBlanksDefault(int val) {
2628 int old = xmlKeepBlanksDefaultValue;
2630 xmlKeepBlanksDefaultValue = val;
2631 xmlIndentTreeOutput = !val;
2635 /************************************************************************
2637 * Deprecated functions kept for compatibility *
2639 ************************************************************************/
2642 * xmlCheckLanguageID:
2643 * @lang: pointer to the string value
2645 * Checks that the value conforms to the LanguageID production:
2647 * NOTE: this is somewhat deprecated, those productions were removed from
2648 * the XML Second edition.
2650 * [33] LanguageID ::= Langcode ('-' Subcode)*
2651 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2652 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2653 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2654 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2655 * [38] Subcode ::= ([a-z] | [A-Z])+
2657 * Returns 1 if correct 0 otherwise
2660 xmlCheckLanguageID(const xmlChar *lang) {
2661 const xmlChar *cur = lang;
2665 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2666 ((cur[0] == 'I') && (cur[1] == '-'))) {
2671 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2672 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2674 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2675 ((cur[0] == 'X') && (cur[1] == '-'))) {
2680 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2681 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2683 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2684 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2689 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2690 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2696 while (cur[0] != 0) { /* non input consuming */
2700 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2701 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2705 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2706 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2713 * xmlDecodeEntities:
2714 * @ctxt: the parser context
2715 * @len: the len to decode (in bytes !), -1 for no size limit
2716 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2717 * @end: an end marker xmlChar, 0 if none
2718 * @end2: an end marker xmlChar, 0 if none
2719 * @end3: an end marker xmlChar, 0 if none
2721 * This function is deprecated, we now always process entities content
2722 * through xmlStringDecodeEntities
2724 * TODO: remove it in next major release.
2726 * [67] Reference ::= EntityRef | CharRef
2728 * [69] PEReference ::= '%' Name ';'
2730 * Returns A newly allocated string with the substitution done. The caller
2731 * must deallocate it !
2734 xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED,
2735 xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) {
2737 xmlChar *buffer = NULL;
2738 unsigned int buffer_size = 0;
2739 unsigned int nbchars = 0;
2741 xmlChar *current = NULL;
2743 unsigned int max = (unsigned int) len;
2747 static int deprecated = 0;
2749 xmlGenericError(xmlGenericErrorContext,
2750 "xmlDecodeEntities() deprecated function reached\n");
2755 if (ctxt->depth > 40) {
2756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2757 ctxt->sax->error(ctxt->userData,
2758 "Detected entity reference loop\n");
2759 ctxt->wellFormed = 0;
2760 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
2761 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2766 * allocate a translation buffer.
2768 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2769 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2770 if (buffer == NULL) {
2771 xmlGenericError(xmlGenericErrorContext,
2772 "xmlDecodeEntities: malloc failed");
2777 * OK loop until we reach one of the ending char or a size limit.
2781 while ((nbchars < max) && (c != end) && /* NOTUSED */
2782 (c != end2) && (c != end3)) {
2785 if ((c == '&') && (NXT(1) == '#')) {
2786 int val = xmlParseCharRef(ctxt);
2787 COPY_BUF(0,buffer,nbchars,val);
2789 } else if (c == '&') &&
2790 (what & XML_SUBSTITUTE_REF)) {
2791 if (xmlParserDebugEntities)
2792 xmlGenericError(xmlGenericErrorContext,
2793 "decoding Entity Reference\n");
2794 ent = xmlParseEntityRef(ctxt);
2795 if ((ent != NULL) &&
2796 (ctxt->replaceEntities != 0)) {
2797 current = ent->content;
2798 while (*current != 0) { /* non input consuming loop */
2799 buffer[nbchars++] = *current++;
2800 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2804 } else if (ent != NULL) {
2805 const xmlChar *cur = ent->name;
2807 buffer[nbchars++] = '&';
2808 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2811 while (*cur != 0) { /* non input consuming loop */
2812 buffer[nbchars++] = *cur++;
2814 buffer[nbchars++] = ';';
2816 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2818 * a PEReference induce to switch the entity flow,
2819 * we break here to flush the current set of chars
2820 * parsed if any. We will be called back later.
2822 if (xmlParserDebugEntities)
2823 xmlGenericError(xmlGenericErrorContext,
2824 "decoding PE Reference\n");
2825 if (nbchars != 0) break;
2827 xmlParsePEReference(ctxt);
2830 * Pop-up of finished entities.
2832 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2837 COPY_BUF(l,buffer,nbchars,c);
2839 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2845 buffer[nbchars++] = 0;
2852 * xmlNamespaceParseNCName:
2853 * @ctxt: an XML parser context
2855 * parse an XML namespace name.
2857 * TODO: this seems not in use anymore, the namespace handling is done on
2858 * top of the SAX interfaces, i.e. not on raw input.
2860 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2862 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2863 * CombiningChar | Extender
2865 * Returns the namespace name or NULL
2869 xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
2871 xmlChar buf[XML_MAX_NAMELEN + 5];
2873 int cur = CUR_CHAR(l);
2876 static int deprecated = 0;
2878 xmlGenericError(xmlGenericErrorContext,
2879 "xmlNamespaceParseNCName() deprecated function reached\n");
2884 /* load first the value of the char !!! */
2886 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2888 xmlGenericError(xmlGenericErrorContext,
2889 "xmlNamespaceParseNCName: reached loop 3\n");
2890 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2891 (cur == '.') || (cur == '-') ||
2893 (IS_COMBINING(cur)) ||
2894 (IS_EXTENDER(cur))) {
2895 COPY_BUF(l,buf,len,cur);
2898 if (len >= XML_MAX_NAMELEN) {
2899 xmlGenericError(xmlGenericErrorContext,
2900 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2901 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2902 (cur == '.') || (cur == '-') ||
2904 (IS_COMBINING(cur)) ||
2905 (IS_EXTENDER(cur))) {
2912 return(xmlStrndup(buf, len));
2918 * xmlNamespaceParseQName:
2919 * @ctxt: an XML parser context
2920 * @prefix: a xmlChar **
2922 * TODO: this seems not in use anymore, the namespace handling is done on
2923 * top of the SAX interfaces, i.e. not on raw input.
2925 * parse an XML qualified name
2927 * [NS 5] QName ::= (Prefix ':')? LocalPart
2929 * [NS 6] Prefix ::= NCName
2931 * [NS 7] LocalPart ::= NCName
2933 * Returns the local part, and prefix is updated
2934 * to get the Prefix if any.
2938 xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) {
2940 static int deprecated = 0;
2942 xmlGenericError(xmlGenericErrorContext,
2943 "xmlNamespaceParseQName() deprecated function reached\n");
2948 xmlChar *ret = NULL;
2951 ret = xmlNamespaceParseNCName(ctxt);
2955 ret = xmlNamespaceParseNCName(ctxt);
2964 * xmlNamespaceParseNSDef:
2965 * @ctxt: an XML parser context
2967 * parse a namespace prefix declaration
2969 * TODO: this seems not in use anymore, the namespace handling is done on
2970 * top of the SAX interfaces, i.e. not on raw input.
2972 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2974 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2976 * Returns the namespace name
2980 xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
2981 static int deprecated = 0;
2983 xmlGenericError(xmlGenericErrorContext,
2984 "xmlNamespaceParseNSDef() deprecated function reached\n");
2989 xmlChar *name = NULL;
2991 if ((RAW == 'x') && (NXT(1) == 'm') &&
2992 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2997 name = xmlNamespaceParseNCName(ctxt);
3005 * xmlParseQuotedString:
3006 * @ctxt: an XML parser context
3008 * Parse and return a string between quotes or doublequotes
3010 * TODO: Deprecated, to be removed at next drop of binary compatibility
3012 * Returns the string parser or NULL.
3015 xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3016 static int deprecated = 0;
3018 xmlGenericError(xmlGenericErrorContext,
3019 "xmlParseQuotedString() deprecated function reached\n");
3025 xmlChar *buf = NULL;
3027 int size = XML_PARSER_BUFFER_SIZE;
3030 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3032 xmlGenericError(xmlGenericErrorContext,
3033 "malloc of %d byte failed\n", size);
3036 xmlGenericError(xmlGenericErrorContext,
3037 "xmlParseQuotedString: reached loop 4\n");
3041 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
3042 if (len + 5 >= size) {
3044 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3046 xmlGenericError(xmlGenericErrorContext,
3047 "realloc of %d byte failed\n", size);
3051 COPY_BUF(l,buf,len,c);
3056 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3058 ctxt->sax->error(ctxt->userData,
3059 "String not closed \"%.50s\"\n", buf);
3060 ctxt->wellFormed = 0;
3061 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3065 } else if (RAW == '\''){
3068 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
3069 if (len + 1 >= size) {
3071 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3073 xmlGenericError(xmlGenericErrorContext,
3074 "realloc of %d byte failed\n", size);
3083 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
3084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3085 ctxt->sax->error(ctxt->userData,
3086 "String not closed \"%.50s\"\n", buf);
3087 ctxt->wellFormed = 0;
3088 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3098 * xmlParseNamespace:
3099 * @ctxt: an XML parser context
3101 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3103 * This is what the older xml-name Working Draft specified, a bunch of
3104 * other stuff may still rely on it, so support is still here as
3105 * if it was declared on the root of the Tree:-(
3107 * TODO: remove from library
3109 * To be removed at next drop of binary compatibility
3113 xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3114 static int deprecated = 0;
3116 xmlGenericError(xmlGenericErrorContext,
3117 "xmlParseNamespace() deprecated function reached\n");
3122 xmlChar *href = NULL;
3123 xmlChar *prefix = NULL;
3127 * We just skipped "namespace" or "xml:namespace"
3131 xmlGenericError(xmlGenericErrorContext,
3132 "xmlParseNamespace: reached loop 5\n");
3133 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
3135 * We can have "ns" or "prefix" attributes
3136 * Old encoding as 'href' or 'AS' attributes is still supported
3138 if ((RAW == 'n') && (NXT(1) == 's')) {
3143 if (RAW != '=') continue;
3147 href = xmlParseQuotedString(ctxt);
3149 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
3150 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3155 if (RAW != '=') continue;
3159 href = xmlParseQuotedString(ctxt);
3161 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
3162 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3163 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3168 if (RAW != '=') continue;
3172 prefix = xmlParseQuotedString(ctxt);
3174 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3179 if (RAW != '=') continue;
3183 prefix = xmlParseQuotedString(ctxt);
3185 } else if ((RAW == '?') && (NXT(1) == '>')) {
3190 * Found garbage when parsing the namespace
3193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3194 ctxt->sax->error(ctxt->userData,
3195 "xmlParseNamespace found garbage\n");
3197 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3198 ctxt->wellFormed = 0;
3199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3204 MOVETO_ENDTAG(CUR_PTR);
3210 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3211 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3214 if (prefix != NULL) xmlFree(prefix);
3215 if (href != NULL) xmlFree(href);
3221 * @ctxt: an XML parser context
3223 * Trickery: parse an XML name but without consuming the input flow
3224 * Needed for rollback cases. Used only when parsing entities references.
3226 * TODO: seems deprecated now, only used in the default part of
3227 * xmlParserHandleReference
3229 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3230 * CombiningChar | Extender
3232 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3234 * [6] Names ::= Name (S Name)*
3236 * Returns the Name parsed or NULL
3240 xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3241 static int deprecated = 0;
3243 xmlGenericError(xmlGenericErrorContext,
3244 "xmlScanName() deprecated function reached\n");
3250 xmlChar buf[XML_MAX_NAMELEN];
3254 if (!IS_LETTER(RAW) && (RAW != '_') &&
3260 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3261 (NXT(len) == '.') || (NXT(len) == '-') ||
3262 (NXT(len) == '_') || (NXT(len) == ':') ||
3263 (IS_COMBINING(NXT(len))) ||
3264 (IS_EXTENDER(NXT(len)))) {
3266 buf[len] = NXT(len);
3268 if (len >= XML_MAX_NAMELEN) {
3269 xmlGenericError(xmlGenericErrorContext,
3270 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3271 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3272 (IS_DIGIT(NXT(len))) ||
3273 (NXT(len) == '.') || (NXT(len) == '-') ||
3274 (NXT(len) == '_') || (NXT(len) == ':') ||
3275 (IS_COMBINING(NXT(len))) ||
3276 (IS_EXTENDER(NXT(len))))
3281 return(xmlStrndup(buf, len));
3286 * xmlParserHandleReference:
3287 * @ctxt: the parser context
3289 * TODO: Remove, now deprecated ... the test is done directly in the
3293 * [67] Reference ::= EntityRef | CharRef
3295 * [68] EntityRef ::= '&' Name ';'
3297 * [ WFC: Entity Declared ]
3298 * the Name given in the entity reference must match that in an entity
3299 * declaration, except that well-formed documents need not declare any
3300 * of the following entities: amp, lt, gt, apos, quot.
3302 * [ WFC: Parsed Entity ]
3303 * An entity reference must not contain the name of an unparsed entity
3305 * [66] CharRef ::= '&#' [0-9]+ ';' |
3306 * '&#x' [0-9a-fA-F]+ ';'
3308 * A PEReference may have been detected in the current input stream
3309 * the handling is done accordingly to
3310 * http://www.w3.org/TR/REC-xml#entproc
3313 xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) {
3314 static int deprecated = 0;
3316 xmlGenericError(xmlGenericErrorContext,
3317 "xmlParserHandleReference() deprecated function reached\n");
3326 * @ctxt: an XML parser context
3327 * @entity: an XML entity pointer.
3329 * Default handling of defined entities, when should we define a new input
3330 * stream ? When do we just handle that as a set of chars ?
3332 * OBSOLETE: to be removed at some point.
3336 xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) {
3337 static int deprecated = 0;
3339 xmlGenericError(xmlGenericErrorContext,
3340 "xmlHandleEntity() deprecated function reached\n");
3346 xmlParserInputPtr input;
3348 if (entity->content == NULL) {
3349 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3353 ctxt->wellFormed = 0;
3354 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
3357 len = xmlStrlen(entity->content);
3358 if (len <= 2) goto handle_as_char;
3361 * Redefine its content as an input stream.
3363 input = xmlNewEntityInputStream(ctxt, entity);
3364 xmlPushInput(ctxt, input);
3369 * Just handle the content as a set of chars.
3371 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3372 (ctxt->sax->characters != NULL))
3373 ctxt->sax->characters(ctxt->userData, entity->content, len);
3379 * @doc: the document carrying the namespace
3380 * @href: the URI associated
3381 * @prefix: the prefix for the namespace
3383 * Creation of a Namespace, the old way using PI and without scoping
3385 * It now create a namespace on the root element of the document if found.
3386 * Returns NULL this functionality had been removed
3389 xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED,
3390 const xmlChar *prefix ATTRIBUTE_UNUSED) {
3391 static int deprecated = 0;
3393 xmlGenericError(xmlGenericErrorContext,
3394 "xmlNewGlobalNs() deprecated function reached\n");
3403 root = xmlDocGetRootElement(doc);
3405 return(xmlNewNs(root, href, prefix));
3408 * if there is no root element yet, create an old Namespace type
3409 * and it will be moved to the root at save time.
3411 cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3413 xmlGenericError(xmlGenericErrorContext,
3414 "xmlNewGlobalNs : malloc failed\n");
3417 memset(cur, 0, sizeof(xmlNs));
3418 cur->type = XML_GLOBAL_NAMESPACE;
3421 cur->href = xmlStrdup(href);
3423 cur->prefix = xmlStrdup(prefix);
3426 * Add it at the end to preserve parsing order ...
3429 if (doc->oldNs == NULL) {
3432 xmlNsPtr prev = doc->oldNs;
3434 while (prev->next != NULL) prev = prev->next;
3445 * @doc: a document pointer
3447 * Upgrade old style Namespaces (PI) and move them to the root of the document.
3451 xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) {
3452 static int deprecated = 0;
3454 xmlGenericError(xmlGenericErrorContext,
3455 "xmlUpgradeOldNs() deprecated function reached\n");
3461 if ((doc == NULL) || (doc->oldNs == NULL)) return;
3462 if (doc->children == NULL) {
3464 xmlGenericError(xmlGenericErrorContext,
3465 "xmlUpgradeOldNs: failed no root !\n");
3471 while (cur->next != NULL) {
3472 cur->type = XML_LOCAL_NAMESPACE;
3475 cur->type = XML_LOCAL_NAMESPACE;
3476 cur->next = doc->children->nsDef;
3477 doc->children->nsDef = doc->oldNs;