2 * encoding.c : implements the encoding conversion functions needed for XML
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
20 * UTF8 string routines from:
21 * "William M. Brack" <wbrack@mmm.com.hk>
23 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
37 #ifdef LIBXML_ICONV_ENABLED
42 #include <libxml/encoding.h>
43 #include <libxml/xmlmemory.h>
44 #ifdef LIBXML_HTML_ENABLED
45 #include <libxml/HTMLparser.h>
47 #include <libxml/globals.h>
48 #include <libxml/xmlerror.h>
50 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
51 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
54 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
55 struct _xmlCharEncodingAlias {
60 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
61 static int xmlCharEncodingAliasesNb = 0;
62 static int xmlCharEncodingAliasesMax = 0;
64 #ifdef LIBXML_ICONV_ENABLED
66 #define DEBUG_ENCODING /* Define this to get encoding traces */
69 #ifdef LIBXML_ISO8859X_ENABLED
70 static void xmlRegisterCharEncodingHandlersISO8859x (void);
74 static int xmlLittleEndian = 1;
76 /************************************************************************
78 * Generic UTF8 handling routines *
80 * From rfc2044: encoding of the Unicode values on UTF-8: *
82 * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
83 * 0000 0000-0000 007F 0xxxxxxx *
84 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
85 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
87 * I hope we won't use values > 0xFFFF anytime soon ! *
89 ************************************************************************/
93 * @utf: pointer to the UTF8 character
95 * returns the numbers of bytes in the character, -1 on format error
98 xmlUTF8Size(const xmlChar *utf) {
106 /* check valid UTF8 character */
109 /* determine number of bytes in char */
111 for (mask=0x20; mask != 0; mask>>=1) {
121 * @utf1: pointer to first UTF8 char
122 * @utf2: pointer to second UTF8 char
124 * returns result of comparing the two UCS4 values
128 xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
135 return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
140 * @utf: a sequence of UTF-8 encoded bytes
142 * compute the length of an UTF8 string, it doesn't do a full UTF8
143 * checking of the content of the string.
145 * Returns the number of characters in the string or -1 in case of error
148 xmlUTF8Strlen(const xmlChar *utf) {
156 if ((utf[1] & 0xc0) != 0x80)
158 if ((utf[0] & 0xe0) == 0xe0) {
159 if ((utf[2] & 0xc0) != 0x80)
161 if ((utf[0] & 0xf0) == 0xf0) {
162 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
181 * @utf: a sequence of UTF-8 encoded bytes
182 * @len: a pointer to @bytes len
184 * Read one UTF8 Char from @utf
186 * Returns the char value or -1 in case of error and update @len with the
187 * number of bytes used
190 xmlGetUTF8Char(const unsigned char *utf, int *len) {
204 if ((utf[1] & 0xc0) != 0x80)
206 if ((c & 0xe0) == 0xe0) {
209 if ((utf[2] & 0xc0) != 0x80)
211 if ((c & 0xf0) == 0xf0) {
214 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
218 c = (utf[0] & 0x7) << 18;
219 c |= (utf[1] & 0x3f) << 12;
220 c |= (utf[2] & 0x3f) << 6;
225 c = (utf[0] & 0xf) << 12;
226 c |= (utf[1] & 0x3f) << 6;
232 c = (utf[0] & 0x1f) << 6;
248 * @utf: Pointer to putative utf-8 encoded string.
250 * Checks @utf for being valid utf-8. @utf is assumed to be
251 * null-terminated. This function is not super-strict, as it will
252 * allow longer utf-8 sequences than necessary. Note that Java is
253 * capable of producing these sequences if provoked. Also note, this
254 * routine checks for the 4-byte maximum size, but does not check for
255 * 0x10ffff maximum value.
257 * Return value: true if @utf is valid.
260 xmlCheckUTF8(const unsigned char *utf)
265 for (ix = 0; (c = utf[ix]);) {
267 if ((utf[ix + 1] & 0xc0) != 0x80)
269 if ((c & 0xe0) == 0xe0) {
270 if ((utf[ix + 2] & 0xc0) != 0x80)
272 if ((c & 0xf0) == 0xf0) {
273 if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
292 * @utf: a sequence of UTF-8 encoded bytes
293 * @len: the number of characters in the array
295 * storage size of an UTF8 string
297 * Returns the storage size of
298 * the first 'len' characters of ARRAY
303 xmlUTF8Strsize(const xmlChar *utf, int len) {
304 const xmlChar *ptr=utf;
313 if ( (ch = *ptr++) & 0x80)
314 while ( (ch<<=1) & 0x80 )
323 * @utf: the input UTF8 *
324 * @len: the len of @utf (in chars)
326 * a strndup for array of UTF8's
328 * Returns a new UTF8 * or NULL
331 xmlUTF8Strndup(const xmlChar *utf, int len) {
335 if ((utf == NULL) || (len < 0)) return(NULL);
336 i = xmlUTF8Strsize(utf, len);
337 ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
339 xmlGenericError(xmlGenericErrorContext,
340 "malloc of %ld byte failed\n",
341 (len + 1) * (long)sizeof(xmlChar));
344 memcpy(ret, utf, i * sizeof(xmlChar));
351 * @utf: the input UTF8 *
352 * @pos: the position of the desired UTF8 char (in chars)
354 * a function to provide the equivalent of fetching a
355 * character from a string array
357 * Returns a pointer to the UTF8 character or NULL
360 xmlUTF8Strpos(const xmlChar *utf, int pos) {
363 if (utf == NULL) return(NULL);
364 if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )
367 if ((ch=*utf++) == 0) return(NULL);
369 /* if not simple ascii, verify proper format */
370 if ( (ch & 0xc0) != 0xc0 )
372 /* then skip over remaining bytes for this char */
373 while ( (ch <<= 1) & 0x80 )
374 if ( (*utf++ & 0xc0) != 0x80 )
378 return((xmlChar *)utf);
383 * @utf: the input UTF8 *
384 * @utfchar: the UTF8 character to be found
386 * a function to provide relative location of a UTF8 char
388 * Returns the relative character position of the desired char
392 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
396 if (utf==NULL || utfchar==NULL) return -1;
397 size = xmlUTF8Strsize(utfchar, 1);
398 for(i=0; (ch=*utf) != 0; i++) {
399 if (xmlStrncmp(utf, utfchar, size)==0)
403 /* if not simple ascii, verify proper format */
404 if ( (ch & 0xc0) != 0xc0 )
406 /* then skip over remaining bytes for this char */
407 while ( (ch <<= 1) & 0x80 )
408 if ( (*utf++ & 0xc0) != 0x80 )
417 * @utf: a sequence of UTF-8 encoded bytes
418 * @start: relative pos of first char
419 * @len: total number to copy
421 * Note: positions are given in units of UTF-8 chars
423 * Returns a pointer to a newly created string
424 * or NULL if any problem
428 xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
432 if (utf == NULL) return(NULL);
433 if (start < 0) return(NULL);
434 if (len < 0) return(NULL);
437 * Skip over any leading chars
439 for (i = 0;i < start;i++) {
440 if ((ch=*utf++) == 0) return(NULL);
442 /* if not simple ascii, verify proper format */
443 if ( (ch & 0xc0) != 0xc0 )
445 /* then skip over remaining bytes for this char */
446 while ( (ch <<= 1) & 0x80 )
447 if ( (*utf++ & 0xc0) != 0x80 )
452 return(xmlUTF8Strndup(utf, len));
455 /************************************************************************
457 * Conversions To/From UTF8 encoding *
459 ************************************************************************/
463 * @out: a pointer to an array of bytes to store the result
464 * @outlen: the length of @out
465 * @in: a pointer to an array of ASCII chars
466 * @inlen: the length of @in
468 * Take a block of ASCII chars in and try to convert it to an UTF-8
469 * block of chars out.
470 * Returns 0 if success, or -1 otherwise
471 * The value of @inlen after return is the number of octets consumed
472 * as the return value is positive, else unpredictable.
473 * The value of @outlen after return is the number of ocetes consumed.
476 asciiToUTF8(unsigned char* out, int *outlen,
477 const unsigned char* in, int *inlen) {
478 unsigned char* outstart = out;
479 const unsigned char* base = in;
480 const unsigned char* processed = in;
481 unsigned char* outend = out + *outlen;
482 const unsigned char* inend;
486 inend = in + (*inlen);
487 while ((in < inend) && (out - outstart + 5 < *outlen)) {
490 /* assertion: c is a single UTF-4 value */
493 if (c < 0x80) { *out++= c; bits= -6; }
495 *outlen = out - outstart;
496 *inlen = processed - base;
500 for ( ; bits >= 0; bits-= 6) {
503 *out++= ((c >> bits) & 0x3F) | 0x80;
505 processed = (const unsigned char*) in;
507 *outlen = out - outstart;
508 *inlen = processed - base;
514 * @out: a pointer to an array of bytes to store the result
515 * @outlen: the length of @out
516 * @in: a pointer to an array of UTF-8 chars
517 * @inlen: the length of @in
519 * Take a block of UTF-8 chars in and try to convert it to an ASCII
520 * block of chars out.
522 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
523 * The value of @inlen after return is the number of octets consumed
524 * as the return value is positive, else unpredictable.
525 * The value of @outlen after return is the number of ocetes consumed.
528 UTF8Toascii(unsigned char* out, int *outlen,
529 const unsigned char* in, int *inlen) {
530 const unsigned char* processed = in;
531 const unsigned char* outend;
532 const unsigned char* outstart = out;
533 const unsigned char* instart = in;
534 const unsigned char* inend;
540 * initialization nothing to do
546 inend = in + (*inlen);
547 outend = out + (*outlen);
550 if (d < 0x80) { c= d; trailing= 0; }
552 /* trailing byte in leading position */
553 *outlen = out - outstart;
554 *inlen = processed - instart;
556 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
557 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
558 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
560 /* no chance for this in Ascii */
561 *outlen = out - outstart;
562 *inlen = processed - instart;
566 if (inend - in < trailing) {
570 for ( ; trailing; trailing--) {
571 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
577 /* assertion: c is a single UTF-4 value */
583 /* no chance for this in Ascii */
584 *outlen = out - outstart;
585 *inlen = processed - instart;
590 *outlen = out - outstart;
591 *inlen = processed - instart;
597 * @out: a pointer to an array of bytes to store the result
598 * @outlen: the length of @out
599 * @in: a pointer to an array of ISO Latin 1 chars
600 * @inlen: the length of @in
602 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
603 * block of chars out.
604 * Returns 0 if success, or -1 otherwise
605 * The value of @inlen after return is the number of octets consumed
606 * as the return value is positive, else unpredictable.
607 * The value of @outlen after return is the number of ocetes consumed.
610 isolat1ToUTF8(unsigned char* out, int *outlen,
611 const unsigned char* in, int *inlen) {
612 unsigned char* outstart = out;
613 const unsigned char* base = in;
614 unsigned char* outend = out + *outlen;
615 const unsigned char* inend;
616 const unsigned char* instop;
619 inend = in + (*inlen);
622 while (in < inend && out < outend - 1) {
624 *out++= ((c >> 6) & 0x1F) | 0xC0;
625 *out++= (c & 0x3F) | 0x80;
629 if (instop - in > outend - out) instop = in + (outend - out);
630 while (c < 0x80 && in < instop) {
636 if (in < inend && out < outend && c < 0x80) {
640 *outlen = out - outstart;
647 * @out: a pointer to an array of bytes to store the result
648 * @outlen: the length of @out
649 * @inb: a pointer to an array of UTF-8 chars
650 * @inlenb: the length of @in in UTF-8 chars
652 * No op copy operation for UTF8 handling.
654 * Returns the number of byte written, or -1 by lack of space, or -2
655 * if the transcoding fails (for *in is not valid utf16 string)
656 * The value of *inlen after return is the number of octets consumed
657 * as the return value is positive, else unpredictable.
660 UTF8ToUTF8(unsigned char* out, int *outlen,
661 const unsigned char* inb, int *inlenb)
665 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
667 if (*outlen > *inlenb) {
675 memcpy(out, inb, len);
685 * @out: a pointer to an array of bytes to store the result
686 * @outlen: the length of @out
687 * @in: a pointer to an array of UTF-8 chars
688 * @inlen: the length of @in
690 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
691 * block of chars out.
693 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
694 * The value of @inlen after return is the number of octets consumed
695 * as the return value is positive, else unpredictable.
696 * The value of @outlen after return is the number of ocetes consumed.
699 UTF8Toisolat1(unsigned char* out, int *outlen,
700 const unsigned char* in, int *inlen) {
701 const unsigned char* processed = in;
702 const unsigned char* outend;
703 const unsigned char* outstart = out;
704 const unsigned char* instart = in;
705 const unsigned char* inend;
711 * initialization nothing to do
717 inend = in + (*inlen);
718 outend = out + (*outlen);
721 if (d < 0x80) { c= d; trailing= 0; }
723 /* trailing byte in leading position */
724 *outlen = out - outstart;
725 *inlen = processed - instart;
727 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
728 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
729 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
731 /* no chance for this in IsoLat1 */
732 *outlen = out - outstart;
733 *inlen = processed - instart;
737 if (inend - in < trailing) {
741 for ( ; trailing; trailing--) {
744 if (((d= *in++) & 0xC0) != 0x80) {
745 *outlen = out - outstart;
746 *inlen = processed - instart;
753 /* assertion: c is a single UTF-4 value */
759 /* no chance for this in IsoLat1 */
760 *outlen = out - outstart;
761 *inlen = processed - instart;
766 *outlen = out - outstart;
767 *inlen = processed - instart;
773 * @out: a pointer to an array of bytes to store the result
774 * @outlen: the length of @out
775 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
776 * @inlenb: the length of @in in UTF-16LE chars
778 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
779 * block of chars out. This function assume the endian property
780 * is the same between the native type of this machine and the
783 * Returns the number of byte written, or -1 by lack of space, or -2
784 * if the transcoding fails (for *in is not valid utf16 string)
785 * The value of *inlen after return is the number of octets consumed
786 * as the return value is positive, else unpredictable.
789 UTF16LEToUTF8(unsigned char* out, int *outlen,
790 const unsigned char* inb, int *inlenb)
792 unsigned char* outstart = out;
793 const unsigned char* processed = inb;
794 unsigned char* outend = out + *outlen;
795 unsigned short* in = (unsigned short*) inb;
796 unsigned short* inend;
797 unsigned int c, d, inlen;
801 if ((*inlenb % 2) == 1)
805 while ((in < inend) && (out - outstart + 5 < *outlen)) {
806 if (xmlLittleEndian) {
809 tmp = (unsigned char *) in;
811 c = c | (((unsigned int)*tmp) << 8);
814 if ((c & 0xFC00) == 0xD800) { /* surrogates */
815 if (in >= inend) { /* (in > inend) shouldn't happens */
818 if (xmlLittleEndian) {
821 tmp = (unsigned char *) in;
823 d = d | (((unsigned int)*tmp) << 8);
826 if ((d & 0xFC00) == 0xDC00) {
833 *outlen = out - outstart;
834 *inlenb = processed - inb;
839 /* assertion: c is a single UTF-4 value */
842 if (c < 0x80) { *out++= c; bits= -6; }
843 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
844 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
845 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
847 for ( ; bits >= 0; bits-= 6) {
850 *out++= ((c >> bits) & 0x3F) | 0x80;
852 processed = (const unsigned char*) in;
854 *outlen = out - outstart;
855 *inlenb = processed - inb;
861 * @outb: a pointer to an array of bytes to store the result
862 * @outlen: the length of @outb
863 * @in: a pointer to an array of UTF-8 chars
864 * @inlen: the length of @in
866 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
867 * block of chars out.
869 * Returns the number of byte written, or -1 by lack of space, or -2
870 * if the transcoding failed.
873 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
874 const unsigned char* in, int *inlen)
876 unsigned short* out = (unsigned short*) outb;
877 const unsigned char* processed = in;
878 const unsigned char *const instart = in;
879 unsigned short* outstart= out;
880 unsigned short* outend;
881 const unsigned char* inend= in+*inlen;
885 unsigned short tmp1, tmp2;
889 * initialization, add the Byte Order Mark
896 #ifdef DEBUG_ENCODING
897 xmlGenericError(xmlGenericErrorContext,
898 "Added FFFE Byte Order Mark\n");
906 outend = out + (*outlen / 2);
909 if (d < 0x80) { c= d; trailing= 0; }
911 /* trailing byte in leading position */
912 *outlen = (out - outstart) * 2;
913 *inlen = processed - instart;
915 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
916 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
917 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
919 /* no chance for this in UTF-16 */
920 *outlen = (out - outstart) * 2;
921 *inlen = processed - instart;
925 if (inend - in < trailing) {
929 for ( ; trailing; trailing--) {
930 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
936 /* assertion: c is a single UTF-4 value */
940 if (xmlLittleEndian) {
943 tmp = (unsigned char *) out;
945 *(tmp + 1) = c >> 8 ;
949 else if (c < 0x110000) {
953 if (xmlLittleEndian) {
954 *out++ = 0xD800 | (c >> 10);
955 *out++ = 0xDC00 | (c & 0x03FF);
957 tmp1 = 0xD800 | (c >> 10);
958 tmp = (unsigned char *) out;
959 *tmp = (unsigned char) tmp1;
960 *(tmp + 1) = tmp1 >> 8;
963 tmp2 = 0xDC00 | (c & 0x03FF);
964 tmp = (unsigned char *) out;
965 *tmp = (unsigned char) tmp2;
966 *(tmp + 1) = tmp2 >> 8;
974 *outlen = (out - outstart) * 2;
975 *inlen = processed - instart;
981 * @out: a pointer to an array of bytes to store the result
982 * @outlen: the length of @out
983 * @inb: a pointer to an array of UTF-16 passwd as a byte array
984 * @inlenb: the length of @in in UTF-16 chars
986 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
987 * block of chars out. This function assume the endian property
988 * is the same between the native type of this machine and the
991 * Returns the number of byte written, or -1 by lack of space, or -2
992 * if the transcoding fails (for *in is not valid utf16 string)
993 * The value of *inlen after return is the number of octets consumed
994 * as the return value is positive, else unpredictable.
997 UTF16BEToUTF8(unsigned char* out, int *outlen,
998 const unsigned char* inb, int *inlenb)
1000 unsigned char* outstart = out;
1001 const unsigned char* processed = inb;
1002 unsigned char* outend = out + *outlen;
1003 unsigned short* in = (unsigned short*) inb;
1004 unsigned short* inend;
1005 unsigned int c, d, inlen;
1009 if ((*inlenb % 2) == 1)
1011 inlen = *inlenb / 2;
1013 while (in < inend) {
1014 if (xmlLittleEndian) {
1015 tmp = (unsigned char *) in;
1018 c = c | (unsigned int) *tmp;
1023 if ((c & 0xFC00) == 0xD800) { /* surrogates */
1024 if (in >= inend) { /* (in > inend) shouldn't happens */
1025 *outlen = out - outstart;
1026 *inlenb = processed - inb;
1029 if (xmlLittleEndian) {
1030 tmp = (unsigned char *) in;
1033 d = d | (unsigned int) *tmp;
1038 if ((d & 0xFC00) == 0xDC00) {
1045 *outlen = out - outstart;
1046 *inlenb = processed - inb;
1051 /* assertion: c is a single UTF-4 value */
1054 if (c < 0x80) { *out++= c; bits= -6; }
1055 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
1056 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
1057 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
1059 for ( ; bits >= 0; bits-= 6) {
1062 *out++= ((c >> bits) & 0x3F) | 0x80;
1064 processed = (const unsigned char*) in;
1066 *outlen = out - outstart;
1067 *inlenb = processed - inb;
1073 * @outb: a pointer to an array of bytes to store the result
1074 * @outlen: the length of @outb
1075 * @in: a pointer to an array of UTF-8 chars
1076 * @inlen: the length of @in
1078 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
1079 * block of chars out.
1081 * Returns the number of byte written, or -1 by lack of space, or -2
1082 * if the transcoding failed.
1085 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
1086 const unsigned char* in, int *inlen)
1088 unsigned short* out = (unsigned short*) outb;
1089 const unsigned char* processed = in;
1090 const unsigned char *const instart = in;
1091 unsigned short* outstart= out;
1092 unsigned short* outend;
1093 const unsigned char* inend= in+*inlen;
1097 unsigned short tmp1, tmp2;
1101 * initialization, add the Byte Order Mark
1108 #ifdef DEBUG_ENCODING
1109 xmlGenericError(xmlGenericErrorContext,
1110 "Added FEFF Byte Order Mark\n");
1118 outend = out + (*outlen / 2);
1119 while (in < inend) {
1121 if (d < 0x80) { c= d; trailing= 0; }
1122 else if (d < 0xC0) {
1123 /* trailing byte in leading position */
1124 *outlen = out - outstart;
1125 *inlen = processed - instart;
1127 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
1128 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
1129 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
1131 /* no chance for this in UTF-16 */
1132 *outlen = out - outstart;
1133 *inlen = processed - instart;
1137 if (inend - in < trailing) {
1141 for ( ; trailing; trailing--) {
1142 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
1147 /* assertion: c is a single UTF-4 value */
1149 if (out >= outend) break;
1150 if (xmlLittleEndian) {
1151 tmp = (unsigned char *) out;
1159 else if (c < 0x110000) {
1160 if (out+1 >= outend) break;
1162 if (xmlLittleEndian) {
1163 tmp1 = 0xD800 | (c >> 10);
1164 tmp = (unsigned char *) out;
1166 *(tmp + 1) = (unsigned char) tmp1;
1169 tmp2 = 0xDC00 | (c & 0x03FF);
1170 tmp = (unsigned char *) out;
1172 *(tmp + 1) = (unsigned char) tmp2;
1175 *out++ = 0xD800 | (c >> 10);
1176 *out++ = 0xDC00 | (c & 0x03FF);
1183 *outlen = (out - outstart) * 2;
1184 *inlen = processed - instart;
1188 /************************************************************************
1190 * Generic encoding handling routines *
1192 ************************************************************************/
1195 * xmlDetectCharEncoding:
1196 * @in: a pointer to the first bytes of the XML entity, must be at least
1198 * @len: pointer to the length of the buffer
1200 * Guess the encoding of the entity using the first bytes of the entity content
1201 * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
1203 * Returns one of the XML_CHAR_ENCODING_... values.
1206 xmlDetectCharEncoding(const unsigned char* in, int len)
1209 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1210 (in[2] == 0x00) && (in[3] == 0x3C))
1211 return(XML_CHAR_ENCODING_UCS4BE);
1212 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
1213 (in[2] == 0x00) && (in[3] == 0x00))
1214 return(XML_CHAR_ENCODING_UCS4LE);
1215 if ((in[0] == 0x00) && (in[1] == 0x00) &&
1216 (in[2] == 0x3C) && (in[3] == 0x00))
1217 return(XML_CHAR_ENCODING_UCS4_2143);
1218 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
1219 (in[2] == 0x00) && (in[3] == 0x00))
1220 return(XML_CHAR_ENCODING_UCS4_3412);
1221 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
1222 (in[2] == 0xA7) && (in[3] == 0x94))
1223 return(XML_CHAR_ENCODING_EBCDIC);
1224 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
1225 (in[2] == 0x78) && (in[3] == 0x6D))
1226 return(XML_CHAR_ENCODING_UTF8);
1230 * Errata on XML-1.0 June 20 2001
1231 * We now allow an UTF8 encoded BOM
1233 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
1235 return(XML_CHAR_ENCODING_UTF8);
1238 if ((in[0] == 0xFE) && (in[1] == 0xFF))
1239 return(XML_CHAR_ENCODING_UTF16BE);
1240 if ((in[0] == 0xFF) && (in[1] == 0xFE))
1241 return(XML_CHAR_ENCODING_UTF16LE);
1243 return(XML_CHAR_ENCODING_NONE);
1247 * xmlCleanupEncodingAliases:
1249 * Unregisters all aliases
1252 xmlCleanupEncodingAliases(void) {
1255 if (xmlCharEncodingAliases == NULL)
1258 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1259 if (xmlCharEncodingAliases[i].name != NULL)
1260 xmlFree((char *) xmlCharEncodingAliases[i].name);
1261 if (xmlCharEncodingAliases[i].alias != NULL)
1262 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1264 xmlCharEncodingAliasesNb = 0;
1265 xmlCharEncodingAliasesMax = 0;
1266 xmlFree(xmlCharEncodingAliases);
1267 xmlCharEncodingAliases = NULL;
1271 * xmlGetEncodingAlias:
1272 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1274 * Lookup an encoding name for the given alias.
1276 * Returns NULL if not found the original name otherwise
1279 xmlGetEncodingAlias(const char *alias) {
1286 if (xmlCharEncodingAliases == NULL)
1289 for (i = 0;i < 99;i++) {
1290 upper[i] = toupper(alias[i]);
1291 if (upper[i] == 0) break;
1296 * Walk down the list looking for a definition of the alias
1298 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1299 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1300 return(xmlCharEncodingAliases[i].name);
1307 * xmlAddEncodingAlias:
1308 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1309 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1311 * Registers and alias @alias for an encoding named @name. Existing alias
1312 * will be overwritten.
1314 * Returns 0 in case of success, -1 in case of error
1317 xmlAddEncodingAlias(const char *name, const char *alias) {
1321 if ((name == NULL) || (alias == NULL))
1324 for (i = 0;i < 99;i++) {
1325 upper[i] = toupper(alias[i]);
1326 if (upper[i] == 0) break;
1330 if (xmlCharEncodingAliases == NULL) {
1331 xmlCharEncodingAliasesNb = 0;
1332 xmlCharEncodingAliasesMax = 20;
1333 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1334 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1335 if (xmlCharEncodingAliases == NULL)
1337 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1338 xmlCharEncodingAliasesMax *= 2;
1339 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1340 xmlRealloc(xmlCharEncodingAliases,
1341 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1344 * Walk down the list looking for a definition of the alias
1346 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1347 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1349 * Replace the definition.
1351 xmlFree((char *) xmlCharEncodingAliases[i].name);
1352 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1357 * Add the definition
1359 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1360 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1361 xmlCharEncodingAliasesNb++;
1366 * xmlDelEncodingAlias:
1367 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1369 * Unregisters an encoding alias @alias
1371 * Returns 0 in case of success, -1 in case of error
1374 xmlDelEncodingAlias(const char *alias) {
1380 if (xmlCharEncodingAliases == NULL)
1383 * Walk down the list looking for a definition of the alias
1385 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1386 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1387 xmlFree((char *) xmlCharEncodingAliases[i].name);
1388 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1389 xmlCharEncodingAliasesNb--;
1390 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1391 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1399 * xmlParseCharEncoding:
1400 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1402 * Compare the string to the known encoding schemes already known. Note
1403 * that the comparison is case insensitive accordingly to the section
1404 * [XML] 4.3.3 Character Encoding in Entities.
1406 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1407 * if not recognized.
1410 xmlParseCharEncoding(const char* name)
1417 return(XML_CHAR_ENCODING_NONE);
1420 * Do the alias resolution
1422 alias = xmlGetEncodingAlias(name);
1426 for (i = 0;i < 499;i++) {
1427 upper[i] = toupper(name[i]);
1428 if (upper[i] == 0) break;
1432 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1433 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1434 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1437 * NOTE: if we were able to parse this, the endianness of UTF16 is
1438 * already found and in use
1440 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1441 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1443 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1444 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1445 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1448 * NOTE: if we were able to parse this, the endianness of UCS4 is
1449 * already found and in use
1451 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1452 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1453 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1456 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1457 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1458 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1460 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1461 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1462 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1464 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1465 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1466 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1467 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1468 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1469 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1470 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1472 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1473 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1474 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1476 #ifdef DEBUG_ENCODING
1477 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1479 return(XML_CHAR_ENCODING_ERROR);
1483 * xmlGetCharEncodingName:
1484 * @enc: the encoding
1486 * The "canonical" name for XML encoding.
1487 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1488 * Section 4.3.3 Character Encoding in Entities
1490 * Returns the canonical name for the given encoding
1494 xmlGetCharEncodingName(xmlCharEncoding enc) {
1496 case XML_CHAR_ENCODING_ERROR:
1498 case XML_CHAR_ENCODING_NONE:
1500 case XML_CHAR_ENCODING_UTF8:
1502 case XML_CHAR_ENCODING_UTF16LE:
1504 case XML_CHAR_ENCODING_UTF16BE:
1506 case XML_CHAR_ENCODING_EBCDIC:
1508 case XML_CHAR_ENCODING_UCS4LE:
1509 return("ISO-10646-UCS-4");
1510 case XML_CHAR_ENCODING_UCS4BE:
1511 return("ISO-10646-UCS-4");
1512 case XML_CHAR_ENCODING_UCS4_2143:
1513 return("ISO-10646-UCS-4");
1514 case XML_CHAR_ENCODING_UCS4_3412:
1515 return("ISO-10646-UCS-4");
1516 case XML_CHAR_ENCODING_UCS2:
1517 return("ISO-10646-UCS-2");
1518 case XML_CHAR_ENCODING_8859_1:
1519 return("ISO-8859-1");
1520 case XML_CHAR_ENCODING_8859_2:
1521 return("ISO-8859-2");
1522 case XML_CHAR_ENCODING_8859_3:
1523 return("ISO-8859-3");
1524 case XML_CHAR_ENCODING_8859_4:
1525 return("ISO-8859-4");
1526 case XML_CHAR_ENCODING_8859_5:
1527 return("ISO-8859-5");
1528 case XML_CHAR_ENCODING_8859_6:
1529 return("ISO-8859-6");
1530 case XML_CHAR_ENCODING_8859_7:
1531 return("ISO-8859-7");
1532 case XML_CHAR_ENCODING_8859_8:
1533 return("ISO-8859-8");
1534 case XML_CHAR_ENCODING_8859_9:
1535 return("ISO-8859-9");
1536 case XML_CHAR_ENCODING_2022_JP:
1537 return("ISO-2022-JP");
1538 case XML_CHAR_ENCODING_SHIFT_JIS:
1539 return("Shift-JIS");
1540 case XML_CHAR_ENCODING_EUC_JP:
1542 case XML_CHAR_ENCODING_ASCII:
1548 /************************************************************************
1550 * Char encoding handlers *
1552 ************************************************************************/
1555 /* the size should be growable, but it's not a big deal ... */
1556 #define MAX_ENCODING_HANDLERS 50
1557 static xmlCharEncodingHandlerPtr *handlers = NULL;
1558 static int nbCharEncodingHandler = 0;
1561 * The default is UTF-8 for XML, that's also the default used for the
1562 * parser internals, so the default encoding handler is NULL
1565 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1568 * xmlNewCharEncodingHandler:
1569 * @name: the encoding name, in UTF-8 format (ASCII actually)
1570 * @input: the xmlCharEncodingInputFunc to read that encoding
1571 * @output: the xmlCharEncodingOutputFunc to write that encoding
1573 * Create and registers an xmlCharEncodingHandler.
1575 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1577 xmlCharEncodingHandlerPtr
1578 xmlNewCharEncodingHandler(const char *name,
1579 xmlCharEncodingInputFunc input,
1580 xmlCharEncodingOutputFunc output) {
1581 xmlCharEncodingHandlerPtr handler;
1588 * Do the alias resolution
1590 alias = xmlGetEncodingAlias(name);
1595 * Keep only the uppercase version of the encoding.
1598 xmlGenericError(xmlGenericErrorContext,
1599 "xmlNewCharEncodingHandler : no name !\n");
1602 for (i = 0;i < 499;i++) {
1603 upper[i] = toupper(name[i]);
1604 if (upper[i] == 0) break;
1607 up = xmlMemStrdup(upper);
1609 xmlGenericError(xmlGenericErrorContext,
1610 "xmlNewCharEncodingHandler : out of memory !\n");
1615 * allocate and fill-up an handler block.
1617 handler = (xmlCharEncodingHandlerPtr)
1618 xmlMalloc(sizeof(xmlCharEncodingHandler));
1619 if (handler == NULL) {
1620 xmlGenericError(xmlGenericErrorContext,
1621 "xmlNewCharEncodingHandler : out of memory !\n");
1624 handler->input = input;
1625 handler->output = output;
1628 #ifdef LIBXML_ICONV_ENABLED
1629 handler->iconv_in = NULL;
1630 handler->iconv_out = NULL;
1631 #endif /* LIBXML_ICONV_ENABLED */
1634 * registers and returns the handler.
1636 xmlRegisterCharEncodingHandler(handler);
1637 #ifdef DEBUG_ENCODING
1638 xmlGenericError(xmlGenericErrorContext,
1639 "Registered encoding handler for %s\n", name);
1645 * xmlInitCharEncodingHandlers:
1647 * Initialize the char encoding support, it registers the default
1648 * encoding supported.
1649 * NOTE: while public, this function usually doesn't need to be called
1650 * in normal processing.
1653 xmlInitCharEncodingHandlers(void) {
1654 unsigned short int tst = 0x1234;
1655 unsigned char *ptr = (unsigned char *) &tst;
1657 if (handlers != NULL) return;
1659 handlers = (xmlCharEncodingHandlerPtr *)
1660 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1662 if (*ptr == 0x12) xmlLittleEndian = 0;
1663 else if (*ptr == 0x34) xmlLittleEndian = 1;
1664 else xmlGenericError(xmlGenericErrorContext,
1665 "Odd problem at endianness detection\n");
1667 if (handlers == NULL) {
1668 xmlGenericError(xmlGenericErrorContext,
1669 "xmlInitCharEncodingHandlers : out of memory !\n");
1672 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1674 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1676 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1677 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1678 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1679 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1680 #ifdef LIBXML_HTML_ENABLED
1681 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1683 #ifndef LIBXML_ICONV_ENABLED
1684 #ifdef LIBXML_ISO8859X_ENABLED
1685 xmlRegisterCharEncodingHandlersISO8859x ();
1692 * xmlCleanupCharEncodingHandlers:
1694 * Cleanup the memory allocated for the char encoding support, it
1695 * unregisters all the encoding handlers and the aliases.
1698 xmlCleanupCharEncodingHandlers(void) {
1699 xmlCleanupEncodingAliases();
1701 if (handlers == NULL) return;
1703 for (;nbCharEncodingHandler > 0;) {
1704 nbCharEncodingHandler--;
1705 if (handlers[nbCharEncodingHandler] != NULL) {
1706 if (handlers[nbCharEncodingHandler]->name != NULL)
1707 xmlFree(handlers[nbCharEncodingHandler]->name);
1708 xmlFree(handlers[nbCharEncodingHandler]);
1713 nbCharEncodingHandler = 0;
1714 xmlDefaultCharEncodingHandler = NULL;
1718 * xmlRegisterCharEncodingHandler:
1719 * @handler: the xmlCharEncodingHandlerPtr handler block
1721 * Register the char encoding handler, surprising, isn't it ?
1724 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1725 if (handlers == NULL) xmlInitCharEncodingHandlers();
1726 if (handler == NULL) {
1727 xmlGenericError(xmlGenericErrorContext,
1728 "xmlRegisterCharEncodingHandler: NULL handler !\n");
1732 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1733 xmlGenericError(xmlGenericErrorContext,
1734 "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1735 xmlGenericError(xmlGenericErrorContext,
1736 "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1739 handlers[nbCharEncodingHandler++] = handler;
1743 * xmlGetCharEncodingHandler:
1744 * @enc: an xmlCharEncoding value.
1746 * Search in the registered set the handler able to read/write that encoding.
1748 * Returns the handler or NULL if not found
1750 xmlCharEncodingHandlerPtr
1751 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1752 xmlCharEncodingHandlerPtr handler;
1754 if (handlers == NULL) xmlInitCharEncodingHandlers();
1756 case XML_CHAR_ENCODING_ERROR:
1758 case XML_CHAR_ENCODING_NONE:
1760 case XML_CHAR_ENCODING_UTF8:
1762 case XML_CHAR_ENCODING_UTF16LE:
1763 return(xmlUTF16LEHandler);
1764 case XML_CHAR_ENCODING_UTF16BE:
1765 return(xmlUTF16BEHandler);
1766 case XML_CHAR_ENCODING_EBCDIC:
1767 handler = xmlFindCharEncodingHandler("EBCDIC");
1768 if (handler != NULL) return(handler);
1769 handler = xmlFindCharEncodingHandler("ebcdic");
1770 if (handler != NULL) return(handler);
1772 case XML_CHAR_ENCODING_UCS4BE:
1773 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1774 if (handler != NULL) return(handler);
1775 handler = xmlFindCharEncodingHandler("UCS-4");
1776 if (handler != NULL) return(handler);
1777 handler = xmlFindCharEncodingHandler("UCS4");
1778 if (handler != NULL) return(handler);
1780 case XML_CHAR_ENCODING_UCS4LE:
1781 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1782 if (handler != NULL) return(handler);
1783 handler = xmlFindCharEncodingHandler("UCS-4");
1784 if (handler != NULL) return(handler);
1785 handler = xmlFindCharEncodingHandler("UCS4");
1786 if (handler != NULL) return(handler);
1788 case XML_CHAR_ENCODING_UCS4_2143:
1790 case XML_CHAR_ENCODING_UCS4_3412:
1792 case XML_CHAR_ENCODING_UCS2:
1793 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1794 if (handler != NULL) return(handler);
1795 handler = xmlFindCharEncodingHandler("UCS-2");
1796 if (handler != NULL) return(handler);
1797 handler = xmlFindCharEncodingHandler("UCS2");
1798 if (handler != NULL) return(handler);
1802 * We used to keep ISO Latin encodings native in the
1803 * generated data. This led to so many problems that
1804 * this has been removed. One can still change this
1805 * back by registering no-ops encoders for those
1807 case XML_CHAR_ENCODING_8859_1:
1808 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1809 if (handler != NULL) return(handler);
1811 case XML_CHAR_ENCODING_8859_2:
1812 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1813 if (handler != NULL) return(handler);
1815 case XML_CHAR_ENCODING_8859_3:
1816 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1817 if (handler != NULL) return(handler);
1819 case XML_CHAR_ENCODING_8859_4:
1820 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1821 if (handler != NULL) return(handler);
1823 case XML_CHAR_ENCODING_8859_5:
1824 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1825 if (handler != NULL) return(handler);
1827 case XML_CHAR_ENCODING_8859_6:
1828 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1829 if (handler != NULL) return(handler);
1831 case XML_CHAR_ENCODING_8859_7:
1832 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1833 if (handler != NULL) return(handler);
1835 case XML_CHAR_ENCODING_8859_8:
1836 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1837 if (handler != NULL) return(handler);
1839 case XML_CHAR_ENCODING_8859_9:
1840 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1841 if (handler != NULL) return(handler);
1845 case XML_CHAR_ENCODING_2022_JP:
1846 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1847 if (handler != NULL) return(handler);
1849 case XML_CHAR_ENCODING_SHIFT_JIS:
1850 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1851 if (handler != NULL) return(handler);
1852 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1853 if (handler != NULL) return(handler);
1854 handler = xmlFindCharEncodingHandler("Shift_JIS");
1855 if (handler != NULL) return(handler);
1857 case XML_CHAR_ENCODING_EUC_JP:
1858 handler = xmlFindCharEncodingHandler("EUC-JP");
1859 if (handler != NULL) return(handler);
1865 #ifdef DEBUG_ENCODING
1866 xmlGenericError(xmlGenericErrorContext,
1867 "No handler found for encoding %d\n", enc);
1873 * xmlFindCharEncodingHandler:
1874 * @name: a string describing the char encoding.
1876 * Search in the registered set the handler able to read/write that encoding.
1878 * Returns the handler or NULL if not found
1880 xmlCharEncodingHandlerPtr
1881 xmlFindCharEncodingHandler(const char *name) {
1884 xmlCharEncoding alias;
1885 #ifdef LIBXML_ICONV_ENABLED
1886 xmlCharEncodingHandlerPtr enc;
1887 iconv_t icv_in, icv_out;
1888 #endif /* LIBXML_ICONV_ENABLED */
1892 if (handlers == NULL) xmlInitCharEncodingHandlers();
1893 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1894 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1897 * Do the alias resolution
1900 nalias = xmlGetEncodingAlias(name);
1905 * Check first for directly registered encoding names
1907 for (i = 0;i < 99;i++) {
1908 upper[i] = toupper(name[i]);
1909 if (upper[i] == 0) break;
1913 for (i = 0;i < nbCharEncodingHandler; i++)
1914 if (!strcmp(upper, handlers[i]->name)) {
1915 #ifdef DEBUG_ENCODING
1916 xmlGenericError(xmlGenericErrorContext,
1917 "Found registered handler for encoding %s\n", name);
1919 return(handlers[i]);
1922 #ifdef LIBXML_ICONV_ENABLED
1923 /* check whether iconv can handle this */
1924 icv_in = iconv_open("UTF-8", name);
1925 icv_out = iconv_open(name, "UTF-8");
1926 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1927 enc = (xmlCharEncodingHandlerPtr)
1928 xmlMalloc(sizeof(xmlCharEncodingHandler));
1930 iconv_close(icv_in);
1931 iconv_close(icv_out);
1934 enc->name = xmlMemStrdup(name);
1937 enc->iconv_in = icv_in;
1938 enc->iconv_out = icv_out;
1939 #ifdef DEBUG_ENCODING
1940 xmlGenericError(xmlGenericErrorContext,
1941 "Found iconv handler for encoding %s\n", name);
1944 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1945 xmlGenericError(xmlGenericErrorContext,
1946 "iconv : problems with filters for '%s'\n", name);
1948 #endif /* LIBXML_ICONV_ENABLED */
1950 #ifdef DEBUG_ENCODING
1951 xmlGenericError(xmlGenericErrorContext,
1952 "No handler found for encoding %s\n", name);
1956 * Fallback using the canonical names
1958 alias = xmlParseCharEncoding(norig);
1959 if (alias != XML_CHAR_ENCODING_ERROR) {
1961 canon = xmlGetCharEncodingName(alias);
1962 if ((canon != NULL) && (strcmp(name, canon))) {
1963 return(xmlFindCharEncodingHandler(canon));
1968 * If nothing was found and it is "UTF-16" then use the Little indian
1971 if ((xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF-16")) ||
1972 (xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF16")))
1973 return(xmlUTF16LEHandler);
1978 /************************************************************************
1980 * ICONV based generic conversion functions *
1982 ************************************************************************/
1984 #ifdef LIBXML_ICONV_ENABLED
1987 * @cd: iconv converter data structure
1988 * @out: a pointer to an array of bytes to store the result
1989 * @outlen: the length of @out
1990 * @in: a pointer to an array of ISO Latin 1 chars
1991 * @inlen: the length of @in
1993 * Returns 0 if success, or
1994 * -1 by lack of space, or
1995 * -2 if the transcoding fails (for *in is not valid utf8 string or
1996 * the result of transformation can't fit into the encoding we want), or
1997 * -3 if there the last byte can't form a single output char.
1999 * The value of @inlen after return is the number of octets consumed
2000 * as the return value is positive, else unpredictable.
2001 * The value of @outlen after return is the number of ocetes consumed.
2004 xmlIconvWrapper(iconv_t cd,
2005 unsigned char *out, int *outlen,
2006 const unsigned char *in, int *inlen) {
2008 size_t icv_inlen = *inlen, icv_outlen = *outlen;
2009 const char *icv_in = (const char *) in;
2010 char *icv_out = (char *) out;
2013 ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
2015 *inlen -= icv_inlen;
2016 *outlen -= icv_outlen;
2021 if ((icv_inlen != 0) || (ret == -1)) {
2023 if (errno == EILSEQ) {
2028 if (errno == E2BIG) {
2033 if (errno == EINVAL) {
2043 #endif /* LIBXML_ICONV_ENABLED */
2045 /************************************************************************
2047 * The real API used by libxml for on-the-fly conversion *
2049 ************************************************************************/
2052 * xmlCharEncFirstLine:
2053 * @handler: char enconding transformation data structure
2054 * @out: an xmlBuffer for the output.
2055 * @in: an xmlBuffer for the input
2057 * Front-end for the encoding handler input function, but handle only
2058 * the very first line, i.e. limit itself to 45 chars.
2060 * Returns the number of byte written if success, or
2062 * -2 if the transcoding fails (for *in is not valid utf8 string or
2063 * the result of transformation can't fit into the encoding we want), or
2066 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2072 if (handler == NULL) return(-1);
2073 if (out == NULL) return(-1);
2074 if (in == NULL) return(-1);
2076 written = out->size - out->use;
2078 if (toconv * 2 >= written) {
2079 xmlBufferGrow(out, toconv);
2080 written = out->size - out->use - 1;
2084 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2085 * 45 chars should be sufficient to reach the end of the encoding
2086 * declaration without going too far inside the document content.
2090 if (handler->input != NULL) {
2091 ret = handler->input(&out->content[out->use], &written,
2092 in->content, &toconv);
2093 xmlBufferShrink(in, toconv);
2094 out->use += written;
2095 out->content[out->use] = 0;
2097 #ifdef LIBXML_ICONV_ENABLED
2098 else if (handler->iconv_in != NULL) {
2099 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2100 &written, in->content, &toconv);
2101 xmlBufferShrink(in, toconv);
2102 out->use += written;
2103 out->content[out->use] = 0;
2104 if (ret == -1) ret = -3;
2106 #endif /* LIBXML_ICONV_ENABLED */
2107 #ifdef DEBUG_ENCODING
2110 xmlGenericError(xmlGenericErrorContext,
2111 "converted %d bytes to %d bytes of input\n",
2115 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2116 toconv, written, in->use);
2119 xmlGenericError(xmlGenericErrorContext,
2120 "input conversion failed due to input error\n");
2123 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2124 toconv, written, in->use);
2127 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2129 #endif /* DEBUG_ENCODING */
2131 * Ignore when input buffer is not on a boundary
2133 if (ret == -3) ret = 0;
2134 if (ret == -1) ret = 0;
2140 * @handler: char encoding transformation data structure
2141 * @out: an xmlBuffer for the output.
2142 * @in: an xmlBuffer for the input
2144 * Generic front-end for the encoding handler input function
2146 * Returns the number of byte written if success, or
2148 * -2 if the transcoding fails (for *in is not valid utf8 string or
2149 * the result of transformation can't fit into the encoding we want), or
2152 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2159 if (handler == NULL)
2169 written = out->size - out->use;
2170 if (toconv * 2 >= written) {
2171 xmlBufferGrow(out, out->size + toconv * 2);
2172 written = out->size - out->use - 1;
2174 if (handler->input != NULL) {
2175 ret = handler->input(&out->content[out->use], &written,
2176 in->content, &toconv);
2177 xmlBufferShrink(in, toconv);
2178 out->use += written;
2179 out->content[out->use] = 0;
2181 #ifdef LIBXML_ICONV_ENABLED
2182 else if (handler->iconv_in != NULL) {
2183 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2184 &written, in->content, &toconv);
2185 xmlBufferShrink(in, toconv);
2186 out->use += written;
2187 out->content[out->use] = 0;
2191 #endif /* LIBXML_ICONV_ENABLED */
2194 #ifdef DEBUG_ENCODING
2195 xmlGenericError(xmlGenericErrorContext,
2196 "converted %d bytes to %d bytes of input\n",
2201 #ifdef DEBUG_ENCODING
2202 xmlGenericError(xmlGenericErrorContext,
2203 "converted %d bytes to %d bytes of input, %d left\n",
2204 toconv, written, in->use);
2208 #ifdef DEBUG_ENCODING
2209 xmlGenericError(xmlGenericErrorContext,
2210 "converted %d bytes to %d bytes of input, %d left\n",
2211 toconv, written, in->use);
2215 xmlGenericError(xmlGenericErrorContext,
2216 "input conversion failed due to input error\n");
2217 xmlGenericError(xmlGenericErrorContext,
2218 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2219 in->content[0], in->content[1],
2220 in->content[2], in->content[3]);
2223 * Ignore when input buffer is not on a boundary
2231 * xmlCharEncOutFunc:
2232 * @handler: char enconding transformation data structure
2233 * @out: an xmlBuffer for the output.
2234 * @in: an xmlBuffer for the input
2236 * Generic front-end for the encoding handler output function
2237 * a first call with @in == NULL has to be made firs to initiate the
2238 * output in case of non-stateless encoding needing to initiate their
2239 * state or the output (like the BOM in UTF16).
2240 * In case of UTF8 sequence conversion errors for the given encoder,
2241 * the content will be automatically remapped to a CharRef sequence.
2243 * Returns the number of byte written if success, or
2245 * -2 if the transcoding fails (for *in is not valid utf8 string or
2246 * the result of transformation can't fit into the encoding we want), or
2249 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2257 if (handler == NULL) return(-1);
2258 if (out == NULL) return(-1);
2262 written = out->size - out->use;
2265 written--; /* Gennady: count '/0' */
2268 * First specific handling of in = NULL, i.e. the initialization call
2272 if (handler->output != NULL) {
2273 ret = handler->output(&out->content[out->use], &written,
2275 if (ret >= 0) { /* Gennady: check return value */
2276 out->use += written;
2277 out->content[out->use] = 0;
2280 #ifdef LIBXML_ICONV_ENABLED
2281 else if (handler->iconv_out != NULL) {
2282 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2283 &written, NULL, &toconv);
2284 out->use += written;
2285 out->content[out->use] = 0;
2287 #endif /* LIBXML_ICONV_ENABLED */
2288 #ifdef DEBUG_ENCODING
2289 xmlGenericError(xmlGenericErrorContext,
2290 "initialized encoder\n");
2296 * Conversion itself.
2301 if (toconv * 2 >= written) {
2302 xmlBufferGrow(out, toconv * 2);
2303 written = out->size - out->use - 1;
2305 if (handler->output != NULL) {
2306 ret = handler->output(&out->content[out->use], &written,
2307 in->content, &toconv);
2308 xmlBufferShrink(in, toconv);
2309 out->use += written;
2310 writtentot += written;
2311 out->content[out->use] = 0;
2313 #ifdef LIBXML_ICONV_ENABLED
2314 else if (handler->iconv_out != NULL) {
2315 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2316 &written, in->content, &toconv);
2317 xmlBufferShrink(in, toconv);
2318 out->use += written;
2319 writtentot += written;
2320 out->content[out->use] = 0;
2324 * Can be a limitation of iconv
2331 #endif /* LIBXML_ICONV_ENABLED */
2333 xmlGenericError(xmlGenericErrorContext,
2334 "xmlCharEncOutFunc: no output function !\n");
2338 if (ret >= 0) output += ret;
2341 * Attempt to handle error cases
2345 #ifdef DEBUG_ENCODING
2346 xmlGenericError(xmlGenericErrorContext,
2347 "converted %d bytes to %d bytes of output\n",
2352 #ifdef DEBUG_ENCODING
2353 xmlGenericError(xmlGenericErrorContext,
2354 "output conversion failed by lack of space\n");
2358 #ifdef DEBUG_ENCODING
2359 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2360 toconv, written, in->use);
2365 const xmlChar *utf = (const xmlChar *) in->content;
2368 cur = xmlGetUTF8Char(utf, &len);
2370 xmlChar charref[20];
2372 #ifdef DEBUG_ENCODING
2373 xmlGenericError(xmlGenericErrorContext,
2374 "handling output conversion error\n");
2375 xmlGenericError(xmlGenericErrorContext,
2376 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2377 in->content[0], in->content[1],
2378 in->content[2], in->content[3]);
2381 * Removes the UTF8 sequence, and replace it by a charref
2382 * and continue the transcoding phase, hoping the error
2383 * did not mangle the encoder state.
2385 snprintf((char *) charref, sizeof(charref), "&#%d;", cur);
2386 xmlBufferShrink(in, len);
2387 xmlBufferAddHead(in, charref, -1);
2391 xmlGenericError(xmlGenericErrorContext,
2392 "output conversion failed due to conv error\n");
2393 xmlGenericError(xmlGenericErrorContext,
2394 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2395 in->content[0], in->content[1],
2396 in->content[2], in->content[3]);
2397 in->content[0] = ' ';
2406 * xmlCharEncCloseFunc:
2407 * @handler: char enconding transformation data structure
2409 * Generic front-end for encoding handler close function
2411 * Returns 0 if success, or -1 in case of error
2414 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2416 if (handler == NULL) return(-1);
2417 if (handler->name == NULL) return(-1);
2418 #ifdef LIBXML_ICONV_ENABLED
2420 * Iconv handlers can be used only once, free the whole block.
2421 * and the associated icon resources.
2423 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2424 if (handler->name != NULL)
2425 xmlFree(handler->name);
2426 handler->name = NULL;
2427 if (handler->iconv_out != NULL) {
2428 if (iconv_close(handler->iconv_out))
2430 handler->iconv_out = NULL;
2432 if (handler->iconv_in != NULL) {
2433 if (iconv_close(handler->iconv_in))
2435 handler->iconv_in = NULL;
2439 #endif /* LIBXML_ICONV_ENABLED */
2440 #ifdef DEBUG_ENCODING
2442 xmlGenericError(xmlGenericErrorContext,
2443 "failed to close the encoding handler\n");
2445 xmlGenericError(xmlGenericErrorContext,
2446 "closed the encoding handler\n");
2452 #ifndef LIBXML_ICONV_ENABLED
2453 #ifdef LIBXML_ISO8859X_ENABLED
2457 * @out: a pointer to an array of bytes to store the result
2458 * @outlen: the length of @out
2459 * @in: a pointer to an array of UTF-8 chars
2460 * @inlen: the length of @in
2461 * @xlattable: the 2-level transcoding table
2463 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2464 * block of chars out.
2466 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2467 * The value of @inlen after return is the number of octets consumed
2468 * as the return value is positive, else unpredictable.
2469 * The value of @outlen after return is the number of ocetes consumed.
2472 UTF8ToISO8859x(unsigned char* out, int *outlen,
2473 const unsigned char* in, int *inlen,
2474 unsigned char const *xlattable) {
2475 const unsigned char* outend;
2476 const unsigned char* outstart = out;
2477 const unsigned char* inend;
2478 const unsigned char* instart = in;
2482 * initialization nothing to do
2488 inend = in + (*inlen);
2489 outend = out + (*outlen);
2490 while (in < inend) {
2491 unsigned char d = *in++;
2494 } else if (d < 0xC0) {
2495 /* trailing byte in leading position */
2496 *outlen = out - outstart;
2497 *inlen = in - instart - 1;
2499 } else if (d < 0xE0) {
2501 if (!(in < inend)) {
2502 /* trailing byte not in input buffer */
2503 *outlen = out - outstart;
2504 *inlen = in - instart - 1;
2508 if ((c & 0xC0) != 0xC0) {
2509 /* not a trailing byte */
2510 *outlen = out - outstart;
2511 *inlen = in - instart - 2;
2516 d = xlattable [48 + c + xlattable [d] * 64];
2518 /* not in character set */
2519 *outlen = out - outstart;
2520 *inlen = in - instart - 2;
2524 } else if (d < 0xF0) {
2527 if (!(in < inend - 1)) {
2528 /* trailing bytes not in input buffer */
2529 *outlen = out - outstart;
2530 *inlen = in - instart - 1;
2534 if ((c1 & 0xC0) != 0xC0) {
2535 /* not a trailing byte (c1) */
2536 *outlen = out - outstart;
2537 *inlen = in - instart - 2;
2541 if ((c2 & 0xC0) != 0xC0) {
2542 /* not a trailing byte (c2) */
2543 *outlen = out - outstart;
2544 *inlen = in - instart - 2;
2550 d = xlattable [48 + c2 + xlattable [48 + c1 + xlattable [32 + d] * 64] * 64];
2552 /* not in character set */
2553 *outlen = out - outstart;
2554 *inlen = in - instart - 3;
2559 /* cannot transcode >= U+010000 */
2560 *outlen = out - outstart;
2561 *inlen = in - instart - 1;
2565 *outlen = out - outstart;
2566 *inlen = in - instart;
2572 * @out: a pointer to an array of bytes to store the result
2573 * @outlen: the length of @out
2574 * @in: a pointer to an array of ISO Latin 1 chars
2575 * @inlen: the length of @in
2577 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2578 * block of chars out.
2579 * Returns 0 if success, or -1 otherwise
2580 * The value of @inlen after return is the number of octets consumed
2581 * The value of @outlen after return is the number of ocetes produced.
2584 ISO8859xToUTF8(unsigned char* out, int *outlen,
2585 const unsigned char* in, int *inlen,
2586 unsigned short const *unicodetable) {
2587 unsigned char* outstart = out;
2588 unsigned char* outend = out + *outlen;
2589 const unsigned char* instart = in;
2590 const unsigned char* inend = in + *inlen;
2591 const unsigned char* instop = inend;
2592 unsigned int c = *in;
2594 while (in < inend && out < outend - 1) {
2596 c = unicodetable [c - 0x80];
2598 /* undefined code point */
2599 *outlen = out - outstart;
2600 *inlen = in - instart;
2604 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2605 *out++ = (c & 0x3F) | 0x80;
2607 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2608 *out++ = ((c >> 6) & 0x3F) | 0x80;
2609 *out++ = (c & 0x3F) | 0x80;
2614 if (instop - in > outend - out) instop = in + (outend - out);
2615 while (c < 0x80 && in < instop) {
2621 if (in < inend && out < outend && c < 0x80) {
2625 *outlen = out - outstart;
2626 *inlen = in - instart;
2631 /************************************************************************
2632 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2633 ************************************************************************/
2635 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2636 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2637 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2638 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2639 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2640 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2641 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2642 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2643 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2644 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2645 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2646 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2647 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2648 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2649 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2650 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2651 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2654 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2655 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2656 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2657 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2658 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2660 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2662 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2663 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2664 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2665 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2666 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2667 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2668 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2669 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2670 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2671 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2674 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2675 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2676 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2677 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2678 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2679 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2680 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2681 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2684 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2685 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2686 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2687 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2688 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2689 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2690 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2691 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2692 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2693 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2694 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2695 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2696 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2697 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2698 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2699 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2700 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2703 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2704 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2709 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2710 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2711 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2712 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2713 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2714 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2715 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2717 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2718 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2719 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2720 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2723 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2731 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2732 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2733 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2734 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2737 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2738 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2739 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2740 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2741 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2742 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2743 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2744 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2745 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2746 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2747 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2748 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2749 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2750 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2751 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2752 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2753 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2756 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2757 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2761 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2762 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2764 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2765 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2766 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2767 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2768 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2769 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2770 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2771 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2772 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2773 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2774 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2775 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2776 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2777 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2781 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2782 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2783 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2786 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2787 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2788 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2789 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2790 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2791 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2792 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2793 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2794 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2795 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2796 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2797 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2798 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2799 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2800 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2801 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2802 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2805 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2806 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2807 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2808 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2811 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2812 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2813 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2814 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2815 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2817 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2818 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2819 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2820 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2821 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2822 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2823 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2824 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2825 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2826 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2829 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2830 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2832 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2835 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2836 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2837 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2838 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2839 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2840 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2841 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2842 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2843 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2844 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2845 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2846 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2847 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2848 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2849 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2850 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2851 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2854 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2855 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2863 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2864 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2865 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2866 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2867 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2868 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2869 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2870 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2871 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2872 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2873 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2874 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2875 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2876 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2881 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2882 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2883 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2884 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2885 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2886 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2887 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2888 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2889 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2890 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2891 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2892 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2893 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2894 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2895 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2896 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2899 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2900 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2901 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2902 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2903 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2908 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2909 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2910 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2911 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2912 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2913 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2916 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2918 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2919 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2920 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2921 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2922 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2924 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2925 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2926 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2927 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2928 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2929 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2930 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2934 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2935 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2936 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2937 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2938 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2939 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2940 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2941 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2942 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2943 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2944 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2945 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2946 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2947 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2948 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2949 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2952 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2953 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2955 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2961 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2962 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2963 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2964 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2965 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2966 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2967 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2968 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2969 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2970 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2971 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2972 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2973 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2974 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2977 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2982 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2983 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2987 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2988 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2989 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2990 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2991 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2992 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2993 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2994 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2995 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2996 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2997 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2998 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2999 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3000 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3001 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3002 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3005 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3006 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3014 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3015 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3016 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3017 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3018 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3019 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3020 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3021 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3023 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3025 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3026 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3027 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3032 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3033 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3034 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3035 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3036 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3037 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3038 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3039 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3040 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3041 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3042 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3043 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3044 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3045 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3046 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3047 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3050 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3051 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3059 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3060 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3061 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3063 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3064 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3065 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3066 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3069 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3070 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3079 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3080 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3081 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3084 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3085 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3086 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3087 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3088 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3089 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3090 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3091 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3092 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3093 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3094 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3095 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3096 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3097 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3098 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3099 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3100 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3103 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3104 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3112 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3113 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3119 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3120 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3121 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3122 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3123 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3128 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3134 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3135 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3136 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3137 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3138 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3139 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3140 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3141 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3142 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3143 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3144 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3145 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3146 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3147 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3148 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3149 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3152 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3153 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3161 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3162 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3163 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3164 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3170 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3173 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3174 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3175 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3176 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3177 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3178 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3179 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3180 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3181 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3182 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3183 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3186 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3187 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3188 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3189 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3190 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3191 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3192 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3193 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3194 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3195 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3196 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3197 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3198 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3199 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3200 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3201 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3202 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3205 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3206 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3214 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3215 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3216 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3221 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3223 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3241 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3246 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3247 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3248 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3251 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3252 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3253 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3254 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3255 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3256 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3257 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3258 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3259 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3260 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,