2 * The contents of this file are subject to the Mozilla Public
3 * License Version 1.1 (the "License"); you may not use this file
4 * except in compliance with the License. You may obtain a copy of
5 * the License at http://www.mozilla.org/MPL/
7 * Software distributed under the License is distributed on an "AS
8 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
9 * implied. See the License for the specific language governing
10 * rights and limitations under the License.
12 * The Original Code is the Sablotron XSLT Processor.
14 * The Initial Developer of the Original Code is Ginger Alliance Ltd.
15 * Portions created by Ginger Alliance are Copyright (C) 2000-2002
16 * Ginger Alliance Ltd. All Rights Reserved.
20 * Alternatively, the contents of this file may be used under the
21 * terms of the GNU General Public License Version 2 or later (the
22 * "GPL"), in which case the provisions of the GPL are applicable
23 * instead of those above. If you wish to allow use of your
24 * version of this file only under the terms of the GPL and not to
25 * allow others to use your version of this file under the MPL,
26 * indicate your decision by deleting the provisions above and
27 * replace them with the notice and other provisions required by
28 * the GPL. If you do not delete the provisions above, a recipient
29 * may use your version of this file under either the MPL or the
38 // #include "shandler.h"
41 ******************************
42 * internal recoding functions
43 * (shall be replaced with a call to the sabconv library function)
44 ******************************
47 // encoding tables for the functions
49 short EncTable1250[] =
51 0x20ac, -1, 0x201a, -1, 0x201e, 0x2026, 0x2020, 0x2021,
52 -1, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179,
53 -1, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
54 -1, 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a,
55 0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7,
56 0x00a8, 0x00a9, 0x015e, 0x00ab, -1, 0x00ad, 0x00ae, 0x017b,
57 0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
58 0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c,
59 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
60 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
61 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
62 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
63 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
64 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
65 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
66 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9
69 short EncTableLatin2[] =
71 -1, -1, -1, -1, -1, -1, -1, -1,
72 -1, -1, -1, -1, -1, -1, -1, -1,
73 -1, -1, -1, -1, -1, -1, -1, -1,
74 -1, -1, -1, -1, -1, -1, -1, -1,
75 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
76 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
77 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
78 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
79 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
80 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
81 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
82 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
83 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
84 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
85 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
86 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
89 void* encInternalOpen(const Str& enc, Bool toUTF8)
93 if (enc.eqNoCase("ISO-8859-2"))
94 return EncTableLatin2;
95 else if (enc.eqNoCase("windows-1250"))
97 // more builtin conversions can come here
98 else return (void*)-1;
101 int encInternalClose(void* intCD)
103 return 0; // iconv's value for OK
106 // the following only does 1-byte encodings for which an EncTable is defined
108 EncResult encInternalConv(void *intCD, const char** inbuf,
110 char **outbuf, size_t *outbytesleft)
112 assert(intCD && intCD != (void*)-1);
116 unsigned char thischar;
117 for (; *inbytesleft > 0; )
127 val = ((short*)intCD)[thischar - 0x80];
130 len = utf8FromCharCode(charbuf, val);
131 if (len <= *outbytesleft)
132 memcpy(*outbuf, charbuf, len);
137 *outbytesleft -= len;
155 Bool Recoder::handledByExpat(const Str& enc) const
157 return (enc.eqNoCase("UTF-8") ||
158 enc.eqNoCase("UTF-16") ||
159 enc.eqNoCase("ISO-8859-1") ||
160 enc.eqNoCase("US-ASCII"));
163 void Recoder::clear(Sit S)
165 for (int i = 0; i < items.number(); i++)
167 if (items[i] && items[i] -> physCD)
170 items.freeall(FALSE);
175 // can't clear here (have no situation)
178 eFlag Recoder::open(Sit S, const Str& enc, Bool toUTF8, CDesc& cd)
180 GP(ConvInfo) newitem = new ConvInfo;
181 (*newitem).method = ENC_NONE;
182 (*newitem).physCD = NULL;
184 iconv_t icd = toUTF8 ? iconv_open("UTF-8", enc) : iconv_open(enc, "UTF-8");
185 // switch off transliteration in iconv:
186 // sadly non-standard, only works in windows port
188 // iconvctl(icd, ICONV_SET_TRANSLITERATE, &val);
189 if (icd != (iconv_t) -1)
191 (*newitem).method = ENC_ICONV;
192 (*newitem).physCD = (void *) icd;
197 // try to open internal recode
199 physcd = encInternalOpen(enc, toUTF8);
200 if (physcd != (void*)-1)
202 (*newitem).method = ENC_INTERNAL;
203 (*newitem).physCD = physcd;
207 // try the encoding handler as a last resort
208 void* enchlrUD = NULL;
209 EncHandler *enchlr = NULL;
210 if (S.getProcessor())
211 enchlr = S.getProcessor() -> getEncHandler(&enchlrUD);
214 void *physcd = enchlr -> open(enchlrUD, S.getProcessor(), toUTF8 ? EH_TO_UTF8 : EH_FROM_UTF8, enc);
215 if (physcd != (void*) -1)
217 (*newitem).method = ENC_HANDLER;
218 (*newitem).physCD = physcd;
223 if ((*newitem).method != ENC_NONE)
224 items.append(cd = newitem.keep());
226 Err1(S, E1_UNKNOWN_ENC, enc);
230 eFlag Recoder::openFromUTF8(Sit S, const Str& enc, CDesc& cd)
232 return open(S, enc, FALSE, cd);
235 eFlag Recoder::openToUTF8(Sit S, const Str& enc, CDesc& cd)
237 return open(S, enc, TRUE, cd);
240 eFlag Recoder::close(Sit S, CDesc cd)
247 iconv_close((iconv_t)(cd -> physCD));
251 encInternalClose(cd -> physCD);
255 void *enchlrUD = NULL;
256 EncHandler *enchlr = NULL;
257 if (S.getProcessor())
258 enchlr = S.getProcessor() -> getEncHandler(&enchlrUD);
260 enchlr -> close(enchlrUD, S.getProcessor(), cd -> physCD);
269 eFlag Recoder::conv(Sit S, CDesc cd, const char *& inbuf, size_t &inbytesleft,
270 char *& outbuf, size_t &outbytesleft, EncResult& result)
279 iconv((iconv_t)(cd -> physCD),
280 # ifdef SABLOT_ICONV_CAST_OK
285 &inbytesleft, &outbuf, &outbytesleft);
289 result = ENC_EINVAL; break;
291 result = ENC_E2BIG; break;
293 result = ENC_EILSEQ; break;
305 encInternalConv(cd -> physCD, &inbuf, &inbytesleft,
306 &outbuf, &outbytesleft);
311 void *enchlrUD = NULL;
312 EncHandler *enchlr = NULL;
313 if (S.getProcessor())
314 enchlr = S.getProcessor() -> getEncHandler(&enchlrUD);
317 switch(enchlr -> conv(enchlrUD, S.getProcessor(), cd -> physCD,
318 &inbuf, &inbytesleft, &outbuf, &outbytesleft))
320 case EH_EINVAL: result = ENC_EINVAL;
322 case EH_E2BIG: result = ENC_E2BIG;
324 case EH_EILSEQ: result = ENC_EILSEQ;
326 default: result = ENC_OK;
337 void Recoder::report(Sit S, MsgType type, MsgCode code, const Str &arg1, const Str & arg2)
339 S.message(type, code, arg1, arg2);