2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 /* This file defines the conversion loop via Unicode as a pivot encoding. */
23 /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24 static int unicode_transliterate (conv_t cd, ucs4_t wc,
25 unsigned char* outptr, size_t outleft)
27 if (cd->oflags & HAVE_HANGUL_JAMO) {
28 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
29 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
30 (contained in Unicode only). */
32 int ret = johab_hangul_decompose(cd,buf,wc);
33 if (ret != RET_ILUNI) {
34 /* we know 1 <= ret <= 3 */
35 state_t backup_state = cd->ostate;
36 unsigned char* backup_outptr = outptr;
37 size_t backup_outleft = outleft;
39 for (i = 0; i < ret; i++) {
41 sub_outcount = RET_TOOSMALL;
42 goto johab_hangul_failed;
44 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
45 if (sub_outcount <= RET_ILUNI)
46 goto johab_hangul_failed;
47 if (!(sub_outcount <= outleft)) abort();
48 outptr += sub_outcount; outleft -= sub_outcount;
50 return outptr-backup_outptr;
52 cd->ostate = backup_state;
53 outptr = backup_outptr;
54 outleft = backup_outleft;
60 /* Try to use a variant, but postfix it with
61 U+303E IDEOGRAPHIC VARIATION INDICATOR
62 (cf. Ken Lunde's "CJKV information processing", p. 188). */
66 else if (wc == 0x30f6)
68 else if (wc >= 0x4e00 && wc < 0xa000)
69 indx = cjk_variants_indx[wc-0x4e00];
73 unsigned short variant = cjk_variants[indx];
74 unsigned short last = variant & 0x8000;
77 buf[0] = variant; buf[1] = 0x303e;
79 state_t backup_state = cd->ostate;
80 unsigned char* backup_outptr = outptr;
81 size_t backup_outleft = outleft;
83 for (i = 0; i < 2; i++) {
85 sub_outcount = RET_TOOSMALL;
88 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
89 if (sub_outcount <= RET_ILUNI)
91 if (!(sub_outcount <= outleft)) abort();
92 outptr += sub_outcount; outleft -= sub_outcount;
94 return outptr-backup_outptr;
96 cd->ostate = backup_state;
97 outptr = backup_outptr;
98 outleft = backup_outleft;
107 if (wc >= 0x2018 && wc <= 0x201a) {
108 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
110 (cd->oflags & HAVE_QUOTATION_MARKS
111 ? (wc == 0x201a ? 0x2018 : wc)
112 : (cd->oflags & HAVE_ACCENTS
113 ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
114 : 0x0027 /* use apostrophe */
116 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
117 if (outcount != RET_ILUNI)
121 /* Use the transliteration table. */
122 int indx = translit_index(wc);
124 const unsigned short * cp = &translit_data[indx];
125 unsigned int num = *cp++;
126 state_t backup_state = cd->ostate;
127 unsigned char* backup_outptr = outptr;
128 size_t backup_outleft = outleft;
131 for (i = 0; i < num; i++) {
133 sub_outcount = RET_TOOSMALL;
134 goto translit_failed;
136 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
137 if (sub_outcount <= RET_ILUNI)
138 goto translit_failed;
139 if (!(sub_outcount <= outleft)) abort();
140 outptr += sub_outcount; outleft -= sub_outcount;
142 return outptr-backup_outptr;
144 cd->ostate = backup_state;
145 outptr = backup_outptr;
146 outleft = backup_outleft;
147 if (sub_outcount != RET_ILUNI)
154 static size_t unicode_loop_convert (iconv_t icd,
155 const char* * inbuf, size_t *inbytesleft,
156 char* * outbuf, size_t *outbytesleft)
158 conv_t cd = (conv_t) icd;
160 const unsigned char* inptr = (const unsigned char*) *inbuf;
161 size_t inleft = *inbytesleft;
162 unsigned char* outptr = (unsigned char*) *outbuf;
163 size_t outleft = *outbytesleft;
165 state_t last_istate = cd->istate;
169 incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
171 if (incount == RET_ILSEQ) {
172 /* Case 1: invalid input */
173 if (cd->discard_ilseq) {
174 switch (cd->iindex) {
175 case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
176 case ei_utf32: case ei_utf32be: case ei_utf32le:
177 case ei_ucs4internal: case ei_ucs4swapped:
179 case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
180 case ei_utf16: case ei_utf16be: case ei_utf16le:
181 case ei_ucs2internal: case ei_ucs2swapped:
192 if (incount == RET_TOOFEW(0)) {
193 /* Case 2: not enough bytes available to detect anything */
198 /* Case 3: k bytes read, but only a shift sequence */
199 incount = -2-incount;
201 /* Case 4: k bytes read, making up a wide character */
203 cd->istate = last_istate;
208 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
209 if (outcount != RET_ILUNI)
211 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
212 if ((wc >> 7) == (0xe0000 >> 7))
214 /* Try transliteration. */
216 if (cd->transliterate) {
217 outcount = unicode_transliterate(cd,wc,outptr,outleft);
218 if (outcount != RET_ILUNI)
221 if (cd->discard_ilseq)
223 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
224 if (outcount != RET_ILUNI)
226 cd->istate = last_istate;
232 cd->istate = last_istate;
237 if (!(outcount <= outleft)) abort();
238 outptr += outcount; outleft -= outcount;
241 if (!(incount <= inleft)) abort();
242 inptr += incount; inleft -= incount;
244 *inbuf = (const char*) inptr;
245 *inbytesleft = inleft;
246 *outbuf = (char*) outptr;
247 *outbytesleft = outleft;
251 static size_t unicode_loop_reset (iconv_t icd,
252 char* * outbuf, size_t *outbytesleft)
254 conv_t cd = (conv_t) icd;
255 if (outbuf == NULL || *outbuf == NULL) {
256 /* Reset the states. */
257 memset(&cd->istate,'\0',sizeof(state_t));
258 memset(&cd->ostate,'\0',sizeof(state_t));
262 if (cd->ifuncs.xxx_flushwc) {
263 state_t last_istate = cd->istate;
265 if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
266 unsigned char* outptr = (unsigned char*) *outbuf;
267 size_t outleft = *outbytesleft;
268 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
269 if (outcount != RET_ILUNI)
271 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
272 if ((wc >> 7) == (0xe0000 >> 7))
274 /* Try transliteration. */
276 if (cd->transliterate) {
277 outcount = unicode_transliterate(cd,wc,outptr,outleft);
278 if (outcount != RET_ILUNI)
281 if (cd->discard_ilseq)
283 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
284 if (outcount != RET_ILUNI)
286 cd->istate = last_istate;
291 cd->istate = last_istate;
295 if (!(outcount <= outleft)) abort();
299 *outbuf = (char*) outptr;
300 *outbytesleft = outleft;
303 if (cd->ofuncs.xxx_reset) {
304 unsigned char* outptr = (unsigned char*) *outbuf;
305 size_t outleft = *outbytesleft;
306 int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
311 if (!(outcount <= outleft)) abort();
312 *outbuf = (char*) (outptr + outcount);
313 *outbytesleft = outleft - outcount;
315 memset(&cd->istate,'\0',sizeof(state_t));
316 memset(&cd->ostate,'\0',sizeof(state_t));