2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
26 #include "libcharset.h"
30 * Consider all system dependent encodings, for any system,
31 * and the extra encodings.
39 * Consider those system dependent encodings that are needed for the
48 #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
54 * Data type for general conversion loop.
57 size_t (*loop_convert) (iconv_t icd,
58 const char* * inbuf, size_t *inbytesleft,
59 char* * outbuf, size_t *outbytesleft);
60 size_t (*loop_reset) (iconv_t icd,
61 char* * outbuf, size_t *outbytesleft);
67 #include "converters.h"
70 * Transliteration tables.
72 #include "cjk_variants.h"
76 * Table of all supported encodings.
79 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
80 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
81 int oflags; /* flags for unicode -> multibyte conversion */
84 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
86 #include "encodings.def"
88 #include "encodings_aix.def"
91 #include "encodings_osf1.def"
94 #include "encodings_dos.def"
97 #include "encodings_extra.def"
99 #include "encodings_local.def"
101 ei_for_broken_compilers_that_dont_like_trailing_commas
104 static struct encoding const all_encodings[] = {
105 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
106 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
107 #include "encodings.def"
109 #include "encodings_aix.def"
112 #include "encodings_osf1.def"
115 #include "encodings_dos.def"
118 #include "encodings_extra.def"
121 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
122 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
123 #include "encodings_local.def"
133 * Alias lookup function.
135 * struct alias { const char* name; unsigned int encoding_index; };
136 * const struct alias * aliases_lookup (const char *str, unsigned int len);
137 * #define MAX_WORD_LENGTH ...
142 * System dependent alias lookup function.
144 * const struct alias * aliases2_lookup (const char *str);
146 #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
147 static struct alias sysdep_aliases[] = {
149 #include "aliases_aix.h"
152 #include "aliases_osf1.h"
155 #include "aliases_dos.h"
158 #include "aliases_extra.h"
165 aliases2_lookup (register const char *str)
169 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
170 if (!strcmp(str,ptr->name))
175 #define aliases2_lookup(str) NULL
179 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
180 and the first string can be assumed to be in uppercase. */
181 static int strequal (const char* str1, const char* str2)
186 c1 = * (unsigned char *) str1++;
187 c2 = * (unsigned char *) str2++;
190 if (c2 >= 'a' && c2 <= 'z')
199 iconv_t iconv_open (const char* tocode, const char* fromcode)
201 struct conv_struct * cd;
202 char buf[MAX_WORD_LENGTH+10+1];
205 const struct alias * ap;
207 unsigned int from_index;
209 unsigned int to_index;
211 int transliterate = 0;
212 int discard_ilseq = 0;
214 /* Before calling aliases_lookup, convert the input string to upper case,
215 * and check whether it's entirely ASCII (we call gperf with option "-7"
216 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
217 * or if it's too long, it is not a valid encoding name.
219 for (to_wchar = 0;;) {
220 /* Search tocode in the table. */
221 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
222 unsigned char c = * (unsigned char *) cp;
225 if (c >= 'a' && c <= 'z')
233 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
238 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
243 if (buf[0] == '\0') {
244 tocode = locale_charset();
245 /* Avoid an endless loop that could occur when using an older version
246 of localcharset.c. */
247 if (tocode[0] == '\0')
251 ap = aliases_lookup(buf,bp-buf);
253 ap = aliases2_lookup(buf);
257 if (ap->encoding_index == ei_local_char) {
258 tocode = locale_charset();
259 /* Avoid an endless loop that could occur when using an older version
260 of localcharset.c. */
261 if (tocode[0] == '\0')
265 if (ap->encoding_index == ei_local_wchar_t) {
266 #if __STDC_ISO_10646__
267 if (sizeof(wchar_t) == 4) {
268 to_index = ei_ucs4internal;
271 if (sizeof(wchar_t) == 2) {
272 to_index = ei_ucs2internal;
275 if (sizeof(wchar_t) == 1) {
276 to_index = ei_iso8859_1;
282 tocode = locale_charset();
287 to_index = ap->encoding_index;
290 for (from_wchar = 0;;) {
291 /* Search fromcode in the table. */
292 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
293 unsigned char c = * (unsigned char *) cp;
296 if (c >= 'a' && c <= 'z')
304 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
308 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
312 if (buf[0] == '\0') {
313 fromcode = locale_charset();
314 /* Avoid an endless loop that could occur when using an older version
315 of localcharset.c. */
316 if (fromcode[0] == '\0')
320 ap = aliases_lookup(buf,bp-buf);
322 ap = aliases2_lookup(buf);
326 if (ap->encoding_index == ei_local_char) {
327 fromcode = locale_charset();
328 /* Avoid an endless loop that could occur when using an older version
329 of localcharset.c. */
330 if (fromcode[0] == '\0')
334 if (ap->encoding_index == ei_local_wchar_t) {
335 #if __STDC_ISO_10646__
336 if (sizeof(wchar_t) == 4) {
337 from_index = ei_ucs4internal;
340 if (sizeof(wchar_t) == 2) {
341 from_index = ei_ucs2internal;
344 if (sizeof(wchar_t) == 1) {
345 from_index = ei_iso8859_1;
351 fromcode = locale_charset();
356 from_index = ap->encoding_index;
359 cd = (struct conv_struct *) malloc(from_wchar != to_wchar
360 ? sizeof(struct wchar_conv_struct)
361 : sizeof(struct conv_struct));
364 return (iconv_t)(-1);
366 cd->iindex = from_index;
367 cd->ifuncs = all_encodings[from_index].ifuncs;
368 cd->oindex = to_index;
369 cd->ofuncs = all_encodings[to_index].ofuncs;
370 cd->oflags = all_encodings[to_index].oflags;
371 /* Initialize the loop functions. */
376 cd->lfuncs.loop_convert = wchar_id_loop_convert;
377 cd->lfuncs.loop_reset = wchar_id_loop_reset;
381 cd->lfuncs.loop_convert = wchar_to_loop_convert;
382 cd->lfuncs.loop_reset = wchar_to_loop_reset;
389 cd->lfuncs.loop_convert = wchar_from_loop_convert;
390 cd->lfuncs.loop_reset = wchar_from_loop_reset;
394 cd->lfuncs.loop_convert = unicode_loop_convert;
395 cd->lfuncs.loop_reset = unicode_loop_reset;
398 /* Initialize the states. */
399 memset(&cd->istate,'\0',sizeof(state_t));
400 memset(&cd->ostate,'\0',sizeof(state_t));
401 /* Initialize the operation flags. */
402 cd->transliterate = transliterate;
403 cd->discard_ilseq = discard_ilseq;
404 /* Initialize additional fields. */
405 if (from_wchar != to_wchar) {
406 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd;
407 memset(&wcd->state,'\0',sizeof(mbstate_t));
413 return (iconv_t)(-1);
416 size_t iconv (iconv_t icd,
417 ICONV_CONST char* * inbuf, size_t *inbytesleft,
418 char* * outbuf, size_t *outbytesleft)
420 conv_t cd = (conv_t) icd;
421 if (inbuf == NULL || *inbuf == NULL)
422 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
424 return cd->lfuncs.loop_convert(icd,
425 (const char* *)inbuf,inbytesleft,
426 outbuf,outbytesleft);
429 int iconv_close (iconv_t icd)
431 conv_t cd = (conv_t) icd;
436 #ifndef LIBICONV_PLUG
438 int iconvctl (iconv_t icd, int request, void* argument)
440 conv_t cd = (conv_t) icd;
444 ((cd->lfuncs.loop_convert == unicode_loop_convert
445 && cd->iindex == cd->oindex)
446 || cd->lfuncs.loop_convert == wchar_id_loop_convert
449 case ICONV_GET_TRANSLITERATE:
450 *(int *)argument = cd->transliterate;
452 case ICONV_SET_TRANSLITERATE:
453 cd->transliterate = (*(const int *)argument ? 1 : 0);
455 case ICONV_GET_DISCARD_ILSEQ:
456 *(int *)argument = cd->discard_ilseq;
458 case ICONV_SET_DISCARD_ILSEQ:
459 cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
467 static int compare_by_index (const void * arg1, const void * arg2)
469 const struct alias * alias1 = (const struct alias *) arg1;
470 const struct alias * alias2 = (const struct alias *) arg2;
471 return (int)alias1->encoding_index - (int)alias2->encoding_index;
474 static int compare_by_name (const void * arg1, const void * arg2)
476 const char * name1 = *(const char **)arg1;
477 const char * name2 = *(const char **)arg2;
478 /* Compare alphabetically, but put "CS" names at the end. */
479 int sign = strcmp(name1,name2);
481 sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
482 * 4 + (sign >= 0 ? 1 : -1);
487 void iconvlist (int (*do_one) (unsigned int namescount,
488 const char * const * names,
492 #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
493 #ifndef aliases2_lookup
494 #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
496 #define aliascount2 0
498 #define aliascount (aliascount1+aliascount2)
499 struct alias aliasbuf[aliascount];
500 const char * namesbuf[aliascount];
503 /* Put all existing aliases into a buffer. */
507 for (i = 0; i < aliascount1; i++) {
508 const struct alias * p = &aliases[i];
509 if (p->name[0] != '\0'
510 && p->encoding_index != ei_local_char
511 && p->encoding_index != ei_local_wchar_t)
514 #ifndef aliases2_lookup
515 for (i = 0; i < aliascount2; i++)
516 aliasbuf[j++] = sysdep_aliases[i];
520 /* Sort by encoding_index. */
522 qsort(aliasbuf, num_aliases, sizeof(struct alias), compare_by_index);
524 /* Process all aliases with the same encoding_index together. */
527 while (j < num_aliases) {
528 unsigned int ei = aliasbuf[j].encoding_index;
531 namesbuf[i++] = aliasbuf[j++].name;
532 while (j < num_aliases && aliasbuf[j].encoding_index == ei);
534 qsort(namesbuf, i, sizeof(const char *), compare_by_name);
535 /* Call the callback. */
536 if (do_one(i,namesbuf,data))
545 int _libiconv_version = _LIBICONV_VERSION;