1 /*
2  * iconv implementation using Win32 API to convert.
3  *
4  * This file is placed in the public domain.
5  */
6 
7 /* for WC_NO_BEST_FIT_CHARS */
8 #ifndef WINVER
9 # define WINVER 0x0500
10 #endif
11 
12 #define STRICT
13 #include <windows.h>
14 #include <errno.h>
15 #include <string.h>
16 #include <stdlib.h>
17 
18 #ifdef __GNUC__
19 #define UNUSED __attribute__((unused))
20 #else
21 #define UNUSED
22 #endif
23 
24 /* WORKAROUND: */
25 #ifndef UNDER_CE
26 #define GetProcAddressA GetProcAddress
27 #endif
28 
29 #if 0
30 # define MAKE_EXE
31 # define MAKE_DLL
32 # define USE_LIBICONV_DLL
33 #endif
34 
35 #if !defined(DEFAULT_LIBICONV_DLL)
36 # define DEFAULT_LIBICONV_DLL ""
37 #endif
38 
39 #define MB_CHAR_MAX 16
40 
41 #define UNICODE_MODE_BOM_DONE   1
42 #define UNICODE_MODE_SWAPPED    2
43 
44 #define FLAG_USE_BOM            1
45 #define FLAG_TRANSLIT           2 /* //TRANSLIT */
46 #define FLAG_IGNORE             4 /* //IGNORE */
47 
48 typedef unsigned char uchar;
49 typedef unsigned short ushort;
50 typedef unsigned int uint;
51 
52 typedef void* iconv_t;
53 
54 iconv_t iconv_open(const char *tocode, const char *fromcode);
55 int iconv_close(iconv_t cd);
56 size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
57 
58 /* libiconv interface for vim */
59 #if defined(MAKE_DLL)
60 int
iconvctl(iconv_t cd,int request,void * argument)61 iconvctl (iconv_t cd, int request, void* argument)
62 {
63     /* not supported */
64     return 0;
65 }
66 #endif
67 
68 typedef struct compat_t compat_t;
69 typedef struct csconv_t csconv_t;
70 typedef struct rec_iconv_t rec_iconv_t;
71 
72 typedef iconv_t (*f_iconv_open)(const char *tocode, const char *fromcode);
73 typedef int (*f_iconv_close)(iconv_t cd);
74 typedef size_t (*f_iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
75 typedef int* (*f_errno)(void);
76 typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
77 typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
78 typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize);
79 typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize);
80 
81 #define COMPAT_IN   1
82 #define COMPAT_OUT  2
83 
84 /* unicode mapping for compatibility with other conversion table. */
85 struct compat_t {
86     uint in;
87     uint out;
88     uint flag;
89 };
90 
91 struct csconv_t {
92     int codepage;
93     int flags;
94     f_mbtowc mbtowc;
95     f_wctomb wctomb;
96     f_mblen mblen;
97     f_flush flush;
98     DWORD mode;
99     compat_t *compat;
100 };
101 
102 struct rec_iconv_t {
103     iconv_t cd;
104     f_iconv_close iconv_close;
105     f_iconv iconv;
106     f_errno _errno;
107     csconv_t from;
108     csconv_t to;
109 #if defined(USE_LIBICONV_DLL)
110     HMODULE hlibiconv;
111 #endif
112 };
113 
114 static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode);
115 static int win_iconv_close(iconv_t cd);
116 static size_t win_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
117 
118 static int load_mlang(void);
119 static int make_csconv(const char *name, csconv_t *cv);
120 static int name_to_codepage(const char *name);
121 static uint utf16_to_ucs4(const ushort *wbuf);
122 static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize);
123 static int mbtowc_flags(int codepage);
124 static int must_use_null_useddefaultchar(int codepage);
125 static char *strrstr(const char *str, const char *token);
126 static char *xstrndup(const char *s, size_t n);
127 static int seterror(int err);
128 
129 #if defined(USE_LIBICONV_DLL)
130 static int libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode);
131 static PVOID MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size);
132 static HMODULE find_imported_module_by_funcname(HMODULE hModule, const char *funcname);
133 
134 static HMODULE hwiniconv;
135 #endif
136 
137 static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
138 static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
139 static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
140 static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize);
141 static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize);
142 
143 static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
144 static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
145 static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
146 static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
147 static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
148 static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
149 static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
150 static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
151 static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
152 static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
153 static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize);
154 
155 static struct {
156     int codepage;
157     const char *name;
158 } codepage_alias[] = {
159     {65001, "CP65001"},
160     {65001, "UTF8"},
161     {65001, "UTF-8"},
162 
163     {1200, "CP1200"},
164     {1200, "UTF16LE"},
165     {1200, "UTF-16LE"},
166     {1200, "UCS2LE"},
167     {1200, "UCS-2LE"},
168 
169     {1201, "CP1201"},
170     {1201, "UTF16BE"},
171     {1201, "UTF-16BE"},
172     {1201, "UCS2BE"},
173     {1201, "UCS-2BE"},
174     {1201, "unicodeFFFE"},
175 
176     {12000, "CP12000"},
177     {12000, "UTF32LE"},
178     {12000, "UTF-32LE"},
179     {12000, "UCS4LE"},
180     {12000, "UCS-4LE"},
181 
182     {12001, "CP12001"},
183     {12001, "UTF32BE"},
184     {12001, "UTF-32BE"},
185     {12001, "UCS4BE"},
186     {12001, "UCS-4BE"},
187 
188 #ifndef GLIB_COMPILATION
189     /*
190      * Default is big endian.
191      * See rfc2781 4.3 Interpreting text labelled as UTF-16.
192      */
193     {1201, "UTF16"},
194     {1201, "UTF-16"},
195     {1201, "UCS2"},
196     {1201, "UCS-2"},
197     {12001, "UTF32"},
198     {12001, "UTF-32"},
199     {12001, "UCS-4"},
200     {12001, "UCS4"},
201 #else
202     /* Default is little endian, because the platform is */
203     {1200, "UTF16"},
204     {1200, "UTF-16"},
205     {1200, "UCS2"},
206     {1200, "UCS-2"},
207     {12000, "UTF32"},
208     {12000, "UTF-32"},
209     {12000, "UCS4"},
210     {12000, "UCS-4"},
211 #endif
212 
213     /* copy from libiconv `iconv -l` */
214     /* !IsValidCodePage(367) */
215     {20127, "ANSI_X3.4-1968"},
216     {20127, "ANSI_X3.4-1986"},
217     {20127, "ASCII"},
218     {20127, "CP367"},
219     {20127, "IBM367"},
220     {20127, "ISO-IR-6"},
221     {20127, "ISO646-US"},
222     {20127, "ISO_646.IRV:1991"},
223     {20127, "US"},
224     {20127, "US-ASCII"},
225     {20127, "CSASCII"},
226 
227     /* !IsValidCodePage(819) */
228     {1252, "CP819"},
229     {1252, "IBM819"},
230     {28591, "ISO-8859-1"},
231     {28591, "ISO-IR-100"},
232     {28591, "ISO8859-1"},
233     {28591, "ISO_8859-1"},
234     {28591, "ISO_8859-1:1987"},
235     {28591, "L1"},
236     {28591, "LATIN1"},
237     {28591, "CSISOLATIN1"},
238 
239     {1250, "CP1250"},
240     {1250, "MS-EE"},
241     {1250, "WINDOWS-1250"},
242 
243     {1251, "CP1251"},
244     {1251, "MS-CYRL"},
245     {1251, "WINDOWS-1251"},
246 
247     {1252, "CP1252"},
248     {1252, "MS-ANSI"},
249     {1252, "WINDOWS-1252"},
250 
251     {1253, "CP1253"},
252     {1253, "MS-GREEK"},
253     {1253, "WINDOWS-1253"},
254 
255     {1254, "CP1254"},
256     {1254, "MS-TURK"},
257     {1254, "WINDOWS-1254"},
258 
259     {1255, "CP1255"},
260     {1255, "MS-HEBR"},
261     {1255, "WINDOWS-1255"},
262 
263     {1256, "CP1256"},
264     {1256, "MS-ARAB"},
265     {1256, "WINDOWS-1256"},
266 
267     {1257, "CP1257"},
268     {1257, "WINBALTRIM"},
269     {1257, "WINDOWS-1257"},
270 
271     {1258, "CP1258"},
272     {1258, "WINDOWS-1258"},
273 
274     {850, "850"},
275     {850, "CP850"},
276     {850, "IBM850"},
277     {850, "CSPC850MULTILINGUAL"},
278 
279     /* !IsValidCodePage(862) */
280     {862, "862"},
281     {862, "CP862"},
282     {862, "IBM862"},
283     {862, "CSPC862LATINHEBREW"},
284 
285     {866, "866"},
286     {866, "CP866"},
287     {866, "IBM866"},
288     {866, "CSIBM866"},
289 
290     /* !IsValidCodePage(154) */
291     {154, "CP154"},
292     {154, "CYRILLIC-ASIAN"},
293     {154, "PT154"},
294     {154, "PTCP154"},
295     {154, "CSPTCP154"},
296 
297     /* !IsValidCodePage(1133) */
298     {1133, "CP1133"},
299     {1133, "IBM-CP1133"},
300 
301     {874, "CP874"},
302     {874, "WINDOWS-874"},
303 
304     /* !IsValidCodePage(51932) */
305     {51932, "CP51932"},
306     {51932, "MS51932"},
307     {51932, "WINDOWS-51932"},
308     {51932, "EUC-JP"},
309 
310     {932, "CP932"},
311     {932, "MS932"},
312     {932, "SHIFFT_JIS"},
313     {932, "SHIFFT_JIS-MS"},
314     {932, "SJIS"},
315     {932, "SJIS-MS"},
316     {932, "SJIS-OPEN"},
317     {932, "SJIS-WIN"},
318     {932, "WINDOWS-31J"},
319     {932, "WINDOWS-932"},
320     {932, "CSWINDOWS31J"},
321 
322     {50221, "CP50221"},
323     {50221, "ISO-2022-JP"},
324     {50221, "ISO-2022-JP-MS"},
325     {50221, "ISO2022-JP"},
326     {50221, "ISO2022-JP-MS"},
327     {50221, "MS50221"},
328     {50221, "WINDOWS-50221"},
329 
330     {936, "CP936"},
331     {936, "GBK"},
332     {936, "MS936"},
333     {936, "WINDOWS-936"},
334 
335     {950, "CP950"},
336     {950, "BIG5"},
337     {950, "BIG5HKSCS"},
338     {950, "BIG5-HKSCS"},
339 
340     {949, "CP949"},
341     {949, "UHC"},
342     {949, "EUC-KR"},
343 
344     {1361, "CP1361"},
345     {1361, "JOHAB"},
346 
347     {437, "437"},
348     {437, "CP437"},
349     {437, "IBM437"},
350     {437, "CSPC8CODEPAGE437"},
351 
352     {737, "CP737"},
353 
354     {775, "CP775"},
355     {775, "IBM775"},
356     {775, "CSPC775BALTIC"},
357 
358     {852, "852"},
359     {852, "CP852"},
360     {852, "IBM852"},
361     {852, "CSPCP852"},
362 
363     /* !IsValidCodePage(853) */
364     {853, "CP853"},
365 
366     {855, "855"},
367     {855, "CP855"},
368     {855, "IBM855"},
369     {855, "CSIBM855"},
370 
371     {857, "857"},
372     {857, "CP857"},
373     {857, "IBM857"},
374     {857, "CSIBM857"},
375 
376     /* !IsValidCodePage(858) */
377     {858, "CP858"},
378 
379     {860, "860"},
380     {860, "CP860"},
381     {860, "IBM860"},
382     {860, "CSIBM860"},
383 
384     {861, "861"},
385     {861, "CP-IS"},
386     {861, "CP861"},
387     {861, "IBM861"},
388     {861, "CSIBM861"},
389 
390     {863, "863"},
391     {863, "CP863"},
392     {863, "IBM863"},
393     {863, "CSIBM863"},
394 
395     {864, "CP864"},
396     {864, "IBM864"},
397     {864, "CSIBM864"},
398 
399     {865, "865"},
400     {865, "CP865"},
401     {865, "IBM865"},
402     {865, "CSIBM865"},
403 
404     {869, "869"},
405     {869, "CP-GR"},
406     {869, "CP869"},
407     {869, "IBM869"},
408     {869, "CSIBM869"},
409 
410     /* !IsValidCodePage(1152) */
411     {1125, "CP1125"},
412 
413     /*
414      * Code Page Identifiers
415      * http://msdn2.microsoft.com/en-us/library/ms776446.aspx
416      */
417     {37, "IBM037"}, /* IBM EBCDIC US-Canada */
418     {437, "IBM437"}, /* OEM United States */
419     {500, "IBM500"}, /* IBM EBCDIC International */
420     {708, "ASMO-708"}, /* Arabic (ASMO 708) */
421     /* 709 		Arabic (ASMO-449+, BCON V4) */
422     /* 710 		Arabic - Transparent Arabic */
423     {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */
424     {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */
425     {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */
426     {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */
427     {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */
428     {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */
429     {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */
430     {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */
431     {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */
432     {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */
433     {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */
434     {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */
435     {864, "IBM864"}, /* OEM Arabic; Arabic (864) */
436     {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */
437     {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */
438     {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */
439     {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */
440     {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */
441     {875, "cp875"}, /* IBM EBCDIC Greek Modern */
442     {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */
443     {932, "shift-jis"}, /* alternative name for it */
444     {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */
445     {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */
446     {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */
447     {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */
448     {950, "big5-hkscs"}, /* alternative name for it */
449     {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */
450     {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */
451     {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */
452     {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */
453     {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */
454     {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */
455     {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */
456     {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */
457     {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */
458     {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */
459     {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */
460     {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */
461     {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */
462     {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */
463     {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */
464     {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */
465     {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */
466     {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */
467     {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */
468     {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */
469     {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */
470     {1361, "Johab"}, /* Korean (Johab) */
471     {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */
472     {10001, "x-mac-japanese"}, /* Japanese (Mac) */
473     {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */
474     {10003, "x-mac-korean"}, /* Korean (Mac) */
475     {10004, "x-mac-arabic"}, /* Arabic (Mac) */
476     {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */
477     {10006, "x-mac-greek"}, /* Greek (Mac) */
478     {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */
479     {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */
480     {10010, "x-mac-romanian"}, /* Romanian (Mac) */
481     {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */
482     {10021, "x-mac-thai"}, /* Thai (Mac) */
483     {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */
484     {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */
485     {10081, "x-mac-turkish"}, /* Turkish (Mac) */
486     {10082, "x-mac-croatian"}, /* Croatian (Mac) */
487     {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */
488     {20001, "x-cp20001"}, /* TCA Taiwan */
489     {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */
490     {20003, "x-cp20003"}, /* IBM5550 Taiwan */
491     {20004, "x-cp20004"}, /* TeleText Taiwan */
492     {20005, "x-cp20005"}, /* Wang Taiwan */
493     {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */
494     {20106, "x-IA5-German"}, /* IA5 German (7-bit) */
495     {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */
496     {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */
497     {20127, "us-ascii"}, /* US-ASCII (7-bit) */
498     {20261, "x-cp20261"}, /* T.61 */
499     {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */
500     {20273, "IBM273"}, /* IBM EBCDIC Germany */
501     {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */
502     {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */
503     {20280, "IBM280"}, /* IBM EBCDIC Italy */
504     {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */
505     {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */
506     {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */
507     {20297, "IBM297"}, /* IBM EBCDIC France */
508     {20420, "IBM420"}, /* IBM EBCDIC Arabic */
509     {20423, "IBM423"}, /* IBM EBCDIC Greek */
510     {20424, "IBM424"}, /* IBM EBCDIC Hebrew */
511     {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */
512     {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */
513     {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */
514     {20871, "IBM871"}, /* IBM EBCDIC Icelandic */
515     {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */
516     {20905, "IBM905"}, /* IBM EBCDIC Turkish */
517     {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */
518     {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */
519     {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */
520     {20949, "x-cp20949"}, /* Korean Wansung */
521     {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */
522     /* 21027 		(deprecated) */
523     {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */
524     {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */
525     {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */
526     {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */
527     {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */
528     {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */
529     {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */
530     {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */
531     {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */
532     {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */
533     {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */
534     {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */
535     {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */
536     {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */
537     {28597, "iso8859-7"}, /* ISO 8859-7 Greek */
538     {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
539     {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
540     {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */
541     {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */
542     {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */
543     {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */
544     {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */
545     {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */
546     {29001, "x-Europa"}, /* Europa 3 */
547     {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
548     {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
549     {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */
550     {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */
551     {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */
552     {50225, "iso-2022-kr"}, /* ISO 2022 Korean */
553     {50225, "iso2022-kr"}, /* ISO 2022 Korean */
554     {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */
555     /* 50229 		ISO 2022 Traditional Chinese */
556     /* 50930 		EBCDIC Japanese (Katakana) Extended */
557     /* 50931 		EBCDIC US-Canada and Japanese */
558     /* 50933 		EBCDIC Korean Extended and Korean */
559     /* 50935 		EBCDIC Simplified Chinese Extended and Simplified Chinese */
560     /* 50936 		EBCDIC Simplified Chinese */
561     /* 50937 		EBCDIC US-Canada and Traditional Chinese */
562     /* 50939 		EBCDIC Japanese (Latin) Extended and Japanese */
563     {51932, "euc-jp"}, /* EUC Japanese */
564     {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */
565     {51949, "euc-kr"}, /* EUC Korean */
566     /* 51950 		EUC Traditional Chinese */
567     {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */
568     {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */
569     {57002, "x-iscii-de"}, /* ISCII Devanagari */
570     {57003, "x-iscii-be"}, /* ISCII Bengali */
571     {57004, "x-iscii-ta"}, /* ISCII Tamil */
572     {57005, "x-iscii-te"}, /* ISCII Telugu */
573     {57006, "x-iscii-as"}, /* ISCII Assamese */
574     {57007, "x-iscii-or"}, /* ISCII Oriya */
575     {57008, "x-iscii-ka"}, /* ISCII Kannada */
576     {57009, "x-iscii-ma"}, /* ISCII Malayalam */
577     {57010, "x-iscii-gu"}, /* ISCII Gujarati */
578     {57011, "x-iscii-pa"}, /* ISCII Punjabi */
579 
580     {0, NULL}
581 };
582 
583 /*
584  * SJIS SHIFTJIS table              CP932 table
585  * ---- --------------------------- --------------------------------
586  *   5C U+00A5 YEN SIGN             U+005C REVERSE SOLIDUS
587  *   7E U+203E OVERLINE             U+007E TILDE
588  * 815C U+2014 EM DASH              U+2015 HORIZONTAL BAR
589  * 815F U+005C REVERSE SOLIDUS      U+FF3C FULLWIDTH REVERSE SOLIDUS
590  * 8160 U+301C WAVE DASH            U+FF5E FULLWIDTH TILDE
591  * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO
592  * 817C U+2212 MINUS SIGN           U+FF0D FULLWIDTH HYPHEN-MINUS
593  * 8191 U+00A2 CENT SIGN            U+FFE0 FULLWIDTH CENT SIGN
594  * 8192 U+00A3 POUND SIGN           U+FFE1 FULLWIDTH POUND SIGN
595  * 81CA U+00AC NOT SIGN             U+FFE2 FULLWIDTH NOT SIGN
596  *
597  * EUC-JP and ISO-2022-JP should be compatible with CP932.
598  *
599  * Kernel and MLang have different Unicode mapping table.  Make sure
600  * which API is used.
601  */
602 static compat_t cp932_compat[] = {
603     {0x00A5, 0x005C, COMPAT_OUT},
604     {0x203E, 0x007E, COMPAT_OUT},
605     {0x2014, 0x2015, COMPAT_OUT},
606     {0x301C, 0xFF5E, COMPAT_OUT},
607     {0x2016, 0x2225, COMPAT_OUT},
608     {0x2212, 0xFF0D, COMPAT_OUT},
609     {0x00A2, 0xFFE0, COMPAT_OUT},
610     {0x00A3, 0xFFE1, COMPAT_OUT},
611     {0x00AC, 0xFFE2, COMPAT_OUT},
612     {0, 0, 0}
613 };
614 
615 static compat_t cp20932_compat[] = {
616     {0x00A5, 0x005C, COMPAT_OUT},
617     {0x203E, 0x007E, COMPAT_OUT},
618     {0x2014, 0x2015, COMPAT_OUT},
619     {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN},
620     {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN},
621     {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN},
622     {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN},
623     {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN},
624     {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN},
625     {0, 0, 0}
626 };
627 
628 static compat_t *cp51932_compat = cp932_compat;
629 
630 /* cp20932_compat for kernel.  cp932_compat for mlang. */
631 static compat_t *cp5022x_compat = cp932_compat;
632 
633 typedef HRESULT (WINAPI *CONVERTINETSTRING)(
634     LPDWORD lpdwMode,
635     DWORD dwSrcEncoding,
636     DWORD dwDstEncoding,
637     LPCSTR lpSrcStr,
638     LPINT lpnSrcSize,
639     LPBYTE lpDstStr,
640     LPINT lpnDstSize
641 );
642 typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)(
643     LPDWORD lpdwMode,
644     DWORD dwSrcEncoding,
645     LPCSTR lpSrcStr,
646     LPINT lpnMultiCharCount,
647     LPWSTR lpDstStr,
648     LPINT lpnWideCharCount
649 );
650 typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)(
651     LPDWORD lpdwMode,
652     DWORD dwEncoding,
653     LPCWSTR lpSrcStr,
654     LPINT lpnWideCharCount,
655     LPSTR lpDstStr,
656     LPINT lpnMultiCharCount
657 );
658 typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)(
659     DWORD dwSrcEncoding,
660     DWORD dwDstEncoding
661 );
662 typedef HRESULT (WINAPI *LCIDTORFC1766A)(
663     LCID Locale,
664     LPSTR pszRfc1766,
665     int nChar
666 );
667 typedef HRESULT (WINAPI *LCIDTORFC1766W)(
668     LCID Locale,
669     LPWSTR pszRfc1766,
670     int nChar
671 );
672 typedef HRESULT (WINAPI *RFC1766TOLCIDA)(
673     LCID *pLocale,
674     LPSTR pszRfc1766
675 );
676 typedef HRESULT (WINAPI *RFC1766TOLCIDW)(
677     LCID *pLocale,
678     LPWSTR pszRfc1766
679 );
680 static CONVERTINETSTRING ConvertINetString;
681 static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode;
682 static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte;
683 static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable;
684 static LCIDTORFC1766A LcidToRfc1766A;
685 static RFC1766TOLCIDA Rfc1766ToLcidA;
686 
687 static int
load_mlang(void)688 load_mlang(void)
689 {
690     HMODULE h;
691     if (ConvertINetString != NULL)
692         return TRUE;
693     h = LoadLibrary(TEXT("mlang.dll"));
694     if (!h)
695         return FALSE;
696     ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString");
697     ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode");
698     ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte");
699     IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable");
700     LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A");
701     Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA");
702     return TRUE;
703 }
704 
705 iconv_t
iconv_open(const char * tocode,const char * fromcode)706 iconv_open(const char *tocode, const char *fromcode)
707 {
708     rec_iconv_t *cd;
709 
710     cd = (rec_iconv_t *)calloc(1, sizeof(rec_iconv_t));
711     if (cd == NULL)
712         return (iconv_t)(-1);
713 
714 #if defined(USE_LIBICONV_DLL)
715     errno = 0;
716     if (libiconv_iconv_open(cd, tocode, fromcode))
717         return (iconv_t)cd;
718 #endif
719 
720     /* reset the errno to prevent reporting wrong error code.
721      * 0 for unsorted error. */
722     errno = 0;
723     if (win_iconv_open(cd, tocode, fromcode))
724         return (iconv_t)cd;
725 
726     free(cd);
727 
728     return (iconv_t)(-1);
729 }
730 
731 int
iconv_close(iconv_t _cd)732 iconv_close(iconv_t _cd)
733 {
734     rec_iconv_t *cd = (rec_iconv_t *)_cd;
735     int r = cd->iconv_close(cd->cd);
736     int e = *(cd->_errno());
737 #if defined(USE_LIBICONV_DLL)
738     if (cd->hlibiconv != NULL)
739         FreeLibrary(cd->hlibiconv);
740 #endif
741     free(cd);
742     errno = e;
743     return r;
744 }
745 
746 size_t
iconv(iconv_t _cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)747 iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
748 {
749     rec_iconv_t *cd = (rec_iconv_t *)_cd;
750     size_t r = cd->iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft);
751     errno = *(cd->_errno());
752     return r;
753 }
754 
755 static int
win_iconv_open(rec_iconv_t * cd,const char * tocode,const char * fromcode)756 win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode)
757 {
758     if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to))
759         return FALSE;
760     cd->iconv_close = win_iconv_close;
761     cd->iconv = win_iconv;
762     cd->_errno = _errno;
763     cd->cd = (iconv_t)cd;
764     return TRUE;
765 }
766 
767 static int
win_iconv_close(iconv_t cd UNUSED)768 win_iconv_close(iconv_t cd UNUSED)
769 {
770     return 0;
771 }
772 
773 static size_t
win_iconv(iconv_t _cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)774 win_iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
775 {
776     rec_iconv_t *cd = (rec_iconv_t *)_cd;
777     ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */
778     int insize;
779     int outsize;
780     int wsize;
781     DWORD frommode;
782     DWORD tomode;
783     uint wc;
784     compat_t *cp;
785     int i;
786 
787     if (inbuf == NULL || *inbuf == NULL)
788     {
789         if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL)
790         {
791             tomode = cd->to.mode;
792             outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, (int)*outbytesleft);
793             if (outsize == -1)
794             {
795                 if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG)
796                 {
797                     outsize = 0;
798                 }
799                 else
800                 {
801                     cd->to.mode = tomode;
802                     return (size_t)(-1);
803                 }
804             }
805             *outbuf += outsize;
806             *outbytesleft -= outsize;
807         }
808         cd->from.mode = 0;
809         cd->to.mode = 0;
810         return 0;
811     }
812 
813     while (*inbytesleft != 0)
814     {
815         frommode = cd->from.mode;
816         tomode = cd->to.mode;
817         wsize = MB_CHAR_MAX;
818 
819         insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, (int)*inbytesleft, wbuf, &wsize);
820         if (insize == -1)
821         {
822             if (cd->to.flags & FLAG_IGNORE)
823             {
824                 cd->from.mode = frommode;
825                 insize = 1;
826                 wsize = 0;
827             }
828             else
829             {
830                 cd->from.mode = frommode;
831                 return (size_t)(-1);
832             }
833         }
834 
835         if (wsize == 0)
836         {
837             *inbuf += insize;
838             *inbytesleft -= insize;
839             continue;
840         }
841 
842         if (cd->from.compat != NULL)
843         {
844             wc = utf16_to_ucs4(wbuf);
845             cp = cd->from.compat;
846             for (i = 0; cp[i].in != 0; ++i)
847             {
848                 if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc)
849                 {
850                     ucs4_to_utf16(cp[i].in, wbuf, &wsize);
851                     break;
852                 }
853             }
854         }
855 
856         if (cd->to.compat != NULL)
857         {
858             wc = utf16_to_ucs4(wbuf);
859             cp = cd->to.compat;
860             for (i = 0; cp[i].in != 0; ++i)
861             {
862                 if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc)
863                 {
864                     ucs4_to_utf16(cp[i].out, wbuf, &wsize);
865                     break;
866                 }
867             }
868         }
869 
870         outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, (int)*outbytesleft);
871         if (outsize == -1)
872         {
873             if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG)
874             {
875                 cd->to.mode = tomode;
876                 outsize = 0;
877             }
878             else
879             {
880                 cd->from.mode = frommode;
881                 cd->to.mode = tomode;
882                 return (size_t)(-1);
883             }
884         }
885 
886         *inbuf += insize;
887         *outbuf += outsize;
888         *inbytesleft -= insize;
889         *outbytesleft -= outsize;
890     }
891 
892     return 0;
893 }
894 
895 static int
make_csconv(const char * _name,csconv_t * cv)896 make_csconv(const char *_name, csconv_t *cv)
897 {
898     CPINFO cpinfo;
899     int use_compat = TRUE;
900     int flag = 0;
901     char *name;
902     char *p;
903 
904     name = xstrndup(_name, strlen(_name));
905     if (name == NULL)
906         return FALSE;
907 
908     /* check for option "enc_name//opt1//opt2" */
909     while ((p = strrstr(name, "//")) != NULL)
910     {
911         if (_stricmp(p + 2, "nocompat") == 0)
912             use_compat = FALSE;
913         else if (_stricmp(p + 2, "translit") == 0)
914             flag |= FLAG_TRANSLIT;
915         else if (_stricmp(p + 2, "ignore") == 0)
916             flag |= FLAG_IGNORE;
917         *p = 0;
918     }
919 
920     cv->mode = 0;
921     cv->flags = flag;
922     cv->mblen = NULL;
923     cv->flush = NULL;
924     cv->compat = NULL;
925     cv->codepage = name_to_codepage(name);
926     if (cv->codepage == 1200 || cv->codepage == 1201)
927     {
928         cv->mbtowc = utf16_mbtowc;
929         cv->wctomb = utf16_wctomb;
930         if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 ||
931           _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0)
932             cv->flags |= FLAG_USE_BOM;
933     }
934     else if (cv->codepage == 12000 || cv->codepage == 12001)
935     {
936         cv->mbtowc = utf32_mbtowc;
937         cv->wctomb = utf32_wctomb;
938         if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 ||
939           _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0)
940             cv->flags |= FLAG_USE_BOM;
941     }
942     else if (cv->codepage == 65001)
943     {
944         cv->mbtowc = kernel_mbtowc;
945         cv->wctomb = kernel_wctomb;
946         cv->mblen = utf8_mblen;
947     }
948     else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang())
949     {
950         cv->mbtowc = iso2022jp_mbtowc;
951         cv->wctomb = iso2022jp_wctomb;
952         cv->flush = iso2022jp_flush;
953     }
954     else if (cv->codepage == 51932 && load_mlang())
955     {
956         cv->mbtowc = mlang_mbtowc;
957         cv->wctomb = mlang_wctomb;
958         cv->mblen = eucjp_mblen;
959     }
960     else if (IsValidCodePage(cv->codepage)
961 	     && GetCPInfo(cv->codepage, &cpinfo) != 0)
962     {
963         cv->mbtowc = kernel_mbtowc;
964         cv->wctomb = kernel_wctomb;
965         if (cpinfo.MaxCharSize == 1)
966             cv->mblen = sbcs_mblen;
967         else if (cpinfo.MaxCharSize == 2)
968             cv->mblen = dbcs_mblen;
969         else
970 	    cv->mblen = mbcs_mblen;
971     }
972     else
973     {
974         /* not supported */
975         free(name);
976         errno = EINVAL;
977         return FALSE;
978     }
979 
980     if (use_compat)
981     {
982         switch (cv->codepage)
983         {
984         case 932: cv->compat = cp932_compat; break;
985         case 20932: cv->compat = cp20932_compat; break;
986         case 51932: cv->compat = cp51932_compat; break;
987         case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break;
988         }
989     }
990 
991     free(name);
992 
993     return TRUE;
994 }
995 
996 static int
name_to_codepage(const char * name)997 name_to_codepage(const char *name)
998 {
999     int i;
1000 
1001     if (*name == '\0' ||
1002 	strcmp(name, "char") == 0)
1003         return GetACP();
1004     else if (strcmp(name, "wchar_t") == 0)
1005         return 1200;
1006     else if (_strnicmp(name, "cp", 2) == 0)
1007         return atoi(name + 2); /* CP123 */
1008     else if ('0' <= name[0] && name[0] <= '9')
1009         return atoi(name);     /* 123 */
1010     else if (_strnicmp(name, "xx", 2) == 0)
1011         return atoi(name + 2); /* XX123 for debug */
1012 
1013     for (i = 0; codepage_alias[i].name != NULL; ++i)
1014         if (_stricmp(name, codepage_alias[i].name) == 0)
1015             return codepage_alias[i].codepage;
1016     return -1;
1017 }
1018 
1019 /*
1020  * http://www.faqs.org/rfcs/rfc2781.html
1021  */
1022 static uint
utf16_to_ucs4(const ushort * wbuf)1023 utf16_to_ucs4(const ushort *wbuf)
1024 {
1025     uint wc = wbuf[0];
1026     if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1027         wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000;
1028     return wc;
1029 }
1030 
1031 static void
ucs4_to_utf16(uint wc,ushort * wbuf,int * wbufsize)1032 ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize)
1033 {
1034     if (wc < 0x10000)
1035     {
1036         wbuf[0] = wc;
1037         *wbufsize = 1;
1038     }
1039     else
1040     {
1041         wc -= 0x10000;
1042         wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF);
1043         wbuf[1] = 0xDC00 | (wc & 0x3FF);
1044         *wbufsize = 2;
1045     }
1046 }
1047 
1048 /*
1049  * Check if codepage is one of those for which the dwFlags parameter
1050  * to MultiByteToWideChar() must be zero. Return zero or
1051  * MB_ERR_INVALID_CHARS.  The docs in Platform SDK for for Windows
1052  * Server 2003 R2 claims that also codepage 65001 is one of these, but
1053  * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave
1054  * out 65001 (UTF-8), and that indeed seems to be the case on XP, it
1055  * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting
1056  * from UTF-8.
1057  */
1058 static int
mbtowc_flags(int codepage)1059 mbtowc_flags(int codepage)
1060 {
1061     return (codepage == 50220 || codepage == 50221 ||
1062 	    codepage == 50222 || codepage == 50225 ||
1063 	    codepage == 50227 || codepage == 50229 ||
1064 	    codepage == 52936 || codepage == 54936 ||
1065 	    (codepage >= 57002 && codepage <= 57011) ||
1066 	    codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS;
1067 }
1068 
1069 /*
1070  * Check if codepage is one those for which the lpUsedDefaultChar
1071  * parameter to WideCharToMultiByte() must be NULL.  The docs in
1072  * Platform SDK for for Windows Server 2003 R2 claims that this is the
1073  * list below, while the MSDN docs for MSVS2008 claim that it is only
1074  * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform
1075  * SDK seems to be correct, at least for XP.
1076  */
1077 static int
must_use_null_useddefaultchar(int codepage)1078 must_use_null_useddefaultchar(int codepage)
1079 {
1080     return (codepage == 65000 || codepage == 65001 ||
1081             codepage == 50220 || codepage == 50221 ||
1082             codepage == 50222 || codepage == 50225 ||
1083             codepage == 50227 || codepage == 50229 ||
1084             codepage == 52936 || codepage == 54936 ||
1085             (codepage >= 57002 && codepage <= 57011) ||
1086             codepage == 42);
1087 }
1088 
1089 static char *
strrstr(const char * str,const char * token)1090 strrstr(const char *str, const char *token)
1091 {
1092     size_t len = strlen(token);
1093     const char *p = str + strlen(str);
1094 
1095     while (str <= --p)
1096         if (p[0] == token[0] && strncmp(p, token, len) == 0)
1097             return (char *)p;
1098     return NULL;
1099 }
1100 
1101 static char *
xstrndup(const char * s,size_t n)1102 xstrndup(const char *s, size_t n)
1103 {
1104     char *p;
1105 
1106     p = (char *)malloc(n + 1);
1107     if (p == NULL)
1108         return NULL;
1109     memcpy(p, s, n);
1110     p[n] = '\0';
1111     return p;
1112 }
1113 
1114 static int
seterror(int err)1115 seterror(int err)
1116 {
1117     errno = err;
1118     return -1;
1119 }
1120 
1121 #if defined(USE_LIBICONV_DLL)
1122 static int
libiconv_iconv_open(rec_iconv_t * cd,const char * tocode,const char * fromcode)1123 libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode)
1124 {
1125     HMODULE hlibiconv = NULL;
1126     HMODULE hmsvcrt = NULL;
1127     char *dllname;
1128     const char *p;
1129     const char *e;
1130     f_iconv_open _iconv_open;
1131 
1132     /*
1133      * always try to load dll, so that we can switch dll in runtime.
1134      */
1135 
1136     /* XXX: getenv() can't get variable set by SetEnvironmentVariable() */
1137     p = getenv("WINICONV_LIBICONV_DLL");
1138     if (p == NULL)
1139         p = DEFAULT_LIBICONV_DLL;
1140     /* parse comma separated value */
1141     for ( ; *p != 0; p = (*e == ',') ? e + 1 : e)
1142     {
1143         e = strchr(p, ',');
1144         if (p == e)
1145             continue;
1146         else if (e == NULL)
1147             e = p + strlen(p);
1148         dllname = xstrndup(p, e - p);
1149         if (dllname == NULL)
1150             return FALSE;
1151         hlibiconv = LoadLibraryA(dllname);
1152         free(dllname);
1153         if (hlibiconv != NULL)
1154         {
1155             if (hlibiconv == hwiniconv)
1156             {
1157                 FreeLibrary(hlibiconv);
1158                 hlibiconv = NULL;
1159                 continue;
1160             }
1161             break;
1162         }
1163     }
1164 
1165     if (hlibiconv == NULL)
1166         goto failed;
1167 
1168     hmsvcrt = find_imported_module_by_funcname(hlibiconv, "_errno");
1169     if (hmsvcrt == NULL)
1170         goto failed;
1171 
1172     _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "libiconv_open");
1173     if (_iconv_open == NULL)
1174         _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "iconv_open");
1175     cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "libiconv_close");
1176     if (cd->iconv_close == NULL)
1177         cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "iconv_close");
1178     cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "libiconv");
1179     if (cd->iconv == NULL)
1180         cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "iconv");
1181     cd->_errno = (f_errno)GetProcAddressA(hmsvcrt, "_errno");
1182     if (_iconv_open == NULL || cd->iconv_close == NULL
1183             || cd->iconv == NULL || cd->_errno == NULL)
1184         goto failed;
1185 
1186     cd->cd = _iconv_open(tocode, fromcode);
1187     if (cd->cd == (iconv_t)(-1))
1188         goto failed;
1189 
1190     cd->hlibiconv = hlibiconv;
1191     return TRUE;
1192 
1193 failed:
1194     if (hlibiconv != NULL)
1195         FreeLibrary(hlibiconv);
1196     /* do not free hmsvcrt which is obtained by GetModuleHandle() */
1197     return FALSE;
1198 }
1199 
1200 /*
1201  * Reference:
1202  * http://forums.belution.com/ja/vc/000/234/78s.shtml
1203  * http://nienie.com/~masapico/api_ImageDirectoryEntryToData.html
1204  *
1205  * The formal way is
1206  *   imagehlp.h or dbghelp.h
1207  *   imagehlp.lib or dbghelp.lib
1208  *   ImageDirectoryEntryToData()
1209  */
1210 #define TO_DOS_HEADER(base) ((PIMAGE_DOS_HEADER)(base))
1211 #define TO_NT_HEADERS(base) ((PIMAGE_NT_HEADERS)((LPBYTE)(base) + TO_DOS_HEADER(base)->e_lfanew))
1212 static PVOID
MyImageDirectoryEntryToData(LPVOID Base,BOOLEAN MappedAsImage,USHORT DirectoryEntry,PULONG Size)1213 MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size)
1214 {
1215     /* TODO: MappedAsImage? */
1216     PIMAGE_DATA_DIRECTORY p;
1217     p = TO_NT_HEADERS(Base)->OptionalHeader.DataDirectory + DirectoryEntry;
1218     if (p->VirtualAddress == 0) {
1219       *Size = 0;
1220       return NULL;
1221     }
1222     *Size = p->Size;
1223     return (PVOID)((LPBYTE)Base + p->VirtualAddress);
1224 }
1225 
1226 static HMODULE
find_imported_module_by_funcname(HMODULE hModule,const char * funcname)1227 find_imported_module_by_funcname(HMODULE hModule, const char *funcname)
1228 {
1229     DWORD_PTR Base;
1230     ULONG Size;
1231     PIMAGE_IMPORT_DESCRIPTOR Imp;
1232     PIMAGE_THUNK_DATA Name;         /* Import Name Table */
1233     PIMAGE_IMPORT_BY_NAME ImpName;
1234 
1235     Base = (DWORD_PTR)hModule;
1236     Imp = (PIMAGE_IMPORT_DESCRIPTOR)MyImageDirectoryEntryToData(
1237             (LPVOID)Base,
1238             TRUE,
1239             IMAGE_DIRECTORY_ENTRY_IMPORT,
1240             &Size);
1241     if (Imp == NULL)
1242         return NULL;
1243     for ( ; Imp->OriginalFirstThunk != 0; ++Imp)
1244     {
1245         Name = (PIMAGE_THUNK_DATA)(Base + Imp->OriginalFirstThunk);
1246         for ( ; Name->u1.Ordinal != 0; ++Name)
1247         {
1248             if (!IMAGE_SNAP_BY_ORDINAL(Name->u1.Ordinal))
1249             {
1250                 ImpName = (PIMAGE_IMPORT_BY_NAME)
1251                     (Base + (DWORD_PTR)Name->u1.AddressOfData);
1252                 if (strcmp((char *)ImpName->Name, funcname) == 0)
1253                     return GetModuleHandleA((char *)(Base + Imp->Name));
1254             }
1255         }
1256     }
1257     return NULL;
1258 }
1259 #endif
1260 
1261 static int
sbcs_mblen(csconv_t * cv UNUSED,const uchar * buf UNUSED,int bufsize UNUSED)1262 sbcs_mblen(csconv_t *cv UNUSED, const uchar *buf UNUSED, int bufsize UNUSED)
1263 {
1264     return 1;
1265 }
1266 
1267 static int
dbcs_mblen(csconv_t * cv,const uchar * buf,int bufsize)1268 dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1269 {
1270     int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1;
1271     if (bufsize < len)
1272         return seterror(EINVAL);
1273     return len;
1274 }
1275 
1276 static int
mbcs_mblen(csconv_t * cv,const uchar * buf,int bufsize)1277 mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1278 {
1279     int len = 0;
1280 
1281     if (cv->codepage == 54936) {
1282 	if (buf[0] <= 0x7F) len = 1;
1283 	else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
1284 		 bufsize >= 2 &&
1285 		 ((buf[1] >= 0x40 && buf[1] <= 0x7E) ||
1286 		  (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2;
1287 	else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
1288 		 bufsize >= 4 &&
1289 		 buf[1] >= 0x30 && buf[1] <= 0x39) len = 4;
1290 	else
1291 	    return seterror(EINVAL);
1292 	return len;
1293     }
1294     else
1295 	return seterror(EINVAL);
1296 }
1297 
1298 static int
utf8_mblen(csconv_t * cv UNUSED,const uchar * buf,int bufsize)1299 utf8_mblen(csconv_t *cv UNUSED, const uchar *buf, int bufsize)
1300 {
1301     int len = 0;
1302 
1303     if (buf[0] < 0x80) len = 1;
1304     else if ((buf[0] & 0xE0) == 0xC0) len = 2;
1305     else if ((buf[0] & 0xF0) == 0xE0) len = 3;
1306     else if ((buf[0] & 0xF8) == 0xF0) len = 4;
1307     else if ((buf[0] & 0xFC) == 0xF8) len = 5;
1308     else if ((buf[0] & 0xFE) == 0xFC) len = 6;
1309 
1310     if (len == 0)
1311         return seterror(EILSEQ);
1312     else if (bufsize < len)
1313         return seterror(EINVAL);
1314     return len;
1315 }
1316 
1317 static int
eucjp_mblen(csconv_t * cv UNUSED,const uchar * buf,int bufsize)1318 eucjp_mblen(csconv_t *cv UNUSED, const uchar *buf, int bufsize)
1319 {
1320     if (buf[0] < 0x80) /* ASCII */
1321         return 1;
1322     else if (buf[0] == 0x8E) /* JIS X 0201 */
1323     {
1324         if (bufsize < 2)
1325             return seterror(EINVAL);
1326         else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF))
1327             return seterror(EILSEQ);
1328         return 2;
1329     }
1330     else if (buf[0] == 0x8F) /* JIS X 0212 */
1331     {
1332         if (bufsize < 3)
1333             return seterror(EINVAL);
1334         else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE)
1335                 || !(0xA1 <= buf[2] && buf[2] <= 0xFE))
1336             return seterror(EILSEQ);
1337         return 3;
1338     }
1339     else /* JIS X 0208 */
1340     {
1341         if (bufsize < 2)
1342             return seterror(EINVAL);
1343         else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE)
1344                 || !(0xA1 <= buf[1] && buf[1] <= 0xFE))
1345             return seterror(EILSEQ);
1346         return 2;
1347     }
1348 }
1349 
1350 static int
kernel_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1351 kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1352 {
1353     int len;
1354 
1355     len = cv->mblen(cv, buf, bufsize);
1356     if (len == -1)
1357         return -1;
1358     *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage),
1359             (const char *)buf, len, (wchar_t *)wbuf, *wbufsize);
1360     if (*wbufsize == 0)
1361         return seterror(EILSEQ);
1362     return len;
1363 }
1364 
1365 static int
kernel_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1366 kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1367 {
1368     BOOL usedDefaultChar = 0;
1369     BOOL *p = NULL;
1370     int flags = 0;
1371     int len;
1372 
1373     if (bufsize == 0)
1374         return seterror(E2BIG);
1375     if (!must_use_null_useddefaultchar(cv->codepage))
1376     {
1377         p = &usedDefaultChar;
1378 #ifdef WC_NO_BEST_FIT_CHARS
1379         if (!(cv->flags & FLAG_TRANSLIT))
1380             flags |= WC_NO_BEST_FIT_CHARS;
1381 #endif
1382     }
1383     len = WideCharToMultiByte(cv->codepage, flags,
1384             (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p);
1385     if (len == 0)
1386     {
1387         if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
1388             return seterror(E2BIG);
1389         return seterror(EILSEQ);
1390     }
1391     else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT))
1392         return seterror(EILSEQ);
1393     else if (cv->mblen(cv, buf, len) != len) /* validate result */
1394         return seterror(EILSEQ);
1395     return len;
1396 }
1397 
1398 /*
1399  * It seems that the mode (cv->mode) is fixnum.
1400  * For example, when converting iso-2022-jp(cp50221) to unicode:
1401  *      in ascii sequence: mode=0xC42C0000
1402  *   in jisx0208 sequence: mode=0xC42C0001
1403  * "C42C" is same for each convert session.
1404  * It should be: ((codepage-1)<<16)|state
1405  */
1406 static int
mlang_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1407 mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1408 {
1409     int len;
1410     int insize;
1411     HRESULT hr;
1412 
1413     len = cv->mblen(cv, buf, bufsize);
1414     if (len == -1)
1415         return -1;
1416     insize = len;
1417     hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage,
1418             (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize);
1419     if (hr != S_OK || insize != len)
1420         return seterror(EILSEQ);
1421     return len;
1422 }
1423 
1424 static int
mlang_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1425 mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1426 {
1427     char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */
1428     int tmpsize = MB_CHAR_MAX;
1429     int insize = wbufsize;
1430     HRESULT hr;
1431 
1432     hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage,
1433             (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize);
1434     if (hr != S_OK || insize != wbufsize)
1435         return seterror(EILSEQ);
1436     else if (bufsize < tmpsize)
1437         return seterror(E2BIG);
1438     else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize)
1439         return seterror(EILSEQ);
1440     memcpy(buf, tmpbuf, tmpsize);
1441     return tmpsize;
1442 }
1443 
1444 static int
utf16_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1445 utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1446 {
1447     int codepage = cv->codepage;
1448 
1449     /* swap endian: 1200 <-> 1201 */
1450     if (cv->mode & UNICODE_MODE_SWAPPED)
1451         codepage ^= 1;
1452 
1453     if (bufsize < 2)
1454         return seterror(EINVAL);
1455     if (codepage == 1200) /* little endian */
1456         wbuf[0] = (buf[1] << 8) | buf[0];
1457     else if (codepage == 1201) /* big endian */
1458         wbuf[0] = (buf[0] << 8) | buf[1];
1459 
1460     if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1461     {
1462         cv->mode |= UNICODE_MODE_BOM_DONE;
1463         if (wbuf[0] == 0xFFFE)
1464         {
1465             cv->mode |= UNICODE_MODE_SWAPPED;
1466             *wbufsize = 0;
1467             return 2;
1468         }
1469         else if (wbuf[0] == 0xFEFF)
1470         {
1471             *wbufsize = 0;
1472             return 2;
1473         }
1474     }
1475 
1476     if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF)
1477         return seterror(EILSEQ);
1478     if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1479     {
1480         if (bufsize < 4)
1481             return seterror(EINVAL);
1482         if (codepage == 1200) /* little endian */
1483             wbuf[1] = (buf[3] << 8) | buf[2];
1484         else if (codepage == 1201) /* big endian */
1485             wbuf[1] = (buf[2] << 8) | buf[3];
1486         if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF))
1487             return seterror(EILSEQ);
1488         *wbufsize = 2;
1489         return 4;
1490     }
1491     *wbufsize = 1;
1492     return 2;
1493 }
1494 
1495 static int
utf16_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1496 utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1497 {
1498     if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1499     {
1500         int r;
1501 
1502         cv->mode |= UNICODE_MODE_BOM_DONE;
1503         if (bufsize < 2)
1504             return seterror(E2BIG);
1505         if (cv->codepage == 1200) /* little endian */
1506             memcpy(buf, "\xFF\xFE", 2);
1507         else if (cv->codepage == 1201) /* big endian */
1508             memcpy(buf, "\xFE\xFF", 2);
1509 
1510         r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2);
1511         if (r == -1)
1512             return -1;
1513         return r + 2;
1514     }
1515 
1516     if (bufsize < 2)
1517         return seterror(E2BIG);
1518     if (cv->codepage == 1200) /* little endian */
1519     {
1520         buf[0] = (wbuf[0] & 0x00FF);
1521         buf[1] = (wbuf[0] & 0xFF00) >> 8;
1522     }
1523     else if (cv->codepage == 1201) /* big endian */
1524     {
1525         buf[0] = (wbuf[0] & 0xFF00) >> 8;
1526         buf[1] = (wbuf[0] & 0x00FF);
1527     }
1528     if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1529     {
1530         if (bufsize < 4)
1531             return seterror(E2BIG);
1532         if (cv->codepage == 1200) /* little endian */
1533         {
1534             buf[2] = (wbuf[1] & 0x00FF);
1535             buf[3] = (wbuf[1] & 0xFF00) >> 8;
1536         }
1537         else if (cv->codepage == 1201) /* big endian */
1538         {
1539             buf[2] = (wbuf[1] & 0xFF00) >> 8;
1540             buf[3] = (wbuf[1] & 0x00FF);
1541         }
1542         return 4;
1543     }
1544     return 2;
1545 }
1546 
1547 static int
utf32_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1548 utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1549 {
1550     int codepage = cv->codepage;
1551     uint wc;
1552 
1553     /* swap endian: 12000 <-> 12001 */
1554     if (cv->mode & UNICODE_MODE_SWAPPED)
1555         codepage ^= 1;
1556 
1557     if (bufsize < 4)
1558         return seterror(EINVAL);
1559     if (codepage == 12000) /* little endian */
1560         wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
1561     else if (codepage == 12001) /* big endian */
1562         wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
1563 
1564     if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1565     {
1566         cv->mode |= UNICODE_MODE_BOM_DONE;
1567         if (wc == 0xFFFE0000)
1568         {
1569             cv->mode |= UNICODE_MODE_SWAPPED;
1570             *wbufsize = 0;
1571             return 4;
1572         }
1573         else if (wc == 0x0000FEFF)
1574         {
1575             *wbufsize = 0;
1576             return 4;
1577         }
1578     }
1579 
1580     if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc)
1581         return seterror(EILSEQ);
1582     ucs4_to_utf16(wc, wbuf, wbufsize);
1583     return 4;
1584 }
1585 
1586 static int
utf32_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1587 utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1588 {
1589     uint wc;
1590 
1591     if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1592     {
1593         int r;
1594 
1595         cv->mode |= UNICODE_MODE_BOM_DONE;
1596         if (bufsize < 4)
1597             return seterror(E2BIG);
1598         if (cv->codepage == 12000) /* little endian */
1599             memcpy(buf, "\xFF\xFE\x00\x00", 4);
1600         else if (cv->codepage == 12001) /* big endian */
1601             memcpy(buf, "\x00\x00\xFE\xFF", 4);
1602 
1603         r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4);
1604         if (r == -1)
1605             return -1;
1606         return r + 4;
1607     }
1608 
1609     if (bufsize < 4)
1610         return seterror(E2BIG);
1611     wc = utf16_to_ucs4(wbuf);
1612     if (cv->codepage == 12000) /* little endian */
1613     {
1614         buf[0] = wc & 0x000000FF;
1615         buf[1] = (wc & 0x0000FF00) >> 8;
1616         buf[2] = (wc & 0x00FF0000) >> 16;
1617         buf[3] = (wc & 0xFF000000) >> 24;
1618     }
1619     else if (cv->codepage == 12001) /* big endian */
1620     {
1621         buf[0] = (wc & 0xFF000000) >> 24;
1622         buf[1] = (wc & 0x00FF0000) >> 16;
1623         buf[2] = (wc & 0x0000FF00) >> 8;
1624         buf[3] = wc & 0x000000FF;
1625     }
1626     return 4;
1627 }
1628 
1629 /*
1630  * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
1631  * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow
1632  *        1 byte Kana)
1633  * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte
1634  *        Kana - SO/SI)
1635  *
1636  * MultiByteToWideChar() and WideCharToMultiByte() behave differently
1637  * depending on Windows version.  On XP, WideCharToMultiByte() doesn't
1638  * terminate result sequence with ascii escape.  But Vista does.
1639  * Use MLang instead.
1640  */
1641 
1642 #define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift))
1643 #define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF)
1644 #define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF)
1645 
1646 #define ISO2022_SI  0
1647 #define ISO2022_SO  1
1648 
1649 /* shift in */
1650 static const char iso2022_SI_seq[] = "\x0F";
1651 /* shift out */
1652 static const char iso2022_SO_seq[] = "\x0E";
1653 
1654 typedef struct iso2022_esc_t iso2022_esc_t;
1655 struct iso2022_esc_t {
1656     const char *esc;
1657     int esc_len;
1658     int len;
1659     int cs;
1660 };
1661 
1662 #define ISO2022JP_CS_ASCII            0
1663 #define ISO2022JP_CS_JISX0201_ROMAN   1
1664 #define ISO2022JP_CS_JISX0201_KANA    2
1665 #define ISO2022JP_CS_JISX0208_1978    3
1666 #define ISO2022JP_CS_JISX0208_1983    4
1667 #define ISO2022JP_CS_JISX0212         5
1668 
1669 static iso2022_esc_t iso2022jp_esc[] = {
1670     {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII},
1671     {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN},
1672     {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA},
1673     {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */
1674     {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983},
1675     {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212},
1676     {NULL, 0, 0, 0}
1677 };
1678 
1679 static int
iso2022jp_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1680 iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1681 {
1682     iso2022_esc_t *iesc = iso2022jp_esc;
1683     char tmp[MB_CHAR_MAX];
1684     int insize;
1685     HRESULT hr;
1686     DWORD dummy = 0;
1687     int len;
1688     int esc_len;
1689     int cs;
1690     int shift;
1691     int i;
1692 
1693     if (buf[0] == 0x1B)
1694     {
1695         for (i = 0; iesc[i].esc != NULL; ++i)
1696         {
1697             esc_len = iesc[i].esc_len;
1698             if (bufsize < esc_len)
1699             {
1700                 if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0)
1701                     return seterror(EINVAL);
1702             }
1703             else
1704             {
1705                 if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0)
1706                 {
1707                     cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI);
1708                     *wbufsize = 0;
1709                     return esc_len;
1710                 }
1711             }
1712         }
1713         /* not supported escape sequence */
1714         return seterror(EILSEQ);
1715     }
1716     else if (buf[0] == iso2022_SO_seq[0])
1717     {
1718         cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO);
1719         *wbufsize = 0;
1720         return 1;
1721     }
1722     else if (buf[0] == iso2022_SI_seq[0])
1723     {
1724         cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI);
1725         *wbufsize = 0;
1726         return 1;
1727     }
1728 
1729     cs = ISO2022_MODE_CS(cv->mode);
1730     shift = ISO2022_MODE_SHIFT(cv->mode);
1731 
1732     /* reset the mode for informal sequence */
1733     if (buf[0] < 0x20)
1734     {
1735         cs = ISO2022JP_CS_ASCII;
1736         shift = ISO2022_SI;
1737     }
1738 
1739     len = iesc[cs].len;
1740     if (bufsize < len)
1741         return seterror(EINVAL);
1742     for (i = 0; i < len; ++i)
1743         if (!(buf[i] < 0x80))
1744             return seterror(EILSEQ);
1745     esc_len = iesc[cs].esc_len;
1746     memcpy(tmp, iesc[cs].esc, esc_len);
1747     if (shift == ISO2022_SO)
1748     {
1749         memcpy(tmp + esc_len, iso2022_SO_seq, 1);
1750         esc_len += 1;
1751     }
1752     memcpy(tmp + esc_len, buf, len);
1753 
1754     if ((cv->codepage == 50220 || cv->codepage == 50221
1755                 || cv->codepage == 50222) && shift == ISO2022_SO)
1756     {
1757         /* XXX: shift-out cannot be used for mbtowc (both kernel and
1758          * mlang) */
1759         esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len;
1760         memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len);
1761         memcpy(tmp + esc_len, buf, len);
1762     }
1763 
1764     insize = len + esc_len;
1765     hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage,
1766             (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize);
1767     if (hr != S_OK || insize != len + esc_len)
1768         return seterror(EILSEQ);
1769 
1770     /* Check for conversion error.  Assuming defaultChar is 0x3F. */
1771     /* ascii should be converted from ascii */
1772     if (wbuf[0] == buf[0]
1773             && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI))
1774         return seterror(EILSEQ);
1775 
1776     /* reset the mode for informal sequence */
1777     if (cv->mode != (DWORD)ISO2022_MODE(cs, shift))
1778         cv->mode = (DWORD)ISO2022_MODE(cs, shift);
1779 
1780     return len;
1781 }
1782 
1783 static int
iso2022jp_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1784 iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1785 {
1786     iso2022_esc_t *iesc = iso2022jp_esc;
1787     char tmp[MB_CHAR_MAX];
1788     int tmpsize = MB_CHAR_MAX;
1789     int insize = wbufsize;
1790     HRESULT hr;
1791     DWORD dummy = 0;
1792     int len;
1793     int esc_len;
1794     int cs;
1795     int shift;
1796     int i;
1797 
1798     /*
1799      * MultiByte = [escape sequence] + character + [escape sequence]
1800      *
1801      * Whether trailing escape sequence is added depends on which API is
1802      * used (kernel or MLang, and its version).
1803      */
1804     hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage,
1805             (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize);
1806     if (hr != S_OK || insize != wbufsize)
1807         return seterror(EILSEQ);
1808     else if (bufsize < tmpsize)
1809         return seterror(E2BIG);
1810 
1811     if (tmpsize == 1)
1812     {
1813         cs = ISO2022JP_CS_ASCII;
1814         esc_len = 0;
1815     }
1816     else
1817     {
1818         for (i = 1; iesc[i].esc != NULL; ++i)
1819         {
1820             esc_len = iesc[i].esc_len;
1821             if (strncmp(tmp, iesc[i].esc, esc_len) == 0)
1822             {
1823                 cs = iesc[i].cs;
1824                 break;
1825             }
1826         }
1827         if (iesc[i].esc == NULL)
1828             /* not supported escape sequence */
1829             return seterror(EILSEQ);
1830     }
1831 
1832     shift = ISO2022_SI;
1833     if (tmp[esc_len] == iso2022_SO_seq[0])
1834     {
1835         shift = ISO2022_SO;
1836         esc_len += 1;
1837     }
1838 
1839     len = iesc[cs].len;
1840 
1841     /* Check for converting error.  Assuming defaultChar is 0x3F. */
1842     /* ascii should be converted from ascii */
1843     if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80))
1844         return seterror(EILSEQ);
1845     else if (tmpsize < esc_len + len)
1846         return seterror(EILSEQ);
1847 
1848     if (cv->mode == ISO2022_MODE(cs, shift))
1849     {
1850         /* remove escape sequence */
1851         if (esc_len != 0)
1852             memmove(tmp, tmp + esc_len, len);
1853         esc_len = 0;
1854     }
1855     else
1856     {
1857         if (cs == ISO2022JP_CS_ASCII)
1858         {
1859             esc_len = iesc[ISO2022JP_CS_ASCII].esc_len;
1860             memmove(tmp + esc_len, tmp, len);
1861             memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len);
1862         }
1863         if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO)
1864         {
1865             /* shift-in before changing to other mode */
1866             memmove(tmp + 1, tmp, len + esc_len);
1867             memcpy(tmp, iso2022_SI_seq, 1);
1868             esc_len += 1;
1869         }
1870     }
1871 
1872     if (bufsize < len + esc_len)
1873         return seterror(E2BIG);
1874     memcpy(buf, tmp, len + esc_len);
1875     cv->mode = ISO2022_MODE(cs, shift);
1876     return len + esc_len;
1877 }
1878 
1879 static int
iso2022jp_flush(csconv_t * cv,uchar * buf,int bufsize)1880 iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize)
1881 {
1882     iso2022_esc_t *iesc = iso2022jp_esc;
1883     int esc_len;
1884 
1885     if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI))
1886     {
1887         esc_len = 0;
1888         if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI)
1889             esc_len += 1;
1890         if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII)
1891             esc_len += iesc[ISO2022JP_CS_ASCII].esc_len;
1892         if (bufsize < esc_len)
1893             return seterror(E2BIG);
1894 
1895         esc_len = 0;
1896         if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI)
1897         {
1898             memcpy(buf, iso2022_SI_seq, 1);
1899             esc_len += 1;
1900         }
1901         if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII)
1902         {
1903             memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc,
1904                     iesc[ISO2022JP_CS_ASCII].esc_len);
1905             esc_len += iesc[ISO2022JP_CS_ASCII].esc_len;
1906         }
1907         return esc_len;
1908     }
1909     return 0;
1910 }
1911 
1912 #if defined(MAKE_DLL) && defined(USE_LIBICONV_DLL)
1913 BOOL WINAPI
DllMain(HINSTANCE hinstDLL,DWORD fdwReason,LPVOID lpReserved)1914 DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
1915 {
1916     switch( fdwReason )
1917     {
1918     case DLL_PROCESS_ATTACH:
1919         hwiniconv = (HMODULE)hinstDLL;
1920         break;
1921     case DLL_THREAD_ATTACH:
1922     case DLL_THREAD_DETACH:
1923     case DLL_PROCESS_DETACH:
1924         break;
1925     }
1926     return TRUE;
1927 }
1928 #endif
1929 
1930 #if defined(MAKE_EXE)
1931 #include <stdio.h>
1932 #include <fcntl.h>
1933 #include <io.h>
1934 int
main(int argc,char ** argv)1935 main(int argc, char **argv)
1936 {
1937     char *fromcode = NULL;
1938     char *tocode = NULL;
1939     int i;
1940     char inbuf[BUFSIZ];
1941     char outbuf[BUFSIZ];
1942     char *pin;
1943     char *pout;
1944     size_t inbytesleft;
1945     size_t outbytesleft;
1946     size_t rest = 0;
1947     iconv_t cd;
1948     size_t r;
1949     FILE *in = stdin;
1950     FILE *out = stdout;
1951     int ignore = 0;
1952     char *p;
1953 
1954     _setmode(_fileno(stdin), _O_BINARY);
1955     _setmode(_fileno(stdout), _O_BINARY);
1956 
1957     for (i = 1; i < argc; ++i)
1958     {
1959         if (strcmp(argv[i], "-l") == 0)
1960         {
1961             for (i = 0; codepage_alias[i].name != NULL; ++i)
1962                 printf("%s\n", codepage_alias[i].name);
1963             return 0;
1964         }
1965 
1966         if (strcmp(argv[i], "-f") == 0)
1967             fromcode = argv[++i];
1968         else if (strcmp(argv[i], "-t") == 0)
1969             tocode = argv[++i];
1970         else if (strcmp(argv[i], "-c") == 0)
1971             ignore = 1;
1972         else if (strcmp(argv[i], "--output") == 0)
1973         {
1974             out = fopen(argv[++i], "wb");
1975             if(out == NULL)
1976             {
1977                 fprintf(stderr, "cannot open %s\n", argv[i]);
1978                 return 1;
1979             }
1980         }
1981         else
1982         {
1983             in = fopen(argv[i], "rb");
1984             if (in == NULL)
1985             {
1986                 fprintf(stderr, "cannot open %s\n", argv[i]);
1987                 return 1;
1988             }
1989             break;
1990         }
1991     }
1992 
1993     if (fromcode == NULL || tocode == NULL)
1994     {
1995         printf("usage: %s [-c] -f from-enc -t to-enc [file]\n", argv[0]);
1996         return 0;
1997     }
1998 
1999     if (ignore)
2000     {
2001         p = tocode;
2002         tocode = (char *)malloc(strlen(p) + strlen("//IGNORE") + 1);
2003         if (tocode == NULL)
2004         {
2005             perror("fatal error");
2006             return 1;
2007         }
2008         strcpy(tocode, p);
2009         strcat(tocode, "//IGNORE");
2010     }
2011 
2012     cd = iconv_open(tocode, fromcode);
2013     if (cd == (iconv_t)(-1))
2014     {
2015         perror("iconv_open error");
2016         return 1;
2017     }
2018 
2019     while ((inbytesleft = fread(inbuf + rest, 1, sizeof(inbuf) - rest, in)) != 0
2020             || rest != 0)
2021     {
2022         inbytesleft += rest;
2023         pin = inbuf;
2024         pout = outbuf;
2025         outbytesleft = sizeof(outbuf);
2026         r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft);
2027         fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out);
2028         if (r == (size_t)(-1) && errno != E2BIG && (errno != EINVAL || feof(in)))
2029         {
2030             perror("conversion error");
2031             return 1;
2032         }
2033         memmove(inbuf, pin, inbytesleft);
2034         rest = inbytesleft;
2035     }
2036     pout = outbuf;
2037     outbytesleft = sizeof(outbuf);
2038     r = iconv(cd, NULL, NULL, &pout, &outbytesleft);
2039     fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out);
2040     if (r == (size_t)(-1))
2041     {
2042         perror("conversion error");
2043         return 1;
2044     }
2045 
2046     iconv_close(cd);
2047 
2048     return 0;
2049 }
2050 #endif
2051 
2052