1 /* w32-iconv.c - iconv implementation for Windows.
2 * Copyright (C) 2016 g10 Code GmbH
3 *
4 * This file is part of libgpg-error.
5 *
6 * libgpg-error is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public License
8 * as published by the Free Software Foundation; either version 2.1 of
9 * the License, or (at your option) any later version.
10 *
11 * libgpg-error is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this program; if not, see <https://www.gnu.org/licenses/>.
18 *
19 **************************************************************
20 * This code code is based on the file win_iconv.c as found
21 * at https://github.com/win-iconv/win-iconv with the commit id
22 * 8c23784e35327c9d85d22810b9e4a2cbd06ffe90, dated 2016-03-18.
23 * Yukihiro Nakadaira <yukihiro.nakadaira@gmail.com> is the
24 * original author. The file win_iconv.c carried this notice:
25 *--------------------------------------------------------------
26 * iconv implementation using Win32 API to convert.
27 *
28 * This file is placed in the public domain.
29 *---------------------------------------------------
30 */
31
32 #if HAVE_CONFIG_H
33 #include <config.h>
34 #endif
35 #if !defined (_WIN32) && !defined (__CYGWIN32__)
36 # error This module may only be build for Windows or Cygwin32
37 #endif
38
39 /* for WC_NO_BEST_FIT_CHARS */
40 #ifndef WINVER
41 # define WINVER 0x0500
42 #endif
43
44 #include <windows.h>
45 #include <errno.h>
46 #include <string.h>
47 #include <stdlib.h>
48
49 #include "gpgrt-int.h"
50
51 #undef USE_MLANG_DLL
52
53 #define MB_CHAR_MAX 16
54
55 #define UNICODE_MODE_BOM_DONE 1
56 #define UNICODE_MODE_SWAPPED 2
57
58 #define FLAG_USE_BOM 1
59 #define FLAG_TRANSLIT 2 /* //TRANSLIT */
60 #define FLAG_IGNORE 4 /* //IGNORE */
61
62 typedef unsigned char uchar;
63 typedef unsigned short ushort;
64 typedef unsigned int uint;
65
66 typedef struct compat_t compat_t;
67 typedef struct csconv_t csconv_t;
68
69 typedef int* (*f_errno)(void);
70 typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
71 typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
72 typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize);
73 typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize);
74
75 #define COMPAT_IN 1
76 #define COMPAT_OUT 2
77
78 /* unicode mapping for compatibility with other conversion table. */
79 struct compat_t {
80 uint in;
81 uint out;
82 uint flag;
83 };
84
85 struct csconv_t {
86 int codepage;
87 int flags;
88 f_mbtowc mbtowc;
89 f_wctomb wctomb;
90 f_mblen mblen;
91 f_flush flush;
92 DWORD mode;
93 compat_t *compat;
94 };
95
96 struct _gpgrt_w32_iconv_s {
97 f_errno _errno;
98 csconv_t from;
99 csconv_t to;
100 };
101
102 #if USE_MLANG_DLL
103 static int load_mlang(void);
104 #endif /*USE_MLANG_DLL*/
105
106 static int make_csconv(const char *name, csconv_t *cv);
107 static int name_to_codepage(const char *name);
108 static uint utf16_to_ucs4(const ushort *wbuf);
109 static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize);
110 static int mbtowc_flags(int codepage);
111 static int must_use_null_useddefaultchar(int codepage);
112 static char *strrstr(const char *str, const char *token);
113 static char *xstrndup(const char *s, size_t n);
114 static int seterror(int err);
115
116 static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
117 static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
118 static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
119 static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize);
120 #if USE_MLANG_DLL
121 static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize);
122 #endif /*USE_MLANG_DLL*/
123
124 static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
125 static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
126
127 #if USE_MLANG_DLL
128 static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
129 static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
130 #endif /*USE_MLANG_DLL*/
131
132 static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
133 static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
134 static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
135 static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
136
137 #if USE_MLANG_DLL
138 static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
139 static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
140 static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize);
141 #endif /*USE_MLANG_DLL*/
142
143 static struct {
144 int codepage;
145 const char *name;
146 } codepage_alias[] = {
147 {65001, "CP65001"},
148 {65001, "UTF8"},
149 {65001, "UTF-8"},
150
151 {1200, "CP1200"},
152 {1200, "UTF16LE"},
153 {1200, "UTF-16LE"},
154 {1200, "UCS2LE"},
155 {1200, "UCS-2LE"},
156 {1200, "UCS-2-INTERNAL"},
157
158 {1201, "CP1201"},
159 {1201, "UTF16BE"},
160 {1201, "UTF-16BE"},
161 {1201, "UCS2BE"},
162 {1201, "UCS-2BE"},
163 {1201, "unicodeFFFE"},
164
165 {12000, "CP12000"},
166 {12000, "UTF32LE"},
167 {12000, "UTF-32LE"},
168 {12000, "UCS4LE"},
169 {12000, "UCS-4LE"},
170
171 {12001, "CP12001"},
172 {12001, "UTF32BE"},
173 {12001, "UTF-32BE"},
174 {12001, "UCS4BE"},
175 {12001, "UCS-4BE"},
176
177 /*
178 * Default is big endian.
179 * See rfc2781 4.3 Interpreting text labelled as UTF-16.
180 */
181 {1201, "UTF16"},
182 {1201, "UTF-16"},
183 {1201, "UCS2"},
184 {1201, "UCS-2"},
185 {12001, "UTF32"},
186 {12001, "UTF-32"},
187 {12001, "UCS-4"},
188 {12001, "UCS4"},
189
190 /* copy from libiconv `iconv -l` */
191 /* !IsValidCodePage(367) */
192 {20127, "ANSI_X3.4-1968"},
193 {20127, "ANSI_X3.4-1986"},
194 {20127, "ASCII"},
195 {20127, "CP367"},
196 {20127, "IBM367"},
197 {20127, "ISO-IR-6"},
198 {20127, "ISO646-US"},
199 {20127, "ISO_646.IRV:1991"},
200 {20127, "US"},
201 {20127, "US-ASCII"},
202 {20127, "CSASCII"},
203
204 /* !IsValidCodePage(819) */
205 {1252, "CP819"},
206 {1252, "IBM819"},
207 {28591, "ISO-8859-1"},
208 {28591, "ISO-IR-100"},
209 {28591, "ISO8859-1"},
210 {28591, "ISO_8859-1"},
211 {28591, "ISO_8859-1:1987"},
212 {28591, "L1"},
213 {28591, "LATIN1"},
214 {28591, "CSISOLATIN1"},
215
216 {1250, "CP1250"},
217 {1250, "MS-EE"},
218 {1250, "WINDOWS-1250"},
219
220 {1251, "CP1251"},
221 {1251, "MS-CYRL"},
222 {1251, "WINDOWS-1251"},
223
224 {1252, "CP1252"},
225 {1252, "MS-ANSI"},
226 {1252, "WINDOWS-1252"},
227
228 {1253, "CP1253"},
229 {1253, "MS-GREEK"},
230 {1253, "WINDOWS-1253"},
231
232 {1254, "CP1254"},
233 {1254, "MS-TURK"},
234 {1254, "WINDOWS-1254"},
235
236 {1255, "CP1255"},
237 {1255, "MS-HEBR"},
238 {1255, "WINDOWS-1255"},
239
240 {1256, "CP1256"},
241 {1256, "MS-ARAB"},
242 {1256, "WINDOWS-1256"},
243
244 {1257, "CP1257"},
245 {1257, "WINBALTRIM"},
246 {1257, "WINDOWS-1257"},
247
248 {1258, "CP1258"},
249 {1258, "WINDOWS-1258"},
250
251 {850, "850"},
252 {850, "CP850"},
253 {850, "IBM850"},
254 {850, "CSPC850MULTILINGUAL"},
255
256 /* !IsValidCodePage(862) */
257 {862, "862"},
258 {862, "CP862"},
259 {862, "IBM862"},
260 {862, "CSPC862LATINHEBREW"},
261
262 {866, "866"},
263 {866, "CP866"},
264 {866, "IBM866"},
265 {866, "CSIBM866"},
266
267 /* !IsValidCodePage(154) */
268 {154, "CP154"},
269 {154, "CYRILLIC-ASIAN"},
270 {154, "PT154"},
271 {154, "PTCP154"},
272 {154, "CSPTCP154"},
273
274 /* !IsValidCodePage(1133) */
275 {1133, "CP1133"},
276 {1133, "IBM-CP1133"},
277
278 {874, "CP874"},
279 {874, "WINDOWS-874"},
280
281 /* !IsValidCodePage(51932) */
282 {51932, "CP51932"},
283 {51932, "MS51932"},
284 {51932, "WINDOWS-51932"},
285 {51932, "EUC-JP"},
286
287 {932, "CP932"},
288 {932, "MS932"},
289 {932, "SHIFFT_JIS"},
290 {932, "SHIFFT_JIS-MS"},
291 {932, "SJIS"},
292 {932, "SJIS-MS"},
293 {932, "SJIS-OPEN"},
294 {932, "SJIS-WIN"},
295 {932, "WINDOWS-31J"},
296 {932, "WINDOWS-932"},
297 {932, "CSWINDOWS31J"},
298
299 {50221, "CP50221"},
300 {50221, "ISO-2022-JP"},
301 {50221, "ISO-2022-JP-MS"},
302 {50221, "ISO2022-JP"},
303 {50221, "ISO2022-JP-MS"},
304 {50221, "MS50221"},
305 {50221, "WINDOWS-50221"},
306
307 {936, "CP936"},
308 {936, "GBK"},
309 {936, "MS936"},
310 {936, "WINDOWS-936"},
311
312 {950, "CP950"},
313 {950, "BIG5"},
314 {950, "BIG5HKSCS"},
315 {950, "BIG5-HKSCS"},
316
317 {949, "CP949"},
318 {949, "UHC"},
319 {949, "EUC-KR"},
320
321 {1361, "CP1361"},
322 {1361, "JOHAB"},
323
324 {437, "437"},
325 {437, "CP437"},
326 {437, "IBM437"},
327 {437, "CSPC8CODEPAGE437"},
328
329 {737, "CP737"},
330
331 {775, "CP775"},
332 {775, "IBM775"},
333 {775, "CSPC775BALTIC"},
334
335 {852, "852"},
336 {852, "CP852"},
337 {852, "IBM852"},
338 {852, "CSPCP852"},
339
340 /* !IsValidCodePage(853) */
341 {853, "CP853"},
342
343 {855, "855"},
344 {855, "CP855"},
345 {855, "IBM855"},
346 {855, "CSIBM855"},
347
348 {857, "857"},
349 {857, "CP857"},
350 {857, "IBM857"},
351 {857, "CSIBM857"},
352
353 /* !IsValidCodePage(858) */
354 {858, "CP858"},
355
356 {860, "860"},
357 {860, "CP860"},
358 {860, "IBM860"},
359 {860, "CSIBM860"},
360
361 {861, "861"},
362 {861, "CP-IS"},
363 {861, "CP861"},
364 {861, "IBM861"},
365 {861, "CSIBM861"},
366
367 {863, "863"},
368 {863, "CP863"},
369 {863, "IBM863"},
370 {863, "CSIBM863"},
371
372 {864, "CP864"},
373 {864, "IBM864"},
374 {864, "CSIBM864"},
375
376 {865, "865"},
377 {865, "CP865"},
378 {865, "IBM865"},
379 {865, "CSIBM865"},
380
381 {869, "869"},
382 {869, "CP-GR"},
383 {869, "CP869"},
384 {869, "IBM869"},
385 {869, "CSIBM869"},
386
387 /* !IsValidCodePage(1152) */
388 {1125, "CP1125"},
389
390 /*
391 * Code Page Identifiers
392 * https://msdn.microsoft.com/en-us/library/dd317756.aspx
393 */
394 {37, "IBM037"}, /* IBM EBCDIC US-Canada */
395 {437, "IBM437"}, /* OEM United States */
396 {500, "IBM500"}, /* IBM EBCDIC International */
397 {708, "ASMO-708"}, /* Arabic (ASMO 708) */
398 /* 709 Arabic (ASMO-449+, BCON V4) */
399 /* 710 Arabic - Transparent Arabic */
400 {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */
401 {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */
402 {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */
403 {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */
404 {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */
405 {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */
406 {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */
407 {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */
408 {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */
409 {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */
410 {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */
411 {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */
412 {864, "IBM864"}, /* OEM Arabic; Arabic (864) */
413 {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */
414 {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */
415 {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */
416 {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */
417 {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */
418 {875, "cp875"}, /* IBM EBCDIC Greek Modern */
419 {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */
420 {932, "shift-jis"}, /* alternative name for it */
421 {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */
422 {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */
423 {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */
424 {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */
425 {950, "big5-hkscs"}, /* alternative name for it */
426 {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */
427 {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */
428 {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */
429 {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */
430 {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */
431 {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */
432 {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */
433 {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */
434 {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */
435 {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */
436 {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */
437 {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */
438 {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */
439 {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */
440 {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */
441 {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */
442 {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */
443 {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */
444 {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */
445 {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */
446 {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */
447 {1361, "Johab"}, /* Korean (Johab) */
448 {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */
449 {10001, "x-mac-japanese"}, /* Japanese (Mac) */
450 {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */
451 {10003, "x-mac-korean"}, /* Korean (Mac) */
452 {10004, "x-mac-arabic"}, /* Arabic (Mac) */
453 {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */
454 {10006, "x-mac-greek"}, /* Greek (Mac) */
455 {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */
456 {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */
457 {10010, "x-mac-romanian"}, /* Romanian (Mac) */
458 {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */
459 {10021, "x-mac-thai"}, /* Thai (Mac) */
460 {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */
461 {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */
462 {10081, "x-mac-turkish"}, /* Turkish (Mac) */
463 {10082, "x-mac-croatian"}, /* Croatian (Mac) */
464 {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */
465 {20001, "x-cp20001"}, /* TCA Taiwan */
466 {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */
467 {20003, "x-cp20003"}, /* IBM5550 Taiwan */
468 {20004, "x-cp20004"}, /* TeleText Taiwan */
469 {20005, "x-cp20005"}, /* Wang Taiwan */
470 {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */
471 {20106, "x-IA5-German"}, /* IA5 German (7-bit) */
472 {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */
473 {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */
474 {20127, "us-ascii"}, /* US-ASCII (7-bit) */
475 {20261, "x-cp20261"}, /* T.61 */
476 {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */
477 {20273, "IBM273"}, /* IBM EBCDIC Germany */
478 {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */
479 {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */
480 {20280, "IBM280"}, /* IBM EBCDIC Italy */
481 {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */
482 {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */
483 {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */
484 {20297, "IBM297"}, /* IBM EBCDIC France */
485 {20420, "IBM420"}, /* IBM EBCDIC Arabic */
486 {20423, "IBM423"}, /* IBM EBCDIC Greek */
487 {20424, "IBM424"}, /* IBM EBCDIC Hebrew */
488 {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */
489 {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */
490 {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */
491 {20871, "IBM871"}, /* IBM EBCDIC Icelandic */
492 {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */
493 {20905, "IBM905"}, /* IBM EBCDIC Turkish */
494 {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */
495 {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */
496 {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */
497 {20949, "x-cp20949"}, /* Korean Wansung */
498 {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */
499 /* 21027 (deprecated) */
500 {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */
501 {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */
502 {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */
503 {28591, "iso_8859-1"},
504 {28591, "iso_8859_1"},
505 {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */
506 {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */
507 {28592, "iso_8859-2"},
508 {28592, "iso_8859_2"},
509 {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */
510 {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */
511 {28593, "iso_8859-3"},
512 {28593, "iso_8859_3"},
513 {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */
514 {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */
515 {28594, "iso_8859-4"},
516 {28594, "iso_8859_4"},
517 {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */
518 {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */
519 {28595, "iso_8859-5"},
520 {28595, "iso_8859_5"},
521 {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */
522 {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */
523 {28596, "iso_8859-6"},
524 {28596, "iso_8859_6"},
525 {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */
526 {28597, "iso8859-7"}, /* ISO 8859-7 Greek */
527 {28597, "iso_8859-7"},
528 {28597, "iso_8859_7"},
529 {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
530 {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
531 {28598, "iso_8859-8"},
532 {28598, "iso_8859_8"},
533 {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */
534 {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */
535 {28599, "iso_8859-9"},
536 {28599, "iso_8859_9"},
537 {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */
538 {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */
539 {28603, "iso_8859-13"},
540 {28603, "iso_8859_13"},
541 {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */
542 {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */
543 {28605, "iso_8859-15"},
544 {28605, "iso_8859_15"},
545 {29001, "x-Europa"}, /* Europa 3 */
546 {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
547 {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
548 {38598, "iso_8859-8-i"},
549 {38598, "iso_8859_8-i"},
550 {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */
551 {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */
552 {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */
553 {50225, "iso-2022-kr"}, /* ISO 2022 Korean */
554 {50225, "iso2022-kr"}, /* ISO 2022 Korean */
555 {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */
556 /* 50229 ISO 2022 Traditional Chinese */
557 /* 50930 EBCDIC Japanese (Katakana) Extended */
558 /* 50931 EBCDIC US-Canada and Japanese */
559 /* 50933 EBCDIC Korean Extended and Korean */
560 /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */
561 /* 50936 EBCDIC Simplified Chinese */
562 /* 50937 EBCDIC US-Canada and Traditional Chinese */
563 /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */
564 {51932, "euc-jp"}, /* EUC Japanese */
565 {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */
566 {51949, "euc-kr"}, /* EUC Korean */
567 /* 51950 EUC Traditional Chinese */
568 {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */
569 {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */
570 {57002, "x-iscii-de"}, /* ISCII Devanagari */
571 {57003, "x-iscii-be"}, /* ISCII Bengali */
572 {57004, "x-iscii-ta"}, /* ISCII Tamil */
573 {57005, "x-iscii-te"}, /* ISCII Telugu */
574 {57006, "x-iscii-as"}, /* ISCII Assamese */
575 {57007, "x-iscii-or"}, /* ISCII Oriya */
576 {57008, "x-iscii-ka"}, /* ISCII Kannada */
577 {57009, "x-iscii-ma"}, /* ISCII Malayalam */
578 {57010, "x-iscii-gu"}, /* ISCII Gujarati */
579 {57011, "x-iscii-pa"}, /* ISCII Punjabi */
580
581 {0, NULL}
582 };
583
584 /*
585 * SJIS SHIFTJIS table CP932 table
586 * ---- --------------------------- --------------------------------
587 * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS
588 * 7E U+203E OVERLINE U+007E TILDE
589 * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR
590 * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS
591 * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE
592 * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO
593 * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS
594 * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN
595 * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN
596 * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN
597 *
598 * EUC-JP and ISO-2022-JP should be compatible with CP932.
599 *
600 * Kernel and MLang have different Unicode mapping table. Make sure
601 * which API is used.
602 */
603 static compat_t cp932_compat[] = {
604 {0x00A5, 0x005C, COMPAT_OUT},
605 {0x203E, 0x007E, COMPAT_OUT},
606 {0x2014, 0x2015, COMPAT_OUT},
607 {0x301C, 0xFF5E, COMPAT_OUT},
608 {0x2016, 0x2225, COMPAT_OUT},
609 {0x2212, 0xFF0D, COMPAT_OUT},
610 {0x00A2, 0xFFE0, COMPAT_OUT},
611 {0x00A3, 0xFFE1, COMPAT_OUT},
612 {0x00AC, 0xFFE2, COMPAT_OUT},
613 {0, 0, 0}
614 };
615
616 static compat_t cp20932_compat[] = {
617 {0x00A5, 0x005C, COMPAT_OUT},
618 {0x203E, 0x007E, COMPAT_OUT},
619 {0x2014, 0x2015, COMPAT_OUT},
620 {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN},
621 {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN},
622 {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN},
623 {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN},
624 {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN},
625 {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN},
626 {0, 0, 0}
627 };
628
629 static compat_t *cp51932_compat = cp932_compat;
630
631 /* cp20932_compat for kernel. cp932_compat for mlang. */
632 static compat_t *cp5022x_compat = cp932_compat;
633
634 #if USE_MLANG_DLL
635 typedef HRESULT (WINAPI *CONVERTINETSTRING)(
636 LPDWORD lpdwMode,
637 DWORD dwSrcEncoding,
638 DWORD dwDstEncoding,
639 LPCSTR lpSrcStr,
640 LPINT lpnSrcSize,
641 LPBYTE lpDstStr,
642 LPINT lpnDstSize
643 );
644 typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)(
645 LPDWORD lpdwMode,
646 DWORD dwSrcEncoding,
647 LPCSTR lpSrcStr,
648 LPINT lpnMultiCharCount,
649 LPWSTR lpDstStr,
650 LPINT lpnWideCharCount
651 );
652 typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)(
653 LPDWORD lpdwMode,
654 DWORD dwEncoding,
655 LPCWSTR lpSrcStr,
656 LPINT lpnWideCharCount,
657 LPSTR lpDstStr,
658 LPINT lpnMultiCharCount
659 );
660 typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)(
661 DWORD dwSrcEncoding,
662 DWORD dwDstEncoding
663 );
664 typedef HRESULT (WINAPI *LCIDTORFC1766A)(
665 LCID Locale,
666 LPSTR pszRfc1766,
667 int nChar
668 );
669 typedef HRESULT (WINAPI *LCIDTORFC1766W)(
670 LCID Locale,
671 LPWSTR pszRfc1766,
672 int nChar
673 );
674 typedef HRESULT (WINAPI *RFC1766TOLCIDA)(
675 LCID *pLocale,
676 LPSTR pszRfc1766
677 );
678 typedef HRESULT (WINAPI *RFC1766TOLCIDW)(
679 LCID *pLocale,
680 LPWSTR pszRfc1766
681 );
682 static CONVERTINETSTRING ConvertINetString;
683 static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode;
684 static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte;
685 static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable;
686 static LCIDTORFC1766A LcidToRfc1766A;
687 static RFC1766TOLCIDA Rfc1766ToLcidA;
688 #endif /*USE_MLANG_DLL*/
689
690 #if USE_MLANG_DLL
691 static int
load_mlang(void)692 load_mlang(void)
693 {
694 HMODULE h;
695 if (ConvertINetString != NULL)
696 return TRUE;
697 h = LoadLibrary(TEXT("mlang.dll"));
698 if (!h)
699 return FALSE;
700 ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString");
701 ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode");
702 ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte");
703 IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable");
704 LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A");
705 Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA");
706 return TRUE;
707 }
708 #endif
709
710
711 static int
win_iconv_open(gpgrt_w32_iconv_t cd,const char * tocode,const char * fromcode)712 win_iconv_open(gpgrt_w32_iconv_t cd, const char *tocode, const char *fromcode)
713 {
714 if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to))
715 return FALSE;
716 cd->_errno = _errno;
717 return TRUE;
718 }
719
720 static size_t
win_iconv(gpgrt_w32_iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)721 win_iconv (gpgrt_w32_iconv_t cd,
722 const char **inbuf, size_t *inbytesleft,
723 char **outbuf, size_t *outbytesleft)
724 {
725 ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */
726 int insize;
727 int outsize;
728 int wsize;
729 DWORD frommode;
730 DWORD tomode;
731 uint wc;
732 compat_t *cp;
733 int i;
734
735 if (inbuf == NULL || *inbuf == NULL)
736 {
737 if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL)
738 {
739 tomode = cd->to.mode;
740 outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft);
741 if (outsize == -1)
742 {
743 if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG)
744 {
745 outsize = 0;
746 }
747 else
748 {
749 cd->to.mode = tomode;
750 return (size_t)(-1);
751 }
752 }
753 *outbuf += outsize;
754 *outbytesleft -= outsize;
755 }
756 cd->from.mode = 0;
757 cd->to.mode = 0;
758 return 0;
759 }
760
761 while (*inbytesleft != 0)
762 {
763 frommode = cd->from.mode;
764 tomode = cd->to.mode;
765 wsize = MB_CHAR_MAX;
766
767 insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize);
768 if (insize == -1)
769 {
770 if (cd->to.flags & FLAG_IGNORE)
771 {
772 cd->from.mode = frommode;
773 insize = 1;
774 wsize = 0;
775 }
776 else
777 {
778 cd->from.mode = frommode;
779 return (size_t)(-1);
780 }
781 }
782
783 if (wsize == 0)
784 {
785 *inbuf += insize;
786 *inbytesleft -= insize;
787 continue;
788 }
789
790 if (cd->from.compat != NULL)
791 {
792 wc = utf16_to_ucs4(wbuf);
793 cp = cd->from.compat;
794 for (i = 0; cp[i].in != 0; ++i)
795 {
796 if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc)
797 {
798 ucs4_to_utf16(cp[i].in, wbuf, &wsize);
799 break;
800 }
801 }
802 }
803
804 if (cd->to.compat != NULL)
805 {
806 wc = utf16_to_ucs4(wbuf);
807 cp = cd->to.compat;
808 for (i = 0; cp[i].in != 0; ++i)
809 {
810 if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc)
811 {
812 ucs4_to_utf16(cp[i].out, wbuf, &wsize);
813 break;
814 }
815 }
816 }
817
818 outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft);
819 if (outsize == -1)
820 {
821 if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG)
822 {
823 cd->to.mode = tomode;
824 outsize = 0;
825 }
826 else
827 {
828 cd->from.mode = frommode;
829 cd->to.mode = tomode;
830 return (size_t)(-1);
831 }
832 }
833
834 *inbuf += insize;
835 *outbuf += outsize;
836 *inbytesleft -= insize;
837 *outbytesleft -= outsize;
838 }
839
840 return 0;
841 }
842
843 static int
make_csconv(const char * _name,csconv_t * cv)844 make_csconv(const char *_name, csconv_t *cv)
845 {
846 CPINFO cpinfo;
847 int use_compat = TRUE;
848 int flag = 0;
849 char *name;
850 char *p;
851
852 name = xstrndup(_name, strlen(_name));
853 if (name == NULL)
854 return FALSE;
855
856 /* check for option "enc_name//opt1//opt2" */
857 while ((p = strrstr(name, "//")) != NULL)
858 {
859 if (_stricmp(p + 2, "nocompat") == 0)
860 use_compat = FALSE;
861 else if (_stricmp(p + 2, "translit") == 0)
862 flag |= FLAG_TRANSLIT;
863 else if (_stricmp(p + 2, "ignore") == 0)
864 flag |= FLAG_IGNORE;
865 *p = 0;
866 }
867
868 cv->mode = 0;
869 cv->flags = flag;
870 cv->mblen = NULL;
871 cv->flush = NULL;
872 cv->compat = NULL;
873 cv->codepage = name_to_codepage(name);
874 if (cv->codepage == 1200 || cv->codepage == 1201)
875 {
876 cv->mbtowc = utf16_mbtowc;
877 cv->wctomb = utf16_wctomb;
878 if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 ||
879 _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0 ||
880 _stricmp(name,"UCS-2-INTERNAL") == 0)
881 cv->flags |= FLAG_USE_BOM;
882 }
883 else if (cv->codepage == 12000 || cv->codepage == 12001)
884 {
885 cv->mbtowc = utf32_mbtowc;
886 cv->wctomb = utf32_wctomb;
887 if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 ||
888 _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0)
889 cv->flags |= FLAG_USE_BOM;
890 }
891 else if (cv->codepage == 65001)
892 {
893 cv->mbtowc = kernel_mbtowc;
894 cv->wctomb = kernel_wctomb;
895 cv->mblen = utf8_mblen;
896 }
897 #if USE_MLANG_DLL
898 else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang())
899 {
900 cv->mbtowc = iso2022jp_mbtowc;
901 cv->wctomb = iso2022jp_wctomb;
902 cv->flush = iso2022jp_flush;
903 }
904 else if (cv->codepage == 51932 && load_mlang())
905 {
906 cv->mbtowc = mlang_mbtowc;
907 cv->wctomb = mlang_wctomb;
908 cv->mblen = eucjp_mblen;
909 }
910 #endif /*USE_MLANG_DLL*/
911 else if (IsValidCodePage(cv->codepage)
912 && GetCPInfo(cv->codepage, &cpinfo) != 0)
913 {
914 cv->mbtowc = kernel_mbtowc;
915 cv->wctomb = kernel_wctomb;
916 if (cpinfo.MaxCharSize == 1)
917 cv->mblen = sbcs_mblen;
918 else if (cpinfo.MaxCharSize == 2)
919 cv->mblen = dbcs_mblen;
920 else
921 cv->mblen = mbcs_mblen;
922 }
923 else
924 {
925 /* not supported */
926 free(name);
927 errno = EINVAL;
928 return FALSE;
929 }
930
931 if (use_compat)
932 {
933 switch (cv->codepage)
934 {
935 case 932: cv->compat = cp932_compat; break;
936 case 20932: cv->compat = cp20932_compat; break;
937 case 51932: cv->compat = cp51932_compat; break;
938 case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break;
939 }
940 }
941
942 free(name);
943
944 return TRUE;
945 }
946
947 static int
name_to_codepage(const char * name)948 name_to_codepage(const char *name)
949 {
950 int i;
951
952 if (*name == '\0' ||
953 strcmp(name, "char") == 0)
954 return GetACP();
955 else if (strcmp(name, "wchar_t") == 0)
956 return 1200;
957 else if (_strnicmp(name, "cp", 2) == 0)
958 return atoi(name + 2); /* CP123 */
959 else if ('0' <= name[0] && name[0] <= '9')
960 return atoi(name); /* 123 */
961 else if (_strnicmp(name, "xx", 2) == 0)
962 return atoi(name + 2); /* XX123 for debug */
963
964 for (i = 0; codepage_alias[i].name != NULL; ++i)
965 if (_stricmp(name, codepage_alias[i].name) == 0)
966 return codepage_alias[i].codepage;
967 return -1;
968 }
969
970 /*
971 * https://tools.ietf.org/html/rfc2781
972 */
973 static uint
utf16_to_ucs4(const ushort * wbuf)974 utf16_to_ucs4(const ushort *wbuf)
975 {
976 uint wc = wbuf[0];
977 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
978 wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000;
979 return wc;
980 }
981
982 static void
ucs4_to_utf16(uint wc,ushort * wbuf,int * wbufsize)983 ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize)
984 {
985 if (wc < 0x10000)
986 {
987 wbuf[0] = wc;
988 *wbufsize = 1;
989 }
990 else
991 {
992 wc -= 0x10000;
993 wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF);
994 wbuf[1] = 0xDC00 | (wc & 0x3FF);
995 *wbufsize = 2;
996 }
997 }
998
999 /*
1000 * Check if codepage is one of those for which the dwFlags parameter
1001 * to MultiByteToWideChar() must be zero. Return zero or
1002 * MB_ERR_INVALID_CHARS. The docs in Platform SDK for Windows
1003 * Server 2003 R2 claims that also codepage 65001 is one of these, but
1004 * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave
1005 * out 65001 (UTF-8), and that indeed seems to be the case on XP, it
1006 * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting
1007 * from UTF-8.
1008 */
1009 static int
mbtowc_flags(int codepage)1010 mbtowc_flags(int codepage)
1011 {
1012 return (codepage == 50220 || codepage == 50221 ||
1013 codepage == 50222 || codepage == 50225 ||
1014 codepage == 50227 || codepage == 50229 ||
1015 codepage == 52936 || codepage == 54936 ||
1016 (codepage >= 57002 && codepage <= 57011) ||
1017 codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS;
1018 }
1019
1020 /*
1021 * Check if codepage is one those for which the lpUsedDefaultChar
1022 * parameter to WideCharToMultiByte() must be NULL. The docs in
1023 * Platform SDK for Windows Server 2003 R2 claims that this is the
1024 * list below, while the MSDN docs for MSVS2008 claim that it is only
1025 * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform
1026 * SDK seems to be correct, at least for XP.
1027 */
1028 static int
must_use_null_useddefaultchar(int codepage)1029 must_use_null_useddefaultchar(int codepage)
1030 {
1031 return (codepage == 65000 || codepage == 65001 ||
1032 codepage == 50220 || codepage == 50221 ||
1033 codepage == 50222 || codepage == 50225 ||
1034 codepage == 50227 || codepage == 50229 ||
1035 codepage == 52936 || codepage == 54936 ||
1036 (codepage >= 57002 && codepage <= 57011) ||
1037 codepage == 42);
1038 }
1039
1040 static char *
strrstr(const char * str,const char * token)1041 strrstr(const char *str, const char *token)
1042 {
1043 int len = strlen(token);
1044 const char *p = str + strlen(str);
1045
1046 while (str <= --p)
1047 if (p[0] == token[0] && strncmp(p, token, len) == 0)
1048 return (char *)p;
1049 return NULL;
1050 }
1051
1052 static char *
xstrndup(const char * s,size_t n)1053 xstrndup(const char *s, size_t n)
1054 {
1055 char *p;
1056
1057 p = (char *)malloc(n + 1);
1058 if (p == NULL)
1059 return NULL;
1060 memcpy(p, s, n);
1061 p[n] = '\0';
1062 return p;
1063 }
1064
1065 static int
seterror(int err)1066 seterror(int err)
1067 {
1068 _gpg_err_set_errno (err);
1069 return -1;
1070 }
1071
1072
1073 static int
sbcs_mblen(csconv_t * cv,const uchar * buf,int bufsize)1074 sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1075 {
1076 (void)cv;
1077 (void)buf;
1078 (void)bufsize;
1079 return 1;
1080 }
1081
1082 static int
dbcs_mblen(csconv_t * cv,const uchar * buf,int bufsize)1083 dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1084 {
1085 int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1;
1086 if (bufsize < len)
1087 return seterror(EINVAL);
1088 return len;
1089 }
1090
1091 static int
mbcs_mblen(csconv_t * cv,const uchar * buf,int bufsize)1092 mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1093 {
1094 int len = 0;
1095
1096 if (cv->codepage == 54936) {
1097 if (buf[0] <= 0x7F) len = 1;
1098 else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
1099 bufsize >= 2 &&
1100 ((buf[1] >= 0x40 && buf[1] <= 0x7E) ||
1101 (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2;
1102 else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
1103 bufsize >= 4 &&
1104 buf[1] >= 0x30 && buf[1] <= 0x39) len = 4;
1105 else
1106 return seterror(EINVAL);
1107 return len;
1108 }
1109 else
1110 return seterror(EINVAL);
1111 }
1112
1113 static int
utf8_mblen(csconv_t * cv,const uchar * buf,int bufsize)1114 utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1115 {
1116 int len = 0;
1117
1118 (void) cv;
1119
1120 if (buf[0] < 0x80) len = 1;
1121 else if ((buf[0] & 0xE0) == 0xC0) len = 2;
1122 else if ((buf[0] & 0xF0) == 0xE0) len = 3;
1123 else if ((buf[0] & 0xF8) == 0xF0) len = 4;
1124 else if ((buf[0] & 0xFC) == 0xF8) len = 5;
1125 else if ((buf[0] & 0xFE) == 0xFC) len = 6;
1126
1127 if (len == 0)
1128 return seterror(EILSEQ);
1129 else if (bufsize < len)
1130 return seterror(EINVAL);
1131 return len;
1132 }
1133
1134 #if USE_MLANG_DLL
1135 static int
eucjp_mblen(csconv_t * cv,const uchar * buf,int bufsize)1136 eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1137 {
1138 (void) cv;
1139
1140 if (buf[0] < 0x80) /* ASCII */
1141 return 1;
1142 else if (buf[0] == 0x8E) /* JIS X 0201 */
1143 {
1144 if (bufsize < 2)
1145 return seterror(EINVAL);
1146 else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF))
1147 return seterror(EILSEQ);
1148 return 2;
1149 }
1150 else if (buf[0] == 0x8F) /* JIS X 0212 */
1151 {
1152 if (bufsize < 3)
1153 return seterror(EINVAL);
1154 else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE)
1155 || !(0xA1 <= buf[2] && buf[2] <= 0xFE))
1156 return seterror(EILSEQ);
1157 return 3;
1158 }
1159 else /* JIS X 0208 */
1160 {
1161 if (bufsize < 2)
1162 return seterror(EINVAL);
1163 else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE)
1164 || !(0xA1 <= buf[1] && buf[1] <= 0xFE))
1165 return seterror(EILSEQ);
1166 return 2;
1167 }
1168 }
1169 #endif /*USE_MLANG_DLL*/
1170
1171 static int
kernel_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1172 kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1173 {
1174 int len;
1175
1176 len = cv->mblen(cv, buf, bufsize);
1177 if (len == -1)
1178 return -1;
1179 /* If converting from ASCII, reject 8bit
1180 * chars. MultiByteToWideChar() doesn't. Note that for ASCII we
1181 * know that the mblen function is sbcs_mblen() so len is 1.
1182 */
1183 if (cv->codepage == 20127 && buf[0] >= 0x80)
1184 return seterror(EILSEQ);
1185 *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage),
1186 (const char *)buf, len, (wchar_t *)wbuf, *wbufsize);
1187 if (*wbufsize == 0)
1188 return seterror(EILSEQ);
1189 return len;
1190 }
1191
1192 static int
kernel_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1193 kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1194 {
1195 BOOL usedDefaultChar = 0;
1196 BOOL *p = NULL;
1197 int flags = 0;
1198 int len;
1199
1200 if (bufsize == 0)
1201 return seterror(E2BIG);
1202 if (!must_use_null_useddefaultchar(cv->codepage))
1203 {
1204 p = &usedDefaultChar;
1205 #ifdef WC_NO_BEST_FIT_CHARS
1206 if (!(cv->flags & FLAG_TRANSLIT))
1207 flags |= WC_NO_BEST_FIT_CHARS;
1208 #endif
1209 }
1210 len = WideCharToMultiByte(cv->codepage, flags,
1211 (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p);
1212 if (len == 0)
1213 {
1214 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
1215 return seterror(E2BIG);
1216 return seterror(EILSEQ);
1217 }
1218 else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT))
1219 return seterror(EILSEQ);
1220 else if (cv->mblen(cv, buf, len) != len) /* validate result */
1221 return seterror(EILSEQ);
1222 return len;
1223 }
1224
1225 /*
1226 * It seems that the mode (cv->mode) is fixnum.
1227 * For example, when converting iso-2022-jp(cp50221) to unicode:
1228 * in ascii sequence: mode=0xC42C0000
1229 * in jisx0208 sequence: mode=0xC42C0001
1230 * "C42C" is same for each convert session.
1231 * It should be: ((codepage-1)<<16)|state
1232 */
1233 #if USE_MLANG_DLL
1234 static int
mlang_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1235 mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1236 {
1237 int len;
1238 int insize;
1239 HRESULT hr;
1240
1241 len = cv->mblen(cv, buf, bufsize);
1242 if (len == -1)
1243 return -1;
1244 insize = len;
1245 hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage,
1246 (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize);
1247 if (hr != S_OK || insize != len)
1248 return seterror(EILSEQ);
1249 return len;
1250 }
1251
1252 static int
mlang_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1253 mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1254 {
1255 char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */
1256 int tmpsize = MB_CHAR_MAX;
1257 int insize = wbufsize;
1258 HRESULT hr;
1259
1260 hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage,
1261 (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize);
1262 if (hr != S_OK || insize != wbufsize)
1263 return seterror(EILSEQ);
1264 else if (bufsize < tmpsize)
1265 return seterror(E2BIG);
1266 else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize)
1267 return seterror(EILSEQ);
1268 memcpy(buf, tmpbuf, tmpsize);
1269 return tmpsize;
1270 }
1271 #endif /*USE_MLANG_DLL*/
1272
1273 static int
utf16_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1274 utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1275 {
1276 int codepage = cv->codepage;
1277
1278 /* swap endian: 1200 <-> 1201 */
1279 if (cv->mode & UNICODE_MODE_SWAPPED)
1280 codepage ^= 1;
1281
1282 if (bufsize < 2)
1283 return seterror(EINVAL);
1284 if (codepage == 1200) /* little endian */
1285 wbuf[0] = (buf[1] << 8) | buf[0];
1286 else if (codepage == 1201) /* big endian */
1287 wbuf[0] = (buf[0] << 8) | buf[1];
1288
1289 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1290 {
1291 cv->mode |= UNICODE_MODE_BOM_DONE;
1292 if (wbuf[0] == 0xFFFE)
1293 {
1294 cv->mode |= UNICODE_MODE_SWAPPED;
1295 *wbufsize = 0;
1296 return 2;
1297 }
1298 else if (wbuf[0] == 0xFEFF)
1299 {
1300 *wbufsize = 0;
1301 return 2;
1302 }
1303 }
1304
1305 if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF)
1306 return seterror(EILSEQ);
1307 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1308 {
1309 if (bufsize < 4)
1310 return seterror(EINVAL);
1311 if (codepage == 1200) /* little endian */
1312 wbuf[1] = (buf[3] << 8) | buf[2];
1313 else if (codepage == 1201) /* big endian */
1314 wbuf[1] = (buf[2] << 8) | buf[3];
1315 if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF))
1316 return seterror(EILSEQ);
1317 *wbufsize = 2;
1318 return 4;
1319 }
1320 *wbufsize = 1;
1321 return 2;
1322 }
1323
1324 static int
utf16_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1325 utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1326 {
1327 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1328 {
1329 int r;
1330
1331 cv->mode |= UNICODE_MODE_BOM_DONE;
1332 if (bufsize < 2)
1333 return seterror(E2BIG);
1334 if (cv->codepage == 1200) /* little endian */
1335 memcpy(buf, "\xFF\xFE", 2);
1336 else if (cv->codepage == 1201) /* big endian */
1337 memcpy(buf, "\xFE\xFF", 2);
1338
1339 r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2);
1340 if (r == -1)
1341 return -1;
1342 return r + 2;
1343 }
1344
1345 if (bufsize < 2)
1346 return seterror(E2BIG);
1347 if (cv->codepage == 1200) /* little endian */
1348 {
1349 buf[0] = (wbuf[0] & 0x00FF);
1350 buf[1] = (wbuf[0] & 0xFF00) >> 8;
1351 }
1352 else if (cv->codepage == 1201) /* big endian */
1353 {
1354 buf[0] = (wbuf[0] & 0xFF00) >> 8;
1355 buf[1] = (wbuf[0] & 0x00FF);
1356 }
1357 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1358 {
1359 if (bufsize < 4)
1360 return seterror(E2BIG);
1361 if (cv->codepage == 1200) /* little endian */
1362 {
1363 buf[2] = (wbuf[1] & 0x00FF);
1364 buf[3] = (wbuf[1] & 0xFF00) >> 8;
1365 }
1366 else if (cv->codepage == 1201) /* big endian */
1367 {
1368 buf[2] = (wbuf[1] & 0xFF00) >> 8;
1369 buf[3] = (wbuf[1] & 0x00FF);
1370 }
1371 return 4;
1372 }
1373 return 2;
1374 }
1375
1376 static int
utf32_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1377 utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1378 {
1379 int codepage = cv->codepage;
1380 uint wc = 0xD800;
1381
1382 /* swap endian: 12000 <-> 12001 */
1383 if (cv->mode & UNICODE_MODE_SWAPPED)
1384 codepage ^= 1;
1385
1386 if (bufsize < 4)
1387 return seterror(EINVAL);
1388 if (codepage == 12000) /* little endian */
1389 wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
1390 else if (codepage == 12001) /* big endian */
1391 wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
1392
1393 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1394 {
1395 cv->mode |= UNICODE_MODE_BOM_DONE;
1396 if (wc == 0xFFFE0000)
1397 {
1398 cv->mode |= UNICODE_MODE_SWAPPED;
1399 *wbufsize = 0;
1400 return 4;
1401 }
1402 else if (wc == 0x0000FEFF)
1403 {
1404 *wbufsize = 0;
1405 return 4;
1406 }
1407 }
1408
1409 if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc)
1410 return seterror(EILSEQ);
1411 ucs4_to_utf16(wc, wbuf, wbufsize);
1412 return 4;
1413 }
1414
1415 static int
utf32_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1416 utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1417 {
1418 uint wc;
1419
1420 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1421 {
1422 int r;
1423
1424 cv->mode |= UNICODE_MODE_BOM_DONE;
1425 if (bufsize < 4)
1426 return seterror(E2BIG);
1427 if (cv->codepage == 12000) /* little endian */
1428 memcpy(buf, "\xFF\xFE\x00\x00", 4);
1429 else if (cv->codepage == 12001) /* big endian */
1430 memcpy(buf, "\x00\x00\xFE\xFF", 4);
1431
1432 r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4);
1433 if (r == -1)
1434 return -1;
1435 return r + 4;
1436 }
1437
1438 if (bufsize < 4)
1439 return seterror(E2BIG);
1440 wc = utf16_to_ucs4(wbuf);
1441 if (cv->codepage == 12000) /* little endian */
1442 {
1443 buf[0] = wc & 0x000000FF;
1444 buf[1] = (wc & 0x0000FF00) >> 8;
1445 buf[2] = (wc & 0x00FF0000) >> 16;
1446 buf[3] = (wc & 0xFF000000) >> 24;
1447 }
1448 else if (cv->codepage == 12001) /* big endian */
1449 {
1450 buf[0] = (wc & 0xFF000000) >> 24;
1451 buf[1] = (wc & 0x00FF0000) >> 16;
1452 buf[2] = (wc & 0x0000FF00) >> 8;
1453 buf[3] = wc & 0x000000FF;
1454 }
1455 return 4;
1456 }
1457
1458 /*
1459 * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
1460 * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow
1461 * 1 byte Kana)
1462 * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte
1463 * Kana - SO/SI)
1464 *
1465 * MultiByteToWideChar() and WideCharToMultiByte() behave differently
1466 * depending on Windows version. On XP, WideCharToMultiByte() doesn't
1467 * terminate result sequence with ascii escape. But Vista does.
1468 * Use MLang instead.
1469 */
1470
1471 #define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift))
1472 #define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF)
1473 #define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF)
1474
1475 #define ISO2022_SI 0
1476 #define ISO2022_SO 1
1477
1478 #if USE_MLANG_DLL
1479 /* shift in */
1480 static const char iso2022_SI_seq[] = "\x0F";
1481 /* shift out */
1482 static const char iso2022_SO_seq[] = "\x0E";
1483
1484 typedef struct iso2022_esc_t iso2022_esc_t;
1485 struct iso2022_esc_t {
1486 const char *esc;
1487 int esc_len;
1488 int len;
1489 int cs;
1490 };
1491 #endif
1492
1493 #define ISO2022JP_CS_ASCII 0
1494 #define ISO2022JP_CS_JISX0201_ROMAN 1
1495 #define ISO2022JP_CS_JISX0201_KANA 2
1496 #define ISO2022JP_CS_JISX0208_1978 3
1497 #define ISO2022JP_CS_JISX0208_1983 4
1498 #define ISO2022JP_CS_JISX0212 5
1499
1500 #if USE_MLANG_DLL
1501 static iso2022_esc_t iso2022jp_esc[] = {
1502 {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII},
1503 {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN},
1504 {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA},
1505 {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */
1506 {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983},
1507 {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212},
1508 {NULL, 0, 0, 0}
1509 };
1510 #endif /*USE_MLANG_DLL*/
1511
1512 #if USE_MLANG_DLL
1513 static int
iso2022jp_mbtowc(csconv_t * cv,const uchar * buf,int bufsize,ushort * wbuf,int * wbufsize)1514 iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize,
1515 ushort *wbuf, int *wbufsize)
1516 {
1517 iso2022_esc_t *iesc = iso2022jp_esc;
1518 char tmp[MB_CHAR_MAX];
1519 int insize;
1520 HRESULT hr;
1521 DWORD dummy = 0;
1522 int len;
1523 int esc_len;
1524 int cs;
1525 int shift;
1526 int i;
1527
1528 if (buf[0] == 0x1B)
1529 {
1530 for (i = 0; iesc[i].esc != NULL; ++i)
1531 {
1532 esc_len = iesc[i].esc_len;
1533 if (bufsize < esc_len)
1534 {
1535 if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0)
1536 return seterror(EINVAL);
1537 }
1538 else
1539 {
1540 if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0)
1541 {
1542 cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI);
1543 *wbufsize = 0;
1544 return esc_len;
1545 }
1546 }
1547 }
1548 /* not supported escape sequence */
1549 return seterror(EILSEQ);
1550 }
1551 else if (buf[0] == iso2022_SO_seq[0])
1552 {
1553 cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO);
1554 *wbufsize = 0;
1555 return 1;
1556 }
1557 else if (buf[0] == iso2022_SI_seq[0])
1558 {
1559 cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI);
1560 *wbufsize = 0;
1561 return 1;
1562 }
1563
1564 cs = ISO2022_MODE_CS(cv->mode);
1565 shift = ISO2022_MODE_SHIFT(cv->mode);
1566
1567 /* reset the mode for informal sequence */
1568 if (buf[0] < 0x20)
1569 {
1570 cs = ISO2022JP_CS_ASCII;
1571 shift = ISO2022_SI;
1572 }
1573
1574 len = iesc[cs].len;
1575 if (bufsize < len)
1576 return seterror(EINVAL);
1577 for (i = 0; i < len; ++i)
1578 if (!(buf[i] < 0x80))
1579 return seterror(EILSEQ);
1580 esc_len = iesc[cs].esc_len;
1581 memcpy(tmp, iesc[cs].esc, esc_len);
1582 if (shift == ISO2022_SO)
1583 {
1584 memcpy(tmp + esc_len, iso2022_SO_seq, 1);
1585 esc_len += 1;
1586 }
1587 memcpy(tmp + esc_len, buf, len);
1588
1589 if ((cv->codepage == 50220 || cv->codepage == 50221
1590 || cv->codepage == 50222) && shift == ISO2022_SO)
1591 {
1592 /* XXX: shift-out cannot be used for mbtowc (both kernel and
1593 * mlang) */
1594 esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len;
1595 memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len);
1596 memcpy(tmp + esc_len, buf, len);
1597 }
1598
1599 insize = len + esc_len;
1600 hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage,
1601 (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize);
1602 if (hr != S_OK || insize != len + esc_len)
1603 return seterror(EILSEQ);
1604
1605 /* Check for conversion error. Assuming defaultChar is 0x3F. */
1606 /* ascii should be converted from ascii */
1607 if (wbuf[0] == buf[0]
1608 && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI))
1609 return seterror(EILSEQ);
1610
1611 /* reset the mode for informal sequence */
1612 if (cv->mode != ISO2022_MODE(cs, shift))
1613 cv->mode = ISO2022_MODE(cs, shift);
1614
1615 return len;
1616 }
1617 #endif /*USE_MLANG_DLL*/
1618
1619
1620 #if USE_MLANG_DLL
1621 static int
iso2022jp_wctomb(csconv_t * cv,ushort * wbuf,int wbufsize,uchar * buf,int bufsize)1622 iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1623 {
1624 iso2022_esc_t *iesc = iso2022jp_esc;
1625 char tmp[MB_CHAR_MAX];
1626 int tmpsize = MB_CHAR_MAX;
1627 int insize = wbufsize;
1628 HRESULT hr;
1629 DWORD dummy = 0;
1630 int len;
1631 int esc_len;
1632 int cs;
1633 int shift;
1634 int i;
1635
1636 /*
1637 * MultiByte = [escape sequence] + character + [escape sequence]
1638 *
1639 * Whether trailing escape sequence is added depends on which API is
1640 * used (kernel or MLang, and its version).
1641 */
1642 hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage,
1643 (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize);
1644 if (hr != S_OK || insize != wbufsize)
1645 return seterror(EILSEQ);
1646 else if (bufsize < tmpsize)
1647 return seterror(E2BIG);
1648
1649 if (tmpsize == 1)
1650 {
1651 cs = ISO2022JP_CS_ASCII;
1652 esc_len = 0;
1653 }
1654 else
1655 {
1656 for (i = 1; iesc[i].esc != NULL; ++i)
1657 {
1658 esc_len = iesc[i].esc_len;
1659 if (strncmp(tmp, iesc[i].esc, esc_len) == 0)
1660 {
1661 cs = iesc[i].cs;
1662 break;
1663 }
1664 }
1665 if (iesc[i].esc == NULL)
1666 /* not supported escape sequence */
1667 return seterror(EILSEQ);
1668 }
1669
1670 shift = ISO2022_SI;
1671 if (tmp[esc_len] == iso2022_SO_seq[0])
1672 {
1673 shift = ISO2022_SO;
1674 esc_len += 1;
1675 }
1676
1677 len = iesc[cs].len;
1678
1679 /* Check for converting error. Assuming defaultChar is 0x3F. */
1680 /* ascii should be converted from ascii */
1681 if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80))
1682 return seterror(EILSEQ);
1683 else if (tmpsize < esc_len + len)
1684 return seterror(EILSEQ);
1685
1686 if (cv->mode == ISO2022_MODE(cs, shift))
1687 {
1688 /* remove escape sequence */
1689 if (esc_len != 0)
1690 memmove(tmp, tmp + esc_len, len);
1691 esc_len = 0;
1692 }
1693 else
1694 {
1695 if (cs == ISO2022JP_CS_ASCII)
1696 {
1697 esc_len = iesc[ISO2022JP_CS_ASCII].esc_len;
1698 memmove(tmp + esc_len, tmp, len);
1699 memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len);
1700 }
1701 if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO)
1702 {
1703 /* shift-in before changing to other mode */
1704 memmove(tmp + 1, tmp, len + esc_len);
1705 memcpy(tmp, iso2022_SI_seq, 1);
1706 esc_len += 1;
1707 }
1708 }
1709
1710 if (bufsize < len + esc_len)
1711 return seterror(E2BIG);
1712 memcpy(buf, tmp, len + esc_len);
1713 cv->mode = ISO2022_MODE(cs, shift);
1714 return len + esc_len;
1715 }
1716 #endif /*USE_MLANG_DLL*/
1717
1718 #if USE_MLANG_DLL
1719 static int
iso2022jp_flush(csconv_t * cv,uchar * buf,int bufsize)1720 iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize)
1721 {
1722 iso2022_esc_t *iesc = iso2022jp_esc;
1723 int esc_len;
1724
1725 if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI))
1726 {
1727 esc_len = 0;
1728 if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI)
1729 esc_len += 1;
1730 if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII)
1731 esc_len += iesc[ISO2022JP_CS_ASCII].esc_len;
1732 if (bufsize < esc_len)
1733 return seterror(E2BIG);
1734
1735 esc_len = 0;
1736 if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI)
1737 {
1738 memcpy(buf, iso2022_SI_seq, 1);
1739 esc_len += 1;
1740 }
1741 if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII)
1742 {
1743 memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc,
1744 iesc[ISO2022JP_CS_ASCII].esc_len);
1745 esc_len += iesc[ISO2022JP_CS_ASCII].esc_len;
1746 }
1747 return esc_len;
1748 }
1749 return 0;
1750 }
1751 #endif /*USE_MLANG_DLL*/
1752
1753
1754 gpgrt_w32_iconv_t
gpgrt_w32_iconv_open(const char * tocode,const char * fromcode)1755 gpgrt_w32_iconv_open (const char *tocode, const char *fromcode)
1756 {
1757 gpgrt_w32_iconv_t cd;
1758
1759 cd = calloc(1, sizeof *cd);
1760 if (!cd)
1761 return (gpgrt_w32_iconv_t)(-1);
1762
1763 /* reset the errno to prevent reporting wrong error code.
1764 * 0 for unsorted error. */
1765 _gpg_err_set_errno (0);
1766 if (win_iconv_open(cd, tocode, fromcode))
1767 return cd;
1768
1769 free(cd);
1770
1771 return (gpgrt_w32_iconv_t)(-1);
1772 }
1773
1774 int
gpgrt_w32_iconv_close(gpgrt_w32_iconv_t cd)1775 gpgrt_w32_iconv_close (gpgrt_w32_iconv_t cd)
1776 {
1777 if (cd)
1778 {
1779 free (cd);
1780 }
1781
1782 return 0;
1783 }
1784
1785 size_t
gpgrt_w32_iconv(gpgrt_w32_iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)1786 gpgrt_w32_iconv (gpgrt_w32_iconv_t cd,
1787 const char **inbuf, size_t *inbytesleft,
1788 char **outbuf, size_t *outbytesleft)
1789 {
1790 size_t r;
1791
1792 r = win_iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
1793 _gpg_err_set_errno (*(cd->_errno()));
1794 return r;
1795 }
1796