1 
2 #ifdef USE_ICONV_H
3 #include <iconv.h>
4 #include <windows.h>
5 #include <errno.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #else
9 #include "win_iconv.c"
10 #endif
11 
12 #include <stdio.h>
13 
14 const char *
tohex(const char * str,int size)15 tohex(const char *str, int size)
16 {
17     static char buf[BUFSIZ];
18     char *pbuf = buf;
19     int i;
20     buf[0] = 0;
21     for (i = 0; i < size; ++i)
22         pbuf += sprintf(pbuf, "%02X", str[i] & 0xFF);
23     return buf;
24 }
25 
26 const char *
errstr(int errcode)27 errstr(int errcode)
28 {
29     static char buf[BUFSIZ];
30     switch (errcode)
31     {
32     case 0: return "NOERROR";
33     case EINVAL: return "EINVAL";
34     case EILSEQ: return "EILSEQ";
35     case E2BIG: return "E2BIG";
36     }
37     sprintf(buf, "%d\n", errcode);
38     return buf;
39 }
40 
41 #ifdef USE_LIBICONV_DLL
42 int use_dll;
43 
44 int
setdll(const char * dllpath)45 setdll(const char *dllpath)
46 {
47     char buf[BUFSIZ];
48     rec_iconv_t cd;
49 
50     sprintf(buf, "WINICONV_LIBICONV_DLL=%s", dllpath);
51     putenv(buf);
52     if (libiconv_iconv_open(&cd, "ascii", "ascii"))
53     {
54         FreeLibrary(cd.hlibiconv);
55         use_dll = TRUE;
56         return TRUE;
57     }
58     use_dll = FALSE;
59     return FALSE;
60 }
61 #endif
62 
63 /*
64  * We can test the codepage that is installed in the system.
65  */
66 int
check_enc(const char * encname,int codepage)67 check_enc(const char *encname, int codepage)
68 {
69     iconv_t cd;
70     int cp;
71     cd = iconv_open("utf-8", encname);
72     if (cd == (iconv_t)(-1))
73     {
74         printf("%s(%d) IS NOT SUPPORTED: SKIP THE TEST\n", encname, codepage);
75         return FALSE;
76     }
77 #ifndef USE_ICONV_H
78     cp = ((rec_iconv_t *)cd)->from.codepage;
79     if (cp != codepage)
80     {
81         printf("%s(%d) ALIAS IS MAPPED TO DIFFERENT CODEPAGE (%d)\n", encname, codepage, cp);
82         exit(1);
83     }
84 #endif
85     iconv_close(cd);
86     return TRUE;
87 }
88 
89 void
test(const char * from,const char * fromstr,int fromsize,const char * to,const char * tostr,int tosize,int errcode,int bufsize,int line)90 test(const char *from, const char *fromstr, int fromsize, const char *to, const char *tostr, int tosize, int errcode, int bufsize, int line)
91 {
92     char outbuf[BUFSIZ];
93     const char *pin;
94     char *pout;
95     size_t inbytesleft;
96     size_t outbytesleft;
97     iconv_t cd;
98     size_t r;
99 #ifdef USE_LIBICONV_DLL
100     char dllpath[_MAX_PATH];
101 #endif
102 
103     cd = iconv_open(to, from);
104     if (cd == (iconv_t)(-1))
105     {
106         printf("%s -> %s: NG: INVALID ENCODING NAME: line=%d\n", from, to, line);
107         exit(1);
108     }
109 
110 #ifdef USE_LIBICONV_DLL
111     if (((rec_iconv_t *)cd)->hlibiconv != NULL)
112         GetModuleFileNameA(((rec_iconv_t *)cd)->hlibiconv, dllpath, sizeof(dllpath));
113 
114     if (use_dll && ((rec_iconv_t *)cd)->hlibiconv == NULL)
115     {
116         printf("%s: %s -> %s: NG: FAILED TO USE DLL: line=%d\n", dllpath, from, to, line);
117         exit(1);
118     }
119     else if (!use_dll && ((rec_iconv_t *)cd)->hlibiconv != NULL)
120     {
121         printf("%s: %s -> %s: NG: DLL IS LOADED UNEXPECTEDLY: line=%d\n", dllpath, from, to, line);
122         exit(1);
123     }
124 #endif
125 
126     errno = 0;
127 
128     pin = (char *)fromstr;
129     pout = outbuf;
130     inbytesleft = fromsize;
131     outbytesleft = bufsize;
132     r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft);
133     if (r != (size_t)(-1))
134         r = iconv(cd, NULL, NULL, &pout, &outbytesleft);
135     *pout = 0;
136 
137 #ifdef USE_LIBICONV_DLL
138     if (use_dll)
139         printf("%s: ", dllpath);
140 #endif
141     printf("%s(%s) -> ", from, tohex(fromstr, fromsize));
142     printf("%s(%s%s%s): ", to, tohex(tostr, tosize),
143             errcode == 0 ? "" : ":",
144             errcode == 0 ? "" : errstr(errcode));
145     if (strcmp(outbuf, tostr) == 0 && errno == errcode)
146         printf("OK\n");
147     else
148     {
149         printf("RESULT(%s:%s): ", tohex(outbuf, sizeof(outbuf) - outbytesleft),
150                 errstr(errno));
151         printf("NG: line=%d\n", line);
152         exit(1);
153     }
154 }
155 
156 #define STATIC_STRLEN(arr) (sizeof(arr) - 1)
157 
158 #define success(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), 0, BUFSIZ, __LINE__)
159 #define einval(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EINVAL, BUFSIZ, __LINE__)
160 #define eilseq(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EILSEQ, BUFSIZ, __LINE__)
161 #define e2big(from, fromstr, to, tostr, bufsize) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), E2BIG, bufsize, __LINE__)
162 
163 int
main(int argc,char ** argv)164 main(int argc, char **argv)
165 {
166 #ifdef USE_LIBICONV_DLL
167     /* test use of dll if $DEFAULT_LIBICONV_DLL was defined. */
168     if (setdll(""))
169     {
170         success("ascii", "ABC", "ascii", "ABC");
171         success("ascii", "ABC", "utf-16be", "\x00\x41\x00\x42\x00\x43");
172     }
173     else
174     {
175         printf("\nDLL TEST IS SKIPPED\n\n");
176     }
177 
178     setdll("none");
179 #endif
180 
181     if (check_enc("ascii", 20127))
182     {
183         success("ascii", "ABC", "ascii", "ABC");
184         /* MSB is dropped.  Hmm... */
185         success("ascii", "\x80\xFF", "ascii", "\x00\x7F");
186     }
187 
188     /* unicode (CP1200 CP1201 CP12000 CP12001 CP65001) */
189     if (check_enc("utf-8", 65001)
190             && check_enc("utf-16be", 1201) && check_enc("utf-16le", 1200)
191             && check_enc("utf-32be", 12001) && check_enc("utf-32le", 12000)
192             )
193     {
194         /* Test the BOM behavior
195          * 1. Remove the BOM when "fromcode" is utf-16 or utf-32.
196          * 2. Add the BOM when "tocode" is utf-16 or utf-32.  */
197         success("utf-16", "\xFE\xFF\x01\x02", "utf-16be", "\x01\x02");
198         success("utf-16", "\xFF\xFE\x02\x01", "utf-16be", "\x01\x02");
199         success("utf-32", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\x01\x02");
200         success("utf-32", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\x01\x02");
201         success("utf-16", "\xFE\xFF\x00\x01", "utf-8", "\x01");
202 #ifndef GLIB_COMPILATION
203         success("utf-8", "\x01", "utf-16", "\xFE\xFF\x00\x01");
204         success("utf-8", "\x01", "utf-32", "\x00\x00\xFE\xFF\x00\x00\x00\x01");
205 #else
206         success("utf-8", "\x01", "utf-16", "\xFF\xFE\x01\x00");
207         success("utf-8", "\x01", "utf-32", "\xFF\xFE\x00\x00\x01\x00\x00\x00");
208 #endif
209 
210         success("utf-16be", "\xFE\xFF\x01\x02", "utf-16be", "\xFE\xFF\x01\x02");
211         success("utf-16le", "\xFF\xFE\x02\x01", "utf-16be", "\xFE\xFF\x01\x02");
212         success("utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02");
213         success("utf-32le", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02");
214         success("utf-16be", "\xFE\xFF\x00\x01", "utf-8", "\xEF\xBB\xBF\x01");
215         success("utf-8", "\xEF\xBB\xBF\x01", "utf-8", "\xEF\xBB\xBF\x01");
216 
217         success("utf-16be", "\x01\x02", "utf-16le", "\x02\x01");
218         success("utf-16le", "\x02\x01", "utf-16be", "\x01\x02");
219         success("utf-16be", "\xFE\xFF", "utf-16le", "\xFF\xFE");
220         success("utf-16le", "\xFF\xFE", "utf-16be", "\xFE\xFF");
221         success("utf-32be", "\x00\x00\x03\x04", "utf-32le", "\x04\x03\x00\x00");
222         success("utf-32le", "\x04\x03\x00\x00", "utf-32be", "\x00\x00\x03\x04");
223         success("utf-32be", "\x00\x00\xFF\xFF", "utf-16be", "\xFF\xFF");
224         success("utf-16be", "\xFF\xFF", "utf-32be", "\x00\x00\xFF\xFF");
225         success("utf-32be", "\x00\x01\x00\x00", "utf-16be", "\xD8\x00\xDC\x00");
226         success("utf-16be", "\xD8\x00\xDC\x00", "utf-32be", "\x00\x01\x00\x00");
227         success("utf-32be", "\x00\x10\xFF\xFF", "utf-16be", "\xDB\xFF\xDF\xFF");
228         success("utf-16be", "\xDB\xFF\xDF\xFF", "utf-32be", "\x00\x10\xFF\xFF");
229         eilseq("utf-32be", "\x00\x11\x00\x00", "utf-16be", "");
230         eilseq("utf-16be", "\xDB\xFF\xE0\x00", "utf-32be", "");
231         success("utf-8", "\xE3\x81\x82", "utf-16be", "\x30\x42");
232         einval("utf-8", "\xE3", "utf-16be", "");
233     }
234 
235     /* Japanese (CP932 CP20932 CP50220 CP50221 CP50222 CP51932) */
236     if (check_enc("cp932", 932)
237             && check_enc("cp20932", 20932) && check_enc("euc-jp", 51932)
238             && check_enc("cp50220", 50220) && check_enc("cp50221", 50221)
239             && check_enc("cp50222", 50222) && check_enc("iso-2022-jp", 50221))
240     {
241         /* Test the compatibility for each other Japanese codepage.
242          * And validate the escape sequence handling for iso-2022-jp. */
243         success("utf-16be", "\xFF\x5E", "cp932", "\x81\x60");
244         success("utf-16be", "\x30\x1C", "cp932", "\x81\x60");
245         success("utf-16be", "\xFF\x5E", "cp932//nocompat", "\x81\x60");
246         eilseq("utf-16be", "\x30\x1C", "cp932//nocompat", "");
247         success("euc-jp", "\xA4\xA2", "utf-16be", "\x30\x42");
248         einval("euc-jp", "\xA4\xA2\xA4", "utf-16be", "\x30\x42");
249         eilseq("euc-jp", "\xA4\xA2\xFF\xFF", "utf-16be", "\x30\x42");
250         success("cp932", "\x81\x60", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42");
251         success("UTF-16BE", "\xFF\x5E", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42");
252         eilseq("UTF-16BE", "\x30\x1C", "iso-2022-jp//nocompat", "");
253         success("UTF-16BE", "\x30\x42\x30\x44", "iso-2022-jp", "\x1B\x24\x42\x24\x22\x24\x24\x1B\x28\x42");
254         success("iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42", "UTF-16BE", "\xFF\x5E");
255     }
256 
257     /*
258      * test for //translit
259      * U+FF41 (FULLWIDTH LATIN SMALL LETTER A) <-> U+0062 (LATIN SMALL LETTER A)
260      */
261     eilseq("UTF-16BE", "\xFF\x41", "iso-8859-1", "");
262     success("UTF-16BE", "\xFF\x41", "iso-8859-1//translit", "a");
263 
264     /*
265      * test for //translit
266      * Some character, not in "to" encoding -> DEFAULT CHARACTER (maybe "?")
267      */
268     eilseq("UTF-16BE", "\x30\x42", "ascii", "");
269     success("UTF-16BE", "\x30\x42", "ascii//translit", "?");
270 
271     /*
272      * test for //ignore
273      */
274     eilseq("UTF-8", "\xFF A \xFF B", "ascii//ignore", " A  B");
275     eilseq("UTF-8", "\xEF\xBC\xA1 A \xEF\xBC\xA2 B", "ascii//ignore", " A  B");
276     eilseq("UTF-8", "\xEF\x01 A \xEF\x02 B", "ascii//ignore", "\x01 A \x02 B");
277 
278     /*
279      * TODO:
280      * Test for state after iconv() failed.
281      * Ensure iconv() error is safe and continuable.
282      */
283 
284     return 0;
285 }
286 
287