1 2 #ifdef USE_ICONV_H 3 #include <iconv.h> 4 #include <windows.h> 5 #include <errno.h> 6 #include <string.h> 7 #include <stdlib.h> 8 #else 9 #include "win_iconv.c" 10 #endif 11 12 #include <stdio.h> 13 14 const char * 15 tohex(const char *str, int size) 16 { 17 static char buf[BUFSIZ]; 18 char *pbuf = buf; 19 int i; 20 buf[0] = 0; 21 for (i = 0; i < size; ++i) 22 pbuf += sprintf(pbuf, "%02X", str[i] & 0xFF); 23 return buf; 24 } 25 26 const char * 27 errstr(int errcode) 28 { 29 static char buf[BUFSIZ]; 30 switch (errcode) 31 { 32 case 0: return "NOERROR"; 33 case EINVAL: return "EINVAL"; 34 case EILSEQ: return "EILSEQ"; 35 case E2BIG: return "E2BIG"; 36 } 37 sprintf(buf, "%d\n", errcode); 38 return buf; 39 } 40 41 #ifdef USE_LIBICONV_DLL 42 int use_dll; 43 44 int 45 setdll(const char *dllpath) 46 { 47 char buf[BUFSIZ]; 48 rec_iconv_t cd; 49 50 sprintf(buf, "WINICONV_LIBICONV_DLL=%s", dllpath); 51 putenv(buf); 52 if (libiconv_iconv_open(&cd, "ascii", "ascii")) 53 { 54 FreeLibrary(cd.hlibiconv); 55 use_dll = TRUE; 56 return TRUE; 57 } 58 use_dll = FALSE; 59 return FALSE; 60 } 61 #endif 62 63 /* 64 * We can test the codepage that is installed in the system. 65 */ 66 int 67 check_enc(const char *encname, int codepage) 68 { 69 iconv_t cd; 70 int cp; 71 cd = iconv_open("utf-8", encname); 72 if (cd == (iconv_t)(-1)) 73 { 74 printf("%s(%d) IS NOT SUPPORTED: SKIP THE TEST\n", encname, codepage); 75 return FALSE; 76 } 77 #ifndef USE_ICONV_H 78 cp = ((rec_iconv_t *)cd)->from.codepage; 79 if (cp != codepage) 80 { 81 printf("%s(%d) ALIAS IS MAPPED TO DIFFERENT CODEPAGE (%d)\n", encname, codepage, cp); 82 exit(1); 83 } 84 #endif 85 iconv_close(cd); 86 return TRUE; 87 } 88 89 void 90 test(const char *from, const char *fromstr, int fromsize, const char *to, const char *tostr, int tosize, int errcode, int bufsize, int line) 91 { 92 char outbuf[BUFSIZ]; 93 const char *pin; 94 char *pout; 95 size_t inbytesleft; 96 size_t outbytesleft; 97 iconv_t cd; 98 size_t r; 99 #ifdef USE_LIBICONV_DLL 100 char dllpath[_MAX_PATH]; 101 #endif 102 103 cd = iconv_open(to, from); 104 if (cd == (iconv_t)(-1)) 105 { 106 printf("%s -> %s: NG: INVALID ENCODING NAME: line=%d\n", from, to, line); 107 exit(1); 108 } 109 110 #ifdef USE_LIBICONV_DLL 111 if (((rec_iconv_t *)cd)->hlibiconv != NULL) 112 GetModuleFileNameA(((rec_iconv_t *)cd)->hlibiconv, dllpath, sizeof(dllpath)); 113 114 if (use_dll && ((rec_iconv_t *)cd)->hlibiconv == NULL) 115 { 116 printf("%s: %s -> %s: NG: FAILED TO USE DLL: line=%d\n", dllpath, from, to, line); 117 exit(1); 118 } 119 else if (!use_dll && ((rec_iconv_t *)cd)->hlibiconv != NULL) 120 { 121 printf("%s: %s -> %s: NG: DLL IS LOADED UNEXPECTEDLY: line=%d\n", dllpath, from, to, line); 122 exit(1); 123 } 124 #endif 125 126 errno = 0; 127 128 pin = (char *)fromstr; 129 pout = outbuf; 130 inbytesleft = fromsize; 131 outbytesleft = bufsize; 132 r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); 133 if (r != (size_t)(-1)) 134 r = iconv(cd, NULL, NULL, &pout, &outbytesleft); 135 *pout = 0; 136 137 #ifdef USE_LIBICONV_DLL 138 if (use_dll) 139 printf("%s: ", dllpath); 140 #endif 141 printf("%s(%s) -> ", from, tohex(fromstr, fromsize)); 142 printf("%s(%s%s%s): ", to, tohex(tostr, tosize), 143 errcode == 0 ? "" : ":", 144 errcode == 0 ? "" : errstr(errcode)); 145 if (strcmp(outbuf, tostr) == 0 && errno == errcode) 146 printf("OK\n"); 147 else 148 { 149 printf("RESULT(%s:%s): ", tohex(outbuf, sizeof(outbuf) - outbytesleft), 150 errstr(errno)); 151 printf("NG: line=%d\n", line); 152 exit(1); 153 } 154 } 155 156 #define STATIC_STRLEN(arr) (sizeof(arr) - 1) 157 158 #define success(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), 0, BUFSIZ, __LINE__) 159 #define einval(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EINVAL, BUFSIZ, __LINE__) 160 #define eilseq(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EILSEQ, BUFSIZ, __LINE__) 161 #define e2big(from, fromstr, to, tostr, bufsize) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), E2BIG, bufsize, __LINE__) 162 163 int 164 main(int argc, char **argv) 165 { 166 #ifdef USE_LIBICONV_DLL 167 /* test use of dll if $DEFAULT_LIBICONV_DLL was defined. */ 168 if (setdll("")) 169 { 170 success("ascii", "ABC", "ascii", "ABC"); 171 success("ascii", "ABC", "utf-16be", "\x00\x41\x00\x42\x00\x43"); 172 } 173 else 174 { 175 printf("\nDLL TEST IS SKIPPED\n\n"); 176 } 177 178 setdll("none"); 179 #endif 180 181 if (check_enc("ascii", 20127)) 182 { 183 success("ascii", "ABC", "ascii", "ABC"); 184 /* MSB is dropped. Hmm... */ 185 success("ascii", "\x80\xFF", "ascii", "\x00\x7F"); 186 } 187 188 /* unicode (CP1200 CP1201 CP12000 CP12001 CP65001) */ 189 if (check_enc("utf-8", 65001) 190 && check_enc("utf-16be", 1201) && check_enc("utf-16le", 1200) 191 && check_enc("utf-32be", 12001) && check_enc("utf-32le", 12000) 192 ) 193 { 194 /* Test the BOM behavior 195 * 1. Remove the BOM when "fromcode" is utf-16 or utf-32. 196 * 2. Add the BOM when "tocode" is utf-16 or utf-32. */ 197 success("utf-16", "\xFE\xFF\x01\x02", "utf-16be", "\x01\x02"); 198 success("utf-16", "\xFF\xFE\x02\x01", "utf-16be", "\x01\x02"); 199 success("utf-32", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\x01\x02"); 200 success("utf-32", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\x01\x02"); 201 success("utf-16", "\xFE\xFF\x00\x01", "utf-8", "\x01"); 202 #ifndef GLIB_COMPILATION 203 success("utf-8", "\x01", "utf-16", "\xFE\xFF\x00\x01"); 204 success("utf-8", "\x01", "utf-32", "\x00\x00\xFE\xFF\x00\x00\x00\x01"); 205 #else 206 success("utf-8", "\x01", "utf-16", "\xFF\xFE\x01\x00"); 207 success("utf-8", "\x01", "utf-32", "\xFF\xFE\x00\x00\x01\x00\x00\x00"); 208 #endif 209 210 success("utf-16be", "\xFE\xFF\x01\x02", "utf-16be", "\xFE\xFF\x01\x02"); 211 success("utf-16le", "\xFF\xFE\x02\x01", "utf-16be", "\xFE\xFF\x01\x02"); 212 success("utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02"); 213 success("utf-32le", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02"); 214 success("utf-16be", "\xFE\xFF\x00\x01", "utf-8", "\xEF\xBB\xBF\x01"); 215 success("utf-8", "\xEF\xBB\xBF\x01", "utf-8", "\xEF\xBB\xBF\x01"); 216 217 success("utf-16be", "\x01\x02", "utf-16le", "\x02\x01"); 218 success("utf-16le", "\x02\x01", "utf-16be", "\x01\x02"); 219 success("utf-16be", "\xFE\xFF", "utf-16le", "\xFF\xFE"); 220 success("utf-16le", "\xFF\xFE", "utf-16be", "\xFE\xFF"); 221 success("utf-32be", "\x00\x00\x03\x04", "utf-32le", "\x04\x03\x00\x00"); 222 success("utf-32le", "\x04\x03\x00\x00", "utf-32be", "\x00\x00\x03\x04"); 223 success("utf-32be", "\x00\x00\xFF\xFF", "utf-16be", "\xFF\xFF"); 224 success("utf-16be", "\xFF\xFF", "utf-32be", "\x00\x00\xFF\xFF"); 225 success("utf-32be", "\x00\x01\x00\x00", "utf-16be", "\xD8\x00\xDC\x00"); 226 success("utf-16be", "\xD8\x00\xDC\x00", "utf-32be", "\x00\x01\x00\x00"); 227 success("utf-32be", "\x00\x10\xFF\xFF", "utf-16be", "\xDB\xFF\xDF\xFF"); 228 success("utf-16be", "\xDB\xFF\xDF\xFF", "utf-32be", "\x00\x10\xFF\xFF"); 229 eilseq("utf-32be", "\x00\x11\x00\x00", "utf-16be", ""); 230 eilseq("utf-16be", "\xDB\xFF\xE0\x00", "utf-32be", ""); 231 success("utf-8", "\xE3\x81\x82", "utf-16be", "\x30\x42"); 232 einval("utf-8", "\xE3", "utf-16be", ""); 233 } 234 235 /* Japanese (CP932 CP20932 CP50220 CP50221 CP50222 CP51932) */ 236 if (check_enc("cp932", 932) 237 && check_enc("cp20932", 20932) && check_enc("euc-jp", 51932) 238 && check_enc("cp50220", 50220) && check_enc("cp50221", 50221) 239 && check_enc("cp50222", 50222) && check_enc("iso-2022-jp", 50221)) 240 { 241 /* Test the compatibility for each other Japanese codepage. 242 * And validate the escape sequence handling for iso-2022-jp. */ 243 success("utf-16be", "\xFF\x5E", "cp932", "\x81\x60"); 244 success("utf-16be", "\x30\x1C", "cp932", "\x81\x60"); 245 success("utf-16be", "\xFF\x5E", "cp932//nocompat", "\x81\x60"); 246 eilseq("utf-16be", "\x30\x1C", "cp932//nocompat", ""); 247 success("euc-jp", "\xA4\xA2", "utf-16be", "\x30\x42"); 248 einval("euc-jp", "\xA4\xA2\xA4", "utf-16be", "\x30\x42"); 249 eilseq("euc-jp", "\xA4\xA2\xFF\xFF", "utf-16be", "\x30\x42"); 250 success("cp932", "\x81\x60", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42"); 251 success("UTF-16BE", "\xFF\x5E", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42"); 252 eilseq("UTF-16BE", "\x30\x1C", "iso-2022-jp//nocompat", ""); 253 success("UTF-16BE", "\x30\x42\x30\x44", "iso-2022-jp", "\x1B\x24\x42\x24\x22\x24\x24\x1B\x28\x42"); 254 success("iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42", "UTF-16BE", "\xFF\x5E"); 255 } 256 257 /* 258 * test for //translit 259 * U+FF41 (FULLWIDTH LATIN SMALL LETTER A) <-> U+0062 (LATIN SMALL LETTER A) 260 */ 261 eilseq("UTF-16BE", "\xFF\x41", "iso-8859-1", ""); 262 success("UTF-16BE", "\xFF\x41", "iso-8859-1//translit", "a"); 263 264 /* 265 * test for //translit 266 * Some character, not in "to" encoding -> DEFAULT CHARACTER (maybe "?") 267 */ 268 eilseq("UTF-16BE", "\x30\x42", "ascii", ""); 269 success("UTF-16BE", "\x30\x42", "ascii//translit", "?"); 270 271 /* 272 * test for //ignore 273 */ 274 eilseq("UTF-8", "\xFF A \xFF B", "ascii//ignore", " A B"); 275 eilseq("UTF-8", "\xEF\xBC\xA1 A \xEF\xBC\xA2 B", "ascii//ignore", " A B"); 276 eilseq("UTF-8", "\xEF\x01 A \xEF\x02 B", "ascii//ignore", "\x01 A \x02 B"); 277 278 /* 279 * TODO: 280 * Test for state after iconv() failed. 281 * Ensure iconv() error is safe and continuable. 282 */ 283 284 return 0; 285 } 286 287