1 
2 #include "wc.h"
3 #include "iso2022.h"
4 #include "sjis.h"
5 #include "hz.h"
6 #include "big5.h"
7 #include "hkscs.h"
8 #include "johab.h"
9 #include "gbk.h"
10 #include "gb18030.h"
11 #include "uhc.h"
12 #include "viet.h"
13 #include "priv.h"
14 #ifdef USE_UNICODE
15 #include "utf8.h"
16 #include "utf7.h"
17 #endif
18 
19 static wc_gset gset_usascii[] = {
20     { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
21     { 0, 0, 0 },
22 };
23 
24 #define gset_iso8859(no) \
25 static wc_gset gset_iso8859##no[] = { \
26     { WC_CCS_US_ASCII,      WC_C_G0_CS94, 1 }, \
27     { WC_CCS_ISO_8859_##no, WC_C_G1_CS96 | 0x80, 1 }, \
28     { 0, 0, 0 }, \
29 }
30 gset_iso8859(1); gset_iso8859(2); gset_iso8859(3); gset_iso8859(4);
31 gset_iso8859(5); gset_iso8859(6); gset_iso8859(7); gset_iso8859(8);
32 gset_iso8859(9); gset_iso8859(10); gset_iso8859(11);
33 gset_iso8859(13); gset_iso8859(14); gset_iso8859(15); gset_iso8859(16);
34 
35 #define gset_cp(no) gset_priv1(CP##no, cp##no)
36 #define gset_priv1(ccs, ces) \
37 static wc_gset gset_##ces[] = { \
38     { WC_CCS_US_ASCII, 0, 1 }, \
39     { WC_CCS_##ccs,    0x80, 1 }, \
40     { 0, 0, 0 }, \
41 }
42 gset_cp(437); gset_cp(737); gset_cp(775); gset_cp(850); gset_cp(852);
43 gset_cp(855); gset_cp(856); gset_cp(857); gset_cp(860); gset_cp(861);
44 gset_cp(862); gset_cp(863); gset_cp(864); gset_cp(865); gset_cp(866);
45 gset_cp(869); gset_cp(874); gset_cp(1006);
46 gset_cp(1250); gset_cp(1251); gset_cp(1252); gset_cp(1253); gset_cp(1254);
47 gset_cp(1255); gset_cp(1256); gset_cp(1257);
48 gset_priv1(KOI8_R, koi8r);
49 gset_priv1(KOI8_U, koi8u);
50 gset_priv1(NEXTSTEP, nextstep);
51 
52 static wc_gset gset_iso2022jp[] = {
53     { WC_CCS_US_ASCII,     WC_C_G0_CS94, 1 },
54     { WC_CCS_JIS_X_0208,   WC_C_G0_CS94, 0 },
55     { 0, 0, 0 },
56 };
57 static wc_gset gset_iso2022jp2[] = {
58     { WC_CCS_US_ASCII,     WC_C_G0_CS94, 1 },
59     { WC_CCS_JIS_X_0208,   WC_C_G0_CS94, 0 },
60     { WC_CCS_JIS_X_0212,   WC_C_G0_CS94, 0 },
61     { WC_CCS_GB_2312,      WC_C_G0_CS94, 0 },
62     { WC_CCS_KS_X_1001,    WC_C_G0_CS94, 0 },
63     { WC_CCS_ISO_8859_1,   WC_C_G2_CS96, 0 },
64     { WC_CCS_ISO_8859_7,   WC_C_G2_CS96, 0 },
65     { 0, 0, 0 },
66 };
67 static wc_gset gset_iso2022jp3[] = {
68     { WC_CCS_US_ASCII,     WC_C_G0_CS94, 1 },
69     { WC_CCS_JIS_X_0208,   WC_C_G0_CS94, 0 },
70     { WC_CCS_JIS_X_0213_1, WC_C_G0_CS94, 0 },
71     { WC_CCS_JIS_X_0213_2, WC_C_G0_CS94, 0 },
72     { 0, 0, 0 },
73 };
74 static wc_gset gset_iso2022cn[] = {
75     { WC_CCS_US_ASCII,    WC_C_G0_CS94, 1 },
76     { WC_CCS_GB_2312,     WC_C_G1_CS94, 1 },
77     { WC_CCS_ISO_IR_165,  WC_C_G1_CS94, 0 },
78     { WC_CCS_CNS_11643_1, WC_C_G1_CS94, 0 },
79     { WC_CCS_CNS_11643_2, WC_C_G2_CS94, 0 },
80     { WC_CCS_CNS_11643_3, WC_C_G3_CS94, 0 },
81     { WC_CCS_CNS_11643_4, WC_C_G3_CS94, 0 },
82     { WC_CCS_CNS_11643_5, WC_C_G3_CS94, 0 },
83     { WC_CCS_CNS_11643_6, WC_C_G3_CS94, 0 },
84     { WC_CCS_CNS_11643_7, WC_C_G3_CS94, 0 },
85     { 0, 0, 0 },
86 };
87 static wc_gset gset_iso2022kr[] = {
88     { WC_CCS_US_ASCII,  WC_C_G0_CS94, 1 },
89     { WC_CCS_KS_X_1001, WC_C_G1_CS94, 1 },
90     { 0, 0, 0 },
91 };
92 static wc_uchar gset_ext_iso2022jp[] = {
93     WC_C_G0_CS94, WC_C_G2_CS96, WC_C_G0_CS94, WC_C_G2_CS96
94 };
95 static wc_uchar gset_ext_iso2022cn[] = {
96     WC_C_G2_CS94, WC_C_G2_CS96, WC_C_G2_CS94, WC_C_G2_CS96
97 };
98 static wc_uchar gset_ext_iso2022kr[] = {
99     WC_C_G1_CS94, WC_C_G1_CS96, WC_C_G1_CS94, WC_C_G1_CS96
100 };
101 static wc_gset gset_eucjp[] = {
102     { WC_CCS_US_ASCII,     WC_C_G0_CS94, 1 },
103     { WC_CCS_JIS_X_0208,   WC_C_G1_CS94 | 0x80, 1 },
104     { WC_CCS_JIS_X_0201K,  WC_C_G2_CS94 | 0x80, 1 },
105     { WC_CCS_JIS_X_0213_1, WC_C_G1_CS94 | 0x80, 0 },
106     { WC_CCS_JIS_X_0213_2, WC_C_G3_CS94 | 0x80, 0 },
107     { WC_CCS_JIS_X_0212,   WC_C_G3_CS94 | 0x80, 1 },
108     { 0, 0, 0 },
109 };
110 static wc_gset gset_euccn[] = {
111     { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
112     { WC_CCS_GB_2312,  WC_C_G1_CS94 | 0x80, 1 },
113     { 0, 0, 0 },
114 };
115 static wc_gset gset_euctw[] = {
116     { WC_CCS_US_ASCII,     WC_C_G0_CS94, 1 },
117     { WC_CCS_CNS_11643_1,  WC_C_G1_CS94 | 0x80, 1 },
118     { WC_CCS_CNS_11643_X,  WC_C_G2_CS94 | 0x80, 1 },
119     { 0, 0, 0 },
120 };
121 static wc_gset gset_euckr[] = {
122     { WC_CCS_US_ASCII,  WC_C_G0_CS94, 1 },
123     { WC_CCS_KS_X_1001, WC_C_G1_CS94 | 0x80, 1 },
124     { 0, 0, 0 },
125 };
126 static wc_gset gset_sjis[] = {
127     { WC_CCS_US_ASCII,     0, 1 },
128     { WC_CCS_JIS_X_0208,   0x80, 1 },
129     { WC_CCS_JIS_X_0201K,  0x80, 1 },
130     { WC_CCS_SJIS_EXT_1,   0x80, 1 },
131     { WC_CCS_SJIS_EXT_2,   0x80, 1 },
132     { WC_CCS_SJIS_EXT,     0x80, 1 },
133     { 0, 0, 0 },
134 };
135 static wc_gset gset_sjisx0213[] = {
136     { WC_CCS_US_ASCII,     0, 1 },
137     { WC_CCS_JIS_X_0208,   0x80, 1 },
138     { WC_CCS_JIS_X_0201K,  0x80, 1 },
139     { WC_CCS_JIS_X_0213_1, 0x80, 1 },
140     { WC_CCS_JIS_X_0213_2, 0x80, 1 },
141     { 0, 0, 0 },
142 };
143 static wc_gset gset_hz[] = {
144     { WC_CCS_US_ASCII, 0, 1 },
145     { WC_CCS_GB_2312,  0, 0 },
146     { 0, 0, 0 },
147 };
148 static wc_gset gset_big5[] = {
149     { WC_CCS_US_ASCII, 0, 1 },
150     { WC_CCS_BIG5_1,   0x80, 1 },
151     { WC_CCS_BIG5_2,   0x80, 1 },
152     { WC_CCS_BIG5,     0x80, 1 },
153     { 0, 0, 0 },
154 };
155 static wc_gset gset_hkscs[] = {
156     { WC_CCS_US_ASCII, 0, 1 },
157     { WC_CCS_BIG5_1,   0x80, 1 },
158     { WC_CCS_BIG5_2,   0x80, 1 },
159     { WC_CCS_BIG5,     0x80, 1 },
160     { WC_CCS_HKSCS_1,  0x80, 1 },
161     { WC_CCS_HKSCS_2,  0x80, 1 },
162     { WC_CCS_HKSCS,    0x80, 1 },
163     { 0, 0, 0 },
164 };
165 static wc_gset gset_johab[] = {
166     { WC_CCS_US_ASCII, 0, 1 },
167     { WC_CCS_JOHAB_1,  0x80, 1 },
168     { WC_CCS_JOHAB_2,  0x80, 1 },
169     { WC_CCS_JOHAB_3,  0x80, 1 },
170     { WC_CCS_JOHAB,    0x80, 1 },
171     { 0, 0, 0 },
172 };
173 static wc_gset gset_gbk[] = {
174     { WC_CCS_US_ASCII,  0, 1 },
175     { WC_CCS_GB_2312,   0x80, 1 },
176     { WC_CCS_GBK_80,    0x80, 1 },
177     { WC_CCS_GBK_1,     0x80, 1 },
178     { WC_CCS_GBK_2,     0x80, 1 },
179     { WC_CCS_GBK,       0x80, 1 },
180     { 0, 0, 0 },
181 };
182 static wc_gset gset_gb18030[] = {
183     { WC_CCS_US_ASCII,  0, 1 },
184     { WC_CCS_GB_2312,   0x80, 1 },
185     { WC_CCS_GBK_1,     0x80, 1 },
186     { WC_CCS_GBK_2,     0x80, 1 },
187     { WC_CCS_GBK,       0x80, 1 },
188     { WC_CCS_GBK_EXT_1, 0x80, 1 },
189     { WC_CCS_GBK_EXT_2, 0x80, 1 },
190     { WC_CCS_GBK_EXT,   0x80, 1 },
191     { WC_CCS_GB18030,   0x80, 1 },
192     { 0, 0, 0 },
193 };
194 static wc_gset gset_uhc[] = {
195     { WC_CCS_US_ASCII,  0, 1 },
196     { WC_CCS_KS_X_1001, 0x80, 1 },
197     { WC_CCS_UHC_1,     0x80, 1 },
198     { WC_CCS_UHC_2,     0x80, 1 },
199     { WC_CCS_UHC,       0x80, 1 },
200     { 0, 0, 0 },
201 };
202 #define gset_priv2(ccs, ces) \
203 static wc_gset gset_##ces[] = { \
204     { WC_CCS_US_ASCII, 0, 1 }, \
205     { WC_CCS_##ccs##_1, 0x80, 1 }, \
206     { WC_CCS_##ccs##_2, 0x80, 1 }, \
207     { 0, 0, 0 }, \
208 }
209 gset_priv2(CP1258, cp1258);
210 gset_priv2(VISCII_11, viscii11);
211 gset_priv2(VPS, vps);
212 static wc_gset gset_tcvn5712[] = {
213     { WC_CCS_US_ASCII, 0, 1 },
214     { WC_CCS_TCVN_5712_1, 0x80, 1 },
215     { WC_CCS_TCVN_5712_2, 0x80, 1 },
216     { WC_CCS_TCVN_5712_3, 0x80, 1 },
217     { 0, 0, 0 },
218 };
219 
220 #ifdef USE_UNICODE
221 static wc_gset gset_utf8[] = {
222     { WC_CCS_US_ASCII,  0, 1 },
223     { WC_CCS_UCS2,      0x80, 1 },
224     { WC_CCS_UCS4,      0x80, 1 },
225     { WC_CCS_UCS_TAG,   0x80, 1 },
226     { 0, 0, 0 },
227 };
228 static wc_gset gset_utf7[] = {
229     { WC_CCS_US_ASCII,  0, 1 },
230     { WC_CCS_UCS2,      0x80, 1 },
231     { WC_CCS_UCS4,      0x80, 1 },
232     { WC_CCS_UCS_TAG,   0x80, 1 },
233     { 0, 0, 0 },
234 };
235 #endif
236 
237 static wc_gset gset_raw[] = {
238     { WC_CCS_US_ASCII, 0, 1 },
239     { WC_CCS_RAW,      0x80, 1 },
240     { 0, 0, 0 },
241 };
242 
243 #define ces_ascii(id,name,desc) \
244     { WC_CES_##id, name, desc, gset_usascii, NULL, \
245 	(void *)wc_conv_from_ascii, (void *)wc_push_to_iso8859, \
246 	(void *)wc_char_conv_from_iso2022 }
247 #define ces_iso8859(id,name,desc,no) \
248     { WC_CES_##id, name, desc, gset_iso8859##no, NULL, \
249 	(void *)wc_conv_from_iso2022, (void *)wc_push_to_iso8859, \
250 	(void *)wc_char_conv_from_iso2022 }
251 #define ces_priv1(id,name,desc,ces) \
252     { WC_CES_##id, name, desc, gset_##ces, NULL, \
253 	(void *)wc_conv_from_priv1, (void *)wc_push_to_priv1, \
254 	(void *)wc_char_conv_from_priv1 }
255 #define ces_iso2022(id,name,desc,terr) \
256     { WC_CES_##id, name, desc, gset_iso2022##terr, gset_ext_iso2022##terr, \
257 	(void *)wc_conv_from_iso2022, (void *)wc_push_to_iso2022, \
258 	(void *)wc_char_conv_from_iso2022 }
259 #define ces_euc(id,name,desc,terr) \
260     { WC_CES_##id, name, desc, gset_euc##terr, NULL, \
261 	(void *)wc_conv_from_iso2022, (void *)wc_push_to_euc##terr, \
262 	(void *)wc_char_conv_from_iso2022 }
263 #define ces_priv2(id,name,desc,ces) \
264     { WC_CES_##id, name, desc, gset_##ces, NULL, \
265 	(void *)wc_conv_from_##ces, (void *)wc_push_to_##ces, \
266 	(void *)wc_char_conv_from_##ces }
267 
268 #define gset_ext_iso2022jp2	gset_ext_iso2022jp
269 #define gset_ext_iso2022jp3	gset_ext_iso2022jp
270 #define wc_push_to_euckr	wc_push_to_euc
271 #define wc_push_to_euccn	wc_push_to_euc
272 #define wc_push_to_priv1	wc_push_to_iso8859
273 #define wc_push_to_cp1258	wc_push_to_viet
274 #define wc_push_to_tcvn5712	wc_push_to_viet
275 #define wc_push_to_viscii11	wc_push_to_viet
276 #define wc_push_to_vps		wc_push_to_viet
277 #define wc_conv_from_cp1258	wc_conv_from_priv1
278 #define wc_conv_from_tcvn5712	wc_conv_from_viet
279 #define wc_conv_from_viscii11	wc_conv_from_viet
280 #define wc_conv_from_vps	wc_conv_from_viet
281 #define wc_conv_from_raw	wc_conv_from_priv1
282 #define wc_char_conv_from_hz	wc_char_conv_from_iso2022
283 #define wc_char_conv_from_cp1258	wc_char_conv_from_priv1
284 #define wc_char_conv_from_tcvn5712	wc_char_conv_from_viet
285 #define wc_char_conv_from_viscii11	wc_char_conv_from_viet
286 #define wc_char_conv_from_vps	wc_char_conv_from_viet
287 #define wc_char_conv_from_raw	wc_char_conv_from_priv1
288 
289 wc_ces_info WcCesInfo[] = {
290     ces_ascii(US_ASCII, "US-ASCII", "Latin (US-ASCII)"),
291 
292     ces_iso8859(ISO_8859_1,  "ISO-8859-1",  "Latin 1 (ISO-8859-1)",        1),
293     ces_iso8859(ISO_8859_2,  "ISO-8859-2",  "Latin 2 (ISO-8859-2)",        2),
294     ces_iso8859(ISO_8859_3,  "ISO-8859-3",  "Latin 3 (ISO-8859-3)",        3),
295     ces_iso8859(ISO_8859_4,  "ISO-8859-4",  "Latin 4 (ISO-8859-4)",        4),
296     ces_iso8859(ISO_8859_5,  "ISO-8859-5",  "Cyrillic (ISO-8859-5)",       5),
297     ces_iso8859(ISO_8859_6,  "ISO-8859-6",  "Arabic (ISO-8859-6)",         6),
298     ces_iso8859(ISO_8859_7,  "ISO-8859-7",  "Greek (ISO-8859-7)",          7),
299     ces_iso8859(ISO_8859_8,  "ISO-8859-8",  "Hebrew (ISO-8859-8)",         8),
300     ces_iso8859(ISO_8859_9,  "ISO-8859-9",  "Turkish (ISO-8859-9)",        9),
301     ces_iso8859(ISO_8859_10, "ISO-8859-10", "Nordic (ISO-8859-10)",        10),
302     ces_iso8859(ISO_8859_11, "ISO-8859-11", "Thai (ISO-8859-11, TIS-620)", 11),
303     { WC_CES_ISO_8859_12, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
304     ces_iso8859(ISO_8859_13, "ISO-8859-13", "Baltic Rim (ISO-8859-13)",    13),
305     ces_iso8859(ISO_8859_14, "ISO-8859-14", "Celtic (ISO-8859-14)",        14),
306     ces_iso8859(ISO_8859_15, "ISO-8859-15", "Latin 9 (ISO-8859-15)",       15),
307     ces_iso8859(ISO_8859_16, "ISO-8859-16", "Romanian (ISO-8859-16)",      16),
308 
309     ces_iso2022(ISO_2022_JP,   "ISO-2022-JP",   "Japanese (ISO-2022-JP)",   jp),
310     ces_iso2022(ISO_2022_JP_2, "ISO-2022-JP-2", "Japanese (ISO-2022-JP-2)", jp2),
311     ces_iso2022(ISO_2022_JP_3, "ISO-2022-JP-3", "Japanese (ISO-2022-JP-3)", jp3),
312     ces_iso2022(ISO_2022_CN,   "ISO-2022-CN",   "Chinese (ISO-2022-CN)",    cn),
313     ces_iso2022(ISO_2022_KR,   "ISO-2022-KR",   "Korean (ISO-2022-KR)",     kr),
314 
315     ces_euc(EUC_JP, "EUC-JP", "Japanese (EUC-JP)",        jp),
316     ces_euc(EUC_CN, "EUC-CN", "Chinese (EUC-CN, GB2312)", cn),
317     ces_euc(EUC_TW, "EUC-TW", "Chinese Taiwan (EUC-TW)",  tw),
318     ces_euc(EUC_KR, "EUC-KR", "Korean (EUC-KR)",          kr),
319 
320     ces_priv1(CP437,    "CP437",    "Latin (CP437)",         cp437),
321     ces_priv1(CP737,    "CP737",    "Greek (CP737)",         cp737),
322     ces_priv1(CP775,    "CP775",    "Baltic Rim (CP775)",    cp775),
323     ces_priv1(CP850,    "CP850",    "Latin 1 (CP850)",       cp850),
324     ces_priv1(CP852,    "CP852",    "Latin 2 (CP852)",       cp852),
325     ces_priv1(CP855,    "CP855",    "Cyrillic (CP855)",      cp855),
326     ces_priv1(CP856,    "CP856",    "Hebrew (CP856)",        cp856),
327     ces_priv1(CP857,    "CP857",    "Turkish (CP857)",       cp857),
328     ces_priv1(CP860,    "CP860",    "Portuguese (CP860)",    cp860),
329     ces_priv1(CP861,    "CP861",    "Icelandic (CP861)",     cp861),
330     ces_priv1(CP862,    "CP862",    "Hebrew (CP862)",        cp862),
331     ces_priv1(CP863,    "CP863",    "Canada French (CP863)", cp863),
332     ces_priv1(CP864,    "CP864",    "Arabic (CP864)",        cp864),
333     ces_priv1(CP865,    "CP865",    "Nordic (CP865)",        cp865),
334     ces_priv1(CP866,    "CP866",    "Cyrillic (CP866)",      cp866),
335     ces_priv1(CP869,    "CP869",    "Greek 2 (CP869)",       cp869),
336     ces_priv1(CP874,    "CP874",    "Thai (CP874)",          cp874),
337     ces_priv1(CP1006,   "CP1006",   "Arabic (CP1006)",       cp1006),
338     ces_priv1(CP1250,   "CP1250",   "Latin 2 (CP1250)",      cp1250),
339     ces_priv1(CP1251,   "CP1251",   "Cyrillic (CP1251)",     cp1251),
340     ces_priv1(CP1252,   "CP1252",   "Latin 1 (CP1252)",      cp1252),
341     ces_priv1(CP1253,   "CP1253",   "Greek (CP1253)",        cp1253),
342     ces_priv1(CP1254,   "CP1254",   "Turkish (CP1254)",      cp1254),
343     ces_priv1(CP1255,   "CP1255",   "Hebrew (CP1255)",       cp1255),
344     ces_priv1(CP1256,   "CP1256",   "Arabic (CP1256)",       cp1256),
345     ces_priv1(CP1257,   "CP1257",   "Baltic Rim (CP1257)",   cp1257),
346     ces_priv1(KOI8_R,   "KOI8-R",   "Cyrillic (KOI8-R)",     koi8r),
347     ces_priv1(KOI8_U,   "KOI8-U",   "Ukrainian (KOI8-U)",    koi8u),
348     ces_priv1(NEXTSTEP, "NeXTSTEP", "NeXTSTEP",              nextstep),
349 
350     ces_priv2(RAW, "Raw", "8bit Raw", raw),
351 
352     ces_priv2(SHIFT_JIS,  "Shift_JIS",  "Japanese (Shift_JIS, CP932)", sjis),
353     ces_priv2(SHIFT_JISX0213, "Shift_JISX0213", "Japanese (Shift_JISX0213)", sjisx0213),
354     ces_priv2(GBK,        "GBK",        "Chinese (GBK, CP936)",    gbk),
355     ces_priv2(GB18030,    "GB18030",    "Chinese (GB18030)",       gb18030),
356     ces_priv2(HZ_GB_2312, "HZ-GB-2312", "Chinese (HZ-GB-2312)",    hz),
357     ces_priv2(BIG5,       "Big5",       "Chinese Taiwan (Big5, CP950)", big5),
358     ces_priv2(HKSCS,      "HKSCS",      "Chinese Hong Kong (HKSCS)", hkscs),
359     ces_priv2(UHC,        "UHC",        "Korean (UHC, CP949)",     uhc),
360     ces_priv2(JOHAB,      "Johab",      "Korean (Johab)",          johab),
361 
362     ces_priv2(CP1258,     "CP1258",     "Vietnamese (CP1258)",     cp1258),
363     ces_priv2(TCVN_5712,  "TCVN-5712",  "Vietnamese (TCVN-5712)",  tcvn5712),
364     ces_priv2(VISCII_11,  "VISCII-1.1", "Vietnamese (VISCII 1.1)", viscii11),
365     ces_priv2(VPS,        "VPS",        "Vietnamese (VPS)",        vps),
366 
367 #ifdef USE_UNICODE
368     ces_priv2(UTF_8, "UTF-8", "Unicode (UTF-8)", utf8),
369     ces_priv2(UTF_7, "UTF-7", "Unicode (UTF-7)", utf7),
370 #else
371     { WC_CES_UTF_8, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
372     { WC_CES_UTF_7, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
373 #endif
374     { 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
375 };
376