1 /*
2  * conversion between BIG5 and Mule Internal Code(CNS 116643-1992
3  * plane 1 and plane 2).
4  * This program is partially copied from lv(Multilingual file viewer)
5  * and slightly modified. lv is written and copyrighted by NARITA Tomio
6  * (nrt@web.ad.jp).
7  *
8  * 1999/1/15 Tatsuo Ishii
9  *
10  * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c
11  */
12 
13 /* can be used in either frontend or backend */
14 #include "postgres_fe.h"
15 
16 #include "mb/pg_wchar.h"
17 
18 typedef struct
19 {
20 	unsigned short code,
21 				peer;
22 } codes_t;
23 
24 /* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
25 static const codes_t big5Level1ToCnsPlane1[25] = {	/* range */
26 	{0xA140, 0x2121},
27 	{0xA1F6, 0x2258},
28 	{0xA1F7, 0x2257},
29 	{0xA1F8, 0x2259},
30 	{0xA2AF, 0x2421},
31 	{0xA3C0, 0x4221},
32 	{0xa3e1, 0x0000},
33 	{0xA440, 0x4421},
34 	{0xACFE, 0x5753},
35 	{0xacff, 0x0000},
36 	{0xAD40, 0x5323},
37 	{0xAFD0, 0x5754},
38 	{0xBBC8, 0x6B51},
39 	{0xBE52, 0x6B50},
40 	{0xBE53, 0x6F5C},
41 	{0xC1AB, 0x7536},
42 	{0xC2CB, 0x7535},
43 	{0xC2CC, 0x7737},
44 	{0xC361, 0x782E},
45 	{0xC3B9, 0x7865},
46 	{0xC3BA, 0x7864},
47 	{0xC3BB, 0x7866},
48 	{0xC456, 0x782D},
49 	{0xC457, 0x7962},
50 	{0xc67f, 0x0000}
51 };
52 
53 /* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
54 static const codes_t cnsPlane1ToBig5Level1[26] = {	/* range */
55 	{0x2121, 0xA140},
56 	{0x2257, 0xA1F7},
57 	{0x2258, 0xA1F6},
58 	{0x2259, 0xA1F8},
59 	{0x234f, 0x0000},
60 	{0x2421, 0xA2AF},
61 	{0x2571, 0x0000},
62 	{0x4221, 0xA3C0},
63 	{0x4242, 0x0000},
64 	{0x4421, 0xA440},
65 	{0x5323, 0xAD40},
66 	{0x5753, 0xACFE},
67 	{0x5754, 0xAFD0},
68 	{0x6B50, 0xBE52},
69 	{0x6B51, 0xBBC8},
70 	{0x6F5C, 0xBE53},
71 	{0x7535, 0xC2CB},
72 	{0x7536, 0xC1AB},
73 	{0x7737, 0xC2CC},
74 	{0x782D, 0xC456},
75 	{0x782E, 0xC361},
76 	{0x7864, 0xC3BA},
77 	{0x7865, 0xC3B9},
78 	{0x7866, 0xC3BB},
79 	{0x7962, 0xC457},
80 	{0x7d4c, 0x0000}
81 };
82 
83 /* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
84 static const codes_t big5Level2ToCnsPlane2[48] = {	/* range */
85 	{0xC940, 0x2121},
86 	{0xc94a, 0x0000},
87 	{0xC94B, 0x212B},
88 	{0xC96C, 0x214D},
89 	{0xC9BE, 0x214C},
90 	{0xC9BF, 0x217D},
91 	{0xC9ED, 0x224E},
92 	{0xCAF7, 0x224D},
93 	{0xCAF8, 0x2439},
94 	{0xD77A, 0x3F6A},
95 	{0xD77B, 0x387E},
96 	{0xDBA7, 0x3F6B},
97 	{0xDDFC, 0x4176},
98 	{0xDDFD, 0x4424},
99 	{0xE8A3, 0x554C},
100 	{0xE976, 0x5723},
101 	{0xEB5B, 0x5A29},
102 	{0xEBF1, 0x554B},
103 	{0xEBF2, 0x5B3F},
104 	{0xECDE, 0x5722},
105 	{0xECDF, 0x5C6A},
106 	{0xEDAA, 0x5D75},
107 	{0xEEEB, 0x642F},
108 	{0xEEEC, 0x6039},
109 	{0xF056, 0x5D74},
110 	{0xF057, 0x6243},
111 	{0xF0CB, 0x5A28},
112 	{0xF0CC, 0x6337},
113 	{0xF163, 0x6430},
114 	{0xF16B, 0x6761},
115 	{0xF16C, 0x6438},
116 	{0xF268, 0x6934},
117 	{0xF269, 0x6573},
118 	{0xF2C3, 0x664E},
119 	{0xF375, 0x6762},
120 	{0xF466, 0x6935},
121 	{0xF4B5, 0x664D},
122 	{0xF4B6, 0x6962},
123 	{0xF4FD, 0x6A4C},
124 	{0xF663, 0x6A4B},
125 	{0xF664, 0x6C52},
126 	{0xF977, 0x7167},
127 	{0xF9C4, 0x7166},
128 	{0xF9C5, 0x7234},
129 	{0xF9C6, 0x7240},
130 	{0xF9C7, 0x7235},
131 	{0xF9D2, 0x7241},
132 	{0xf9d6, 0x0000}
133 };
134 
135 /* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
136 static const codes_t cnsPlane2ToBig5Level2[49] = {	/* range */
137 	{0x2121, 0xC940},
138 	{0x212B, 0xC94B},
139 	{0x214C, 0xC9BE},
140 	{0x214D, 0xC96C},
141 	{0x217D, 0xC9BF},
142 	{0x224D, 0xCAF7},
143 	{0x224E, 0xC9ED},
144 	{0x2439, 0xCAF8},
145 	{0x387E, 0xD77B},
146 	{0x3F6A, 0xD77A},
147 	{0x3F6B, 0xDBA7},
148 	{0x4424, 0x0000},
149 	{0x4176, 0xDDFC},
150 	{0x4177, 0x0000},
151 	{0x4424, 0xDDFD},
152 	{0x554B, 0xEBF1},
153 	{0x554C, 0xE8A3},
154 	{0x5722, 0xECDE},
155 	{0x5723, 0xE976},
156 	{0x5A28, 0xF0CB},
157 	{0x5A29, 0xEB5B},
158 	{0x5B3F, 0xEBF2},
159 	{0x5C6A, 0xECDF},
160 	{0x5D74, 0xF056},
161 	{0x5D75, 0xEDAA},
162 	{0x6039, 0xEEEC},
163 	{0x6243, 0xF057},
164 	{0x6337, 0xF0CC},
165 	{0x642F, 0xEEEB},
166 	{0x6430, 0xF163},
167 	{0x6438, 0xF16C},
168 	{0x6573, 0xF269},
169 	{0x664D, 0xF4B5},
170 	{0x664E, 0xF2C3},
171 	{0x6761, 0xF16B},
172 	{0x6762, 0xF375},
173 	{0x6934, 0xF268},
174 	{0x6935, 0xF466},
175 	{0x6962, 0xF4B6},
176 	{0x6A4B, 0xF663},
177 	{0x6A4C, 0xF4FD},
178 	{0x6C52, 0xF664},
179 	{0x7166, 0xF9C4},
180 	{0x7167, 0xF977},
181 	{0x7234, 0xF9C5},
182 	{0x7235, 0xF9C7},
183 	{0x7240, 0xF9C6},
184 	{0x7241, 0xF9D2},
185 	{0x7245, 0x0000}
186 };
187 
188 /* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
189 static const unsigned short b1c4[][2] = {
190 	{0xC879, 0x2123},
191 	{0xC87B, 0x2124},
192 	{0xC87D, 0x212A},
193 	{0xC8A2, 0x2152}
194 };
195 
196 /* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
197 static const unsigned short b2c3[][2] = {
198 	{0xF9D6, 0x4337},
199 	{0xF9D7, 0x4F50},
200 	{0xF9D8, 0x444E},
201 	{0xF9D9, 0x504A},
202 	{0xF9DA, 0x2C5D},
203 	{0xF9DB, 0x3D7E},
204 	{0xF9DC, 0x4B5C}
205 };
206 
BinarySearchRange(const codes_t * array,int high,unsigned short code)207 static unsigned short BinarySearchRange
208 			(const codes_t *array, int high, unsigned short code)
209 {
210 	int			low,
211 				mid,
212 				distance,
213 				tmp;
214 
215 	low = 0;
216 	mid = high >> 1;
217 
218 	for (; low <= high; mid = (low + high) >> 1)
219 	{
220 		if ((array[mid].code <= code) && (array[mid + 1].code > code))
221 		{
222 			if (0 == array[mid].peer)
223 				return 0;
224 			if (code >= 0xa140U)
225 			{
226 				/* big5 to cns */
227 				tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
228 				high = code & 0x00ff;
229 				low = array[mid].code & 0x00ff;
230 
231 				/*
232 				 * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e,
233 				 * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix is
234 				 * 0x9d.                     [region_low, region_high] We
235 				 * should remember big5 has two different regions (above).
236 				 * There is a bias for the distance between these regions.
237 				 * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is
238 				 * 1.) bias = - 0x22.
239 				 */
240 				distance = tmp * 0x9d + high - low +
241 					(high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22)
242 					 : (low >= 0xa1 ? +0x22 : 0));
243 
244 				/*
245 				 * NOTE: we have to convert the distance into a code point.
246 				 * The code point's low_byte is 0x21 plus mod_0x5e. In the
247 				 * first, we extract the mod_0x5e of the starting code point,
248 				 * subtracting 0x21, and add distance to it. Then we calculate
249 				 * again mod_0x5e of them, and restore the final codepoint,
250 				 * adding 0x21.
251 				 */
252 				tmp = (array[mid].peer & 0x00ff) + distance - 0x21;
253 				tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8)
254 					+ 0x21 + tmp % 0x5e;
255 				return tmp;
256 			}
257 			else
258 			{
259 				/* cns to big5 */
260 				tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
261 
262 				/*
263 				 * NOTE: ISO charsets ranges between 0x21-0xfe (94charset).
264 				 * Its radix is 0x5e. But there is no distance bias like big5.
265 				 */
266 				distance = tmp * 0x5e
267 					+ ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff));
268 
269 				/*
270 				 * NOTE: Similar to big5 to cns conversion, we extract
271 				 * mod_0x9d and restore mod_0x9d into a code point.
272 				 */
273 				low = array[mid].peer & 0x00ff;
274 				tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40);
275 				low = tmp % 0x9d;
276 				tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8)
277 					+ (low > 0x3e ? 0x62 : 0x40) + low;
278 				return tmp;
279 			}
280 		}
281 		else if (array[mid].code > code)
282 			high = mid - 1;
283 		else
284 			low = mid + 1;
285 	}
286 
287 	return 0;
288 }
289 
290 
291 unsigned short
BIG5toCNS(unsigned short big5,unsigned char * lc)292 BIG5toCNS(unsigned short big5, unsigned char *lc)
293 {
294 	unsigned short cns = 0;
295 	int			i;
296 
297 	if (big5 < 0xc940U)
298 	{
299 		/* level 1 */
300 
301 		for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
302 		{
303 			if (b1c4[i][0] == big5)
304 			{
305 				*lc = LC_CNS11643_4;
306 				return (b1c4[i][1] | 0x8080U);
307 			}
308 		}
309 
310 		if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
311 			*lc = LC_CNS11643_1;
312 	}
313 	else if (big5 == 0xc94aU)
314 	{
315 		/* level 2 */
316 		*lc = LC_CNS11643_1;
317 		cns = 0x4442;
318 	}
319 	else
320 	{
321 		/* level 2 */
322 		for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
323 		{
324 			if (b2c3[i][0] == big5)
325 			{
326 				*lc = LC_CNS11643_3;
327 				return (b2c3[i][1] | 0x8080U);
328 			}
329 		}
330 
331 		if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
332 			*lc = LC_CNS11643_2;
333 	}
334 
335 	if (0 == cns)
336 	{							/* no mapping Big5 to CNS 11643-1992 */
337 		*lc = 0;
338 		return (unsigned short) '?';
339 	}
340 
341 	return cns | 0x8080;
342 }
343 
344 unsigned short
CNStoBIG5(unsigned short cns,unsigned char lc)345 CNStoBIG5(unsigned short cns, unsigned char lc)
346 {
347 	int			i;
348 	unsigned int big5 = 0;
349 
350 	cns &= 0x7f7f;
351 
352 	switch (lc)
353 	{
354 		case LC_CNS11643_1:
355 			big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns);
356 			break;
357 		case LC_CNS11643_2:
358 			big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns);
359 			break;
360 		case LC_CNS11643_3:
361 			for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
362 			{
363 				if (b2c3[i][1] == cns)
364 					return (b2c3[i][0]);
365 			}
366 			break;
367 		case LC_CNS11643_4:
368 			for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
369 			{
370 				if (b1c4[i][1] == cns)
371 					return (b1c4[i][0]);
372 			}
373 		default:
374 			break;
375 	}
376 	return big5;
377 }
378