1 /* $XTermId: other.c,v 1.15 2013/01/30 01:27:54 tom Exp $ */
2 
3 /*
4 Copyright (c) 2002 by Tomohiro KUBOTA
5 
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15 
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 THE SOFTWARE.
23 */
24 
25 #include <other.h>
26 
27 #define EURO_10646 0x20AC
28 
29 int
init_gbk(OtherStatePtr s)30 init_gbk(OtherStatePtr s)
31 {
32     s->gbk.mapping = LookupMapping("gbk-0", us16BIT);
33     if (!s->gbk.mapping)
34 	return 0;
35 
36     s->gbk.reverse = LookupReverse(s->gbk.mapping);
37     if (!s->gbk.reverse)
38 	return 0;
39 
40     s->gbk.buf = -1;
41     return 1;
42 }
43 
44 unsigned int
mapping_gbk(unsigned int n,OtherStatePtr s)45 mapping_gbk(unsigned int n, OtherStatePtr s)
46 {
47     unsigned int r;
48     if (n < 128)
49 	return n;
50     if (n == 128)
51 	return EURO_10646;
52     r = MapCodeValue(n, s->gbk.mapping);
53     return r;
54 }
55 
56 unsigned int
reverse_gbk(unsigned int n,OtherStatePtr s)57 reverse_gbk(unsigned int n, OtherStatePtr s)
58 {
59     if (n < 128)
60 	return n;
61     if (n == EURO_10646)
62 	return 128;
63     return s->gbk.reverse->reverse(n, s->gbk.reverse->data);
64 }
65 
66 int
stack_gbk(unsigned c,OtherStatePtr s)67 stack_gbk(unsigned c, OtherStatePtr s)
68 {
69     if (s->gbk.buf < 0) {
70 	if (c < 129)
71 	    return (int) c;
72 	s->gbk.buf = (int) c;
73 	return -1;
74     } else {
75 	int b;
76 	if (c < 0x40 || c == 0x7F) {
77 	    s->gbk.buf = -1;
78 	    return (int) c;
79 	}
80 	if (s->gbk.buf < 0xFF && c < 0xFF)
81 	    b = (int) ((unsigned) (s->gbk.buf << 8) + c);
82 	else
83 	    b = -1;
84 	s->gbk.buf = -1;
85 	return b;
86     }
87 }
88 
89 int
init_utf8(OtherStatePtr s)90 init_utf8(OtherStatePtr s)
91 {
92     s->utf8.buf_ptr = 0;
93     return 1;
94 }
95 
96 unsigned int
mapping_utf8(unsigned int n,OtherStatePtr s GCC_UNUSED)97 mapping_utf8(unsigned int n, OtherStatePtr s GCC_UNUSED)
98 {
99     return n;
100 }
101 
102 unsigned int
reverse_utf8(unsigned int n,OtherStatePtr s GCC_UNUSED)103 reverse_utf8(unsigned int n, OtherStatePtr s GCC_UNUSED)
104 {
105     if (n < 0x80)
106 	return n;
107     if (n < 0x800)
108 	return 0xC080 + ((n & 0x7C0) << 2) + (n & 0x3F);
109     if (n < 0x10000)
110 	return 0xE08080 + ((n & 0xF000) << 4) + ((n & 0xFC0) << 2) + (n & 0x3F);
111     return 0xF0808080 + ((n & 0x1C0000) << 6) + ((n & 0x3F000) << 4) +
112 	((n & 0xFC0) << 2) + (n & 0x3F);
113 }
114 
115 int
stack_utf8(unsigned c,OtherStatePtr s)116 stack_utf8(unsigned c, OtherStatePtr s)
117 {
118     int u;
119 
120     if (c < 0x80) {
121 	s->utf8.buf_ptr = 0;
122 	return (int) c;
123     }
124     if (s->utf8.buf_ptr == 0) {
125 	if ((c & 0x40) == 0)
126 	    return -1;
127 	s->utf8.buf[s->utf8.buf_ptr++] = UChar(c);
128 	if ((c & 0x60) == 0x40)
129 	    s->utf8.len = 2;
130 	else if ((c & 0x70) == 0x60)
131 	    s->utf8.len = 3;
132 	else if ((c & 0x78) == 0x70)
133 	    s->utf8.len = 4;
134 	else
135 	    s->utf8.buf_ptr = 0;
136 	return -1;
137     }
138     if ((c & 0x40) != 0) {
139 	s->utf8.buf_ptr = 0;
140 	return -1;
141     }
142     s->utf8.buf[s->utf8.buf_ptr++] = UChar(c);
143     if (s->utf8.buf_ptr < s->utf8.len)
144 	return -1;
145     switch (s->utf8.len) {
146     case 2:
147 	u = ((s->utf8.buf[0] & 0x1F) << 6) | (s->utf8.buf[1] & 0x3F);
148 	s->utf8.buf_ptr = 0;
149 	if (u < 0x80)
150 	    return -1;
151 	else
152 	    return u;
153     case 3:
154 	u = ((s->utf8.buf[0] & 0x0F) << 12)
155 	    | ((s->utf8.buf[1] & 0x3F) << 6)
156 	    | (s->utf8.buf[2] & 0x3F);
157 	s->utf8.buf_ptr = 0;
158 	if (u < 0x800)
159 	    return -1;
160 	else
161 	    return u;
162     case 4:
163 	u = ((s->utf8.buf[0] & 0x03) << 18)
164 	    | ((s->utf8.buf[1] & 0x3F) << 12)
165 	    | ((s->utf8.buf[2] & 0x3F) << 6)
166 	    | ((s->utf8.buf[3] & 0x3F));
167 	s->utf8.buf_ptr = 0;
168 	if (u < 0x10000)
169 	    return -1;
170 	else
171 	    return u;
172     }
173     s->utf8.buf_ptr = 0;
174     return -1;
175 }
176 
177 #define HALFWIDTH_10646 0xFF61
178 #define YEN_SJIS 0x5C
179 #define YEN_10646 0x00A5
180 #define OVERLINE_SJIS 0x7E
181 #define OVERLINE_10646 0x203E
182 
183 int
init_sjis(OtherStatePtr s)184 init_sjis(OtherStatePtr s)
185 {
186     s->sjis.x0208mapping = LookupMapping("jisx0208.1990-0", us16BIT);
187     if (!s->sjis.x0208mapping)
188 	return 0;
189 
190     s->sjis.x0208reverse = LookupReverse(s->sjis.x0208mapping);
191     if (!s->sjis.x0208reverse)
192 	return 0;
193 
194     s->sjis.x0201mapping = LookupMapping("jisx0201.1976-0", us16BIT);
195     if (!s->sjis.x0201mapping)
196 	return 0;
197 
198     s->sjis.x0201reverse = LookupReverse(s->sjis.x0201mapping);
199     if (!s->sjis.x0201reverse)
200 	return 0;
201 
202     s->sjis.buf = -1;
203     return 1;
204 }
205 
206 unsigned int
mapping_sjis(unsigned int n,OtherStatePtr s)207 mapping_sjis(unsigned int n, OtherStatePtr s)
208 {
209     unsigned int j1, j2, s1, s2;
210     if (n == YEN_SJIS)
211 	return YEN_10646;
212     if (n == OVERLINE_SJIS)
213 	return OVERLINE_10646;
214     if (n < 0x80)
215 	return n;
216     if (n >= 0xA0 && n <= 0xDF)
217 	return MapCodeValue(n, s->sjis.x0201mapping);
218     s1 = ((n >> 8) & 0xFF);
219     s2 = (n & 0xFF);
220     j1 = (s1 << 1)
221 	- (unsigned) (s1 <= 0x9F ? 0xE0 : 0x160)
222 	- (unsigned) (s2 < 0x9F ? 1 : 0);
223     j2 = s2
224 	- 0x1F
225 	- (unsigned) (s2 >= 0x7F ? 1 : 0)
226 	- (unsigned) (s2 >= 0x9F ? 0x5E : 0);
227     return MapCodeValue((j1 << 8) + j2, s->sjis.x0208mapping);
228 }
229 
230 unsigned int
reverse_sjis(unsigned int n,OtherStatePtr s)231 reverse_sjis(unsigned int n, OtherStatePtr s)
232 {
233     unsigned int j, j1, j2, s1, s2;
234     if (n == YEN_10646)
235 	return YEN_SJIS;
236     if (n == OVERLINE_10646)
237 	return OVERLINE_SJIS;
238     if (n < 0x80)
239 	return n;
240     if (n >= HALFWIDTH_10646)
241 	return s->sjis.x0201reverse->reverse(n, s->sjis.x0201reverse->data);
242     j = s->sjis.x0208reverse->reverse(n, s->sjis.x0208reverse->data);
243     j1 = ((j >> 8) & 0xFF);
244     j2 = (j & 0xFF);
245     s1 = ((j1 - 1) >> 1)
246 	+ (unsigned) ((j1 <= 0x5E) ? 0x71 : 0xB1);
247     s2 = j2
248 	+ (unsigned) ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E);
249     return (s1 << 8) + s2;
250 }
251 
252 int
stack_sjis(unsigned c,OtherStatePtr s)253 stack_sjis(unsigned c, OtherStatePtr s)
254 {
255     if (s->sjis.buf < 0) {
256 	if (c < 128 || (c >= 0xA0 && c <= 0xDF))
257 	    return (int) c;
258 	s->sjis.buf = (int) c;
259 	return -1;
260     } else {
261 	int b;
262 	if (c < 0x40 || c == 0x7F) {
263 	    s->sjis.buf = -1;
264 	    return (int) c;
265 	}
266 	if (s->sjis.buf < 0xFF && c < 0xFF)
267 	    b = (int) ((unsigned) (s->sjis.buf << 8) + c);
268 	else
269 	    b = -1;
270 	s->sjis.buf = -1;
271 	return b;
272     }
273 }
274 
275 int
init_hkscs(OtherStatePtr s)276 init_hkscs(OtherStatePtr s)
277 {
278     s->hkscs.mapping = LookupMapping("big5hkscs-0", us16BIT);
279     if (!s->hkscs.mapping)
280 	return 0;
281 
282     s->hkscs.reverse = LookupReverse(s->hkscs.mapping);
283     if (!s->hkscs.reverse)
284 	return 0;
285 
286     s->hkscs.buf = -1;
287     return 1;
288 }
289 
290 unsigned int
mapping_hkscs(unsigned int n,OtherStatePtr s)291 mapping_hkscs(unsigned int n, OtherStatePtr s)
292 {
293     unsigned int r;
294     if (n < 128)
295 	return n;
296     if (n == 128)
297 	return EURO_10646;
298     r = MapCodeValue(n, s->hkscs.mapping);
299     return r;
300 }
301 
302 unsigned int
reverse_hkscs(unsigned int n,OtherStatePtr s)303 reverse_hkscs(unsigned int n, OtherStatePtr s)
304 {
305     if (n < 128)
306 	return n;
307     if (n == EURO_10646)
308 	return 128;
309     return s->hkscs.reverse->reverse(n, s->hkscs.reverse->data);
310 }
311 
312 int
stack_hkscs(unsigned c,OtherStatePtr s)313 stack_hkscs(unsigned c, OtherStatePtr s)
314 {
315     if (s->hkscs.buf < 0) {
316 	if (c < 129)
317 	    return (int) c;
318 	s->hkscs.buf = (int) c;
319 	return -1;
320     } else {
321 	int b;
322 	if (c < 0x40 || c == 0x7F) {
323 	    s->hkscs.buf = -1;
324 	    return (int) c;
325 	}
326 	if (s->hkscs.buf < 0xFF && c < 0xFF)
327 	    b = (int) ((unsigned) (s->hkscs.buf << 8) + c);
328 	else
329 	    b = -1;
330 	s->hkscs.buf = -1;
331 	return b;
332     }
333 }
334 
335 /*
336  *  Because of the 1 ~ 4 multi-bytes nature of GB18030.
337  *  CharSet encoding is split to 2 subset (besides latin)
338  *  The 2Bytes MB char is defined in gb18030.2000-0
339  *  The 4Bytes MB char is defined in gb18030.2000-1
340  *  Please note that the mapping in 2000-1 is not a 4Bytes seq => 2Bytes value
341  *  mapping.
342  *  To use the 2000-1 we need to 'linear' the 4Bytes sequence and 'lookup' the
343  *  unicode value after that.
344  *
345  *  For more info on GB18030 standard pls check:
346  *    http://oss.software.ibm.com/icu/docs/papers/gb18030.html
347  *
348  *  For more info on GB18030 implementation issues in XFree86 pls check:
349  *    http://www.ibm.com/developerWorks/cn/linux/i18n/gb18030/xfree86/part1
350  */
351 int
init_gb18030(OtherStatePtr s)352 init_gb18030(OtherStatePtr s)
353 {
354     s->gb18030.cs0_mapping = LookupMapping("gb18030.2000-0", us16BIT);
355     if (!s->gb18030.cs0_mapping)
356 	return 0;
357 
358     s->gb18030.cs0_reverse = LookupReverse(s->gb18030.cs0_mapping);
359     if (!s->gb18030.cs0_reverse)
360 	return 0;
361 
362     s->gb18030.cs1_mapping = LookupMapping("gb18030.2000-1", us16BIT);
363     if (!s->gb18030.cs1_mapping)
364 	return 0;
365 
366     s->gb18030.cs1_reverse = LookupReverse(s->gb18030.cs1_mapping);
367     if (!s->gb18030.cs1_reverse)
368 	return 0;
369 
370     s->gb18030.linear = 0;
371     s->gb18030.buf_ptr = 0;
372     return 1;
373 }
374 
375 unsigned int
mapping_gb18030(unsigned int n,OtherStatePtr s)376 mapping_gb18030(unsigned int n, OtherStatePtr s)
377 {
378     if (n <= 0x80)
379 	return n;		/* 0x80 is valid but unassigned codepoint */
380     if (n >= 0xFFFF)
381 	return '?';
382 
383     return MapCodeValue(n,
384 			((s->gb18030.linear)
385 			 ? s->gb18030.cs1_mapping
386 			 : s->gb18030.cs0_mapping));
387 }
388 
389 unsigned int
reverse_gb18030(unsigned int n,OtherStatePtr s)390 reverse_gb18030(unsigned int n, OtherStatePtr s)
391 {
392     /* when lookup in 2000-0 failed. */
393     /* lookup in 2000-1 and then try to unlinear'd */
394     unsigned int r;
395     if (n <= 0x80)
396 	return n;
397 
398     r = s->gb18030.cs0_reverse->reverse(n, s->gb18030.cs0_reverse->data);
399     if (r != 0)
400 	return r;
401 
402     r = s->gb18030.cs1_reverse->reverse(n, s->gb18030.cs1_reverse->data);
403     if (r != 0) {
404 	unsigned char bytes[4];
405 
406 	bytes[3] = UChar(0x30 + r % 10);
407 	r /= 10;
408 	bytes[2] = UChar(0x81 + r % 126);
409 	r /= 126;
410 	bytes[1] = UChar(0x30 + r % 10);
411 	r /= 10;
412 	bytes[0] = UChar(0x81 + r);
413 
414 	r = (unsigned int) bytes[0] << 24;
415 	r |= (unsigned int) bytes[1] << 16;
416 	r |= (unsigned int) bytes[2] << 8;
417 	r |= (unsigned int) bytes[3];
418     }
419     return r;
420 }
421 
422 int
stack_gb18030(unsigned c,OtherStatePtr s)423 stack_gb18030(unsigned c, OtherStatePtr s)
424 {
425     /* if set gb18030.linear => True. the return value is "linear'd" */
426     if (s->gb18030.buf_ptr == 0) {
427 	if (c <= 0x80)
428 	    return (int) c;
429 	if (c == 0xFF)
430 	    return -1;
431 	s->gb18030.linear = 0;
432 	s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
433 	return -1;
434     } else if (s->gb18030.buf_ptr == 1) {
435 	if (c >= 0x40) {
436 	    s->gb18030.buf_ptr = 0;
437 	    if ((c == 0x80) || (c == 0xFF))
438 		return -1;
439 	    else
440 		return (int) ((unsigned) (s->gb18030.buf[0] << 8) + c);
441 	} else if (c >= 30) {	/* 2Byte is (0x30 -> 0x39) */
442 	    s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
443 	    return -1;
444 	} else {
445 	    s->gb18030.buf_ptr = 0;
446 	    return (int) c;
447 	}
448     } else if (s->gb18030.buf_ptr == 2) {
449 	if ((c >= 0x81) && (c <= 0xFE)) {
450 	    s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
451 	    return -1;
452 	} else {
453 	    s->gb18030.buf_ptr = 0;
454 	    return (int) c;
455 	}
456     } else {
457 	int r = 0;
458 	s->gb18030.buf_ptr = 0;
459 	if ((c >= 0x30) && (c <= 0x39)) {
460 	    s->gb18030.linear = 1;
461 	    r = (((s->gb18030.buf[0] - 0x81) * 10
462 		  + (s->gb18030.buf[1] - 0x30)) * 126
463 		 + (s->gb18030.buf[2] - 0x81)) * 10
464 		+ ((int) c - 0x30);
465 	    return r;
466 	}
467 	return -1;
468     }
469 }
470