1 /* $XTermId: other.c,v 1.15 2013/01/30 01:27:54 tom Exp $ */
2
3 /*
4 Copyright (c) 2002 by Tomohiro KUBOTA
5
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 THE SOFTWARE.
23 */
24
25 #include <other.h>
26
27 #define EURO_10646 0x20AC
28
29 int
init_gbk(OtherStatePtr s)30 init_gbk(OtherStatePtr s)
31 {
32 s->gbk.mapping = LookupMapping("gbk-0", us16BIT);
33 if (!s->gbk.mapping)
34 return 0;
35
36 s->gbk.reverse = LookupReverse(s->gbk.mapping);
37 if (!s->gbk.reverse)
38 return 0;
39
40 s->gbk.buf = -1;
41 return 1;
42 }
43
44 unsigned int
mapping_gbk(unsigned int n,OtherStatePtr s)45 mapping_gbk(unsigned int n, OtherStatePtr s)
46 {
47 unsigned int r;
48 if (n < 128)
49 return n;
50 if (n == 128)
51 return EURO_10646;
52 r = MapCodeValue(n, s->gbk.mapping);
53 return r;
54 }
55
56 unsigned int
reverse_gbk(unsigned int n,OtherStatePtr s)57 reverse_gbk(unsigned int n, OtherStatePtr s)
58 {
59 if (n < 128)
60 return n;
61 if (n == EURO_10646)
62 return 128;
63 return s->gbk.reverse->reverse(n, s->gbk.reverse->data);
64 }
65
66 int
stack_gbk(unsigned c,OtherStatePtr s)67 stack_gbk(unsigned c, OtherStatePtr s)
68 {
69 if (s->gbk.buf < 0) {
70 if (c < 129)
71 return (int) c;
72 s->gbk.buf = (int) c;
73 return -1;
74 } else {
75 int b;
76 if (c < 0x40 || c == 0x7F) {
77 s->gbk.buf = -1;
78 return (int) c;
79 }
80 if (s->gbk.buf < 0xFF && c < 0xFF)
81 b = (int) ((unsigned) (s->gbk.buf << 8) + c);
82 else
83 b = -1;
84 s->gbk.buf = -1;
85 return b;
86 }
87 }
88
89 int
init_utf8(OtherStatePtr s)90 init_utf8(OtherStatePtr s)
91 {
92 s->utf8.buf_ptr = 0;
93 return 1;
94 }
95
96 unsigned int
mapping_utf8(unsigned int n,OtherStatePtr s GCC_UNUSED)97 mapping_utf8(unsigned int n, OtherStatePtr s GCC_UNUSED)
98 {
99 return n;
100 }
101
102 unsigned int
reverse_utf8(unsigned int n,OtherStatePtr s GCC_UNUSED)103 reverse_utf8(unsigned int n, OtherStatePtr s GCC_UNUSED)
104 {
105 if (n < 0x80)
106 return n;
107 if (n < 0x800)
108 return 0xC080 + ((n & 0x7C0) << 2) + (n & 0x3F);
109 if (n < 0x10000)
110 return 0xE08080 + ((n & 0xF000) << 4) + ((n & 0xFC0) << 2) + (n & 0x3F);
111 return 0xF0808080 + ((n & 0x1C0000) << 6) + ((n & 0x3F000) << 4) +
112 ((n & 0xFC0) << 2) + (n & 0x3F);
113 }
114
115 int
stack_utf8(unsigned c,OtherStatePtr s)116 stack_utf8(unsigned c, OtherStatePtr s)
117 {
118 int u;
119
120 if (c < 0x80) {
121 s->utf8.buf_ptr = 0;
122 return (int) c;
123 }
124 if (s->utf8.buf_ptr == 0) {
125 if ((c & 0x40) == 0)
126 return -1;
127 s->utf8.buf[s->utf8.buf_ptr++] = UChar(c);
128 if ((c & 0x60) == 0x40)
129 s->utf8.len = 2;
130 else if ((c & 0x70) == 0x60)
131 s->utf8.len = 3;
132 else if ((c & 0x78) == 0x70)
133 s->utf8.len = 4;
134 else
135 s->utf8.buf_ptr = 0;
136 return -1;
137 }
138 if ((c & 0x40) != 0) {
139 s->utf8.buf_ptr = 0;
140 return -1;
141 }
142 s->utf8.buf[s->utf8.buf_ptr++] = UChar(c);
143 if (s->utf8.buf_ptr < s->utf8.len)
144 return -1;
145 switch (s->utf8.len) {
146 case 2:
147 u = ((s->utf8.buf[0] & 0x1F) << 6) | (s->utf8.buf[1] & 0x3F);
148 s->utf8.buf_ptr = 0;
149 if (u < 0x80)
150 return -1;
151 else
152 return u;
153 case 3:
154 u = ((s->utf8.buf[0] & 0x0F) << 12)
155 | ((s->utf8.buf[1] & 0x3F) << 6)
156 | (s->utf8.buf[2] & 0x3F);
157 s->utf8.buf_ptr = 0;
158 if (u < 0x800)
159 return -1;
160 else
161 return u;
162 case 4:
163 u = ((s->utf8.buf[0] & 0x03) << 18)
164 | ((s->utf8.buf[1] & 0x3F) << 12)
165 | ((s->utf8.buf[2] & 0x3F) << 6)
166 | ((s->utf8.buf[3] & 0x3F));
167 s->utf8.buf_ptr = 0;
168 if (u < 0x10000)
169 return -1;
170 else
171 return u;
172 }
173 s->utf8.buf_ptr = 0;
174 return -1;
175 }
176
177 #define HALFWIDTH_10646 0xFF61
178 #define YEN_SJIS 0x5C
179 #define YEN_10646 0x00A5
180 #define OVERLINE_SJIS 0x7E
181 #define OVERLINE_10646 0x203E
182
183 int
init_sjis(OtherStatePtr s)184 init_sjis(OtherStatePtr s)
185 {
186 s->sjis.x0208mapping = LookupMapping("jisx0208.1990-0", us16BIT);
187 if (!s->sjis.x0208mapping)
188 return 0;
189
190 s->sjis.x0208reverse = LookupReverse(s->sjis.x0208mapping);
191 if (!s->sjis.x0208reverse)
192 return 0;
193
194 s->sjis.x0201mapping = LookupMapping("jisx0201.1976-0", us16BIT);
195 if (!s->sjis.x0201mapping)
196 return 0;
197
198 s->sjis.x0201reverse = LookupReverse(s->sjis.x0201mapping);
199 if (!s->sjis.x0201reverse)
200 return 0;
201
202 s->sjis.buf = -1;
203 return 1;
204 }
205
206 unsigned int
mapping_sjis(unsigned int n,OtherStatePtr s)207 mapping_sjis(unsigned int n, OtherStatePtr s)
208 {
209 unsigned int j1, j2, s1, s2;
210 if (n == YEN_SJIS)
211 return YEN_10646;
212 if (n == OVERLINE_SJIS)
213 return OVERLINE_10646;
214 if (n < 0x80)
215 return n;
216 if (n >= 0xA0 && n <= 0xDF)
217 return MapCodeValue(n, s->sjis.x0201mapping);
218 s1 = ((n >> 8) & 0xFF);
219 s2 = (n & 0xFF);
220 j1 = (s1 << 1)
221 - (unsigned) (s1 <= 0x9F ? 0xE0 : 0x160)
222 - (unsigned) (s2 < 0x9F ? 1 : 0);
223 j2 = s2
224 - 0x1F
225 - (unsigned) (s2 >= 0x7F ? 1 : 0)
226 - (unsigned) (s2 >= 0x9F ? 0x5E : 0);
227 return MapCodeValue((j1 << 8) + j2, s->sjis.x0208mapping);
228 }
229
230 unsigned int
reverse_sjis(unsigned int n,OtherStatePtr s)231 reverse_sjis(unsigned int n, OtherStatePtr s)
232 {
233 unsigned int j, j1, j2, s1, s2;
234 if (n == YEN_10646)
235 return YEN_SJIS;
236 if (n == OVERLINE_10646)
237 return OVERLINE_SJIS;
238 if (n < 0x80)
239 return n;
240 if (n >= HALFWIDTH_10646)
241 return s->sjis.x0201reverse->reverse(n, s->sjis.x0201reverse->data);
242 j = s->sjis.x0208reverse->reverse(n, s->sjis.x0208reverse->data);
243 j1 = ((j >> 8) & 0xFF);
244 j2 = (j & 0xFF);
245 s1 = ((j1 - 1) >> 1)
246 + (unsigned) ((j1 <= 0x5E) ? 0x71 : 0xB1);
247 s2 = j2
248 + (unsigned) ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E);
249 return (s1 << 8) + s2;
250 }
251
252 int
stack_sjis(unsigned c,OtherStatePtr s)253 stack_sjis(unsigned c, OtherStatePtr s)
254 {
255 if (s->sjis.buf < 0) {
256 if (c < 128 || (c >= 0xA0 && c <= 0xDF))
257 return (int) c;
258 s->sjis.buf = (int) c;
259 return -1;
260 } else {
261 int b;
262 if (c < 0x40 || c == 0x7F) {
263 s->sjis.buf = -1;
264 return (int) c;
265 }
266 if (s->sjis.buf < 0xFF && c < 0xFF)
267 b = (int) ((unsigned) (s->sjis.buf << 8) + c);
268 else
269 b = -1;
270 s->sjis.buf = -1;
271 return b;
272 }
273 }
274
275 int
init_hkscs(OtherStatePtr s)276 init_hkscs(OtherStatePtr s)
277 {
278 s->hkscs.mapping = LookupMapping("big5hkscs-0", us16BIT);
279 if (!s->hkscs.mapping)
280 return 0;
281
282 s->hkscs.reverse = LookupReverse(s->hkscs.mapping);
283 if (!s->hkscs.reverse)
284 return 0;
285
286 s->hkscs.buf = -1;
287 return 1;
288 }
289
290 unsigned int
mapping_hkscs(unsigned int n,OtherStatePtr s)291 mapping_hkscs(unsigned int n, OtherStatePtr s)
292 {
293 unsigned int r;
294 if (n < 128)
295 return n;
296 if (n == 128)
297 return EURO_10646;
298 r = MapCodeValue(n, s->hkscs.mapping);
299 return r;
300 }
301
302 unsigned int
reverse_hkscs(unsigned int n,OtherStatePtr s)303 reverse_hkscs(unsigned int n, OtherStatePtr s)
304 {
305 if (n < 128)
306 return n;
307 if (n == EURO_10646)
308 return 128;
309 return s->hkscs.reverse->reverse(n, s->hkscs.reverse->data);
310 }
311
312 int
stack_hkscs(unsigned c,OtherStatePtr s)313 stack_hkscs(unsigned c, OtherStatePtr s)
314 {
315 if (s->hkscs.buf < 0) {
316 if (c < 129)
317 return (int) c;
318 s->hkscs.buf = (int) c;
319 return -1;
320 } else {
321 int b;
322 if (c < 0x40 || c == 0x7F) {
323 s->hkscs.buf = -1;
324 return (int) c;
325 }
326 if (s->hkscs.buf < 0xFF && c < 0xFF)
327 b = (int) ((unsigned) (s->hkscs.buf << 8) + c);
328 else
329 b = -1;
330 s->hkscs.buf = -1;
331 return b;
332 }
333 }
334
335 /*
336 * Because of the 1 ~ 4 multi-bytes nature of GB18030.
337 * CharSet encoding is split to 2 subset (besides latin)
338 * The 2Bytes MB char is defined in gb18030.2000-0
339 * The 4Bytes MB char is defined in gb18030.2000-1
340 * Please note that the mapping in 2000-1 is not a 4Bytes seq => 2Bytes value
341 * mapping.
342 * To use the 2000-1 we need to 'linear' the 4Bytes sequence and 'lookup' the
343 * unicode value after that.
344 *
345 * For more info on GB18030 standard pls check:
346 * http://oss.software.ibm.com/icu/docs/papers/gb18030.html
347 *
348 * For more info on GB18030 implementation issues in XFree86 pls check:
349 * http://www.ibm.com/developerWorks/cn/linux/i18n/gb18030/xfree86/part1
350 */
351 int
init_gb18030(OtherStatePtr s)352 init_gb18030(OtherStatePtr s)
353 {
354 s->gb18030.cs0_mapping = LookupMapping("gb18030.2000-0", us16BIT);
355 if (!s->gb18030.cs0_mapping)
356 return 0;
357
358 s->gb18030.cs0_reverse = LookupReverse(s->gb18030.cs0_mapping);
359 if (!s->gb18030.cs0_reverse)
360 return 0;
361
362 s->gb18030.cs1_mapping = LookupMapping("gb18030.2000-1", us16BIT);
363 if (!s->gb18030.cs1_mapping)
364 return 0;
365
366 s->gb18030.cs1_reverse = LookupReverse(s->gb18030.cs1_mapping);
367 if (!s->gb18030.cs1_reverse)
368 return 0;
369
370 s->gb18030.linear = 0;
371 s->gb18030.buf_ptr = 0;
372 return 1;
373 }
374
375 unsigned int
mapping_gb18030(unsigned int n,OtherStatePtr s)376 mapping_gb18030(unsigned int n, OtherStatePtr s)
377 {
378 if (n <= 0x80)
379 return n; /* 0x80 is valid but unassigned codepoint */
380 if (n >= 0xFFFF)
381 return '?';
382
383 return MapCodeValue(n,
384 ((s->gb18030.linear)
385 ? s->gb18030.cs1_mapping
386 : s->gb18030.cs0_mapping));
387 }
388
389 unsigned int
reverse_gb18030(unsigned int n,OtherStatePtr s)390 reverse_gb18030(unsigned int n, OtherStatePtr s)
391 {
392 /* when lookup in 2000-0 failed. */
393 /* lookup in 2000-1 and then try to unlinear'd */
394 unsigned int r;
395 if (n <= 0x80)
396 return n;
397
398 r = s->gb18030.cs0_reverse->reverse(n, s->gb18030.cs0_reverse->data);
399 if (r != 0)
400 return r;
401
402 r = s->gb18030.cs1_reverse->reverse(n, s->gb18030.cs1_reverse->data);
403 if (r != 0) {
404 unsigned char bytes[4];
405
406 bytes[3] = UChar(0x30 + r % 10);
407 r /= 10;
408 bytes[2] = UChar(0x81 + r % 126);
409 r /= 126;
410 bytes[1] = UChar(0x30 + r % 10);
411 r /= 10;
412 bytes[0] = UChar(0x81 + r);
413
414 r = (unsigned int) bytes[0] << 24;
415 r |= (unsigned int) bytes[1] << 16;
416 r |= (unsigned int) bytes[2] << 8;
417 r |= (unsigned int) bytes[3];
418 }
419 return r;
420 }
421
422 int
stack_gb18030(unsigned c,OtherStatePtr s)423 stack_gb18030(unsigned c, OtherStatePtr s)
424 {
425 /* if set gb18030.linear => True. the return value is "linear'd" */
426 if (s->gb18030.buf_ptr == 0) {
427 if (c <= 0x80)
428 return (int) c;
429 if (c == 0xFF)
430 return -1;
431 s->gb18030.linear = 0;
432 s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
433 return -1;
434 } else if (s->gb18030.buf_ptr == 1) {
435 if (c >= 0x40) {
436 s->gb18030.buf_ptr = 0;
437 if ((c == 0x80) || (c == 0xFF))
438 return -1;
439 else
440 return (int) ((unsigned) (s->gb18030.buf[0] << 8) + c);
441 } else if (c >= 30) { /* 2Byte is (0x30 -> 0x39) */
442 s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
443 return -1;
444 } else {
445 s->gb18030.buf_ptr = 0;
446 return (int) c;
447 }
448 } else if (s->gb18030.buf_ptr == 2) {
449 if ((c >= 0x81) && (c <= 0xFE)) {
450 s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c;
451 return -1;
452 } else {
453 s->gb18030.buf_ptr = 0;
454 return (int) c;
455 }
456 } else {
457 int r = 0;
458 s->gb18030.buf_ptr = 0;
459 if ((c >= 0x30) && (c <= 0x39)) {
460 s->gb18030.linear = 1;
461 r = (((s->gb18030.buf[0] - 0x81) * 10
462 + (s->gb18030.buf[1] - 0x30)) * 126
463 + (s->gb18030.buf[2] - 0x81)) * 10
464 + ((int) c - 0x30);
465 return r;
466 }
467 return -1;
468 }
469 }
470