1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * The source code included in this files was separated from mbfilter_iso2022_jp_ms.c
26 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_iso2022jp_mobile.h"
32 #include "mbfilter_sjis_mobile.h"
33
34 #include "unicode_table_cp932_ext.h"
35 #include "unicode_table_jis.h"
36 #include "cp932_table.h"
37
38 static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter);
39 static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter);
40
41 static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL};
42
43 const mbfl_encoding mbfl_encoding_2022jp_kddi = {
44 mbfl_no_encoding_2022jp_kddi,
45 "ISO-2022-JP-MOBILE#KDDI",
46 "ISO-2022-JP",
47 mbfl_encoding_2022jp_kddi_aliases,
48 NULL,
49 MBFL_ENCTYPE_GL_UNSAFE,
50 &vtbl_2022jp_kddi_wchar,
51 &vtbl_wchar_2022jp_kddi
52 };
53
54 const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = {
55 mbfl_no_encoding_2022jp_kddi,
56 mbfl_no_encoding_wchar,
57 mbfl_filt_conv_common_ctor,
58 NULL,
59 mbfl_filt_conv_2022jp_mobile_wchar,
60 mbfl_filt_conv_2022jp_mobile_wchar_flush,
61 NULL,
62 };
63
64 const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = {
65 mbfl_no_encoding_wchar,
66 mbfl_no_encoding_2022jp_kddi,
67 mbfl_filt_conv_common_ctor,
68 NULL,
69 mbfl_filt_conv_wchar_2022jp_mobile,
70 mbfl_filt_conv_wchar_2022jp_mobile_flush,
71 NULL,
72 };
73
74 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
75
76 #define SJIS_ENCODE(c1,c2,s1,s2) \
77 do { \
78 s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \
79 s2 = c2; \
80 if ((c1) & 1) { \
81 if ((c2) < 0x60) { \
82 s2--; \
83 } \
84 s2 += 0x20; \
85 } else { \
86 s2 += 0x7e; \
87 } \
88 } while (0)
89
90 #define SJIS_DECODE(c1,c2,s1,s2) \
91 do { \
92 if (c1 < 0xa0) { \
93 s1 = ((c1 - 0x81) << 1) + 0x21; \
94 } else { \
95 s1 = ((c1 - 0xc1) << 1) + 0x21; \
96 } \
97 s2 = c2; \
98 if (c2 < 0x9f) { \
99 if (c2 < 0x7f) { \
100 s2++; \
101 } \
102 s2 -= 0x20; \
103 } else { \
104 s1++; \
105 s2 -= 0x7e; \
106 } \
107 } while (0)
108
109 /* (ku*94)+ten value -> Shift-JIS byte sequence */
110 #define CODE2JIS(c1,c2,s1,s2) \
111 c1 = (s1)/94+0x21; \
112 c2 = (s1)-94*((c1)-0x21)+0x21; \
113 s1 = ((c1) << 8) | (c2); \
114 s2 = 1
115
116 #define JISX0201_KANA 0x20
117 #define JISX0208_KANJI 0x80
118
mbfl_filt_conv_2022jp_mobile_wchar(int c,mbfl_convert_filter * filter)119 int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter)
120 {
121 int c1, s, w, snd = 0;
122
123 switch (filter->status & 0xF) {
124 case 0:
125 if (c == 0x1B) {
126 filter->status += 2;
127 } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) {
128 CK((*filter->output_function)(0xFF40 + c, filter->data));
129 } else if (filter->status == JISX0208_KANJI && c > 0x20 && c < 0x80) {
130 filter->cache = c;
131 filter->status += 1;
132 } else if (c >= 0 && c < 0x80) { /* ASCII */
133 CK((*filter->output_function)(c, filter->data));
134 } else if (c > 0xA0 && c < 0xE0) { /* Kana */
135 CK((*filter->output_function)(0xFEC0 + c, filter->data));
136 } else {
137 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
138 }
139 break;
140
141 /* JISX 0208, second byte */
142 case 1:
143 w = 0;
144 filter->status &= ~0xF;
145 c1 = filter->cache;
146 if (c > 0x20 && c < 0x7F) {
147 s = ((c1 - 0x21) * 94) + c - 0x21;
148
149 if (s <= 137) {
150 if (s == 31) {
151 w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
152 } else if (s == 32) {
153 w = 0xFF5E; /* FULLWIDTH TILDE */
154 } else if (s == 33) {
155 w = 0x2225; /* PARALLEL TO */
156 } else if (s == 60) {
157 w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
158 } else if (s == 80) {
159 w = 0xFFE0; /* FULLWIDTH CENT SIGN */
160 } else if (s == 81) {
161 w = 0xFFE1; /* FULLWIDTH POUND SIGN */
162 } else if (s == 137) {
163 w = 0xFFE2; /* FULLWIDTH NOT SIGN */
164 }
165 }
166
167 if (s >= (84 * 94) && s < (91 * 94)) {
168 s += 22 * 94;
169 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
170 if (w > 0 && snd > 0) {
171 (*filter->output_function)(snd, filter->data);
172 }
173 }
174
175 if (w == 0) {
176 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
177 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
178 } else if (s >= 0 && s < jisx0208_ucs_table_size) {
179 w = jisx0208_ucs_table[s];
180 }
181 }
182
183 if (w <= 0) {
184 w = MBFL_BAD_INPUT;
185 }
186 CK((*filter->output_function)(w, filter->data));
187 } else {
188 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
189 }
190 break;
191
192 /* ESC */
193 case 2:
194 if (c == '$') {
195 filter->status++;
196 } else if (c == '(') {
197 filter->status += 3;
198 } else {
199 filter->status &= ~0xF;
200 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
201 }
202 break;
203
204 /* ESC $ */
205 case 3:
206 if (c == '@' || c == 'B') {
207 filter->status = JISX0208_KANJI;
208 } else if (c == '(') {
209 filter->status++;
210 } else {
211 filter->status &= ~0xF;
212 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
213 }
214 break;
215
216 /* ESC $ ( */
217 case 4:
218 if (c == '@' || c == 'B') {
219 filter->status = JISX0208_KANJI;
220 } else {
221 filter->status &= ~0xF;
222 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
223 }
224 break;
225
226 /* ESC ( */
227 case 5:
228 if (c == 'B' || c == 'J') {
229 filter->status = 0; /* ASCII mode */
230 } else if (c == 'I') {
231 filter->status = JISX0201_KANA;
232 } else {
233 filter->status &= ~0xF;
234 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
235 }
236 }
237
238 return 0;
239 }
240
mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter * filter)241 static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter)
242 {
243 if (filter->status & 0xF) {
244 (*filter->output_function)(MBFL_BAD_INPUT, filter->data);
245 }
246
247 if (filter->flush_function) {
248 (*filter->flush_function)(filter->data);
249 }
250
251 return 0;
252 }
253
mbfl_filt_conv_wchar_2022jp_mobile(int c,mbfl_convert_filter * filter)254 int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
255 {
256 int c1, c2, s1 = 0, s2 = 0;
257
258 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
259 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
260 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
261 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
262 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
263 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
264 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
265 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
266 }
267
268 if (s1 <= 0) {
269 if (c == 0xA5) { /* YEN SIGN */
270 s1 = 0x216F; /* FULLWIDTH YEN SIGN */
271 } else if (c == 0x203E) { /* OVER LINE */
272 s1 = 0x2131; /* FULLWIDTH MACRON */
273 } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
274 s1 = 0x2140;
275 } else if (c == 0xFF5E) { /* FULLWIDTH TILDE */
276 s1 = 0x2141;
277 } else if (c == 0x2225) { /* PARALLEL TO */
278 s1 = 0x2142;
279 } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
280 s1 = 0x215d;
281 } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */
282 s1 = 0x2171;
283 } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */
284 s1 = 0x2172;
285 } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */
286 s1 = 0x224c;
287 }
288 }
289
290 if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter)) {
291 CODE2JIS(c1,c2,s1,s2);
292 s1 -= 0x1600;
293 }
294
295 if (filter->status == 1 && filter->cache) {
296 /* We are just processing one of KDDI's special emoji for a phone keypad button */
297 return 0;
298 }
299
300 if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */
301 s1 = -1;
302 for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) {
303 if (c == cp932ext1_ucs_table[c1]) {
304 s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
305 break;
306 }
307 }
308
309 if (c == 0) {
310 s1 = 0;
311 }
312 }
313
314 if (s1 >= 0) {
315 if (s1 < 0x80) { /* ASCII */
316 if (filter->status & 0xFF00) {
317 CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
318 CK((*filter->output_function)('(', filter->data));
319 CK((*filter->output_function)('B', filter->data));
320 }
321 CK((*filter->output_function)(s1, filter->data));
322 filter->status = 0;
323 } else if (s1 > 0xA0 && s1 < 0xE0) { /* Kana */
324 if ((filter->status & 0xFF00) != 0x100) {
325 CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
326 CK((*filter->output_function)('(', filter->data));
327 CK((*filter->output_function)('I', filter->data));
328 }
329 filter->status = 0x100;
330 CK((*filter->output_function)(s1 & 0x7F, filter->data));
331 } else if (s1 < 0x7E7F) { /* JIS X 0208 */
332 if ((filter->status & 0xFF00) != 0x200) {
333 CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
334 CK((*filter->output_function)('$', filter->data));
335 CK((*filter->output_function)('B', filter->data));
336 }
337 filter->status = 0x200;
338 CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data));
339 CK((*filter->output_function)(s1 & 0x7F, filter->data));
340 }
341 } else {
342 CK(mbfl_filt_conv_illegal_output(c, filter));
343 }
344
345 return 0;
346 }
347
mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter * filter)348 static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter)
349 {
350 /* Go back to ASCII mode (so strings can be safely concatenated) */
351 if (filter->status & 0xFF00) {
352 (*filter->output_function)(0x1B, filter->data); /* ESC */
353 (*filter->output_function)('(', filter->data);
354 (*filter->output_function)('B', filter->data);
355 }
356
357 int c1 = filter->cache;
358 if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
359 (*filter->output_function)(c1, filter->data);
360 }
361
362 if (filter->flush_function) {
363 (*filter->flush_function)(filter->data);
364 }
365
366 return 0;
367 }
368