1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * The source code included in this files was separated from mbfilter_iso2022_jp_ms.c
26  * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_iso2022jp_mobile.h"
32 #include "mbfilter_sjis_mobile.h"
33 
34 #include "unicode_table_cp932_ext.h"
35 #include "unicode_table_jis.h"
36 #include "cp932_table.h"
37 
38 static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter);
39 static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter);
40 
41 static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL};
42 
43 const mbfl_encoding mbfl_encoding_2022jp_kddi = {
44 	mbfl_no_encoding_2022jp_kddi,
45 	"ISO-2022-JP-MOBILE#KDDI",
46 	"ISO-2022-JP",
47 	mbfl_encoding_2022jp_kddi_aliases,
48 	NULL,
49 	MBFL_ENCTYPE_GL_UNSAFE,
50 	&vtbl_2022jp_kddi_wchar,
51 	&vtbl_wchar_2022jp_kddi
52 };
53 
54 const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = {
55 	mbfl_no_encoding_2022jp_kddi,
56 	mbfl_no_encoding_wchar,
57 	mbfl_filt_conv_common_ctor,
58 	NULL,
59 	mbfl_filt_conv_2022jp_mobile_wchar,
60 	mbfl_filt_conv_2022jp_mobile_wchar_flush,
61 	NULL,
62 };
63 
64 const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = {
65 	mbfl_no_encoding_wchar,
66 	mbfl_no_encoding_2022jp_kddi,
67 	mbfl_filt_conv_common_ctor,
68 	NULL,
69 	mbfl_filt_conv_wchar_2022jp_mobile,
70 	mbfl_filt_conv_wchar_2022jp_mobile_flush,
71 	NULL,
72 };
73 
74 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
75 
76 #define SJIS_ENCODE(c1,c2,s1,s2) \
77 		do { \
78 			s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \
79 			s2 = c2; \
80 			if ((c1) & 1) { \
81 				if ((c2) < 0x60) { \
82 					s2--; \
83 				} \
84 				s2 += 0x20; \
85 			} else { \
86 				s2 += 0x7e; \
87 			} \
88 		} while (0)
89 
90 #define SJIS_DECODE(c1,c2,s1,s2) \
91 		do { \
92 			if (c1 < 0xa0) { \
93 				s1 = ((c1 - 0x81) << 1) + 0x21; \
94 			} else { \
95 				s1 = ((c1 - 0xc1) << 1) + 0x21; \
96 			} \
97 			s2 = c2; \
98 			if (c2 < 0x9f) { \
99 				if (c2 < 0x7f) { \
100 					s2++; \
101 				} \
102 				s2 -= 0x20; \
103 			} else { \
104 				s1++; \
105 				s2 -= 0x7e; \
106 			} \
107 		} while (0)
108 
109 /* (ku*94)+ten value -> Shift-JIS byte sequence */
110 #define CODE2JIS(c1,c2,s1,s2) \
111 	c1 = (s1)/94+0x21; \
112 	c2 = (s1)-94*((c1)-0x21)+0x21; \
113 	s1 = ((c1) << 8) | (c2); \
114 	s2 = 1
115 
116 #define JISX0201_KANA  0x20
117 #define JISX0208_KANJI 0x80
118 
mbfl_filt_conv_2022jp_mobile_wchar(int c,mbfl_convert_filter * filter)119 int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter)
120 {
121 	int c1, s, w, snd = 0;
122 
123 	switch (filter->status & 0xF) {
124 	case 0:
125 		if (c == 0x1B) {
126 			filter->status += 2;
127 		} else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) {
128 			CK((*filter->output_function)(0xFF40 + c, filter->data));
129 		} else if (filter->status == JISX0208_KANJI && c > 0x20 && c < 0x80) {
130 			filter->cache = c;
131 			filter->status += 1;
132 		} else if (c >= 0 && c < 0x80) { /* ASCII */
133 			CK((*filter->output_function)(c, filter->data));
134 		} else if (c > 0xA0 && c < 0xE0) { /* Kana */
135 			CK((*filter->output_function)(0xFEC0 + c, filter->data));
136 		} else {
137 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
138 		}
139 		break;
140 
141 	/* JISX 0208, second byte */
142 	case 1:
143 		w = 0;
144 		filter->status &= ~0xF;
145 		c1 = filter->cache;
146 		if (c > 0x20 && c < 0x7F) {
147 			s = ((c1 - 0x21) * 94) + c - 0x21;
148 
149 			if (s <= 137) {
150 				if (s == 31) {
151 					w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
152 				} else if (s == 32) {
153 					w = 0xFF5E; /* FULLWIDTH TILDE */
154 				} else if (s == 33) {
155 					w = 0x2225; /* PARALLEL TO */
156 				} else if (s == 60) {
157 					w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
158 				} else if (s == 80) {
159 					w = 0xFFE0; /* FULLWIDTH CENT SIGN */
160 				} else if (s == 81) {
161 					w = 0xFFE1; /* FULLWIDTH POUND SIGN */
162 				} else if (s == 137) {
163 					w = 0xFFE2; /* FULLWIDTH NOT SIGN */
164 				}
165 			}
166 
167 			if (s >= (84 * 94) && s < (91 * 94)) {
168 				s += 22 * 94;
169 				w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
170 				if (w > 0 && snd > 0) {
171 					(*filter->output_function)(snd, filter->data);
172 				}
173 			}
174 
175 			if (w == 0) {
176 				if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
177 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
178 				} else if (s >= 0 && s < jisx0208_ucs_table_size) {
179 					w = jisx0208_ucs_table[s];
180 				}
181 			}
182 
183 			if (w <= 0) {
184 				w = MBFL_BAD_INPUT;
185 			}
186 			CK((*filter->output_function)(w, filter->data));
187 		} else {
188 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
189 		}
190 		break;
191 
192 	/* ESC */
193 	case 2:
194 		if (c == '$') {
195 			filter->status++;
196 		} else if (c == '(') {
197 			filter->status += 3;
198 		} else {
199 			filter->status &= ~0xF;
200 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
201 		}
202 		break;
203 
204 	/* ESC $ */
205 	case 3:
206 		if (c == '@' || c == 'B') {
207 			filter->status = JISX0208_KANJI;
208 		} else if (c == '(') {
209 			filter->status++;
210 		} else {
211 			filter->status &= ~0xF;
212 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
213 		}
214 		break;
215 
216 	/* ESC $ ( */
217 	case 4:
218 		if (c == '@' || c == 'B') {
219 			filter->status = JISX0208_KANJI;
220 		} else {
221 			filter->status &= ~0xF;
222 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
223 		}
224 		break;
225 
226 	/* ESC ( */
227 	case 5:
228 		if (c == 'B' || c == 'J') {
229 			filter->status = 0; /* ASCII mode */
230 		} else if (c == 'I') {
231 			filter->status = JISX0201_KANA;
232 		} else {
233 			filter->status &= ~0xF;
234 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
235 		}
236 	}
237 
238 	return 0;
239 }
240 
mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter * filter)241 static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter)
242 {
243 	if (filter->status & 0xF) {
244 		(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
245 	}
246 
247 	if (filter->flush_function) {
248 		(*filter->flush_function)(filter->data);
249 	}
250 
251 	return 0;
252 }
253 
mbfl_filt_conv_wchar_2022jp_mobile(int c,mbfl_convert_filter * filter)254 int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
255 {
256 	int c1, c2, s1 = 0, s2 = 0;
257 
258 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
259 		s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
260 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
261 		s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
262 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
263 		s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
264 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
265 		s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
266 	}
267 
268 	if (s1 <= 0) {
269 		if (c == 0xA5) { /* YEN SIGN */
270 			s1 = 0x216F; /* FULLWIDTH YEN SIGN */
271 		} else if (c == 0x203E) { /* OVER LINE */
272 			s1 = 0x2131; /* FULLWIDTH MACRON */
273 		} else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
274 			s1 = 0x2140;
275 		} else if (c == 0xFF5E) { /* FULLWIDTH TILDE */
276 			s1 = 0x2141;
277 		} else if (c == 0x2225) { /* PARALLEL TO */
278 			s1 = 0x2142;
279 		} else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
280 			s1 = 0x215d;
281 		} else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */
282 			s1 = 0x2171;
283 		} else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */
284 			s1 = 0x2172;
285 		} else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */
286 			s1 = 0x224c;
287 		}
288 	}
289 
290 	if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter)) {
291 		CODE2JIS(c1,c2,s1,s2);
292 		s1 -= 0x1600;
293 	}
294 
295 	if (filter->status == 1 && filter->cache) {
296 		/* We are just processing one of KDDI's special emoji for a phone keypad button */
297 		return 0;
298 	}
299 
300 	if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */
301 		s1 = -1;
302 		for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) {
303 			if (c == cp932ext1_ucs_table[c1]) {
304 				s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
305 				break;
306 			}
307 		}
308 
309 		if (c == 0) {
310 			s1 = 0;
311 		}
312 	}
313 
314 	if (s1 >= 0) {
315 		if (s1 < 0x80) { /* ASCII */
316 			if (filter->status & 0xFF00) {
317 				CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
318 				CK((*filter->output_function)('(', filter->data));
319 				CK((*filter->output_function)('B', filter->data));
320 			}
321 			CK((*filter->output_function)(s1, filter->data));
322 			filter->status = 0;
323 		} else if (s1 > 0xA0 && s1 < 0xE0) { /* Kana */
324 			if ((filter->status & 0xFF00) != 0x100) {
325 				CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
326 				CK((*filter->output_function)('(', filter->data));
327 				CK((*filter->output_function)('I', filter->data));
328 			}
329 			filter->status = 0x100;
330 			CK((*filter->output_function)(s1 & 0x7F, filter->data));
331 		} else if (s1 < 0x7E7F) { /* JIS X 0208 */
332 			if ((filter->status & 0xFF00) != 0x200) {
333 				CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
334 				CK((*filter->output_function)('$', filter->data));
335 				CK((*filter->output_function)('B', filter->data));
336 			}
337 			filter->status = 0x200;
338 			CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data));
339 			CK((*filter->output_function)(s1 & 0x7F, filter->data));
340 		}
341 	} else {
342 		CK(mbfl_filt_conv_illegal_output(c, filter));
343 	}
344 
345 	return 0;
346 }
347 
mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter * filter)348 static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter)
349 {
350 	/* Go back to ASCII mode (so strings can be safely concatenated) */
351 	if (filter->status & 0xFF00) {
352 		(*filter->output_function)(0x1B, filter->data); /* ESC */
353 		(*filter->output_function)('(', filter->data);
354 		(*filter->output_function)('B', filter->data);
355 	}
356 
357 	int c1 = filter->cache;
358 	if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
359 		(*filter->output_function)(c1, filter->data);
360 	}
361 
362 	if (filter->flush_function) {
363 		(*filter->flush_function)(filter->data);
364 	}
365 
366 	return 0;
367 }
368