1 /*
2  * "streamable kanji code filter and converter"
3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4  *
5  * LICENSE NOTICES
6  *
7  * This file is part of "streamable kanji code filter and converter",
8  * which is distributed under the terms of GNU Lesser General Public
9  * License (version 2) as published by the Free Software Foundation.
10  *
11  * This software is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with "streamable kanji code filter and converter";
18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19  * Suite 330, Boston, MA  02111-1307  USA
20  *
21  * The author of this file:
22  *
23  */
24 /*
25  * the source code included in this files was separated from mbfilter_sjis_open.c
26  * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27  *
28  */
29 
30 #include "mbfilter.h"
31 #include "mbfilter_sjis_mobile.h"
32 
33 #include "unicode_table_cp932_ext.h"
34 #include "unicode_table_jis.h"
35 
36 #include "emoji2uni.h"
37 
38 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
39 extern const unsigned char mblen_table_sjis[];
40 
41 static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter);
42 
43 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
44 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
45 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
46 
47 const mbfl_encoding mbfl_encoding_sjis_docomo = {
48 	mbfl_no_encoding_sjis_docomo,
49 	"SJIS-Mobile#DOCOMO",
50 	"Shift_JIS",
51 	mbfl_encoding_sjis_docomo_aliases,
52  	mblen_table_sjis,
53 	MBFL_ENCTYPE_GL_UNSAFE,
54 	&vtbl_sjis_docomo_wchar,
55 	&vtbl_wchar_sjis_docomo
56 };
57 
58 const mbfl_encoding mbfl_encoding_sjis_kddi = {
59 	mbfl_no_encoding_sjis_kddi,
60 	"SJIS-Mobile#KDDI",
61 	"Shift_JIS",
62 	mbfl_encoding_sjis_kddi_aliases,
63  	mblen_table_sjis,
64 	MBFL_ENCTYPE_GL_UNSAFE,
65 	&vtbl_sjis_kddi_wchar,
66 	&vtbl_wchar_sjis_kddi
67 };
68 
69 const mbfl_encoding mbfl_encoding_sjis_sb = {
70 	mbfl_no_encoding_sjis_sb,
71 	"SJIS-Mobile#SOFTBANK",
72 	"Shift_JIS",
73 	mbfl_encoding_sjis_sb_aliases,
74  	mblen_table_sjis,
75 	MBFL_ENCTYPE_GL_UNSAFE,
76 	&vtbl_sjis_sb_wchar,
77 	&vtbl_wchar_sjis_sb
78 };
79 
80 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
81 	mbfl_no_encoding_sjis_docomo,
82 	mbfl_no_encoding_wchar,
83 	mbfl_filt_conv_common_ctor,
84 	NULL,
85 	mbfl_filt_conv_sjis_mobile_wchar,
86 	mbfl_filt_conv_sjis_wchar_flush,
87 	NULL,
88 };
89 
90 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
91 	mbfl_no_encoding_wchar,
92 	mbfl_no_encoding_sjis_docomo,
93 	mbfl_filt_conv_common_ctor,
94 	NULL,
95 	mbfl_filt_conv_wchar_sjis_mobile,
96 	mbfl_filt_conv_sjis_mobile_flush,
97 	NULL,
98 };
99 
100 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
101 	mbfl_no_encoding_sjis_kddi,
102 	mbfl_no_encoding_wchar,
103 	mbfl_filt_conv_common_ctor,
104 	NULL,
105 	mbfl_filt_conv_sjis_mobile_wchar,
106 	mbfl_filt_conv_sjis_wchar_flush,
107 	NULL,
108 };
109 
110 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
111 	mbfl_no_encoding_wchar,
112 	mbfl_no_encoding_sjis_kddi,
113 	mbfl_filt_conv_common_ctor,
114 	NULL,
115 	mbfl_filt_conv_wchar_sjis_mobile,
116 	mbfl_filt_conv_sjis_mobile_flush,
117 	NULL,
118 };
119 
120 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
121 	mbfl_no_encoding_sjis_sb,
122 	mbfl_no_encoding_wchar,
123 	mbfl_filt_conv_common_ctor,
124 	NULL,
125 	mbfl_filt_conv_sjis_mobile_wchar,
126 	mbfl_filt_conv_sjis_wchar_flush,
127 	NULL,
128 };
129 
130 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
131 	mbfl_no_encoding_wchar,
132 	mbfl_no_encoding_sjis_sb,
133 	mbfl_filt_conv_common_ctor,
134 	NULL,
135 	mbfl_filt_conv_wchar_sjis_mobile,
136 	mbfl_filt_conv_sjis_mobile_flush,
137 	NULL,
138 };
139 
140 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
141 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
142 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
143 
144 const unsigned short mbfl_docomo2uni_pua[4][3] = {
145 	{0x28c2, 0x292f, 0xe63e},
146 	{0x2930, 0x2934, 0xe6ac},
147 	{0x2935, 0x2951, 0xe6b1},
148 	{0x2952, 0x29db, 0xe6ce},
149 };
150 
151 const unsigned short mbfl_kddi2uni_pua[7][3] = {
152 	{0x26ec, 0x2838, 0xe468},
153 	{0x284c, 0x2863, 0xe5b5},
154 	{0x24b8, 0x24ca, 0xe5cd},
155 	{0x24cb, 0x2545, 0xea80},
156 	{0x2839, 0x284b, 0xeafb},
157 	{0x2546, 0x25c0, 0xeb0e},
158 	{0x25c1, 0x25c6, 0xeb89},
159 };
160 
161 const unsigned short mbfl_sb2uni_pua[6][3] = {
162 	{0x27a9, 0x2802, 0xe101},
163 	{0x2808, 0x2861, 0xe201},
164 	{0x2921, 0x297a, 0xe001},
165 	{0x2980, 0x29cc, 0xe301},
166 	{0x2a99, 0x2ae4, 0xe401},
167 	{0x2af8, 0x2b35, 0xe501},
168 };
169 
170 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
171 	{0x24b8, 0x24f6, 0xec40},
172 	{0x24f7, 0x2573, 0xec80},
173 	{0x2574, 0x25b2, 0xed40},
174 	{0x25b3, 0x25c6, 0xed80},
175 	{0x26ec, 0x272a, 0xef40},
176 	{0x272b, 0x27a7, 0xef80},
177 	{0x27a8, 0x27e6, 0xf040},
178 	{0x27e7, 0x2863, 0xf080},
179 };
180 
181 /* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
182  * These correspond to the letters A-Z
183  * To display the flag emoji for a country, two unicode codepoints are combined,
184  * which correspond to the two-letter code for that country
185  * This macro converts uppercase ASCII values to Regional Indicator codepoints */
186 #define NFLAGS(c) (0x1F1A5+(int)(c))
187 
188 #define CK(statement)	do { if ((statement) < 0) return (-1); } while (0)
189 
190 #define SJIS_ENCODE(c1,c2,s1,s2) \
191 		do { \
192 			s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \
193 			s2 = c2; \
194 			if ((c1) & 1) { \
195 				if ((c2) < 0x60) { \
196 					s2--; \
197 				} \
198 				s2 += 0x20; \
199 			} else { \
200 				s2 += 0x7e; \
201 			} \
202 		} while (0)
203 
204 #define SJIS_DECODE(c1,c2,s1,s2) \
205 		do { \
206 			if (c1 < 0xa0) { \
207 				s1 = ((c1 - 0x81) << 1) + 0x21; \
208 			} else { \
209 				s1 = ((c1 - 0xc1) << 1) + 0x21; \
210 			} \
211 			s2 = c2; \
212 			if (c2 < 0x9f) { \
213 				if (c2 < 0x7f) { \
214 					s2++; \
215 				} \
216 				s2 -= 0x20; \
217 			} else { \
218 				s1++; \
219 				s2 -= 0x7e; \
220 			} \
221 		} while (0)
222 
223 /* (ku*94)+ten value -> Shift-JIS byte sequence */
224 #define CODE2JIS(c1,c2,s1,s2) \
225 	c1 = (s1)/94+0x21; \
226 	c2 = (s1)-94*((c1)-0x21)+0x21; \
227 	s1 = ((c1) << 8) | (c2); \
228 	s2 = 1
229 
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)230 int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
231 {
232 	for (int i = 0; i < n; i++) {
233 		if (map[i][0] <= c && c <= map[i][1]) {
234 			*w = c - map[i][0] + map[i][2];
235 			return 1;
236 		}
237 	}
238 	return 0;
239 }
240 
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)241 int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
242 {
243 	/* Convert in reverse direction */
244 	for (int i = 0; i < n; i++) {
245 		if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
246 			*w = c + map[i][0] - map[i][2];
247 			return 1;
248 		}
249 	}
250 	return 0;
251 }
252 
253 /* number -> (ku*94)+ten value for telephone keypad character */
254 #define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n)))
255 #define DOCOMO_KEYPAD_HASH 0x2964
256 
257 #define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0)
258 
259 /* Unicode codepoints for emoji are above 0x1F000, but we only store 16-bits
260  * in our tables. Therefore, add 0x10000 to recover the true values.
261  *
262  * Again, for some emoji which are not supported by Unicode, we use codepoints
263  * in the Private Use Area above 0xFE000. Again, add 0xF0000 to recover the
264  * true value. */
convert_emoji_cp(int cp)265 static inline int convert_emoji_cp(int cp)
266 {
267 	if (cp > 0xF000)
268 		return cp + 0x10000;
269 	else if (cp > 0xE000)
270 		return cp + 0xF0000;
271 	return cp;
272 }
273 
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)274 int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
275 {
276 	/* All three mobile vendors had emoji for numbers on a telephone keypad
277 	 * Unicode doesn't have those, but it has a combining character which puts
278 	 * a 'keypad button' around the following character, making it look like
279 	 * a key on a telephone or keyboard. That combining char is codepoint 0x20E3. */
280 	if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
281 		if ((s >= DOCOMO_KEYPAD(1) && s <= DOCOMO_KEYPAD(9)) || s == DOCOMO_KEYPAD(0) || s == DOCOMO_KEYPAD_HASH) {
282 			EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]));
283 		} else {
284 			*snd = 0;
285 			return convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]);
286 		}
287 	}
288 	return 0;
289 }
290 
291 #define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0)
292 
293 static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"};
294 
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)295 int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
296 {
297 	if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
298 		if (s == 0x24C0) { /* Spain */
299 			EMIT_FLAG_EMOJI("ES");
300 		} else if (s == 0x24C1) { /* Russia */
301 			EMIT_FLAG_EMOJI("RU");
302 		} else if (s >= 0x2545 && s <= 0x254A) {
303 			EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]);
304 		} else if (s == 0x25BC) {
305 			EMIT_KEYPAD_EMOJI('#');
306 		} else {
307 			*snd = 0;
308 			return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]);
309 		}
310 	} else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
311 		if (s == 0x2750) { /* Japan */
312 			EMIT_FLAG_EMOJI("JP");
313 		} else if (s >= 0x27A6 && s <= 0x27AE) {
314 			EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1');
315 		} else if (s == 0x27F7) { /* United States */
316 			EMIT_FLAG_EMOJI("US");
317 		} else if (s == 0x2830) {
318 			EMIT_KEYPAD_EMOJI('0');
319 		} else {
320 			*snd = 0;
321 			return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]);
322 		}
323 	}
324 	return 0;
325 }
326 
327 static const char nflags_sb[10][2] = {"JP", "US", "FR", "DE", "IT", "GB", "ES", "RU", "CN", "KR"};
328 
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)329 int mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
330 {
331 	if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
332 		if (s == 0x2817 || (s >= 0x2823 && s <= 0x282C)) {
333 			EMIT_KEYPAD_EMOJI(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]);
334 		} else {
335 			*snd = 0;
336 			return convert_emoji_cp(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]);
337 		}
338 	} else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
339 		*snd = 0;
340 		return convert_emoji_cp(mb_tbl_code2uni_sb2[s - mb_tbl_code2uni_sb2_min]);
341 	} else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
342 		if (s >= 0x2B02 && s <= 0x2B0B) {
343 			EMIT_FLAG_EMOJI(nflags_sb[s - 0x2B02]);
344 		} else {
345 			*snd = 0;
346 			return convert_emoji_cp(mb_tbl_code2uni_sb3[s - mb_tbl_code2uni_sb3_min]);
347 		}
348 	}
349 	return 0;
350 }
351 
352 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)353 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
354 {
355 	/* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji
356 	 * to a sequence of 2 codepoints, one of which is a combining character which
357 	 * adds the 'key' image around the other
358 	 *
359 	 * In the other direction, look for such sequences and convert them to a
360 	 * single emoji */
361 	if (filter->status == 1) {
362 		int c1 = filter->cache;
363 		filter->cache = filter->status = 0;
364 		if (c == 0x20E3) {
365 			if (c1 == '#') {
366 				*s1 = 0x2964;
367 			} else if (c1 == '0') {
368 				*s1 = 0x296F;
369 			} else { /* Previous character was '1'-'9' */
370 				*s1 = 0x2966 + (c1 - '1');
371 			}
372 			return 1;
373 		} else {
374 			/* This character wasn't combining character to make keypad symbol,
375 			 * so pass the previous character through... and proceed to process the
376 			 * current character as usual
377 			 * (Single-byte ASCII characters are valid in Shift-JIS...) */
378 			CK((*filter->output_function)(c1, filter->data));
379 		}
380 	}
381 
382 	if (c == '#' || (c >= '0' && c <= '9')) {
383 		filter->status = 1;
384 		filter->cache = c;
385 		return 0;
386 	}
387 
388 	if (c == 0xA9) { /* Copyright sign */
389 		*s1 = 0x29B5;
390 		return 1;
391 	} else if (c == 0x00AE) { /* Registered sign */
392 		*s1 = 0x29BA;
393 		return 1;
394 	} else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
395 		int i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
396 		if (i >= 0) {
397 			*s1 = mb_tbl_uni_docomo2code2_value[i];
398 			return 1;
399 		}
400 	} else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
401 		int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
402 		if (i >= 0) {
403 			*s1 = mb_tbl_uni_docomo2code3_value[i];
404 			return 1;
405 		}
406 	} else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
407 		int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
408 		if (i >= 0) {
409 			*s1 = mb_tbl_uni_docomo2code5_val[i];
410 			return 1;
411 		}
412 	}
413 	return 0;
414 }
415 
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)416 int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
417 {
418 	if (filter->status == 1) {
419 		int c1 = filter->cache;
420 		filter->cache = filter->status = 0;
421 		if (c == 0x20E3) {
422 			if (c1 == '#') {
423 				*s1 = 0x25BC;
424 			} else if (c1 == '0') {
425 				*s1 = 0x2830;
426 			} else { /* Previous character was '1'-'9' */
427 				*s1 = 0x27a6 + (c1 - '1');
428 			}
429 			return 1;
430 		} else {
431 			CK((*filter->output_function)(c1, filter->data));
432 		}
433 	} else if (filter->status == 2) {
434 		int c1 = filter->cache;
435 		filter->cache = filter->status = 0;
436 		if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */
437 			for (int i = 0; i < 10; i++) {
438 				if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
439 					*s1 = nflags_code_kddi[i];
440 					return 1;
441 				}
442 			}
443 		}
444 
445 		/* If none of the KDDI national flag emoji matched, then we have no way
446 		 * to convert the previous codepoint... */
447 		mbfl_filt_conv_illegal_output(c1, filter);
448 	}
449 
450 	if (c == '#' || (c >= '0' && c <= '9')) {
451 		filter->status = 1;
452 		filter->cache = c;
453 		return 0;
454 	} else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */
455 		filter->status = 2;
456 		filter->cache = c;
457 		return 0;
458 	}
459 
460 	if (c == 0xA9) { /* Copyright sign */
461 		*s1 = 0x27DC;
462 		return 1;
463 	} else if (c == 0xAE) { /* Registered sign */
464 		*s1 = 0x27DD;
465 		return 1;
466 	} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
467 		int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
468 		if (i >= 0) {
469 			*s1 = mb_tbl_uni_kddi2code2_value[i];
470 			return 1;
471 		}
472 	} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
473 		int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
474 		if (i >= 0) {
475 			*s1 = mb_tbl_uni_kddi2code3_value[i];
476 			return 1;
477 		}
478 	} else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
479 		int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
480 		if (i >= 0) {
481 			*s1 = mb_tbl_uni_kddi2code5_val[i];
482 			return 1;
483 		}
484 	}
485 	return 0;
486 }
487 
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)488 int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
489 {
490 	if (filter->status == 1) {
491 		int c1 = filter->cache;
492 		filter->cache = filter->status = 0;
493 		if (c == 0x20E3) {
494 			if (c1 == '#') {
495 				*s1 = 0x2817;
496 			} else if (c1 == '0') {
497 				*s1 = 0x282c;
498 			} else { /* Previous character was '1'-'9' */
499 				*s1 = 0x2823 + (c1 - '1');
500 			}
501 			return 1;
502 		} else {
503 			(*filter->output_function)(c1, filter->data);
504 		}
505 	} else if (filter->status == 2) {
506 		int c1 = filter->cache;
507 		filter->cache = filter->status = 0;
508 		if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */
509 			for (int i = 0; i < 10; i++) {
510 				if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
511 					*s1 = nflags_code_sb[i];
512 					return 1;
513 				}
514 			}
515 		}
516 
517 		/* If none of the SoftBank national flag emoji matched, then we have no way
518 		 * to convert the previous codepoint... */
519 		mbfl_filt_conv_illegal_output(c1, filter);
520 	}
521 
522 	if (c == '#' || (c >= '0' && c <= '9')) {
523 		filter->status = 1;
524 		filter->cache = c;
525 		return 0;
526 	} else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */
527 		filter->status = 2;
528 		filter->cache = c;
529 		return 0;
530 	}
531 
532 	if (c == 0xA9) { /* Copyright sign */
533 		*s1 = 0x2855;
534 		return 1;
535 	} else if (c == 0xAE) { /* Registered sign */
536 		*s1 = 0x2856;
537 		return 1;
538 	} else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
539 		int i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
540 		if (i >= 0) {
541 			*s1 = mb_tbl_uni_sb2code2_value[i];
542 			return 1;
543 		}
544 	} else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
545 		int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
546 		if (i >= 0) {
547 			*s1 = mb_tbl_uni_sb2code3_value[i];
548 			return 1;
549 		}
550 	} else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
551 		int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
552 		if (i >= 0) {
553 			*s1 = mb_tbl_uni_sb2code5_val[i];
554 			return 1;
555 		}
556 	}
557 	return 0;
558 }
559 
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)560 int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
561 {
562 	int c1, s, s1, s2, w, snd = 0;
563 
564 	switch (filter->status) {
565 	case 0:
566 		if (c >= 0 && c < 0x80) { /* ASCII */
567 			if (filter->from == &mbfl_encoding_sjis_sb && c == 0x1B) {
568 				/* ESC; escape sequences were used on older SoftBank phones for emoji */
569 				filter->cache = c;
570 				filter->status = 2;
571 			} else {
572 				CK((*filter->output_function)(c, filter->data));
573 			}
574 		} else if (c > 0xA0 && c < 0xE0) { /* Kana */
575 			CK((*filter->output_function)(0xFEC0 + c, filter->data));
576 		} else if (c > 0x80 && c < 0xFD && c != 0xA0) { /* Kanji, first byte */
577 			filter->status = 1;
578 			filter->cache = c;
579 		} else {
580 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
581 		}
582 		break;
583 
584 	case 1: /* Kanji, second byte */
585 		filter->status = 0;
586 		c1 = filter->cache;
587 		if (c >= 0x40 && c <= 0xFC && c != 0x7F) {
588 			w = 0;
589 			SJIS_DECODE(c1, c, s1, s2);
590 			s = ((s1 - 0x21) * 94) + s2 - 0x21;
591 			if (s <= 137) {
592 				if (s == 31) {
593 					w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
594 				} else if (s == 32) {
595 					w = 0xFF5E; /* FULLWIDTH TILDE */
596 				} else if (s == 33) {
597 					w = 0x2225; /* PARALLEL TO */
598 				} else if (s == 60) {
599 					w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
600 				} else if (s == 80) {
601 					w = 0xFFE0; /* FULLWIDTH CENT SIGN */
602 				} else if (s == 81) {
603 					w = 0xFFE1; /* FULLWIDTH POUND SIGN */
604 				} else if (s == 137) {
605 					w = 0xFFE2; /* FULLWIDTH NOT SIGN */
606 				}
607 			}
608 			if (w == 0) {
609 				if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {		/* vendor ext1 (13ku) */
610 					w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
611 				} else if (s >= 0 && s < jisx0208_ucs_table_size) {		/* X 0208 */
612 					w = jisx0208_ucs_table[s];
613 				} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {		/* vendor ext2 (89ku - 92ku) */
614 					w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
615 				}
616 
617 				/* Emoji */
618 				if (filter->from == &mbfl_encoding_sjis_docomo && s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
619 					w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
620 					if (snd > 0) {
621 						CK((*filter->output_function)(snd, filter->data));
622 					}
623 				} else if (filter->from == &mbfl_encoding_sjis_kddi && s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) {
624 					w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
625 					if (snd > 0) {
626 						CK((*filter->output_function)(snd, filter->data));
627 					}
628 				} else if (filter->from == &mbfl_encoding_sjis_sb && s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) {
629 					w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
630 					if (snd > 0) {
631 						CK((*filter->output_function)(snd, filter->data));
632 					}
633 				}
634 
635 				if (w == 0) {
636 					if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
637 						w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
638 					} else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
639 						w = s - (94*94) + 0xe000;
640 					}
641 				}
642 			}
643 			if (w <= 0) {
644 				w = MBFL_BAD_INPUT;
645 			}
646 			CK((*filter->output_function)(w, filter->data));
647 		} else {
648 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
649 		}
650 		break;
651 
652 	/* ESC: Softbank Emoji */
653 	case 2:
654 		if (c == '$') {
655 			filter->cache = c;
656 			filter->status++;
657 		} else {
658 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
659 			filter->status = filter->cache = 0;
660 		}
661 		break;
662 
663 	/* ESC $: Softbank Emoji */
664 	case 3:
665 		if ((c >= 'E' && c <= 'G') || (c >= 'O' && c <= 'Q')) {
666 			filter->cache = c;
667 			filter->status++;
668 		} else {
669 			CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
670 			filter->status = filter->cache = 0;
671 		}
672 		break;
673 
674 	/* ESC $ [GEFOPQ]: Softbank Emoji */
675 	case 4:
676 		c1 = filter->cache;
677 		if (c == 0xF) { /* Terminate sequence of emoji */
678 			filter->status = filter->cache = 0;
679 			return 0;
680 		} else {
681 			if (c1 == 'G' && c >= 0x21 && c <= 0x7a) {
682 				s1 = (0x91 - 0x21) * 94;
683 			} else if (c1 == 'E' && c >= 0x21 && c <= 0x7A) {
684 				s1 = (0x8D - 0x21) * 94;
685 			} else if (c1 == 'F' && c >= 0x21 && c <= 0x7A) {
686 				s1 = (0x8E - 0x21) * 94;
687 			} else if (c1 == 'O' && c >= 0x21 && c <= 0x6D) {
688 				s1 = (0x92 - 0x21) * 94;
689 			} else if (c1 == 'P' && c >= 0x21 && c <= 0x6C) {
690 				s1 = (0x95 - 0x21) * 94;
691 			} else if (c1 == 'Q' && c >= 0x21 && c <= 0x5E) {
692 				s1 = (0x96 - 0x21) * 94;
693 			} else {
694 				CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
695 				filter->status = filter->cache = 0;
696 				return 0;
697 			}
698 
699 			w = mbfilter_sjis_emoji_sb2unicode(s1 + c - 0x21, &snd);
700 			if (w > 0) {
701 				if (snd > 0) {
702 					CK((*filter->output_function)(snd, filter->data));
703 				}
704 				CK((*filter->output_function)(w, filter->data));
705 			} else {
706 				CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
707 				filter->status = filter->cache = 0;
708 			}
709 		}
710 	}
711 
712 	return 0;
713 }
714 
mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter * filter)715 static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter)
716 {
717 	if (filter->status && filter->status != 4) {
718 		(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
719 	}
720 
721 	if (filter->flush_function) {
722 		(*filter->flush_function)(filter->data);
723 	}
724 
725 	return 0;
726 }
727 
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)728 int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
729 {
730 	int c1, c2, s1 = 0, s2 = 0;
731 
732 	if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
733 		s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
734 	} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
735 		s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
736 	} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
737 		s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
738 	} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
739 		s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
740 	} else if (c >= 0xE000 && c < (0xE000 + 20*94)) {
741 		/* Private User Area (95ku - 114ku) */
742 		s1 = c - 0xE000;
743 		c1 = (s1 / 94) + 0x7F;
744 		c2 = (s1 % 94) + 0x21;
745 		s1 = (c1 << 8) | c2;
746 		s2 = 1;
747 	}
748 
749 	if (s1 <= 0) {
750 		if (c == 0xA5) { /* YEN SIGN */
751 			s1 = 0x216F; /* FULLWIDTH YEN SIGN */
752 		} else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */
753 			s1 = 0x2140;
754 		} else if (c == 0x2225) { /* PARALLEL TO */
755 			s1 = 0x2142;
756 		} else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
757 			s1 = 0x215D;
758 		} else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */
759 			s1 = 0x2171;
760 		} else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */
761 			s1 = 0x2172;
762 		} else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */
763 			s1 = 0x224C;
764 		}
765 	}
766 
767 	if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {	/* not found or X 0212 */
768 		s1 = -1;
769 
770 		/* CP932 vendor ext1 (13ku) */
771 		for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) {
772 			if (c == cp932ext1_ucs_table[c1]) {
773 				s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
774 				break;
775 			}
776 		}
777 
778 		if (s1 <= 0) {
779 			/* CP932 vendor ext2 (115ku - 119ku) */
780 			for (c1 = 0; c1 < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; c1++) {
781 				if (c == cp932ext2_ucs_table[c1]) {
782 					s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21;
783 					break;
784 				}
785 			}
786 		}
787 
788 		if (s1 <= 0) {
789 			/* CP932 vendor ext3 (115ku - 119ku) */
790 			for (c1 = 0; c1 < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; c1++) {
791 				if (c == cp932ext3_ucs_table[c1]) {
792 					s1 = (((c1 / 94) + 0x93) << 8) + (c1 % 94) + 0x21;
793 					break;
794 				}
795 			}
796 		}
797 
798 		if (c == 0) {
799 			s1 = 0;
800 		}
801 	}
802 
803 	if ((filter->to == &mbfl_encoding_sjis_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter)) ||
804 		  (filter->to == &mbfl_encoding_sjis_kddi   && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter)) ||
805 		  (filter->to == &mbfl_encoding_sjis_sb     && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter))) {
806 		CODE2JIS(c1,c2,s1,s2);
807  	}
808 
809 	if (filter->status) {
810 		return 0;
811 	}
812 
813 	if (s1 >= 0) {
814 		if (s1 < 0x100) { /* Latin/Kana */
815 			CK((*filter->output_function)(s1, filter->data));
816 		} else { /* Kanji */
817 			c1 = (s1 >> 8) & 0xff;
818 			c2 = s1 & 0xff;
819 			SJIS_ENCODE(c1, c2, s1, s2);
820 			CK((*filter->output_function)(s1, filter->data));
821 			CK((*filter->output_function)(s2, filter->data));
822 		}
823 	} else {
824 		CK(mbfl_filt_conv_illegal_output(c, filter));
825 	}
826 
827 	return 0;
828 }
829 
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)830 int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
831 {
832 	int c1 = filter->cache;
833 	if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
834 		CK((*filter->output_function)(c1, filter->data));
835 	} else if (filter->status == 2) {
836 		/* First of a pair of Regional Indicator codepoints came at the end of a string */
837 		filter->cache = filter->status = 0;
838 		mbfl_filt_conv_illegal_output(c1, filter);
839 	}
840 
841 	if (filter->flush_function) {
842 		(*filter->flush_function)(filter->data);
843 	}
844 
845 	return 0;
846 }
847