1 /*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24 /*
25 * the source code included in this files was separated from mbfilter_sjis_open.c
26 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
27 *
28 */
29
30 #include "mbfilter.h"
31 #include "mbfilter_sjis_mobile.h"
32
33 #include "unicode_table_cp932_ext.h"
34 #include "unicode_table_jis.h"
35
36 #include "emoji2uni.h"
37
38 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
39 extern const unsigned char mblen_table_sjis[];
40
41 static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter);
42
43 static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL};
44 static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL};
45 static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL};
46
47 const mbfl_encoding mbfl_encoding_sjis_docomo = {
48 mbfl_no_encoding_sjis_docomo,
49 "SJIS-Mobile#DOCOMO",
50 "Shift_JIS",
51 mbfl_encoding_sjis_docomo_aliases,
52 mblen_table_sjis,
53 MBFL_ENCTYPE_GL_UNSAFE,
54 &vtbl_sjis_docomo_wchar,
55 &vtbl_wchar_sjis_docomo
56 };
57
58 const mbfl_encoding mbfl_encoding_sjis_kddi = {
59 mbfl_no_encoding_sjis_kddi,
60 "SJIS-Mobile#KDDI",
61 "Shift_JIS",
62 mbfl_encoding_sjis_kddi_aliases,
63 mblen_table_sjis,
64 MBFL_ENCTYPE_GL_UNSAFE,
65 &vtbl_sjis_kddi_wchar,
66 &vtbl_wchar_sjis_kddi
67 };
68
69 const mbfl_encoding mbfl_encoding_sjis_sb = {
70 mbfl_no_encoding_sjis_sb,
71 "SJIS-Mobile#SOFTBANK",
72 "Shift_JIS",
73 mbfl_encoding_sjis_sb_aliases,
74 mblen_table_sjis,
75 MBFL_ENCTYPE_GL_UNSAFE,
76 &vtbl_sjis_sb_wchar,
77 &vtbl_wchar_sjis_sb
78 };
79
80 const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = {
81 mbfl_no_encoding_sjis_docomo,
82 mbfl_no_encoding_wchar,
83 mbfl_filt_conv_common_ctor,
84 NULL,
85 mbfl_filt_conv_sjis_mobile_wchar,
86 mbfl_filt_conv_sjis_wchar_flush,
87 NULL,
88 };
89
90 const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = {
91 mbfl_no_encoding_wchar,
92 mbfl_no_encoding_sjis_docomo,
93 mbfl_filt_conv_common_ctor,
94 NULL,
95 mbfl_filt_conv_wchar_sjis_mobile,
96 mbfl_filt_conv_sjis_mobile_flush,
97 NULL,
98 };
99
100 const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = {
101 mbfl_no_encoding_sjis_kddi,
102 mbfl_no_encoding_wchar,
103 mbfl_filt_conv_common_ctor,
104 NULL,
105 mbfl_filt_conv_sjis_mobile_wchar,
106 mbfl_filt_conv_sjis_wchar_flush,
107 NULL,
108 };
109
110 const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = {
111 mbfl_no_encoding_wchar,
112 mbfl_no_encoding_sjis_kddi,
113 mbfl_filt_conv_common_ctor,
114 NULL,
115 mbfl_filt_conv_wchar_sjis_mobile,
116 mbfl_filt_conv_sjis_mobile_flush,
117 NULL,
118 };
119
120 const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = {
121 mbfl_no_encoding_sjis_sb,
122 mbfl_no_encoding_wchar,
123 mbfl_filt_conv_common_ctor,
124 NULL,
125 mbfl_filt_conv_sjis_mobile_wchar,
126 mbfl_filt_conv_sjis_wchar_flush,
127 NULL,
128 };
129
130 const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = {
131 mbfl_no_encoding_wchar,
132 mbfl_no_encoding_sjis_sb,
133 mbfl_filt_conv_common_ctor,
134 NULL,
135 mbfl_filt_conv_wchar_sjis_mobile,
136 mbfl_filt_conv_sjis_mobile_flush,
137 NULL,
138 };
139
140 static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
141 static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7};
142 static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03};
143
144 const unsigned short mbfl_docomo2uni_pua[4][3] = {
145 {0x28c2, 0x292f, 0xe63e},
146 {0x2930, 0x2934, 0xe6ac},
147 {0x2935, 0x2951, 0xe6b1},
148 {0x2952, 0x29db, 0xe6ce},
149 };
150
151 const unsigned short mbfl_kddi2uni_pua[7][3] = {
152 {0x26ec, 0x2838, 0xe468},
153 {0x284c, 0x2863, 0xe5b5},
154 {0x24b8, 0x24ca, 0xe5cd},
155 {0x24cb, 0x2545, 0xea80},
156 {0x2839, 0x284b, 0xeafb},
157 {0x2546, 0x25c0, 0xeb0e},
158 {0x25c1, 0x25c6, 0xeb89},
159 };
160
161 const unsigned short mbfl_sb2uni_pua[6][3] = {
162 {0x27a9, 0x2802, 0xe101},
163 {0x2808, 0x2861, 0xe201},
164 {0x2921, 0x297a, 0xe001},
165 {0x2980, 0x29cc, 0xe301},
166 {0x2a99, 0x2ae4, 0xe401},
167 {0x2af8, 0x2b35, 0xe501},
168 };
169
170 const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
171 {0x24b8, 0x24f6, 0xec40},
172 {0x24f7, 0x2573, 0xec80},
173 {0x2574, 0x25b2, 0xed40},
174 {0x25b3, 0x25c6, 0xed80},
175 {0x26ec, 0x272a, 0xef40},
176 {0x272b, 0x27a7, 0xef80},
177 {0x27a8, 0x27e6, 0xf040},
178 {0x27e7, 0x2863, 0xf080},
179 };
180
181 /* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
182 * These correspond to the letters A-Z
183 * To display the flag emoji for a country, two unicode codepoints are combined,
184 * which correspond to the two-letter code for that country
185 * This macro converts uppercase ASCII values to Regional Indicator codepoints */
186 #define NFLAGS(c) (0x1F1A5+(int)(c))
187
188 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
189
190 #define SJIS_ENCODE(c1,c2,s1,s2) \
191 do { \
192 s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \
193 s2 = c2; \
194 if ((c1) & 1) { \
195 if ((c2) < 0x60) { \
196 s2--; \
197 } \
198 s2 += 0x20; \
199 } else { \
200 s2 += 0x7e; \
201 } \
202 } while (0)
203
204 #define SJIS_DECODE(c1,c2,s1,s2) \
205 do { \
206 if (c1 < 0xa0) { \
207 s1 = ((c1 - 0x81) << 1) + 0x21; \
208 } else { \
209 s1 = ((c1 - 0xc1) << 1) + 0x21; \
210 } \
211 s2 = c2; \
212 if (c2 < 0x9f) { \
213 if (c2 < 0x7f) { \
214 s2++; \
215 } \
216 s2 -= 0x20; \
217 } else { \
218 s1++; \
219 s2 -= 0x7e; \
220 } \
221 } while (0)
222
223 /* (ku*94)+ten value -> Shift-JIS byte sequence */
224 #define CODE2JIS(c1,c2,s1,s2) \
225 c1 = (s1)/94+0x21; \
226 c2 = (s1)-94*((c1)-0x21)+0x21; \
227 s1 = ((c1) << 8) | (c2); \
228 s2 = 1
229
mbfilter_conv_map_tbl(int c,int * w,const unsigned short map[][3],int n)230 int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
231 {
232 for (int i = 0; i < n; i++) {
233 if (map[i][0] <= c && c <= map[i][1]) {
234 *w = c - map[i][0] + map[i][2];
235 return 1;
236 }
237 }
238 return 0;
239 }
240
mbfilter_conv_r_map_tbl(int c,int * w,const unsigned short map[][3],int n)241 int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
242 {
243 /* Convert in reverse direction */
244 for (int i = 0; i < n; i++) {
245 if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
246 *w = c + map[i][0] - map[i][2];
247 return 1;
248 }
249 }
250 return 0;
251 }
252
253 /* number -> (ku*94)+ten value for telephone keypad character */
254 #define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n)))
255 #define DOCOMO_KEYPAD_HASH 0x2964
256
257 #define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0)
258
259 /* Unicode codepoints for emoji are above 0x1F000, but we only store 16-bits
260 * in our tables. Therefore, add 0x10000 to recover the true values.
261 *
262 * Again, for some emoji which are not supported by Unicode, we use codepoints
263 * in the Private Use Area above 0xFE000. Again, add 0xF0000 to recover the
264 * true value. */
convert_emoji_cp(int cp)265 static inline int convert_emoji_cp(int cp)
266 {
267 if (cp > 0xF000)
268 return cp + 0x10000;
269 else if (cp > 0xE000)
270 return cp + 0xF0000;
271 return cp;
272 }
273
mbfilter_sjis_emoji_docomo2unicode(int s,int * snd)274 int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
275 {
276 /* All three mobile vendors had emoji for numbers on a telephone keypad
277 * Unicode doesn't have those, but it has a combining character which puts
278 * a 'keypad button' around the following character, making it look like
279 * a key on a telephone or keyboard. That combining char is codepoint 0x20E3. */
280 if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
281 if ((s >= DOCOMO_KEYPAD(1) && s <= DOCOMO_KEYPAD(9)) || s == DOCOMO_KEYPAD(0) || s == DOCOMO_KEYPAD_HASH) {
282 EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]));
283 } else {
284 *snd = 0;
285 return convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]);
286 }
287 }
288 return 0;
289 }
290
291 #define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0)
292
293 static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"};
294
mbfilter_sjis_emoji_kddi2unicode(int s,int * snd)295 int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
296 {
297 if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
298 if (s == 0x24C0) { /* Spain */
299 EMIT_FLAG_EMOJI("ES");
300 } else if (s == 0x24C1) { /* Russia */
301 EMIT_FLAG_EMOJI("RU");
302 } else if (s >= 0x2545 && s <= 0x254A) {
303 EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]);
304 } else if (s == 0x25BC) {
305 EMIT_KEYPAD_EMOJI('#');
306 } else {
307 *snd = 0;
308 return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]);
309 }
310 } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
311 if (s == 0x2750) { /* Japan */
312 EMIT_FLAG_EMOJI("JP");
313 } else if (s >= 0x27A6 && s <= 0x27AE) {
314 EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1');
315 } else if (s == 0x27F7) { /* United States */
316 EMIT_FLAG_EMOJI("US");
317 } else if (s == 0x2830) {
318 EMIT_KEYPAD_EMOJI('0');
319 } else {
320 *snd = 0;
321 return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]);
322 }
323 }
324 return 0;
325 }
326
327 static const char nflags_sb[10][2] = {"JP", "US", "FR", "DE", "IT", "GB", "ES", "RU", "CN", "KR"};
328
mbfilter_sjis_emoji_sb2unicode(int s,int * snd)329 int mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
330 {
331 if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) {
332 if (s == 0x2817 || (s >= 0x2823 && s <= 0x282C)) {
333 EMIT_KEYPAD_EMOJI(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]);
334 } else {
335 *snd = 0;
336 return convert_emoji_cp(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]);
337 }
338 } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) {
339 *snd = 0;
340 return convert_emoji_cp(mb_tbl_code2uni_sb2[s - mb_tbl_code2uni_sb2_min]);
341 } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) {
342 if (s >= 0x2B02 && s <= 0x2B0B) {
343 EMIT_FLAG_EMOJI(nflags_sb[s - 0x2B02]);
344 } else {
345 *snd = 0;
346 return convert_emoji_cp(mb_tbl_code2uni_sb3[s - mb_tbl_code2uni_sb3_min]);
347 }
348 }
349 return 0;
350 }
351
352 int
mbfilter_unicode2sjis_emoji_docomo(int c,int * s1,mbfl_convert_filter * filter)353 mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
354 {
355 /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji
356 * to a sequence of 2 codepoints, one of which is a combining character which
357 * adds the 'key' image around the other
358 *
359 * In the other direction, look for such sequences and convert them to a
360 * single emoji */
361 if (filter->status == 1) {
362 int c1 = filter->cache;
363 filter->cache = filter->status = 0;
364 if (c == 0x20E3) {
365 if (c1 == '#') {
366 *s1 = 0x2964;
367 } else if (c1 == '0') {
368 *s1 = 0x296F;
369 } else { /* Previous character was '1'-'9' */
370 *s1 = 0x2966 + (c1 - '1');
371 }
372 return 1;
373 } else {
374 /* This character wasn't combining character to make keypad symbol,
375 * so pass the previous character through... and proceed to process the
376 * current character as usual
377 * (Single-byte ASCII characters are valid in Shift-JIS...) */
378 CK((*filter->output_function)(c1, filter->data));
379 }
380 }
381
382 if (c == '#' || (c >= '0' && c <= '9')) {
383 filter->status = 1;
384 filter->cache = c;
385 return 0;
386 }
387
388 if (c == 0xA9) { /* Copyright sign */
389 *s1 = 0x29B5;
390 return 1;
391 } else if (c == 0x00AE) { /* Registered sign */
392 *s1 = 0x29BA;
393 return 1;
394 } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
395 int i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
396 if (i >= 0) {
397 *s1 = mb_tbl_uni_docomo2code2_value[i];
398 return 1;
399 }
400 } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
401 int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
402 if (i >= 0) {
403 *s1 = mb_tbl_uni_docomo2code3_value[i];
404 return 1;
405 }
406 } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
407 int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
408 if (i >= 0) {
409 *s1 = mb_tbl_uni_docomo2code5_val[i];
410 return 1;
411 }
412 }
413 return 0;
414 }
415
mbfilter_unicode2sjis_emoji_kddi(int c,int * s1,mbfl_convert_filter * filter)416 int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
417 {
418 if (filter->status == 1) {
419 int c1 = filter->cache;
420 filter->cache = filter->status = 0;
421 if (c == 0x20E3) {
422 if (c1 == '#') {
423 *s1 = 0x25BC;
424 } else if (c1 == '0') {
425 *s1 = 0x2830;
426 } else { /* Previous character was '1'-'9' */
427 *s1 = 0x27a6 + (c1 - '1');
428 }
429 return 1;
430 } else {
431 CK((*filter->output_function)(c1, filter->data));
432 }
433 } else if (filter->status == 2) {
434 int c1 = filter->cache;
435 filter->cache = filter->status = 0;
436 if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */
437 for (int i = 0; i < 10; i++) {
438 if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
439 *s1 = nflags_code_kddi[i];
440 return 1;
441 }
442 }
443 }
444
445 /* If none of the KDDI national flag emoji matched, then we have no way
446 * to convert the previous codepoint... */
447 mbfl_filt_conv_illegal_output(c1, filter);
448 }
449
450 if (c == '#' || (c >= '0' && c <= '9')) {
451 filter->status = 1;
452 filter->cache = c;
453 return 0;
454 } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */
455 filter->status = 2;
456 filter->cache = c;
457 return 0;
458 }
459
460 if (c == 0xA9) { /* Copyright sign */
461 *s1 = 0x27DC;
462 return 1;
463 } else if (c == 0xAE) { /* Registered sign */
464 *s1 = 0x27DD;
465 return 1;
466 } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
467 int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
468 if (i >= 0) {
469 *s1 = mb_tbl_uni_kddi2code2_value[i];
470 return 1;
471 }
472 } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
473 int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
474 if (i >= 0) {
475 *s1 = mb_tbl_uni_kddi2code3_value[i];
476 return 1;
477 }
478 } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
479 int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
480 if (i >= 0) {
481 *s1 = mb_tbl_uni_kddi2code5_val[i];
482 return 1;
483 }
484 }
485 return 0;
486 }
487
mbfilter_unicode2sjis_emoji_sb(int c,int * s1,mbfl_convert_filter * filter)488 int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
489 {
490 if (filter->status == 1) {
491 int c1 = filter->cache;
492 filter->cache = filter->status = 0;
493 if (c == 0x20E3) {
494 if (c1 == '#') {
495 *s1 = 0x2817;
496 } else if (c1 == '0') {
497 *s1 = 0x282c;
498 } else { /* Previous character was '1'-'9' */
499 *s1 = 0x2823 + (c1 - '1');
500 }
501 return 1;
502 } else {
503 (*filter->output_function)(c1, filter->data);
504 }
505 } else if (filter->status == 2) {
506 int c1 = filter->cache;
507 filter->cache = filter->status = 0;
508 if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */
509 for (int i = 0; i < 10; i++) {
510 if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) {
511 *s1 = nflags_code_sb[i];
512 return 1;
513 }
514 }
515 }
516
517 /* If none of the SoftBank national flag emoji matched, then we have no way
518 * to convert the previous codepoint... */
519 mbfl_filt_conv_illegal_output(c1, filter);
520 }
521
522 if (c == '#' || (c >= '0' && c <= '9')) {
523 filter->status = 1;
524 filter->cache = c;
525 return 0;
526 } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */
527 filter->status = 2;
528 filter->cache = c;
529 return 0;
530 }
531
532 if (c == 0xA9) { /* Copyright sign */
533 *s1 = 0x2855;
534 return 1;
535 } else if (c == 0xAE) { /* Registered sign */
536 *s1 = 0x2856;
537 return 1;
538 } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
539 int i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
540 if (i >= 0) {
541 *s1 = mb_tbl_uni_sb2code2_value[i];
542 return 1;
543 }
544 } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
545 int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
546 if (i >= 0) {
547 *s1 = mb_tbl_uni_sb2code3_value[i];
548 return 1;
549 }
550 } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
551 int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
552 if (i >= 0) {
553 *s1 = mb_tbl_uni_sb2code5_val[i];
554 return 1;
555 }
556 }
557 return 0;
558 }
559
mbfl_filt_conv_sjis_mobile_wchar(int c,mbfl_convert_filter * filter)560 int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter)
561 {
562 int c1, s, s1, s2, w, snd = 0;
563
564 switch (filter->status) {
565 case 0:
566 if (c >= 0 && c < 0x80) { /* ASCII */
567 if (filter->from == &mbfl_encoding_sjis_sb && c == 0x1B) {
568 /* ESC; escape sequences were used on older SoftBank phones for emoji */
569 filter->cache = c;
570 filter->status = 2;
571 } else {
572 CK((*filter->output_function)(c, filter->data));
573 }
574 } else if (c > 0xA0 && c < 0xE0) { /* Kana */
575 CK((*filter->output_function)(0xFEC0 + c, filter->data));
576 } else if (c > 0x80 && c < 0xFD && c != 0xA0) { /* Kanji, first byte */
577 filter->status = 1;
578 filter->cache = c;
579 } else {
580 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
581 }
582 break;
583
584 case 1: /* Kanji, second byte */
585 filter->status = 0;
586 c1 = filter->cache;
587 if (c >= 0x40 && c <= 0xFC && c != 0x7F) {
588 w = 0;
589 SJIS_DECODE(c1, c, s1, s2);
590 s = ((s1 - 0x21) * 94) + s2 - 0x21;
591 if (s <= 137) {
592 if (s == 31) {
593 w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
594 } else if (s == 32) {
595 w = 0xFF5E; /* FULLWIDTH TILDE */
596 } else if (s == 33) {
597 w = 0x2225; /* PARALLEL TO */
598 } else if (s == 60) {
599 w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
600 } else if (s == 80) {
601 w = 0xFFE0; /* FULLWIDTH CENT SIGN */
602 } else if (s == 81) {
603 w = 0xFFE1; /* FULLWIDTH POUND SIGN */
604 } else if (s == 137) {
605 w = 0xFFE2; /* FULLWIDTH NOT SIGN */
606 }
607 }
608 if (w == 0) {
609 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
610 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
611 } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
612 w = jisx0208_ucs_table[s];
613 } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
614 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
615 }
616
617 /* Emoji */
618 if (filter->from == &mbfl_encoding_sjis_docomo && s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
619 w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
620 if (snd > 0) {
621 CK((*filter->output_function)(snd, filter->data));
622 }
623 } else if (filter->from == &mbfl_encoding_sjis_kddi && s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) {
624 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
625 if (snd > 0) {
626 CK((*filter->output_function)(snd, filter->data));
627 }
628 } else if (filter->from == &mbfl_encoding_sjis_sb && s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) {
629 w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
630 if (snd > 0) {
631 CK((*filter->output_function)(snd, filter->data));
632 }
633 }
634
635 if (w == 0) {
636 if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
637 w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
638 } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
639 w = s - (94*94) + 0xe000;
640 }
641 }
642 }
643 if (w <= 0) {
644 w = MBFL_BAD_INPUT;
645 }
646 CK((*filter->output_function)(w, filter->data));
647 } else {
648 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
649 }
650 break;
651
652 /* ESC: Softbank Emoji */
653 case 2:
654 if (c == '$') {
655 filter->cache = c;
656 filter->status++;
657 } else {
658 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
659 filter->status = filter->cache = 0;
660 }
661 break;
662
663 /* ESC $: Softbank Emoji */
664 case 3:
665 if ((c >= 'E' && c <= 'G') || (c >= 'O' && c <= 'Q')) {
666 filter->cache = c;
667 filter->status++;
668 } else {
669 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
670 filter->status = filter->cache = 0;
671 }
672 break;
673
674 /* ESC $ [GEFOPQ]: Softbank Emoji */
675 case 4:
676 c1 = filter->cache;
677 if (c == 0xF) { /* Terminate sequence of emoji */
678 filter->status = filter->cache = 0;
679 return 0;
680 } else {
681 if (c1 == 'G' && c >= 0x21 && c <= 0x7a) {
682 s1 = (0x91 - 0x21) * 94;
683 } else if (c1 == 'E' && c >= 0x21 && c <= 0x7A) {
684 s1 = (0x8D - 0x21) * 94;
685 } else if (c1 == 'F' && c >= 0x21 && c <= 0x7A) {
686 s1 = (0x8E - 0x21) * 94;
687 } else if (c1 == 'O' && c >= 0x21 && c <= 0x6D) {
688 s1 = (0x92 - 0x21) * 94;
689 } else if (c1 == 'P' && c >= 0x21 && c <= 0x6C) {
690 s1 = (0x95 - 0x21) * 94;
691 } else if (c1 == 'Q' && c >= 0x21 && c <= 0x5E) {
692 s1 = (0x96 - 0x21) * 94;
693 } else {
694 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
695 filter->status = filter->cache = 0;
696 return 0;
697 }
698
699 w = mbfilter_sjis_emoji_sb2unicode(s1 + c - 0x21, &snd);
700 if (w > 0) {
701 if (snd > 0) {
702 CK((*filter->output_function)(snd, filter->data));
703 }
704 CK((*filter->output_function)(w, filter->data));
705 } else {
706 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
707 filter->status = filter->cache = 0;
708 }
709 }
710 }
711
712 return 0;
713 }
714
mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter * filter)715 static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter)
716 {
717 if (filter->status && filter->status != 4) {
718 (*filter->output_function)(MBFL_BAD_INPUT, filter->data);
719 }
720
721 if (filter->flush_function) {
722 (*filter->flush_function)(filter->data);
723 }
724
725 return 0;
726 }
727
mbfl_filt_conv_wchar_sjis_mobile(int c,mbfl_convert_filter * filter)728 int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
729 {
730 int c1, c2, s1 = 0, s2 = 0;
731
732 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
733 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
734 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
735 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
736 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
737 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
738 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
739 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
740 } else if (c >= 0xE000 && c < (0xE000 + 20*94)) {
741 /* Private User Area (95ku - 114ku) */
742 s1 = c - 0xE000;
743 c1 = (s1 / 94) + 0x7F;
744 c2 = (s1 % 94) + 0x21;
745 s1 = (c1 << 8) | c2;
746 s2 = 1;
747 }
748
749 if (s1 <= 0) {
750 if (c == 0xA5) { /* YEN SIGN */
751 s1 = 0x216F; /* FULLWIDTH YEN SIGN */
752 } else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */
753 s1 = 0x2140;
754 } else if (c == 0x2225) { /* PARALLEL TO */
755 s1 = 0x2142;
756 } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
757 s1 = 0x215D;
758 } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */
759 s1 = 0x2171;
760 } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */
761 s1 = 0x2172;
762 } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */
763 s1 = 0x224C;
764 }
765 }
766
767 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
768 s1 = -1;
769
770 /* CP932 vendor ext1 (13ku) */
771 for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) {
772 if (c == cp932ext1_ucs_table[c1]) {
773 s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
774 break;
775 }
776 }
777
778 if (s1 <= 0) {
779 /* CP932 vendor ext2 (115ku - 119ku) */
780 for (c1 = 0; c1 < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; c1++) {
781 if (c == cp932ext2_ucs_table[c1]) {
782 s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21;
783 break;
784 }
785 }
786 }
787
788 if (s1 <= 0) {
789 /* CP932 vendor ext3 (115ku - 119ku) */
790 for (c1 = 0; c1 < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; c1++) {
791 if (c == cp932ext3_ucs_table[c1]) {
792 s1 = (((c1 / 94) + 0x93) << 8) + (c1 % 94) + 0x21;
793 break;
794 }
795 }
796 }
797
798 if (c == 0) {
799 s1 = 0;
800 }
801 }
802
803 if ((filter->to == &mbfl_encoding_sjis_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter)) ||
804 (filter->to == &mbfl_encoding_sjis_kddi && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter)) ||
805 (filter->to == &mbfl_encoding_sjis_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter))) {
806 CODE2JIS(c1,c2,s1,s2);
807 }
808
809 if (filter->status) {
810 return 0;
811 }
812
813 if (s1 >= 0) {
814 if (s1 < 0x100) { /* Latin/Kana */
815 CK((*filter->output_function)(s1, filter->data));
816 } else { /* Kanji */
817 c1 = (s1 >> 8) & 0xff;
818 c2 = s1 & 0xff;
819 SJIS_ENCODE(c1, c2, s1, s2);
820 CK((*filter->output_function)(s1, filter->data));
821 CK((*filter->output_function)(s2, filter->data));
822 }
823 } else {
824 CK(mbfl_filt_conv_illegal_output(c, filter));
825 }
826
827 return 0;
828 }
829
mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter * filter)830 int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
831 {
832 int c1 = filter->cache;
833 if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
834 CK((*filter->output_function)(c1, filter->data));
835 } else if (filter->status == 2) {
836 /* First of a pair of Regional Indicator codepoints came at the end of a string */
837 filter->cache = filter->status = 0;
838 mbfl_filt_conv_illegal_output(c1, filter);
839 }
840
841 if (filter->flush_function) {
842 (*filter->flush_function)(filter->data);
843 }
844
845 return 0;
846 }
847