1 /* -*- mode: C; mode: fold -*- */
2 /* slkanji.c --- Interface to use Japanese 2byte KANJI code.
3 * Copyright (c) 1995, 2000 Kazuhisa Yoshino(k-yosino@actweb.ne.jp)
4 * This file is part of the Japanized S-Lang library.
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Perl Artistic License.
8 */
9
10 #include "slinclud.h"
11
12 #if SLANG_HAS_KANJI_SUPPORT
13 #include <stdio.h>
14 #include <ctype.h>
15 #include "config.h"
16 #include "slang.h"
17 #include <fcntl.h>
18
19 #include "slang.h"
20 #include "_slang.h"
21 #include "slkanji.h"
22
23 static char *Kcode[] = {
24 "Ascii",
25 "Euc",
26 "Jis",
27 "Sjis",
28 /* "Binary", */
29 /* "SLang", */
30 NULL };
31
32 #if 1
33 struct _kSLcode_data /* Extended EUC */
34 {
35 unsigned char *name;
36 char *pre_str; /* previous string(escape sequence). If this value unset, code data output... */
37 unsigned char *func_name;
38 /* unsigned char *(*convert_func)(); */ /* argument is 1? (or 2?). */
39 int lenth; /* character byte length */
40 int width; /* character width */
41 int mode; /* 0: after here. 1: next word(1*lenth) only. */
42 /* int enable; */ /* enable/disable */
43 } *current_set=(struct _kSLcode_data *)NULL, kSLcode_data[0x20] =
44 {
45 /* 0x80 */ {"",NULL,NULL,1,1,1},
46 /* 0x81 */ {"jisx0201", "\x1b(B", NULL, 1, 1, 0},
47 /* 0x82 */ {0,0,0,0,0,0},
48 /* 0x83 */ {0,0,0,0,0,0},
49 /* 0x84 */ {0,0,0,0,0,0},
50 /* 0x85 */ {0,0,0,0,0,0},
51 /* 0x86 */ {0,0,0,0,0,0},
52 /* 0x87 */ {0,0,0,0,0,0},
53 /* 0x88 */ {0,0,0,0,0,0},
54 /* 0x89 */ {0,0,0,0,0,0},
55 /* 0x8a */ {0,0,0,0,0,0},
56 /* 0x8b */ {0,0,0,0,0,0},
57 /* 0x8c */ {0,0,0,0,0,0},
58 /* 0x8d */ {0,0,0,0,0,0},
59 /* 0x8e */ {"euc-jp-ss2", NULL, NULL, 1, 1, 1},
60 /* 0x8f */ {"euc-jp-ss3", NULL,NULL, 2,2,1},
61 /* 0x90 */ {"jisx0208-1983", "\x1b$B", NULL, 2, 2, 0},
62 /* 0x91 */ {"jisx0208-1978", "\x1b$@", NULL, 2, 2, 0},
63 /* 0x92 */ {0,0,0,0,0,0},
64 /* 0x93 */ {0,0,0,0,0,0},
65 /* 0x94 */ {0,0,0,0,0,0},
66 /* 0x95 */ {0,0,0,0,0,0},
67 /* 0x96 */ {0,0,0,0,0,0},
68 /* 0x97 */ {0,0,0,0,0,0},
69 /* 0x98 */ {0,0,0,0,0,0},
70 /* 0x99 */ {0,0,0,0,0,0},
71 /* 0x9a */ {0,0,0,0,0,0},
72 /* 0x9b */ {0,0,0,0,0,0},
73 /* 0x9c */ {0,0,0,0,0,0},
74 /* 0x9d */ {0,0,0,0,0,0},
75 /* 0x9e */ {"extended",NULL,NULL,-1,-1,-1},
76 /* 0x9f */ {"extended",NULL,NULL,-1,-1,-1}
77 };
78
kSLset_code_data(unsigned char * name,char * pre,unsigned char * func,int len,int mod)79 int kSLset_code_data(unsigned char *name, char *pre, unsigned char *func, int len, int mod)
80 {
81 int i, n;
82
83 for (i=0 ; i<32 ; i++)
84 {
85 if (kSLcode_data[i].name == NULL && kSLcode_data[i].pre_str == NULL)
86 break;
87 }
88 if(i == 32) return -1; /* kSLcode_data table is full */
89 kSLcode_data[i].name = (unsigned char*)SLmalloc(strlen(name)+1);
90 strcpy(kSLcode_data[i].name, name);
91 kSLcode_data[i].pre_str = (char*)SLmalloc(strlen(pre)+1);
92 strcpy(kSLcode_data[i].pre_str, pre);
93 kSLcode_data[i].func_name = (char*)SLmalloc(strlen(func)+1);
94 strcpy(kSLcode_data[i].func_name, func);
95 kSLcode_data[i].lenth = len;
96 kSLcode_data[i].mode = mod;
97
98 return i;
99 }
100
kSLfind_code_data(unsigned char * name,char * pre)101 int kSLfind_code_data(unsigned char *name, char *pre)
102 {
103 int i, n;
104
105 for (i=0 ; i<0x20 ; i++)
106 {
107 if((name && !strcmp(name, kSLcode_data[i].name))
108 || (pre && !strcmp(pre, kSLcode_data[i].pre_str)))
109 return i;
110 }
111 return -1;
112 }
113
kSLget_code_data_member(int i)114 void kSLget_code_data_member(int i)
115 {
116
117 SLang_push_string(kSLcode_data[i].name);
118 SLang_push_string(kSLcode_data[i].pre_str);
119 SLang_push_string(kSLcode_data[i].func_name);
120 SLang_push_integer(kSLcode_data[i].lenth);
121 SLang_push_integer(kSLcode_data[i].mode);
122
123 }
124
125 /*
126 int convert_function(void (unsigned char *buf, int bufsize, *get_func)(void))
127 {
128
129
130 }
131 */
132
kSLstrlen(unsigned char * str)133 int kSLstrlen(unsigned char *str)
134 {
135 register int len, n=0;
136 register unsigned char *p = str;
137
138 if (!p) return 0;
139 while (*p)
140 {
141 if ((0x80 & *p) && (*p < 0xa0)) /* 0x80 <= *p < 0xa0 */
142 {
143 len = kSLcode_data[*p & 0x7f].lenth; /* kSLcode_data[*p - 0x80] */
144 n += len;
145 p += len;
146 }
147 else
148 n++;
149
150 p++;
151 }
152
153 return n;
154 }
155
156 #endif
157
158 int kSLcode = SLANG_DEFAULT_KANJI_CODE;
159 int kSLfile_code = SLANG_DEFAULT_KANJI_CODE, kSLinput_code = SLANG_DEFAULT_KANJI_CODE,
160 kSLdisplay_code = SLANG_DEFAULT_KANJI_CODE, kSLsystem_code = SLANG_DEFAULT_KANJI_CODE;
161
162 #ifdef IBMPC_SYSTEM
163 int kSLfiAuto = FALSE,
164 SKanaToDKana = FALSE;
165 #else
166 int kSLfiAuto = TRUE,
167 SKanaToDKana = TRUE;
168 #endif
169
170 int jp_nokanji = NOKANJI;
171 int ascii = ASCII;
172 int jp_euc = EUC;
173 int jp_jis = JIS;
174 int jp_sjis = SJIS;
175 int val_true = TRUE;
176 int val_false = FALSE;
177
178
IsKanji(int c,int code)179 int IsKanji(int c, int code) /*{{{*/
180 {
181 /* if(!code) return FALSE; */
182 c = (c & 0xff);
183 if(code == SJIS)
184 {
185 if((0x80 < c && c < 0xa0) || (0xe0 <= c && c <= 0xfc))
186 return TRUE;
187 }
188 else if(code == EUC)
189 {
190 if(0xa0 < c && c < 0xff)
191 return TRUE;
192 if(c == 0x8e) return TRUE; /* fake */
193 }
194 else if(code == JIS)
195 {
196 if(0x20 < c && c < 0x7f)
197 return TRUE;
198 }
199 return FALSE;
200 }
201
202 /*}}}*/
203
kSLiskanji(int * n)204 int kSLiskanji(int *n) /*{{{*/
205 {
206 return (IsKanji(*n, kSLcode));
207 }
208
209 /*}}}*/
210
211 /*
212 * distinguish KANJI code of pointed position in string
213 * argment:
214 * beg: begin of string
215 * pos: position of string
216 * return:
217 * 0: ASCII
218 * 1: KANJI 1st byte
219 * 2: KANJI 2nd byte
220 */
221
kanji_pos(unsigned char * beg,unsigned char * pos)222 int kanji_pos(unsigned char *beg, unsigned char *pos) /*{{{*/
223 {
224 int ret = 0;
225 unsigned char *p = beg;
226
227 if((beg == pos) || !iskanji(*(pos-1)))
228 {
229 if (iskanji(*pos))
230 return 1; /* KNAJI 1st byte */
231 else
232 return ASCII; /* ASCII: 0 */
233 }
234
235 while(p < pos)
236 {
237 if (iskanji(*p)) p++;
238 p++;
239 }
240
241 if(p != pos) return (p - pos +1);
242 if(iskanji(*p)) return 1;
243
244 return ASCII;
245 }
246
247 /*}}}*/
248
249
250 #define CHAR_MASK 0x000000FF
251 #define ALT_CHAR_FLAG 0x80 /* defined in slsmg.c */
252 #define SHORT_ALT_CHAR_FLAG(p) ((ALT_CHAR_FLAG << 8) & p)
253
short_kanji_pos(unsigned short * beg,unsigned short * pos)254 int short_kanji_pos(unsigned short *beg, unsigned short *pos) /*{{{*/
255 {
256 int ret = 0;
257 unsigned short *p = beg;
258
259 if (SHORT_ALT_CHAR_FLAG(*pos))
260 return ASCII;
261 if ((beg == pos) || !iskanji(*(pos-1) & CHAR_MASK) || SHORT_ALT_CHAR_FLAG(*(pos-1)))
262 {
263 if (iskanji(*pos))
264 return 1; /* KANJI 1st byte */
265 else
266 return ASCII; /* ASCII: 0 */
267 }
268
269 while(p < pos)
270 {
271 if (!SHORT_ALT_CHAR_FLAG(*p) && iskanji(*p & CHAR_MASK)) p++;
272 p++;
273 }
274
275 if (p != pos) return ((p - pos) +1);
276 if (!SHORT_ALT_CHAR_FLAG(*p) && iskanji(*p & CHAR_MASK)) return 1;
277
278 return ASCII;
279 }
280
281 /*}}}*/
282
iskanji2nd(char * str,int col)283 int iskanji2nd(char *str, int col)
284 {
285 int j;
286
287 if(!col || !iskanji(str[col-1])) return FALSE;
288
289 for( j=0 ; j < col ; j++ )
290 {
291 if (iskanji(str[j])) j++;
292 }
293 if( j == col ) return FALSE;
294 else return TRUE;
295 }
296
kcode_to_str(int n)297 char *kcode_to_str(int n)
298 {
299 int i=0;
300 while(Kcode[i])
301 {
302 if(i == n) return Kcode[n];
303 i++;
304 }
305 return Kcode[ASCII];
306 }
307
308 #ifdef REAL_UNIX_SYSTEM
Stricmp(char * src,char * dst)309 int Stricmp(char *src, char *dst)
310 {
311
312 while(*src)
313 {
314 if(toupper(*src) != toupper(*dst))
315 return (toupper(*src) - toupper(*dst));
316 src++;
317 dst++;
318 }
319 return 0;
320 }
321 #endif
322
323
str_to_kcode(char * s)324 int str_to_kcode(char *s)
325 {
326 int i;
327 for(i=0 ; Kcode[i] ; i++)
328 {
329 if(!Stricmp(Kcode[i], s)) return i;
330 }
331
332 return (int)NULL;
333 }
334
sjistojis(char * src,char * dst)335 void sjistojis(char *src, char *dst)
336 {
337 #if 1
338 sjistoeuc(src, dst);
339 *dst++ &= 0x7f;
340 *dst &= 0x7f;
341 #else
342 unsigned int high;
343 unsigned int low;
344
345 high = *src & 0xff;
346 low = *(src+1) & 0xff;
347 if (high <= 0x9f)
348 high -= 0x71;
349 else
350 high -= 0xb1;
351 high = high * 2 + 1;
352 if (low > 0x7f)
353 low--;
354 if (low >= 0x9e)
355 {
356 low -= 0x7d;
357 high++;
358 }
359 else
360 {
361 low -= 0x1f;
362 }
363 *dst = (char)(high & 0x7f);
364 *(dst+1) = (char)(low & 0x7f);
365 #endif
366 }
367
jistosjis(char * src,char * dst)368 void jistosjis(char *src, char *dst)
369 {
370 int high;
371 int low;
372
373 high = *src & 0x7f;
374 low = *(src+1) & 0x7f;
375 if (high & 1)
376 low += 0x1f;
377 else
378 low += 0x7d;
379 if (low >= 0x7f)
380 low++;
381 high = ((high - 0x21) >> 1) + 0x81;
382 if (high > 0x9f)
383 high += 0x40;
384
385 *dst = (char)high;
386 *(dst+1) = (char)low;
387 }
388
euctosjis(char * src,char * dst)389 void euctosjis(char *src, char *dst)
390 {
391 #if 1
392 euctojis(src, dst);
393 jistosjis(dst, dst);
394 #else
395 int high;
396 int low;
397
398 high = (*src & 0x7f);
399 low = (*(src+1) & 0x7f);
400 if (high & 1)
401 low += 0x1f;
402 else
403 low += 0x7d;
404 if (low >= 0x7f)
405 low++;
406 high = ((high - 0x21) >> 1) + 0x81;
407 if (high > 0x9f)
408 high += 0x40;
409
410 *dst = (char)high;
411 *(dst+1) = (char)low;
412 #endif
413 }
414
sjistoeuc(char * src,char * dst)415 void sjistoeuc(char *src, char *dst)
416 {
417 unsigned int high;
418 unsigned int low;
419
420 high = *src & 0xff;
421 low = *(src+1) & 0xff;
422 if (high <= 0x9f)
423 high -= 0x71;
424 else
425 high -= 0xb1;
426 high = high * 2 + 1;
427 if (low > 0x7f)
428 low--;
429 if (low >= 0x9e)
430 {
431 low -= 0x7d;
432 high++;
433 }
434 else
435 {
436 low -= 0x1f;
437 }
438
439 *dst = (char)(high | 0x80);
440 *(dst+1) = (char)(low | 0x80);
441 }
442
euctojis(char * src,char * dst)443 void euctojis(char *src, char *dst)
444 {
445 *dst = *src & 0x7f;
446 *(dst+1) = *(src+1) & 0x7f;
447 }
448
jistoeuc(char * src,char * dst)449 void jistoeuc(char *src, char *dst)
450 {
451 *dst = (*src | 0x80);
452 *(dst+1) = (*(src+1) | 0x80);
453 }
454
notconv(char * src,char * dst)455 void notconv(char *src, char *dst)
456 {
457 *dst = *src;
458 *(dst+1) = *(src+1);
459 }
460
461 void (*kSLcodeconv[NCODE][NCODE])() =
462 {{notconv, notconv, notconv, notconv},
463 {notconv, notconv, euctojis, euctosjis},
464 {notconv, jistoeuc, notconv, jistosjis},
465 {notconv, sjistoeuc, sjistojis, notconv}};
466
displaycode_to_SLang(char * src,char * dst)467 void displaycode_to_SLang(char *src, char *dst)
468 {
469 int in = kSLdisplay_code, out = kSLcode;
470
471 if (in < 0 || NCODE <= in) in = ASCII;
472 if (out < 0 || NCODE <= out) out = ASCII;
473 kSLcodeconv[in][out](src, dst);
474 }
475
476 #define ISMARU(c) (0xca <= (c & 0xff) && (c & 0xff) <= 0xce)
477 #define ISNIGORI(c) ((0xb6 <= (c & 0xff) && (c & 0xff) <= 0xc4)\
478 || (0xca <= (c & 0xff) && (c & 0xff) <= 0xce)\
479 || (0xb3 == (c & 0xff)))
han2zen(in,out,lin,lout,code)480 void han2zen(in, out, lin, lout, code) /*{{{*/
481 unsigned char *in, *out;
482 int *lin, *lout, code;
483 {
484 int maru = FALSE, nigori = FALSE;
485 unsigned char ch1, ch2 = '\0';
486 int mtable[][2] = {
487 {129,66},{129,117},{129,118},{129,65},{129,69},{131,146},{131,64},{131,66},
488 {131,68},{131,70},{131,72},{131,131},{131,133},{131,135},{131,98},{129,91},
489 {131,65},{131,67},{131,69},{131,71},{131,73},{131,74},{131,76},{131,78},
490 {131,80},{131,82},{131,84},{131,86},{131,88},{131,90},{131,92},{131,94},
491 {131,96},{131,99},{131,101},{131,103},{131,105},{131,106},{131,107},{131,108},
492 {131,109},{131,110},{131,113},{131,116},{131,119},{131,122},{131,125},{131,126},
493 {131,128},{131,129},{131,130},{131,132},{131,134},{131,136},{131,137},{131,138},
494 {131,139},{131,140},{131,141},{131,143},{131,147},{129,74},{129,75}
495 };
496
497 if(code == EUC)
498 {
499 ch1 = in[1];
500 if (SKanaToDKana <= 0)
501 if (in[2] == SS2) ch2 = in[3];
502 }
503 else if(code == JIS)
504 {
505 ch1 = (in[0] | 0x80);
506 ch2 = (in[1] | 0x80);
507 }
508 else
509 {
510 ch1 = in[0];
511 ch2 = in[1];
512 }
513
514 if( ch1 == 0xa0 )
515 {
516 out[0] = ' ';
517 out[1] = '\0';
518 *lin = *lout = 1;
519 if(code == EUC) *lin = 2;
520 }
521 else
522 {
523 if (SKanaToDKana <= 0)
524 {
525 if(ch2 == 0xde && ISNIGORI(ch1))
526 nigori = TRUE;
527 else if(ch2 == 0xdf && ISMARU(ch1))
528 maru = TRUE;
529 }
530
531 out[0] = mtable[ch1 - 0xa1][0];
532 out[1] = mtable[ch1 - 0xa1][1];
533 if(nigori)
534 {
535 if((0x4a <= out[1] && out[1] <= 0x67) || (0x6e <= out[1] && out[1] <= 0x7a))
536 out[1]++;
537 else if(out[0] == 0x83 && out[1] == 0x45)
538 out[1] = 0x94;
539 }
540 else if(maru && 0x6e <= out[1] && out[1] <= 0x7a)
541 out[1] += 2;
542
543 if(nigori || maru) *lin = 2;
544 else *lin = 1;
545 if(code == EUC) *lin *= 2;
546 *lout = 2;
547 }
548 }
549
550 /*}}}*/
551
552 /*
553 * Not check, if src[n-1] is KANJI first byte, or if src[n-1] is in JIS ESC sequence,
554 * it return more bigger number. understand?
555 *
556 * if you want change "src" string from Kanji *incode to Kanji *outcode,
557 * this function return to need byte for Code Convert.
558 *
559 * htoz: hankaku to zenkaku flag (TRUE or FALSE)
560 */
kSLCheckLineNum(unsigned char * src,int n,int incode,int outcode,int htoz)561 int kSLCheckLineNum(unsigned char *src, int n, int incode, int outcode, int htoz)
562 {
563 int i, siz=0;
564 int kflg = FALSE, hflg = FALSE;
565 int okflg = FALSE, ohflg = FALSE;
566
567 for (i=0 ; i<n ; )
568 {
569 if (incode == JIS && src[i] == ESC )
570 {
571 if (src[i+1] == '$')
572 {
573 if ((src[i+2] == '@') || (src[i+2] == 'B'))
574 {
575 i += 3;
576 kflg = TRUE;
577 hflg = FALSE;
578 }
579 else
580 {
581 i += 2;
582 siz += 2;
583 }
584 }
585 else if (src[i+1] == '(')
586 {
587 if ((src[i+2] == 'J') || (src[i+2] == 'B') || (src[i+2] == 'H'))
588 {
589 i += 3;
590 kflg = hflg = FALSE;
591 }
592 else if (src[i+2] == 'I')
593 {
594 i += 3;
595 kflg = FALSE;
596 hflg = TRUE;
597 }
598 else
599 {
600 i += 2;
601 siz += 2;
602 }
603 }
604 else
605 {
606 i++;
607 siz++;
608 }
609 }
610 else if ((incode == JIS && kflg && isjiskanji(src[i]))
611 || (incode == EUC && iseuckanji(src[i]))
612 || (incode == SJIS && issjiskanji(src[i])))
613 {
614 i += 2;
615 siz += 2;
616 if (outcode == JIS && !okflg)
617 {
618 siz += 3;
619 okflg = TRUE;
620 ohflg = FALSE;
621 }
622 }
623 else if ((incode == JIS && hflg) || (incode == EUC && src[i] == SS2)
624 || (incode == SJIS && ishkana(src[i])))
625 {
626 if (htoz)
627 {
628 int sc, dc;
629 unsigned char p[2];
630
631 /* But &dst[o] is only SJIS code */
632 han2zen(&src[i], p, &sc, &dc, incode);
633 i += sc;
634 siz += dc;
635 if (outcode == JIS && !okflg)
636 {
637 siz += 3;
638 okflg = TRUE;
639 ohflg = FALSE;
640 }
641 }
642 else
643 {
644 i++; siz++;
645 if (incode == EUC) i++;
646 if (outcode == EUC) siz++;
647 if (outcode == JIS && !ohflg)
648 {
649 siz += 3;
650 okflg = FALSE;
651 ohflg = TRUE;
652 }
653 }
654 }
655 else
656 {
657 i++;
658 siz++;
659 if (outcode == JIS && (okflg || ohflg))
660 {
661 siz += 3;
662 okflg = ohflg = FALSE;
663 }
664 }
665 }
666
667 if (outcode == JIS && (okflg || ohflg))
668 {
669 siz += 3;
670 okflg = ohflg = FALSE;
671 }
672
673 return siz;
674 }
675
676
kSLCodeConv(unsigned char * src,int * siz,int incode,int outcode,int KanaChgFlag)677 unsigned char * kSLCodeConv(unsigned char *src, int *siz, int incode, int outcode, int KanaChgFlag) /*{{{*/
678 {
679 int dstsiz;
680 unsigned char *dst, tmp[2];
681 static int kflg = FALSE, hflg = FALSE;
682 static int okflg = FALSE, ohflg = FALSE;
683 int i, o;
684 void (*kcodeto)(char *, char *);
685 void (*kanakcodeto)(char *, char *);
686 char *jiskanji = "\033$B",
687 *jiskana = "\033(I",
688 *jisascii = "\033(B";
689 static char kanji_char[2] = {'\0', '\0'}; /* If last charctor of "src" string is KANJI 1st byte,
690 * it charctor(KANJI 1st byte) is set this variable.
691 *
692 * And, if last charctor of "src" is KANJI 1st byte
693 * when it used to be this function,
694 * you must set to this variable.
695 */
696
697 if (incode < 0 || NCODE <= incode) incode = ASCII;
698 if (outcode < 0 || NCODE <= outcode) outcode = ASCII;
699
700 if (!kSLcode || (incode == ASCII) || (outcode == ASCII) || !src) return src;
701 else if (incode == outcode)
702 {
703 if (KanaChgFlag == FALSE) return src;
704 }
705 kcodeto = kSLcodeconv[incode][outcode];
706 kanakcodeto = kSLcodeconv[SJIS][outcode];
707
708 dstsiz = kSLCheckLineNum (src, *siz, incode, outcode, KanaChgFlag);
709 if (kanji_char[0])
710 {
711 dstsiz++;
712 if (outcode == JIS) dstsiz += 3;
713 }
714 if ((dst = (unsigned char*)SLmalloc(dstsiz + 1)) == NULL)
715 {
716 /* error message */
717 return src;
718 }
719
720 for (i=0,o=0 ; i<*siz ; )
721 {
722 if (incode == JIS && src[i] == ESC )
723 {
724 if (src[i+1] == '$')
725 {
726 if ((src[i+2] == '@') || (src[i+2] == 'B'))
727 {
728 i += 3;
729 kflg = TRUE;
730 hflg = FALSE;
731 }
732 else
733 {
734 dst[o++] = src[i++];
735 }
736 }
737 else if (src[i+1] == '(')
738 {
739 if ((src[i+2] == 'J') || (src[i+2] == 'B'))
740 {
741 i += 3;
742 kflg = hflg = FALSE;
743 }
744 else if (src[i+2] == 'I')
745 {
746 i += 3;
747 kflg = FALSE;
748 hflg = TRUE;
749 }
750 else {
751 dst[o++] = src[i++];
752 }
753 }
754 else
755 {
756 dst[o++] = src[i++];
757 }
758 }
759 else if ((incode == JIS && kflg && isjiskanji(src[i]))
760 || (incode == EUC && iseuckanji(src[i]))
761 || (incode == SJIS && issjiskanji(src[i])) || kanji_char[0])
762 {
763 if (i == (*siz -1) && !kanji_char[0])
764 {
765 kanji_char[0] = src[i];
766 i++;
767 }
768 else
769 {
770 if (outcode == JIS && !okflg)
771 {
772 strcpy (&dst[o], jiskanji);
773 o += strlen (jiskanji);
774 okflg = TRUE;
775 ohflg = FALSE;
776 }
777 if (kanji_char[0])
778 {
779 kanji_char[1] = src[i];
780 kcodeto(kanji_char, &dst[o]);
781 kanji_char[0] = '\0';
782 i--;
783 }
784 else
785 kcodeto (&src[i], &dst[o]);
786 i += 2;
787 o += 2;
788 }
789 }
790 else if ((incode == JIS && hflg) || (incode == EUC && src[i] == SS2)
791 || (incode == SJIS && ishkana(src[i])))
792 {
793 if (KanaChgFlag)
794 {
795 int sc, dc;
796
797 if (outcode == JIS && !okflg)
798 {
799 strcpy (&dst[o], jiskanji);
800 o += strlen (jiskanji);
801 okflg = TRUE;
802 ohflg = FALSE;
803 }
804 /* But &dst[o] is only SJIS code */
805 han2zen (&src[i], &dst[o], &sc, &dc, incode);
806 kanakcodeto (&dst[o], &dst[o]);
807 i += sc;
808 o += dc;
809 }
810 else
811 {
812 if (outcode == JIS && !ohflg)
813 {
814 strcpy (&dst[o], jiskana);
815 o += strlen (jiskana);
816 okflg = FALSE;
817 ohflg = TRUE;
818 }
819 if (incode == EUC) i++;
820 if (outcode == EUC) dst[o++] = SS2;
821 dst[o] = src[i];
822 if (outcode == JIS) dst[o] &= 0x7f;
823 else dst[o] |= 0x80;
824 i++; o++;
825 }
826 }
827 else
828 {
829 if (outcode == JIS && (okflg || ohflg))
830 {
831 strcpy (&dst[o], jisascii);
832 o += strlen (jisascii);
833 okflg = ohflg = FALSE;
834 }
835 dst[o++] = src[i++];
836 }
837 }
838
839 if (outcode == JIS && (okflg || ohflg))
840 {
841 strcpy (&dst[o], jisascii);
842 o += strlen (jisascii);
843 okflg = ohflg = FALSE;
844 }
845
846 dst[o] = '\0';
847 *siz = o;
848
849 return dst;
850 }
851
852 /*}}}*/
853
854 #if 0
855 void kSLset_kanji_filecode(int *n)
856 {
857 kSLfile_code = *n;
858 }
859
860 void kSLrot_kanji_filecode()
861 {
862 kSLfile_code++;
863 if(BINARY < kSLfile_code) kSLfile_code = ASCII;
864 }
865
866 int kSLget_kanji_filecode()
867 {
868 return kSLfile_code;
869 }
870 #if 0
871 char get_1st_kanji_filecode()
872 {
873 return *Kcode[kSLfile_code];
874 }
875 #endif
876
877 void kSLset_kanji_inputcode(int *n)
878 {
879 kSLinput_code = *n;
880 }
881
882 void kSLrot_kanji_inputcode()
883 {
884 kSLinput_code++;
885 if(SJIS < kSLinput_code) kSLinput_code = ASCII;
886 }
887
888 int kSLget_kanji_inputcode()
889 {
890 return kSLinput_code;
891 }
892
893 #if 0
894 char get_1st_kanji_inputcode()
895 {
896 return *Kcode[kSLinput_code];
897 }
898 #endif
899
900 void kSLset_kanji_displaycode(int *n)
901 {
902 kSLdisplay_code = *n;
903 }
904
905 void kSLrot_kanji_displaycode()
906 {
907 kSLdisplay_code++;
908 if(BINARY < kSLdisplay_code) kSLdisplay_code = ASCII;
909 }
910
911 int kSLget_kanji_displaycode()
912 {
913 return kSLdisplay_code;
914 }
915 #if 0
916 char get_1st_kanji_displaycode()
917 {
918 return *Kcode[kSLdisplay_code];
919 }
920 #endif
921
922 void kSLset_kanji_systemcode(int *n)
923 {
924 jscode = *n;
925 }
926
927 #if 0
928 void kSLrot_kanji_systemcode()
929 {
930 jscode++;
931 if(SJIS < jscode) jscode = ASCII;
932 }
933
934 char *get_kanji_systemcode()
935 {
936 return Kcode[jscode];
937 }
938 #if 0
939 char get_1st_kanji_systemcode()
940 {
941 return *Kcode[jscode];
942 }
943 #endif
944 #endif
945 #endif
946
set_kanji_kSLcode(int * n)947 void set_kanji_kSLcode(int *n)
948 {
949 kSLcode = *n;
950 if(kSLis_kanji_code() == FALSE) kSLcode = ASCII;
951
952 }
953
954 #if 0
955 void rot_kanji_kSLcode()
956 {
957 kSLcode++;
958 if(kSLis_kanji_code() == FALSE) kSLcode = ASCII;
959 }
960 #endif
961
get_kanji_kSLcode(void)962 char *get_kanji_kSLcode(void)
963 {
964 return Kcode[kSLcode];
965 }
966
967 #if 0
968 char get_1st_kanji_jedcode()
969 {
970 return *Kcode[kSLcode];
971 }
972 #endif
973
kSLis_kanji_code(void)974 int kSLis_kanji_code(void)
975 {
976 if(kSLcode == EUC || /* kSLcode == JIS || */ kSLcode == SJIS)
977 return TRUE;
978 else
979 return FALSE;
980 }
981
file_kanji_autocode(char * fname)982 char *file_kanji_autocode(char *fname)
983 {
984 return Kcode[kSLfile_code];
985 }
986
987 /* i: TRUE or FALSE */
han_to_zen(int * i)988 void han_to_zen(int *i)
989 {
990 SKanaToDKana = *i;
991 }
992
993
994
995 int DetectLevel = 2;
996 /*
997 * flag
998 * 0: return
999 * 1: 100lines test
1000 * 2: if first KANJI code find, it's end.
1001 * 3: file's last made test suru.
1002 */
1003 #define NLINES 1024
kcode_detect(char * filename)1004 int kcode_detect(char *filename) /*{{{*/
1005 {
1006 int code = ASCII;
1007 FILE *fp;
1008 unsigned char buf[NLINES], *s;
1009 int EightBit=0;
1010 int cnt = -1;
1011 int cod_cnt[4] = {0,0,0,0};
1012
1013 if(!kSLis_kanji_code()) return ASCII;
1014 if(!DetectLevel) return kSLfile_code;
1015 if(DetectLevel == 1) cnt = 100;
1016
1017 if((fp = fopen(filename, "rb")) == NULL) return kSLfile_code;
1018 while (((!code && cnt) || DetectLevel==3) && (s = (char*)fgets((char*)buf, NLINES, fp)) != NULL)
1019 {
1020 code = IsKcode(buf, strlen(buf), &EightBit);
1021 if (0 < cnt) cnt--;
1022 if (code)
1023 {
1024 (cod_cnt[code])++;
1025 cnt = 0;
1026 }
1027 }
1028 fclose(fp);
1029
1030 for (cnt = 1 ; cnt < 4 ; cnt++) if (cod_cnt[cnt]) code = cnt;
1031 if (cod_cnt[EUC] && cod_cnt[SJIS]) code = BINARY;
1032 if (!code && EightBit) code = EUC;
1033 if (!code) code = kSLfile_code;
1034 return code;
1035 }
1036
1037 /*}}}*/
1038
1039 #define issjis2ndkanji(c) ((0x40 <= (unsigned char)(c&0xff) && (unsigned char)(c&0xff) <= 0x7e) \
1040 || (0x80 <= (unsigned char)(c&0xff) && (unsigned char)(c&0xff) <= 0xfc))
1041
IsKcode(buf,len,EightBit)1042 int IsKcode(buf, len, EightBit)
1043 unsigned char *buf;
1044 int len, *EightBit;
1045 {
1046 int code;
1047 int i;
1048 code = ASCII;
1049 for (i=0 ; (i < len) && (code == ASCII) ; )
1050 {
1051
1052 if (*EightBit==0 && buf[i] == ESC)
1053 {
1054 if ((buf[i+1] == '$' && (buf[i+2] == '@' || buf[i+2] == 'B'))
1055 || (buf[i+1] == '(' && (buf[i+2] == 'J' || buf[i+2] == 'B' || buf[i+2] == 'I')))
1056 {
1057 code = JIS;
1058 }
1059 else
1060 {
1061 i++;
1062 }
1063 }
1064 else if( (buf[i] & 0x80) == 0 )
1065 i++;
1066 else
1067 {
1068 *EightBit = 1;
1069
1070 if(buf[i] == SS2)
1071 {
1072 if(!ishkana(buf[i+1]))
1073 {
1074 code = SJIS;
1075 }
1076 else if(!issjis2ndkanji(buf[i+1]))
1077 {
1078 code = EUC;
1079 }
1080 else
1081 {
1082 i += 2;
1083 }
1084 }
1085 /* else if(ishkana(buf[i]))
1086 {
1087 if(!iseuckanji(buf[i]) || !iseuckanji(buf[i+1]))
1088 {
1089 code = SJIS;
1090 }
1091 else
1092 {
1093 code = EUC;
1094 i += 2;
1095 }
1096 } */
1097 else if(issjiskanji(buf[i]))
1098 {
1099 if(!iseuckanji(buf[i]) || !iseuckanji(buf[i+1]))
1100 {
1101 code = SJIS;
1102 }
1103 else if(!issjis2ndkanji(buf[i+1]))
1104 {
1105 code = EUC;
1106 }
1107 else
1108 {
1109 i += 2;
1110 }
1111 }
1112 else if(!iseuckanji(buf[i]) || !iseuckanji(buf[i+1]))
1113 {
1114 code = 5;
1115 }
1116 else
1117 {
1118 code = EUC;
1119 }
1120 }
1121 }
1122 return code;
1123 }
1124
1125
1126 #define BUFSIZE 4
1127 #define PENDING 10
1128
kSLsys_getkey(void)1129 unsigned int kSLsys_getkey(void) /*{{{*/
1130 {
1131 static unsigned char buf[BUFSIZE], dst[BUFSIZ], nxtchar = '\0';
1132 static int ikflg = FALSE, ihflg = FALSE;
1133 static int okflg = FALSE, ohflg = FALSE;
1134 int ishankana = FALSE, iszenkaku = FALSE;
1135 unsigned int ret;
1136 void (*kcodeto)(char *, char *);
1137 void (*kanakcodeto)(char *, char *);
1138 char *jiskanji = "\033$@",
1139 *jiskana = "\033(I",
1140 *jisascii = "\033(B";
1141 int incode = kSLinput_code, outcode = kSLcode;
1142
1143 if (incode < 0 || NCODE <= incode) incode = ASCII;
1144 if (outcode < 0 || NCODE <= outcode) outcode = ASCII;
1145
1146 if (!SKanaToDKana && kSLinput_code == kSLcode) return _SLsys_getkey();
1147
1148 kcodeto = kSLcodeconv[kSLinput_code][kSLcode];
1149 kanakcodeto = kSLcodeconv[SJIS][kSLcode];
1150 if (kcodeto == notconv) return _SLsys_getkey();
1151
1152 if(nxtchar)
1153 {
1154 ret = buf[0] = nxtchar;
1155 nxtchar = '\0';
1156 }
1157 else
1158 ret = buf[0] = _SLsys_getkey();
1159 buf[1] = '\0';
1160
1161
1162 while(kSLinput_code == JIS && buf[0] == ESC)
1163 {
1164 if(_SLsys_input_pending(PENDING))
1165 {
1166 buf[1] = _SLsys_getkey();
1167 if(_SLsys_input_pending(PENDING))
1168 {
1169 buf[2] = _SLsys_getkey();
1170 }
1171 else
1172 {
1173 SLang_ungetkey_string(&buf[1], 1);
1174 return ret;
1175 }
1176 }
1177 else return ret;
1178
1179 if(buf[1] == '$' && (buf[2] == '@' || buf[2] == 'B'))
1180 {
1181 ikflg = TRUE;
1182 ihflg = FALSE;
1183 }
1184 else if(buf[1] == '(' && buf[2] == 'I')
1185 {
1186 ikflg = FALSE;
1187 ihflg = TRUE;
1188 }
1189 else if(buf[1] == '(' && (buf[2] == 'B' || buf[2] == 'J'))
1190 {
1191 ikflg = ihflg = FALSE;
1192 }
1193 else
1194 {
1195 SLang_ungetkey_string(&buf[1], 2);
1196 return ret;
1197 }
1198 ret = buf[0] = _SLsys_getkey();
1199 }
1200
1201
1202 if((kSLinput_code == JIS && ikflg) || (kSLinput_code == EUC && iseuckanji(ret))
1203 || (kSLinput_code == SJIS && issjiskanji(ret)))
1204 {
1205 buf[1] = _SLsys_getkey();
1206 kcodeto(buf, dst);
1207 ret = dst[0];
1208 iszenkaku = TRUE;
1209 }
1210 else if((kSLinput_code == JIS && ihflg) || (kSLinput_code == EUC && ret == SS2)
1211 || (kSLinput_code == SJIS && ishkana(ret)))
1212 {
1213 if(kSLinput_code == EUC)
1214 ret = buf[0] = _SLsys_getkey();
1215 else if(kSLinput_code == JIS)
1216 ret = buf[0] = (ret | 0x80);
1217 /* else if(kSLinput_code == SJIS) */
1218
1219 if(kSLinput_code != EUC && SKanaToDKana && ISNIGORI(ret) && _SLsys_input_pending(PENDING))
1220 {
1221 nxtchar = buf[1] = _SLsys_getkey();
1222 if(kSLinput_code == JIS && nxtchar != ESC &&
1223 (nxtchar == 0x5e || (nxtchar == 0x5f && ISMARU(ret))))
1224 nxtchar = buf[1] = (nxtchar | 0x80);
1225 if(buf[1] == 222 || (buf[1] == 223 && ISMARU(ret)))
1226 {
1227 nxtchar = '\0';
1228 }
1229 }
1230 ishankana = TRUE;
1231 }
1232
1233 if(ishankana)
1234 {
1235 if(SKanaToDKana)
1236 {
1237 int dummy;
1238 buf[0] = (unsigned char)ret;
1239 han2zen(buf, dst, &dummy, &dummy, SJIS);
1240 kanakcodeto(dst, dst);
1241 ret = dst[0];
1242 ishankana = FALSE;
1243 iszenkaku = TRUE;
1244 }
1245 else
1246 {
1247 if(kSLcode == JIS && !ohflg)
1248 {
1249 SLang_ungetkey_string(buf, 1);
1250 SLang_ungetkey_string(jiskana+1, 2);
1251 ohflg = TRUE;
1252 okflg = FALSE;
1253 ret = ESC;
1254 }
1255 else if(kSLcode == EUC)
1256 {
1257 SLang_ungetkey_string(buf, 1);
1258 ret = SS2;
1259 }
1260 }
1261 }
1262
1263 if(iszenkaku)
1264 {
1265 SLang_ungetkey_string(&dst[1], 1);
1266
1267 if(kSLcode == JIS && !okflg)
1268 {
1269 SLang_ungetkey_string(dst, 1);
1270 SLang_ungetkey_string(jiskanji+1, 2);
1271 okflg = TRUE;
1272 ohflg = FALSE;
1273 ret = ESC;
1274 }
1275 }
1276 else if(/* !iszenkaku && */ !ishankana)
1277 {
1278 if(kSLcode == JIS && (okflg || ohflg))
1279 {
1280 if(kSLcode == JIS && !okflg)
1281 {
1282 SLang_ungetkey_string(buf, 1);
1283 SLang_ungetkey_string(jisascii+1, 2);
1284 okflg = ohflg = FALSE;
1285 ret = ESC;
1286 }
1287 }
1288 }
1289
1290 return ret;
1291 }
1292
1293 /*}}}*/
1294
1295 static SLang_Intrin_Fun_Type SLKanji_ITable[] = /*{{{*/
1296 {
1297 MAKE_INTRINSIC_I("iskanji", kSLiskanji, SLANG_INT_TYPE),
1298
1299 #if 0
1300 MAKE_INTRINSIC_SS("sjis_to_slang", notconv, SLANG_VOID_TYPE),
1301 MAKE_INTRINSIC_SS("jis_to_slang", jistosjis, SLANG_VOID_TYPE),
1302 MAKE_INTRINSIC_SS("euc_to_slang", euctosjis, SLANG_VOID_TYPE),
1303 MAKE_INTRINSIC_SS("to_sjis", notconv, SLANG_VOID_TYPE),
1304 MAKE_INTRINSIC_SS("to_euc", sjistoeuc, SLANG_VOID_TYPE),
1305 MAKE_INTRINSIC_SS("to_jis", sjistojis, SLANG_VOID_TYPE),
1306 #endif
1307 SLANG_END_TABLE
1308 };
1309
1310 /*}}}*/
1311
1312 static SLang_Intrin_Var_Type SLKanji_Vars[] = /*{{{*/
1313 {
1314 MAKE_VARIABLE("NOKANJI", &jp_nokanji, SLANG_INT_TYPE, 1),
1315 MAKE_VARIABLE("ASCII", &ascii, SLANG_INT_TYPE, 1),
1316 MAKE_VARIABLE("EUC", &jp_euc, SLANG_INT_TYPE, 1),
1317 MAKE_VARIABLE("JIS", &jp_jis, SLANG_INT_TYPE, 1),
1318 MAKE_VARIABLE("SJIS", &jp_sjis, SLANG_INT_TYPE, 1),
1319 MAKE_VARIABLE("TRUE", &val_true, SLANG_INT_TYPE, 1),
1320 MAKE_VARIABLE("FALSE", &val_false, SLANG_INT_TYPE, 1),
1321
1322 MAKE_VARIABLE("kfile_code", &kSLfile_code, SLANG_INT_TYPE, 0),
1323 MAKE_VARIABLE("kinput_code", &kSLinput_code, SLANG_INT_TYPE, 0),
1324 MAKE_VARIABLE("kdisplay_code", &kSLdisplay_code, SLANG_INT_TYPE, 0),
1325 MAKE_VARIABLE("KfioAuto", &kSLfiAuto, SLANG_INT_TYPE, 0), /* for compatibility */
1326 /* use "kanji_filecode_detect" variable */
1327 MAKE_VARIABLE("kanji_filecode_detect", &kSLfiAuto, SLANG_INT_TYPE, 0),
1328 MAKE_VARIABLE("han_to_zen", &SKanaToDKana, SLANG_INT_TYPE, 0),
1329 MAKE_VARIABLE("SLang_code", &kSLcode, SLANG_INT_TYPE, 0),
1330 MAKE_VARIABLE("KANJI_DETECT", &DetectLevel, SLANG_INT_TYPE, 0),
1331 SLANG_END_TABLE
1332 };
1333
1334 /*}}}*/
1335
kSLinit_kanji(void)1336 int kSLinit_kanji(void) /*{{{*/
1337 {
1338 int ret;
1339
1340 if (-1 == SLadd_intrin_fun_table(SLKanji_ITable, NULL)
1341 || (-1 == SLadd_intrin_var_table (SLKanji_Vars, NULL)))
1342 return -1;
1343
1344 return 0;
1345 }
1346
1347 /*}}}*/
1348
1349 #endif /* SLANG_HAS_KANJI_SUPPORT */
1350