1 /*
2 * KAKASI (Kanji Kana Simple inversion program)
3 * $Id: kanjiio.c,v 1.17 2013-02-07 07:26:18 knok Exp $
4 * Copyright (C) 1992
5 * Hironobu Takahashi (takahasi@tiny.or.jp)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either versions 2, or (at your option)
10 * any later version.
11 *
12 * This program is distributed in the hope that it will be useful
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with KAKASI, see the file COPYING. If not, write to the Free
19 * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22 /*
23 Modified by NOKUBI Takatsugu
24 1999/03/04
25 Rename PERLMOD macro to LIBRARY
26 1999/01/08
27 Add PERLMOD macro.
28 */
29
30 #ifdef HAVE_CONFIG_H
31 # include <config.h>
32 #endif
33
34 #include <stdio.h>
35 #ifdef HAVE_STRING_H
36 # include <string.h>
37 #else
38 # include <strings.h>
39 #endif
40 #ifdef HAVE_MALLOC_H
41 # include <malloc.h>
42 #endif
43 #include <stdlib.h>
44 #include "kakasi.h"
45 #ifdef LIBRARY
46 # include "libkakasi.h"
47 #endif
48
49 #ifdef KAKASI_SUPPORT_UTF8
50 #include <iconv.h>
51 extern iconv_t fromutf8;
52 extern iconv_t toutf8;
53 int utf8converted = 0;
54 void pututf8(int f, int s);
55
56 /*
57 The macro UTF8_COMPUTE was imported from gutf8.c in glib.
58 You can get the original source code from ftp://ftp.gtk.org.
59 */
60 #define UTF8_COMPUTE(Char, Mask, Len) \
61 if (Char < 128) \
62 { \
63 Len = 1; \
64 Mask = 0x7f; \
65 } \
66 else if ((Char & 0xe0) == 0xc0) \
67 { \
68 Len = 2; \
69 Mask = 0x1f; \
70 } \
71 else if ((Char & 0xf0) == 0xe0) \
72 { \
73 Len = 3; \
74 Mask = 0x0f; \
75 } \
76 else if ((Char & 0xf8) == 0xf0) \
77 { \
78 Len = 4; \
79 Mask = 0x07; \
80 } \
81 else if ((Char & 0xfc) == 0xf8) \
82 { \
83 Len = 5; \
84 Mask = 0x03; \
85 } \
86 else if ((Char & 0xfe) == 0xfc) \
87 { \
88 Len = 6; \
89 Mask = 0x01; \
90 } \
91 else \
92 Len = -1;
93
94 #endif /* KAKASI_SUPPORT_UTF8 */
95
96
97 #if ! defined HAVE_MEMMOVE && ! defined memmove
98 # define memmove(d, s, n) bcopy ((s), (d), (n))
99 #endif
100
101 int input_term_type = UNKNOWN;
102
103 int input_GL = SETG0;
104 int input_GR = SETG3;
105 int input_G[5] = {ASCII, KATAKANA, KATAKANA, JIS83, SJKANA};
106
107 int output_term_type = UNKNOWN;
108
109 int output_GL = SETG0;
110 int output_GR = SETG3;
111 int output_G[5] = {ASCII, KATAKANA, KATAKANA, JIS83, SJKANA};
112
113 #ifdef LIBRARY
114 unsigned char *getcharbuffer;
115 #endif /* LIBRARY */
116
117 /* One character buffer */
118
119 static Character kanji_buf={OTHER, 0, 0};
120 static int kanji_buf_set = 0;
121
122 void
ungetkanji(c)123 ungetkanji(c)
124 Character *c;
125 {
126 kanji_buf.type = c->type;
127 kanji_buf.c1 = c->c1;
128 kanji_buf.c2 = c->c2;
129 kanji_buf_set = 1;
130 }
131
132 /* One input of a character */
133
134 static unsigned char input_stack[1024];
135 static int input_stack_depth = 0;
136
137 #ifdef LIBRARY
138
139 int pbuf_error = 0;
140
141 #define PBSTRSIZE (4096)
142 #ifdef putchar
143 #undef putchar
144 #endif
145 #define putchar(x) putcharpbuf(x)
146
147 typedef struct pbuf_t {
148 char *str;
149 long size;
150 struct pbuf_t *next;
151 } pbuf;
152
153 pbuf pcbuf = {
154 NULL, -1, NULL
155 };
156
157 pbuf *pcbuf_tail = &pcbuf;
158
159 void
setcharbuffer(s)160 setcharbuffer(s)
161 unsigned char *s;
162 {
163 getcharbuffer = s;
164 }
165
166 void
putcharpbuf(c)167 putcharpbuf(c)
168 int c;
169 {
170 pbuf *pb = pcbuf_tail;
171
172 if (pb->size == PBSTRSIZE) {
173 pb = pb->next;
174 pcbuf_tail = pb;
175 }
176 if (pb->size < 0) {
177 pbuf *npb;
178 if ((pb->str = (char *) malloc(PBSTRSIZE)) == NULL) {
179 pbuf_error = 1;
180 return;
181 }
182 pb->size = 0;
183 if ((npb = (void *) malloc(sizeof(pbuf))) == NULL) {
184 pbuf_error = 1;
185 return;
186 }
187 npb->str = NULL;
188 npb->size = -1;
189 npb->next = NULL;
190 pb->next = npb;
191 }
192 pb->str[pb->size] = (char) c;
193 pb->size ++;
194 }
195
196 char *
getpbstr()197 getpbstr() {
198 char *ret, *tmp;
199 long tsize = 0;
200 pbuf *pb = &pcbuf;
201 while (pb->next != NULL) {
202 tsize += pb->size;
203 pb = pb->next;
204 }
205 if (tsize <= 0)
206 return NULL;
207 pb = &pcbuf;
208 tmp = ret = (char *) malloc(tsize + 1);
209 if (ret == NULL)
210 return NULL;
211 while (pb->next != NULL) {
212 memmove(tmp, pb->str, pb->size);
213 tmp += pb->size;
214 pb = pb->next;
215 }
216 ret[tsize] = '\0';
217 pb = &pcbuf;
218 free(pb->str);
219 pb = pb->next;
220 if (pb->next != NULL) {
221 pbuf *opb = pb;
222 pb = pb->next;
223 while (pb != NULL) {
224 free(opb->str);
225 free(opb);
226 opb = pb;
227 pb = pb->next;
228 }
229 } else {
230 free(pb);
231 }
232 pcbuf.str = NULL;
233 pcbuf.size = -1;
234 pcbuf.next = NULL;
235 pcbuf_tail = &pcbuf;
236 return ret;
237 }
238 #endif /* LIBRARY */
239
240 static int
get1byte()241 get1byte()
242 {
243 if (input_stack_depth == 0)
244 #ifdef LIBRARY
245 {
246 if (*getcharbuffer == '\0') return EOF;
247 return (int) *getcharbuffer ++;
248 }
249 #else
250 return getchar();
251 #endif /* LIBRARY */
252 else
253 return input_stack[-- input_stack_depth];
254 }
255
256 static void
unget1byte(c)257 unget1byte(c)
258 int c;
259 {
260 input_stack[input_stack_depth ++] = c;
261 }
262
263 static int
getc0set1(gn)264 getc0set1(gn)
265 int gn;
266 {
267 int c3;
268 int set;
269
270 switch(c3 = get1byte()) {
271 case 'B':
272 set = ASCII; break;
273 case 'J':
274 set = JISROMAN; break;
275 case 'O':
276 set = GRAPHIC; break;
277 case 'I':
278 set = KATAKANA; break;
279 default:
280 unget1byte(c3); return -1;
281 }
282 input_G[gn] = set;
283 return 0;
284 }
285
286 static void
set_terms(type,term_type,GL,GR,G)287 set_terms(type, term_type, GL, GR, G)
288 int type;
289 int *term_type;
290 int *GL;
291 int *GR;
292 int *G;
293 {
294 *term_type = type;
295
296 switch(type) {
297 case OLDJIS:
298 *GL=SETG0, *GR=SETG1,
299 G[0]=JISROMAN, G[1]=KATAKANA, G[2]=KATAKANA, G[3]=KATAKANA;
300 break;
301 case NEWJIS:
302 *GL=SETG0, *GR=SETG1,
303 G[0]=ASCII, G[1]=KATAKANA, G[2]=KATAKANA, G[3]=KATAKANA;
304 break;
305 case DEC:
306 *GL=SETG0, *GR=SETG3,
307 G[0]=ASCII, G[1]=GRAPHIC, G[2]=KATAKANA, G[3]=JIS83;
308 break;
309 case EUC:
310 *GL=SETG0, *GR=SETG3,
311 G[0]=ASCII, G[1]=KATAKANA, G[2]=KATAKANA, G[3]=JIS83;
312 break;
313 case MSKANJI:
314 *GL=SETG0, *GR=SJKANA,
315 G[0]=ASCII, G[1]=KATAKANA, G[2]=KATAKANA, G[3]=KATAKANA;
316 break;
317 }
318 }
319
320 void
set_input_term(type)321 set_input_term(type)
322 int type;
323 {
324 set_terms(type, &input_term_type, &input_GL, &input_GR, input_G);
325 }
326
327 void
set_output_term(type)328 set_output_term(type)
329 int type;
330 {
331 set_terms(type, &output_term_type, &output_GL, &output_GR, output_G);
332 }
333
334 static int
getc0set2(gn)335 getc0set2(gn)
336 int gn;
337 {
338 int c4;
339 int set;
340
341 switch(c4 = get1byte()) {
342 case '@':
343 set = JIS78;
344 if (input_term_type == UNKNOWN)
345 set_input_term(OLDJIS);
346 if (output_term_type == UNKNOWN)
347 set_output_term(OLDJIS);
348 break;
349 case 'B':
350 set = JIS83;
351 if (input_term_type == UNKNOWN)
352 set_input_term(NEWJIS);
353 if (output_term_type == UNKNOWN)
354 set_output_term(NEWJIS);
355 break;
356 default:
357 unget1byte(c4); return -1;
358 }
359 input_G[gn] = set;
360 return 0;
361 }
362
363 static void
getc0(c,c1)364 getc0(c, c1)
365 Character *c;
366 int c1;
367 {
368 int c2, c3;
369 int GL_save, GR_save;
370
371 switch(c1) {
372 case '\033':
373 switch(c2 = get1byte()) {
374 case '(':
375 if (getc0set1(SETG0) != 0) {
376 unget1byte(c2); c->type = OTHER; c->c1 = c1; return;
377 }
378 break;
379 case ')':
380 if (getc0set1(SETG1) != 0) {
381 unget1byte(c2); c->type = OTHER; c->c1 = c1; return;
382 }
383 break;
384 case '*':
385 if (getc0set1(SETG2) != 0) {
386 unget1byte(c2); c->type = OTHER; c->c1 = c1; return;
387 }
388 break;
389 case '+':
390 if (getc0set1(SETG3) != 0) {
391 unget1byte(c2); c->type = OTHER; c->c1 = c1; return;
392 }
393 break;
394 case '$':
395 switch(c3 = get1byte()) {
396 case '@':
397 if (input_term_type == UNKNOWN)
398 set_input_term(OLDJIS);
399 if (output_term_type == UNKNOWN)
400 set_output_term(OLDJIS);
401 input_G[SETG0] = JIS78;
402 break;
403 case 'B':
404 if (input_term_type == UNKNOWN)
405 set_input_term(NEWJIS);
406 if (output_term_type == UNKNOWN)
407 set_output_term(NEWJIS);
408 input_G[SETG0] = JIS83;
409 break;
410 case '(':
411 if (getc0set2(SETG0) != 0) {
412 unget1byte(c3); unget1byte(c2);
413 c->type = OTHER; c->c1 = c1; return;
414 }
415 break;
416 case ')':
417 if (getc0set2(SETG1) != 0) {
418 unget1byte(c3); unget1byte(c2);
419 c->type = OTHER; c->c1 = c1; return;
420 }
421 break;
422 case '*':
423 if (getc0set2(SETG2) != 0) {
424 unget1byte(c3); unget1byte(c2);
425 c->type = OTHER; c->c1 = c1; return;
426 }
427 break;
428 case '+':
429 if (getc0set2(SETG3) != 0) {
430 unget1byte(c3); unget1byte(c2);
431 c->type = OTHER; c->c1 = c1; return;
432 }
433 break;
434 default:
435 unget1byte(c3);
436 unget1byte(c2);
437 c->type = OTHER; c->c1 = c1; return;
438 }
439 break;
440 case 'n':
441 input_GL = SETG2;
442 break;
443 case 'o':
444 input_GL = SETG3;
445 break;
446 case '~':
447 input_GR = SETG1;
448 break;
449 case '}':
450 input_GR = SETG2;
451 break;
452 case '|':
453 input_GR = SETG3;
454 break;
455 case 'N':
456 GL_save = input_GL;
457 GR_save = input_GR;
458 input_GL = SETG2;
459 input_GR = SETG2;
460 getkanji(c);
461 input_GL = GL_save;
462 input_GR = GR_save;
463 return;
464 case 'O':
465 GL_save = input_GL;
466 GR_save = input_GR;
467 input_GL = SETG3;
468 input_GR = SETG3;
469 getkanji(c);
470 input_GL = GL_save;
471 input_GR = GR_save;
472 return;
473 default:
474 unget1byte(c2);
475 c->type = OTHER; c->c1 = c1; return;
476 }
477 break;
478 case 0xe:
479 input_GL = SETG1;
480 break;
481 case 0xf:
482 input_GL = SETG0;
483 break;
484 case EOF:
485 c->type = OTHER; c->c1 = 0xff; return;
486 default:
487 c->type = OTHER; c->c1 = c1; return;
488 }
489 getkanji(c);
490 }
491
492 static void
getc1(c,c1)493 getc1(c, c1)
494 Character *c;
495 int c1;
496 {
497 int GL_save, GR_save;
498
499 switch(c1) {
500 case 0x8e:
501 GL_save = input_GL;
502 GR_save = input_GR;
503 input_GL = SETG2;
504 input_GR = SETG2;
505 getkanji(c);
506 input_GL = GL_save;
507 input_GR = GR_save;
508 return;
509 case 0x8f:
510 GL_save = input_GL;
511 GR_save = input_GR;
512 input_GL = SETG3;
513 input_GR = SETG3;
514 getkanji(c);
515 input_GL = GL_save;
516 input_GR = GR_save;
517 return;
518 default:
519 c->type = OTHER; c->c1 = c1; return;
520 }
521 }
522
523 void
getkanji(c)524 getkanji(c)
525 Character *c;
526 {
527 int c1;
528
529 if (kanji_buf_set) {
530 c->type = kanji_buf.type;
531 c->c1 = kanji_buf.c1;
532 c->c2 = kanji_buf.c2;
533 kanji_buf_set = 0;
534 return;
535 }
536
537 c1 = get1byte();
538 #ifdef KAKASI_SUPPORT_UTF8
539 if (!utf8converted && input_term_type == UTF8) {
540 char utf8[6], eucj[3];
541 char *from = utf8, *to = eucj;
542 size_t fromlen = 0, tolen = 3;
543 int i, len, mask;
544 utf8converted = 1;
545 UTF8_COMPUTE(c1, mask, len);
546 (void) mask;
547 if (len <= 1) {
548 unget1byte(c1);
549 getkanji(c);
550 return;
551 }
552 utf8[0] = c1;
553 for (i = 1; i < len; i ++) {
554 utf8[i] = getchar();
555 }
556 fromlen = len;
557 if (fromutf8 == (iconv_t) -1)
558 fromutf8 = iconv_open("EUC-JP", "UTF-8");
559 iconv(fromutf8, &from, &fromlen, &to, &tolen);
560 if (tolen == 1) {
561 unget1byte(eucj[1]);
562 unget1byte(eucj[0]);
563 } else {
564 unget1byte(eucj[2]);
565 unget1byte(eucj[1]);
566 unget1byte(eucj[0]);
567 }
568 getkanji(c);
569 return;
570 } else {
571 utf8converted = 0;
572 }
573 #endif /* KAKASI_SUPPORT_UTF8 */
574 if (c1 < 0x20) { /* C0 */
575 getc0(c, c1);
576 } else if (c1 < 0x7f) { /* GL */
577 c->type = input_G[input_GL];
578 switch(c->type) {
579 case JIS78:
580 c->c1 = c1|0x80; c->c2 = get1byte()|0x80;
581 exc78_83(c);
582 break;
583 case JIS83:
584 c->c1 = c1|0x80; c->c2 = get1byte()|0x80;
585 break;
586 default:
587 c->c1 = c1;
588 }
589 } else if (c1 == 0x7f) { /* C0 */
590 c->type = OTHER; c->c1 = c1;
591 } else { /* 0x80 - 0xff */
592 if (input_term_type == UNKNOWN) {
593 int c2, term_type;
594
595 c2 = get1byte(); unget1byte(c2);
596 if ((c1 <= 0x9f) && (c1 >= 0x81) &&
597 (c2 >= 0x40) && (c2 <= 0xfc) && (c2 != 0x7f))
598 term_type = MSKANJI;
599 else if ((c1 <= 0xe9) && (c1 >= 0xe0) &&
600 (c2 >= 0x40) && (c2 <= 0xfc) && (c2 != 0x7f))
601 term_type = MSKANJI;
602 else if ((c1 == 0xea) &&
603 (c2 >= 0x40) && (c2 <= 0x0a5) && (c2 != 0x7f))
604 term_type = MSKANJI;
605 else if ((c1 <= 0xf4) && (c1 >= 0xa1) &&
606 (c2 >= 0xa1) && (c2 <= 0xfe))
607 term_type = DEC;
608 else
609 term_type = NEWJIS;
610 set_input_term(term_type);
611 if (output_term_type == UNKNOWN) {
612 set_output_term(term_type);
613 }
614 }
615
616 if (input_term_type == MSKANJI) {
617 if ((0xa0 <= c1) && (c1 <= 0xdf)) {
618 c->type=KATAKANA; c->c1 = c1&0x7f;
619 } else if ((0x81 <= c1) && (c1 <= 0xea)) {
620 int o1, o2, c2;
621
622 c2 = get1byte();
623 if (c2 >= 0x9f) {
624 if (c1 >= 0xe0) o1 = c1*2 - 0xe0;
625 else o1 = c1*2 - 0x60;
626 o2 = c2 + 2;
627 } else {
628 if (c1 >= 0xe0) o1 = c1*2 - 0xe1;
629 else o1 = c1*2 - 0x61;
630 if (c2 >= 0x7f) o2 = c2 + 0x60;
631 else o2 = c2 + 0x61;
632 }
633 c->type=JIS83;
634 c->c1 = o1;
635 c->c2 = o2;
636 } else {
637 c->type=OTHER; c->c1 = c1;
638 }
639 } else {
640 if (c1 < 0xa0) { /* C1 */
641 getc1(c, c1);
642 } else if (c1 < 0xff) { /* GR */
643 c->type = input_G[input_GR];
644 switch(c->type) {
645 case JIS78:
646 c->c1 = c1; c->c2 = get1byte()|0x80;
647 exc78_83(c);
648 case JIS83:
649 c->c1 = c1; c->c2 = get1byte()|0x80;
650 break;
651 default:
652 c->c1 = c1 & 0x7f;
653 }
654 if (c->c2 == 0xff) /* get1byte() == EOF */
655 c->type = OTHER;
656 } else if (c1 == 0xff) { /* C1 */
657 c->type = OTHER; c->c1 = c1;
658 }
659 }
660 }
661 }
662
663 static void
separator_proc(c)664 separator_proc(c)
665 Character *c;
666 {
667 int i;
668 int flag = 1;
669
670 switch(c->type) {
671 case OTHER:
672 case ASCII:
673 case JISROMAN:
674 switch(c->c1) {
675 case ' ':
676 case '\011':
677 case '\015':
678 separator_out = 0;
679 return;
680 }
681
682 /* check whether Character c is equal to separator or not */
683 for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
684 if ((c+i)->c1 != separator[i].c1 ||
685 (c+i)->c2 != separator[i].c2) {
686 flag = 0;
687 break;
688 }
689 }
690 if (flag) { /* In case of c === separator */
691 separator_out = 0;
692 return;
693 }
694 }
695
696 if (separator_out != 2) {
697 separator_out = 1;
698 return;
699 }
700
701 /* output separator */
702 for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
703 putkanji(&separator[i]);
704 }
705
706 separator_out = 1;
707 }
708
709 /* One character output */
710
711 void
putkanji(c)712 putkanji(c)
713 Character *c;
714 {
715 if (bunkatu_mode) {
716 separator_proc(c);
717 }
718
719 switch(output_term_type) {
720 case UNKNOWN:
721 switch(c->type) {
722 case OTHER:
723 case ASCII:
724 case JISROMAN:
725 if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
726 putchar('\033');putchar('(');putchar('J');
727 output_G[0] = JISROMAN;}
728 if (output_GL != SETG0) {
729 putchar(0xf); output_GL = SETG0;}
730 putchar(c->c1);
731 break;
732 case KATAKANA:
733 if (output_G[0] != KATAKANA) {
734 putchar('\033');putchar('(');putchar('I');
735 output_G[0] = KATAKANA;}
736 if (output_GL != SETG0) {
737 putchar(0xf); output_GL = SETG0;}
738 putchar(c->c1);
739 break;
740 case JIS83:
741 case JIS78:
742 if ((output_G[0] != JIS78) && (output_G[0] != JIS83)) {
743 putchar('\033');putchar('$');putchar('@');
744 output_G[0] = JIS78;}
745 if (output_GL != SETG0) {
746 putchar(0xf); output_GL = SETG0;}
747 putchar((c->c1)&0x7f);
748 putchar((c->c2)&0x7f);
749 break;
750 }
751 break;
752 case OLDJIS:
753 switch(c->type) {
754 case OTHER:
755 if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
756 putchar('\033');putchar('(');putchar('J');
757 output_G[0] = JISROMAN;}
758 if (output_GL != SETG0) {
759 putchar(0xf); output_GL = SETG0;}
760 putchar(c->c1);
761 break;
762 case ASCII:
763 if (output_G[0] != ASCII) {
764 putchar('\033');putchar('(');putchar('B');
765 output_G[0] = ASCII;}
766 if (output_GL != SETG0) {
767 putchar(0xf); output_GL = SETG0;}
768 putchar((c->c1)&0x7f);
769 break;
770 case JISROMAN:
771 if (output_G[0] != JISROMAN) {
772 putchar('\033');putchar('(');putchar('J');
773 output_G[0] = JISROMAN;}
774 if (output_GL != SETG0) {
775 putchar(0xf); output_GL = SETG0;}
776 putchar((c->c1)&0x7f);
777 break;
778 case KATAKANA:
779 if (output_G[0] != KATAKANA) {
780 putchar('\033');putchar('(');putchar('I');
781 output_G[0] = KATAKANA;}
782 if (output_GL != SETG0) {
783 putchar(0xf); output_GL = SETG0;}
784 putchar(c->c1);
785 break;
786 case JIS83:
787 exc78_83(c);
788 case JIS78:
789 if (output_G[0] != JIS78) {
790 putchar('\033');putchar('$');putchar('@');
791 output_G[0] = JIS78;}
792 if (output_GL != SETG0) {
793 putchar(0xf); output_GL = SETG0;}
794 putchar((c->c1)&0x7f);
795 putchar((c->c2)&0x7f);
796 break;
797 }
798 break;
799 case NEWJIS:
800 switch(c->type) {
801 case OTHER:
802 if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
803 putchar('\033');putchar('(');putchar('B');
804 output_G[0] = ASCII;}
805 if (output_GL != SETG0) {
806 putchar(0xf); output_GL = SETG0;}
807 putchar(c->c1);
808 break;
809 case ASCII:
810 if (output_G[0] != ASCII) {
811 putchar('\033');putchar('(');putchar('B');
812 output_G[0] = ASCII;}
813 if (output_GL != SETG0) {
814 putchar(0xf); output_GL = SETG0;}
815 putchar((c->c1)&0x7f);
816 break;
817 case JISROMAN:
818 if (output_G[0] != JISROMAN) {
819 putchar('\033');putchar('(');putchar('J');
820 output_G[0] = JISROMAN;}
821 if (output_GL != SETG0) {
822 putchar(0xf); output_GL = SETG0;}
823 putchar((c->c1)&0x7f);
824 break;
825 case KATAKANA:
826 if (output_G[0] != KATAKANA) {
827 putchar('\033');putchar('(');putchar('I');
828 output_G[0] = KATAKANA;}
829 if (output_GL != SETG0) {
830 putchar(0xf); output_GL = SETG0;}
831 putchar(c->c1);
832 break;
833 case JIS78:
834 exc78_83(c);
835 case JIS83:
836 if (output_G[0] != JIS83) {
837 putchar('\033');putchar('$');putchar('B');
838 output_G[0] = JIS83;}
839 if (output_GL != SETG0) {
840 putchar(0xf); output_GL = SETG0;}
841 putchar((c->c1)&0x7f);
842 putchar((c->c2)&0x7f);
843 break;
844 }
845 break;
846 case DEC:
847 switch(c->type) {
848 case OTHER:
849 if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
850 putchar('\033');putchar('(');putchar('B');
851 output_G[0] = ASCII;}
852 if (output_GL != SETG0) {
853 putchar(0xf); output_GL = SETG0;}
854 putchar(c->c1);
855 break;
856 case ASCII:
857 if (output_G[0] != ASCII) {
858 putchar('\033');putchar('(');putchar('B');
859 output_G[0] = ASCII;}
860 if (output_GL != SETG0) {
861 putchar(0xf); output_GL = SETG0;}
862 putchar((c->c1)&0x7f);
863 break;
864 case JISROMAN:
865 if (output_G[0] != JISROMAN) {
866 putchar('\033');putchar('(');putchar('J');
867 output_G[0] = JISROMAN;}
868 if (output_GL != SETG0) {
869 putchar(0xf); output_GL = SETG0;}
870 putchar((c->c1)&0x7f);
871 break;
872 case KATAKANA:
873 if (output_G[2] != KATAKANA) {
874 putchar('\033');putchar('*');putchar('I');
875 output_G[2] = KATAKANA;}
876 if (output_GR != SETG2) {
877 putchar('\033');putchar('}');output_GR=SETG2;}
878 putchar((c->c1)|0x80);
879 break;
880 case GRAPHIC:
881 if (output_G[1] != GRAPHIC) {
882 putchar('\033');putchar(')');putchar('0');
883 output_G[2] = GRAPHIC;}
884 if (output_GR != SETG1) {
885 putchar('\033');putchar('~');output_GR=SETG1;}
886 putchar((c->c1)|0x80);
887 break;
888 case JIS78:
889 exc78_83(c);
890 case JIS83:
891 if (output_G[3] != JIS83) {
892 putchar('\033');putchar('$');putchar('+');putchar('B');
893 output_G[3] = JIS83;}
894 if (output_GR != SETG3) {
895 putchar('\033'); putchar('|'); output_GR = SETG3;}
896 putchar((c->c1)|0x80);
897 putchar((c->c2)|0x80);
898 break;
899 }
900 break;
901 case EUC:
902 switch(c->type) {
903 case OTHER:
904 if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
905 putchar('\033');putchar('(');putchar('B');
906 output_G[0] = ASCII;}
907 if (output_GL != SETG0) {
908 putchar(0xf); output_GL = SETG0;}
909 putchar(c->c1);
910 break;
911 case ASCII:
912 if (output_G[0] != ASCII) {
913 putchar('\033');putchar('(');putchar('B');
914 output_G[0] = ASCII;}
915 if (output_GL != SETG0) {
916 putchar(0xf); output_GL = SETG0;}
917 putchar((c->c1)&0x7f);
918 break;
919 case JISROMAN:
920 if (output_G[0] != JISROMAN) {
921 putchar('\033');putchar('(');putchar('J');
922 output_G[0] = JISROMAN;}
923 if (output_GL != SETG0) {
924 putchar(0xf); output_GL = SETG0;}
925 putchar((c->c1)&0x7f);
926 break;
927 case KATAKANA:
928 if (output_G[2] != KATAKANA) {
929 putchar('\033');putchar('*');putchar('I');
930 output_G[2] = KATAKANA;}
931 putchar(0x8e);
932 putchar((c->c1)|0x80);
933 break;
934 case JIS78:
935 exc78_83(c);
936 case JIS83:
937 if (output_G[3] != JIS83) {
938 putchar('\033');putchar('$');putchar('+');putchar('B');
939 output_G[3] = JIS83;}
940 if (output_GR != SETG3) {
941 putchar('\033'); putchar('|'); output_GR = SETG3;}
942 putchar((c->c1)|0x80);
943 putchar((c->c2)|0x80);
944 break;
945 }
946 break;
947 case MSKANJI:
948 switch(c->type) {
949 case OTHER:
950 if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
951 putchar('\033');putchar('(');putchar('B');
952 output_G[0] = ASCII;}
953 if (output_GL != SETG0) {
954 putchar(0xf); output_GL = SETG0;}
955 putchar(c->c1);
956 break;
957 case ASCII:
958 case JISROMAN:
959 putchar((c->c1)&0x7f);
960 break;
961 case KATAKANA:
962 putchar((c->c1)|0x80);
963 break;
964 case JIS78:
965 exc78_83(c);
966 case JIS83:
967 {
968 int o1, o2;
969
970 if ((c->c1) & 1) {
971 o1 = c->c1/2 + ((c->c1 < 0xdf) ? 0x31 : 0x71);
972 o2 = c->c2 - ((c->c2 >= 0xe0) ? 0x60 : 0x61);
973 } else {
974 o1 = c->c1/2 + ((c->c1 < 0xdf) ? 0x30 : 0x70);
975 o2 = c->c2 - 2;
976 }
977 putchar(o1);
978 putchar(o2);
979 break;
980 }
981 }
982 break;
983 #ifdef KAKASI_SUPPORT_UTF8
984 case UTF8:
985 switch(c->type) {
986 case OTHER:
987 if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
988 putchar('\033');putchar('(');putchar('B');
989 output_G[0] = ASCII;}
990 if (output_GL != SETG0) {
991 putchar(0xf); output_GL = SETG0;}
992 putchar(c->c1);
993 break;
994 case ASCII:
995 if (output_G[0] != ASCII) {
996 putchar('\033');putchar('(');putchar('B');
997 output_G[0] = ASCII;}
998 if (output_GL != SETG0) {
999 putchar(0xf); output_GL = SETG0;}
1000 putchar((c->c1)&0x7f);
1001 break;
1002 case JISROMAN:
1003 if (output_G[0] != JISROMAN) {
1004 putchar('\033');putchar('(');putchar('J');
1005 output_G[0] = JISROMAN;}
1006 if (output_GL != SETG0) {
1007 putchar(0xf); output_GL = SETG0;}
1008 putchar((c->c1)&0x7f);
1009 break;
1010 case KATAKANA:
1011 if (output_G[2] != KATAKANA) {
1012 putchar('\033');putchar('*');putchar('I');
1013 output_G[2] = KATAKANA;}
1014 pututf8(0x8e, (c->c1)|0x80);
1015 break;
1016 case JIS78:
1017 exc78_83(c);
1018 case JIS83:
1019 if (output_G[3] != JIS83) {
1020 putchar('\033');putchar('$');putchar('+');putchar('B');
1021 output_G[3] = JIS83;}
1022 if (output_GR != SETG3) {
1023 putchar('\033'); putchar('|'); output_GR = SETG3;}
1024 pututf8((c->c1)|0x80, (c->c2)|0x80);
1025 break;
1026 }
1027 break;
1028 #endif /* KAKASI_SUPPORT_UTF8 */
1029 }
1030 }
1031
1032 int
term_type_str(str)1033 term_type_str(str)
1034 char *str;
1035 {
1036 if ((strncmp(str, "oldjis", 6) == 0) ||
1037 (strncmp(str, "jisold", 6) == 0))
1038 return OLDJIS;
1039 if (strncmp(str, "dec", 6) == 0)
1040 return DEC;
1041 if ((strncmp(str, "euc", 6) == 0) ||
1042 (strncmp(str, "att", 6) == 0))
1043 return EUC;
1044 if ((strncmp(str, "sjis", 6) == 0) ||
1045 (strncmp(str, "msjis", 6) == 0) ||
1046 (strncmp(str, "shiftjis", 6) == 0))
1047 return MSKANJI;
1048 #ifdef KAKASI_SUPPORT_UTF8
1049 if ((strncmp(str, "utf8", 6) == 0) ||
1050 (strncmp(str, "utf-8", 6) == 0))
1051 return UTF8;
1052 #endif /* KAKASI_SUPPORT_UTF8 */
1053
1054 return NEWJIS;
1055 }
1056
1057 #ifdef KAKASI_SUPPORT_UTF8
1058 void
pututf8(f,s)1059 pututf8(f, s)
1060 int f, s;
1061 {
1062 char fromstr[1024], tostr[1024];
1063 char *from = fromstr, *to = tostr;
1064 size_t fromlen = 2, tolen = 6;
1065 int i;
1066 fromstr[0] = (char) f;
1067 fromstr[1] = (char) s;
1068 if (toutf8 == (iconv_t) -1)
1069 toutf8 = iconv_open("UTF-8", "EUC-JP");
1070 iconv(toutf8, &from, &fromlen, &to, &tolen);
1071 if (tolen >= 6 || tolen < 0)
1072 return;
1073 for (i = 0; i < (6 - tolen); i ++) {
1074 putchar(tostr[i]);
1075 }
1076 }
1077
1078 void
close_iconv()1079 close_iconv()
1080 {
1081 if (fromutf8 != -1) {
1082 iconv_close(fromutf8);
1083 fromutf8 = -1;
1084 }
1085 if (toutf8 != -1) {
1086 iconv_close(toutf8);
1087 toutf8 = -1;
1088 }
1089 }
1090 #endif /* KAKASI_SUPPORT_UTF8 */
1091