1 /*
2  * KAKASI (Kanji Kana Simple inversion program)
3  * $Id: kanjiio.c,v 1.17 2013-02-07 07:26:18 knok Exp $
4  * Copyright (C) 1992
5  * Hironobu Takahashi (takahasi@tiny.or.jp)
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either versions 2, or (at your option)
10  * any later version.
11  *
12  * This program is distributed in the hope that it will be useful
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with KAKASI, see the file COPYING.  If not, write to the Free
19  * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
20  * 02111-1307, USA.
21  */
22 /*
23   Modified by NOKUBI Takatsugu
24   1999/03/04
25        Rename PERLMOD macro to LIBRARY
26   1999/01/08
27        Add PERLMOD macro.
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 # include <config.h>
32 #endif
33 
34 #include <stdio.h>
35 #ifdef HAVE_STRING_H
36 # include <string.h>
37 #else
38 # include <strings.h>
39 #endif
40 #ifdef HAVE_MALLOC_H
41 # include <malloc.h>
42 #endif
43 #include <stdlib.h>
44 #include "kakasi.h"
45 #ifdef LIBRARY
46 # include "libkakasi.h"
47 #endif
48 
49 #ifdef KAKASI_SUPPORT_UTF8
50 #include <iconv.h>
51 extern iconv_t fromutf8;
52 extern iconv_t toutf8;
53 int utf8converted = 0;
54 void pututf8(int f, int s);
55 
56 /*
57    The macro UTF8_COMPUTE was imported from gutf8.c in glib.
58    You can get the original source code from ftp://ftp.gtk.org.
59  */
60 #define UTF8_COMPUTE(Char, Mask, Len)					      \
61   if (Char < 128)							      \
62     {									      \
63       Len = 1;								      \
64       Mask = 0x7f;							      \
65     }									      \
66   else if ((Char & 0xe0) == 0xc0)					      \
67     {									      \
68       Len = 2;								      \
69       Mask = 0x1f;							      \
70     }									      \
71   else if ((Char & 0xf0) == 0xe0)					      \
72     {									      \
73       Len = 3;								      \
74       Mask = 0x0f;							      \
75     }									      \
76   else if ((Char & 0xf8) == 0xf0)					      \
77     {									      \
78       Len = 4;								      \
79       Mask = 0x07;							      \
80     }									      \
81   else if ((Char & 0xfc) == 0xf8)					      \
82     {									      \
83       Len = 5;								      \
84       Mask = 0x03;							      \
85     }									      \
86   else if ((Char & 0xfe) == 0xfc)					      \
87     {									      \
88       Len = 6;								      \
89       Mask = 0x01;							      \
90     }									      \
91   else									      \
92     Len = -1;
93 
94 #endif /* KAKASI_SUPPORT_UTF8 */
95 
96 
97 #if ! defined HAVE_MEMMOVE && ! defined memmove
98 # define memmove(d, s, n) bcopy ((s), (d), (n))
99 #endif
100 
101 int input_term_type = UNKNOWN;
102 
103 int input_GL = SETG0;
104 int input_GR = SETG3;
105 int input_G[5] = {ASCII, KATAKANA, KATAKANA, JIS83, SJKANA};
106 
107 int output_term_type = UNKNOWN;
108 
109 int output_GL = SETG0;
110 int output_GR = SETG3;
111 int output_G[5] = {ASCII, KATAKANA, KATAKANA, JIS83, SJKANA};
112 
113 #ifdef LIBRARY
114 unsigned char *getcharbuffer;
115 #endif /* LIBRARY */
116 
117 /* One character buffer */
118 
119 static Character kanji_buf={OTHER, 0, 0};
120 static int kanji_buf_set = 0;
121 
122 void
ungetkanji(c)123 ungetkanji(c)
124      Character *c;
125 {
126     kanji_buf.type = c->type;
127     kanji_buf.c1 = c->c1;
128     kanji_buf.c2 = c->c2;
129     kanji_buf_set = 1;
130 }
131 
132 /* One input of a character */
133 
134 static unsigned char input_stack[1024];
135 static int input_stack_depth = 0;
136 
137 #ifdef LIBRARY
138 
139 int pbuf_error = 0;
140 
141 #define PBSTRSIZE (4096)
142 #ifdef putchar
143 #undef putchar
144 #endif
145 #define putchar(x) putcharpbuf(x)
146 
147 typedef struct pbuf_t {
148     char *str;
149     long size;
150     struct pbuf_t *next;
151 } pbuf;
152 
153 pbuf pcbuf = {
154     NULL, -1, NULL
155 };
156 
157 pbuf *pcbuf_tail = &pcbuf;
158 
159 void
setcharbuffer(s)160 setcharbuffer(s)
161      unsigned char *s;
162 {
163     getcharbuffer = s;
164 }
165 
166 void
putcharpbuf(c)167 putcharpbuf(c)
168      int c;
169 {
170     pbuf *pb = pcbuf_tail;
171 
172     if (pb->size == PBSTRSIZE) {
173 	pb = pb->next;
174 	pcbuf_tail = pb;
175     }
176     if (pb->size < 0) {
177 	pbuf *npb;
178 	if ((pb->str = (char *) malloc(PBSTRSIZE)) == NULL) {
179 	    pbuf_error = 1;
180 	    return;
181 	}
182 	pb->size = 0;
183 	if ((npb = (void *) malloc(sizeof(pbuf))) == NULL) {
184 	    pbuf_error = 1;
185 	    return;
186 	}
187 	npb->str = NULL;
188 	npb->size = -1;
189 	npb->next = NULL;
190 	pb->next = npb;
191     }
192     pb->str[pb->size] = (char) c;
193     pb->size ++;
194 }
195 
196 char *
getpbstr()197 getpbstr() {
198     char *ret, *tmp;
199     long tsize = 0;
200     pbuf *pb = &pcbuf;
201     while (pb->next != NULL) {
202 	tsize += pb->size;
203 	pb = pb->next;
204     }
205     if (tsize <= 0)
206 	return NULL;
207     pb = &pcbuf;
208     tmp = ret = (char *) malloc(tsize + 1);
209     if (ret == NULL)
210 	return NULL;
211     while (pb->next != NULL) {
212 	memmove(tmp, pb->str, pb->size);
213 	tmp += pb->size;
214 	pb = pb->next;
215     }
216     ret[tsize] = '\0';
217     pb = &pcbuf;
218     free(pb->str);
219     pb = pb->next;
220     if (pb->next != NULL) {
221 	pbuf *opb = pb;
222 	pb = pb->next;
223 	while (pb != NULL) {
224 	    free(opb->str);
225 	    free(opb);
226 	    opb = pb;
227 	    pb = pb->next;
228 	}
229     } else {
230 	free(pb);
231     }
232     pcbuf.str = NULL;
233     pcbuf.size = -1;
234     pcbuf.next = NULL;
235     pcbuf_tail = &pcbuf;
236     return ret;
237 }
238 #endif /* LIBRARY */
239 
240 static int
get1byte()241 get1byte()
242 {
243     if (input_stack_depth == 0)
244 #ifdef LIBRARY
245     {
246 	if (*getcharbuffer == '\0') return EOF;
247 	return (int) *getcharbuffer ++;
248     }
249 #else
250 	return getchar();
251 #endif /* LIBRARY */
252     else
253 	return input_stack[-- input_stack_depth];
254 }
255 
256 static void
unget1byte(c)257 unget1byte(c)
258      int c;
259 {
260     input_stack[input_stack_depth ++] = c;
261 }
262 
263 static int
getc0set1(gn)264 getc0set1(gn)
265      int gn;
266 {
267     int c3;
268     int set;
269 
270     switch(c3 = get1byte()) {
271       case 'B':
272 	set = ASCII; break;
273       case 'J':
274 	set = JISROMAN; break;
275       case 'O':
276 	set = GRAPHIC; break;
277       case 'I':
278 	set = KATAKANA; break;
279       default:
280 	unget1byte(c3); return -1;
281     }
282     input_G[gn] = set;
283     return 0;
284 }
285 
286 static void
set_terms(type,term_type,GL,GR,G)287 set_terms(type, term_type, GL, GR, G)
288      int type;
289      int *term_type;
290      int *GL;
291      int *GR;
292      int *G;
293 {
294     *term_type = type;
295 
296     switch(type) {
297       case OLDJIS:
298 	*GL=SETG0, *GR=SETG1,
299 	G[0]=JISROMAN, G[1]=KATAKANA, G[2]=KATAKANA, G[3]=KATAKANA;
300 	break;
301       case NEWJIS:
302 	*GL=SETG0, *GR=SETG1,
303 	G[0]=ASCII, G[1]=KATAKANA, G[2]=KATAKANA, G[3]=KATAKANA;
304 	break;
305       case DEC:
306 	*GL=SETG0, *GR=SETG3,
307 	G[0]=ASCII, G[1]=GRAPHIC, G[2]=KATAKANA, G[3]=JIS83;
308 	break;
309       case EUC:
310 	*GL=SETG0, *GR=SETG3,
311 	G[0]=ASCII, G[1]=KATAKANA, G[2]=KATAKANA, G[3]=JIS83;
312 	break;
313       case MSKANJI:
314 	*GL=SETG0, *GR=SJKANA,
315 	G[0]=ASCII, G[1]=KATAKANA, G[2]=KATAKANA, G[3]=KATAKANA;
316 	break;
317     }
318 }
319 
320 void
set_input_term(type)321 set_input_term(type)
322      int type;
323 {
324     set_terms(type, &input_term_type, &input_GL, &input_GR, input_G);
325 }
326 
327 void
set_output_term(type)328 set_output_term(type)
329      int type;
330 {
331     set_terms(type, &output_term_type, &output_GL, &output_GR, output_G);
332 }
333 
334 static int
getc0set2(gn)335 getc0set2(gn)
336      int gn;
337 {
338     int c4;
339     int set;
340 
341     switch(c4 = get1byte()) {
342       case '@':
343 	set = JIS78;
344 	if (input_term_type == UNKNOWN)
345 	    set_input_term(OLDJIS);
346 	if (output_term_type == UNKNOWN)
347 	    set_output_term(OLDJIS);
348 	break;
349       case 'B':
350 	set = JIS83;
351 	if (input_term_type == UNKNOWN)
352 	    set_input_term(NEWJIS);
353 	if (output_term_type == UNKNOWN)
354 	    set_output_term(NEWJIS);
355 	break;
356       default:
357 	unget1byte(c4); return -1;
358     }
359     input_G[gn] = set;
360     return 0;
361 }
362 
363 static void
getc0(c,c1)364 getc0(c, c1)
365      Character *c;
366      int c1;
367 {
368     int c2, c3;
369     int GL_save, GR_save;
370 
371     switch(c1) {
372       case '\033':
373 	switch(c2 = get1byte()) {
374 	  case '(':
375 	    if (getc0set1(SETG0) != 0) {
376 		unget1byte(c2);	c->type = OTHER; c->c1 = c1; return;
377 	    }
378 	    break;
379 	  case ')':
380 	    if (getc0set1(SETG1) != 0) {
381 		unget1byte(c2);	c->type = OTHER; c->c1 = c1; return;
382 	    }
383 	    break;
384 	  case '*':
385 	    if (getc0set1(SETG2) != 0) {
386 		unget1byte(c2);	c->type = OTHER; c->c1 = c1; return;
387 	    }
388 	    break;
389 	  case '+':
390 	    if (getc0set1(SETG3) != 0) {
391 		unget1byte(c2);	c->type = OTHER; c->c1 = c1; return;
392 	    }
393 	    break;
394 	  case '$':
395 	    switch(c3 = get1byte()) {
396 	      case '@':
397 		if (input_term_type == UNKNOWN)
398 		    set_input_term(OLDJIS);
399 		if (output_term_type == UNKNOWN)
400 		    set_output_term(OLDJIS);
401 		input_G[SETG0] = JIS78;
402 		break;
403 	      case 'B':
404 		if (input_term_type == UNKNOWN)
405 		    set_input_term(NEWJIS);
406 		if (output_term_type == UNKNOWN)
407 		    set_output_term(NEWJIS);
408 		input_G[SETG0] = JIS83;
409 		break;
410 	      case '(':
411 		if (getc0set2(SETG0) != 0) {
412 		    unget1byte(c3); unget1byte(c2);
413 		    c->type = OTHER; c->c1 = c1; return;
414 		}
415 		break;
416 	      case ')':
417 		if (getc0set2(SETG1) != 0) {
418 		    unget1byte(c3); unget1byte(c2);
419 		    c->type = OTHER; c->c1 = c1; return;
420 		}
421 		break;
422 	      case '*':
423 		if (getc0set2(SETG2) != 0) {
424 		    unget1byte(c3); unget1byte(c2);
425 		    c->type = OTHER; c->c1 = c1; return;
426 		}
427 		break;
428 	      case '+':
429 		if (getc0set2(SETG3) != 0) {
430 		    unget1byte(c3); unget1byte(c2);
431 		    c->type = OTHER; c->c1 = c1; return;
432 		}
433 		break;
434 	      default:
435 		unget1byte(c3);
436 		unget1byte(c2);
437 		c->type = OTHER; c->c1 = c1; return;
438 	    }
439 	    break;
440 	  case 'n':
441 	    input_GL = SETG2;
442 	    break;
443 	  case 'o':
444 	    input_GL = SETG3;
445 	    break;
446 	  case '~':
447 	    input_GR = SETG1;
448 	    break;
449 	  case '}':
450 	    input_GR = SETG2;
451 	    break;
452 	  case '|':
453 	    input_GR = SETG3;
454 	    break;
455 	  case 'N':
456 	    GL_save = input_GL;
457 	    GR_save = input_GR;
458 	    input_GL = SETG2;
459 	    input_GR = SETG2;
460 	    getkanji(c);
461 	    input_GL = GL_save;
462 	    input_GR = GR_save;
463 	    return;
464 	  case 'O':
465 	    GL_save = input_GL;
466 	    GR_save = input_GR;
467 	    input_GL = SETG3;
468 	    input_GR = SETG3;
469 	    getkanji(c);
470 	    input_GL = GL_save;
471 	    input_GR = GR_save;
472 	    return;
473 	  default:
474 	    unget1byte(c2);
475 	    c->type = OTHER; c->c1 = c1; return;
476 	}
477 	break;
478       case 0xe:
479 	input_GL = SETG1;
480 	break;
481       case 0xf:
482 	input_GL = SETG0;
483 	break;
484       case EOF:
485 	c->type = OTHER; c->c1 = 0xff; return;
486       default:
487 	c->type = OTHER; c->c1 = c1; return;
488     }
489     getkanji(c);
490 }
491 
492 static void
getc1(c,c1)493 getc1(c, c1)
494      Character *c;
495      int c1;
496 {
497     int GL_save, GR_save;
498 
499     switch(c1) {
500       case 0x8e:
501 	GL_save = input_GL;
502 	GR_save = input_GR;
503 	input_GL = SETG2;
504 	input_GR = SETG2;
505 	getkanji(c);
506 	input_GL = GL_save;
507 	input_GR = GR_save;
508 	return;
509       case 0x8f:
510 	GL_save = input_GL;
511 	GR_save = input_GR;
512 	input_GL = SETG3;
513 	input_GR = SETG3;
514 	getkanji(c);
515 	input_GL = GL_save;
516 	input_GR = GR_save;
517 	return;
518       default:
519 	c->type = OTHER; c->c1 = c1; return;
520     }
521 }
522 
523 void
getkanji(c)524 getkanji(c)
525      Character *c;
526 {
527     int c1;
528 
529     if (kanji_buf_set) {
530 	c->type = kanji_buf.type;
531 	c->c1 = kanji_buf.c1;
532 	c->c2 = kanji_buf.c2;
533 	kanji_buf_set = 0;
534 	return;
535     }
536 
537     c1 = get1byte();
538 #ifdef KAKASI_SUPPORT_UTF8
539 	if (!utf8converted && input_term_type == UTF8) {
540 	    char utf8[6], eucj[3];
541 	    char *from = utf8, *to = eucj;
542 	    size_t fromlen = 0, tolen = 3;
543 	    int i, len, mask;
544 	    utf8converted = 1;
545 	    UTF8_COMPUTE(c1, mask, len);
546 	    (void) mask;
547 	    if (len <= 1) {
548 		unget1byte(c1);
549 		getkanji(c);
550 		return;
551 	    }
552 	    utf8[0] = c1;
553 	    for (i = 1; i < len; i ++) {
554 		utf8[i] = getchar();
555 	    }
556 	    fromlen = len;
557 	    if (fromutf8 == (iconv_t) -1)
558 		fromutf8 = iconv_open("EUC-JP", "UTF-8");
559 	    iconv(fromutf8, &from, &fromlen, &to, &tolen);
560 	    if (tolen == 1) {
561 		unget1byte(eucj[1]);
562 		unget1byte(eucj[0]);
563 	    } else {
564 		unget1byte(eucj[2]);
565 		unget1byte(eucj[1]);
566 		unget1byte(eucj[0]);
567 	    }
568 	    getkanji(c);
569 	    return;
570 	} else {
571 	    utf8converted = 0;
572 	}
573 #endif /* KAKASI_SUPPORT_UTF8 */
574     if (c1 < 0x20) { /* C0 */
575 	getc0(c, c1);
576     } else if (c1 < 0x7f) { /* GL */
577 	c->type = input_G[input_GL];
578 	switch(c->type) {
579 	  case JIS78:
580 	    c->c1 = c1|0x80; c->c2 = get1byte()|0x80;
581 	    exc78_83(c);
582 	    break;
583 	  case JIS83:
584 	    c->c1 = c1|0x80; c->c2 = get1byte()|0x80;
585 	    break;
586 	  default:
587 	    c->c1 = c1;
588 	}
589     } else if (c1 == 0x7f) { /* C0 */
590 	c->type = OTHER; c->c1 = c1;
591     } else { /* 0x80 - 0xff */
592 	if (input_term_type == UNKNOWN) {
593 	    int c2, term_type;
594 
595 	    c2 = get1byte(); unget1byte(c2);
596 	    if ((c1 <= 0x9f) && (c1 >= 0x81) &&
597 		(c2 >= 0x40) && (c2 <= 0xfc) && (c2 != 0x7f))
598 		term_type = MSKANJI;
599 	    else if ((c1 <= 0xe9) && (c1 >= 0xe0) &&
600 		     (c2 >= 0x40) && (c2 <= 0xfc) && (c2 != 0x7f))
601 		term_type = MSKANJI;
602 	    else if ((c1 == 0xea) &&
603 		     (c2 >= 0x40) && (c2 <= 0x0a5) && (c2 != 0x7f))
604 		term_type = MSKANJI;
605 	    else if ((c1 <= 0xf4) && (c1 >= 0xa1) &&
606 		     (c2 >= 0xa1) && (c2 <= 0xfe))
607 		term_type = DEC;
608 	    else
609 		term_type = NEWJIS;
610 	    set_input_term(term_type);
611 	    if (output_term_type == UNKNOWN) {
612 		set_output_term(term_type);
613 	    }
614 	}
615 
616 	if (input_term_type == MSKANJI) {
617 	    if ((0xa0 <= c1) && (c1 <= 0xdf)) {
618 		c->type=KATAKANA; c->c1 = c1&0x7f;
619 	    } else if ((0x81 <= c1) && (c1 <= 0xea)) {
620 		int o1, o2, c2;
621 
622 		c2 = get1byte();
623 		if (c2 >= 0x9f) {
624 		    if (c1 >= 0xe0) o1 = c1*2 - 0xe0;
625 		    else o1 = c1*2 - 0x60;
626 		    o2 = c2 + 2;
627 		} else {
628 		    if (c1 >= 0xe0) o1 = c1*2 - 0xe1;
629 		    else o1 = c1*2 - 0x61;
630 		    if (c2 >= 0x7f) o2 = c2 + 0x60;
631 		    else o2 = c2 +  0x61;
632 		}
633 		c->type=JIS83;
634 		c->c1 = o1;
635 		c->c2 = o2;
636 	    } else {
637 		c->type=OTHER; c->c1 = c1;
638 	    }
639 	} else {
640 	    if (c1 < 0xa0) { /* C1 */
641 		getc1(c, c1);
642 	    } else if (c1 < 0xff) { /* GR */
643 		c->type = input_G[input_GR];
644 		switch(c->type) {
645 		  case JIS78:
646 		    c->c1 = c1; c->c2 = get1byte()|0x80;
647 		    exc78_83(c);
648 		  case JIS83:
649 		    c->c1 = c1; c->c2 = get1byte()|0x80;
650 		    break;
651 		  default:
652 		    c->c1 = c1 & 0x7f;
653 		}
654 		if (c->c2 == 0xff) /* get1byte() == EOF */
655 		    c->type = OTHER;
656 	    } else if (c1 == 0xff) { /* C1 */
657 		c->type = OTHER; c->c1 = c1;
658 	    }
659 	}
660     }
661 }
662 
663 static void
separator_proc(c)664 separator_proc(c)
665      Character *c;
666 {
667     int i;
668     int flag = 1;
669 
670     switch(c->type) {
671       case OTHER:
672       case ASCII:
673       case JISROMAN:
674 	switch(c->c1) {
675 	  case ' ':
676 	  case '\011':
677 	  case '\015':
678 	    separator_out = 0;
679 	    return;
680 	}
681 
682 	/* check whether Character c is equal to separator or not */
683 	for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
684 	    if ((c+i)->c1 != separator[i].c1 ||
685 		(c+i)->c2 != separator[i].c2) {
686 		flag = 0;
687 		break;
688 	    }
689 	}
690 	if (flag) { /* In case of c === separator */
691 	    separator_out = 0;
692 	    return;
693 	}
694     }
695 
696     if (separator_out != 2) {
697 	separator_out = 1;
698 	return;
699     }
700 
701     /* output separator */
702     for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
703 	putkanji(&separator[i]);
704     }
705 
706     separator_out = 1;
707 }
708 
709 /* One character output */
710 
711 void
putkanji(c)712 putkanji(c)
713      Character *c;
714 {
715     if (bunkatu_mode) {
716 	separator_proc(c);
717     }
718 
719     switch(output_term_type) {
720       case UNKNOWN:
721 	switch(c->type) {
722 	  case OTHER:
723 	  case ASCII:
724 	  case JISROMAN:
725 	    if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
726 		putchar('\033');putchar('(');putchar('J');
727 		output_G[0] = JISROMAN;}
728 	    if (output_GL != SETG0) {
729 		putchar(0xf); output_GL = SETG0;}
730 	    putchar(c->c1);
731 	    break;
732 	  case KATAKANA:
733 	    if (output_G[0] != KATAKANA) {
734 		putchar('\033');putchar('(');putchar('I');
735 		output_G[0] = KATAKANA;}
736 	    if (output_GL != SETG0) {
737 		putchar(0xf); output_GL = SETG0;}
738 	    putchar(c->c1);
739 	    break;
740 	  case JIS83:
741 	  case JIS78:
742 	    if ((output_G[0] != JIS78) && (output_G[0] != JIS83)) {
743 		putchar('\033');putchar('$');putchar('@');
744 		output_G[0] = JIS78;}
745 	    if (output_GL != SETG0) {
746 		putchar(0xf); output_GL = SETG0;}
747 	    putchar((c->c1)&0x7f);
748 	    putchar((c->c2)&0x7f);
749 	    break;
750 	}
751 	break;
752       case OLDJIS:
753 	switch(c->type) {
754 	  case OTHER:
755 	    if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
756 		putchar('\033');putchar('(');putchar('J');
757 		output_G[0] = JISROMAN;}
758 	    if (output_GL != SETG0) {
759 		putchar(0xf); output_GL = SETG0;}
760 	    putchar(c->c1);
761 	    break;
762 	  case ASCII:
763 	    if (output_G[0] != ASCII) {
764 		putchar('\033');putchar('(');putchar('B');
765 		output_G[0] = ASCII;}
766 	    if (output_GL != SETG0) {
767 		putchar(0xf); output_GL = SETG0;}
768 	    putchar((c->c1)&0x7f);
769 	    break;
770 	  case JISROMAN:
771 	    if (output_G[0] != JISROMAN) {
772 		putchar('\033');putchar('(');putchar('J');
773 		output_G[0] = JISROMAN;}
774 	    if (output_GL != SETG0) {
775 		putchar(0xf); output_GL = SETG0;}
776 	    putchar((c->c1)&0x7f);
777 	    break;
778 	  case KATAKANA:
779 	    if (output_G[0] != KATAKANA) {
780 		putchar('\033');putchar('(');putchar('I');
781 		output_G[0] = KATAKANA;}
782 	    if (output_GL != SETG0) {
783 		putchar(0xf); output_GL = SETG0;}
784 	    putchar(c->c1);
785 	    break;
786 	  case JIS83:
787 	    exc78_83(c);
788 	  case JIS78:
789 	    if (output_G[0] != JIS78) {
790 		putchar('\033');putchar('$');putchar('@');
791 		output_G[0] = JIS78;}
792 	    if (output_GL != SETG0) {
793 		putchar(0xf); output_GL = SETG0;}
794 	    putchar((c->c1)&0x7f);
795 	    putchar((c->c2)&0x7f);
796 	    break;
797 	}
798 	break;
799       case NEWJIS:
800 	switch(c->type) {
801 	  case OTHER:
802 	    if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
803 		putchar('\033');putchar('(');putchar('B');
804 		output_G[0] = ASCII;}
805 	    if (output_GL != SETG0) {
806 		putchar(0xf); output_GL = SETG0;}
807 	    putchar(c->c1);
808 	    break;
809 	  case ASCII:
810 	    if (output_G[0] != ASCII) {
811 		putchar('\033');putchar('(');putchar('B');
812 		output_G[0] = ASCII;}
813 	    if (output_GL != SETG0) {
814 		putchar(0xf); output_GL = SETG0;}
815 	    putchar((c->c1)&0x7f);
816 	    break;
817 	  case JISROMAN:
818 	    if (output_G[0] != JISROMAN) {
819 		putchar('\033');putchar('(');putchar('J');
820 		output_G[0] = JISROMAN;}
821 	    if (output_GL != SETG0) {
822 		putchar(0xf); output_GL = SETG0;}
823 	    putchar((c->c1)&0x7f);
824 	    break;
825 	  case KATAKANA:
826 	    if (output_G[0] != KATAKANA) {
827 		putchar('\033');putchar('(');putchar('I');
828 		output_G[0] = KATAKANA;}
829 	    if (output_GL != SETG0) {
830 		putchar(0xf); output_GL = SETG0;}
831 	    putchar(c->c1);
832 	    break;
833 	  case JIS78:
834 	    exc78_83(c);
835 	  case JIS83:
836 	    if (output_G[0] != JIS83) {
837 		putchar('\033');putchar('$');putchar('B');
838 		output_G[0] = JIS83;}
839 	    if (output_GL != SETG0) {
840 		putchar(0xf); output_GL = SETG0;}
841 	    putchar((c->c1)&0x7f);
842 	    putchar((c->c2)&0x7f);
843 	    break;
844 	}
845 	break;
846       case DEC:
847 	switch(c->type) {
848 	  case OTHER:
849 	    if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
850 		putchar('\033');putchar('(');putchar('B');
851 		output_G[0] = ASCII;}
852 	    if (output_GL != SETG0) {
853 		putchar(0xf); output_GL = SETG0;}
854 	    putchar(c->c1);
855 	    break;
856 	  case ASCII:
857 	    if (output_G[0] != ASCII) {
858 		putchar('\033');putchar('(');putchar('B');
859 		output_G[0] = ASCII;}
860 	    if (output_GL != SETG0) {
861 		putchar(0xf); output_GL = SETG0;}
862 	    putchar((c->c1)&0x7f);
863 	    break;
864 	  case JISROMAN:
865 	    if (output_G[0] != JISROMAN) {
866 		putchar('\033');putchar('(');putchar('J');
867 		output_G[0] = JISROMAN;}
868 	    if (output_GL != SETG0) {
869 		putchar(0xf); output_GL = SETG0;}
870 	    putchar((c->c1)&0x7f);
871 	    break;
872 	  case KATAKANA:
873 	    if (output_G[2] != KATAKANA) {
874 		putchar('\033');putchar('*');putchar('I');
875 		output_G[2] = KATAKANA;}
876 	    if (output_GR != SETG2) {
877 		putchar('\033');putchar('}');output_GR=SETG2;}
878 	    putchar((c->c1)|0x80);
879 	    break;
880 	  case GRAPHIC:
881 	    if (output_G[1] != GRAPHIC) {
882 		putchar('\033');putchar(')');putchar('0');
883 		output_G[2] = GRAPHIC;}
884 	    if (output_GR != SETG1) {
885 		putchar('\033');putchar('~');output_GR=SETG1;}
886 	    putchar((c->c1)|0x80);
887 	    break;
888 	  case JIS78:
889 	    exc78_83(c);
890 	  case JIS83:
891 	    if (output_G[3] != JIS83) {
892 		putchar('\033');putchar('$');putchar('+');putchar('B');
893 		output_G[3] = JIS83;}
894 	    if (output_GR != SETG3) {
895 		putchar('\033'); putchar('|'); output_GR = SETG3;}
896 	    putchar((c->c1)|0x80);
897 	    putchar((c->c2)|0x80);
898 	    break;
899 	}
900 	break;
901       case EUC:
902 	switch(c->type) {
903 	  case OTHER:
904 	    if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
905 		putchar('\033');putchar('(');putchar('B');
906 		output_G[0] = ASCII;}
907 	    if (output_GL != SETG0) {
908 		putchar(0xf); output_GL = SETG0;}
909 	    putchar(c->c1);
910 	    break;
911 	  case ASCII:
912 	    if (output_G[0] != ASCII) {
913 		putchar('\033');putchar('(');putchar('B');
914 		output_G[0] = ASCII;}
915 	    if (output_GL != SETG0) {
916 		putchar(0xf); output_GL = SETG0;}
917 	    putchar((c->c1)&0x7f);
918 	    break;
919 	  case JISROMAN:
920 	    if (output_G[0] != JISROMAN) {
921 		putchar('\033');putchar('(');putchar('J');
922 		output_G[0] = JISROMAN;}
923 	    if (output_GL != SETG0) {
924 		putchar(0xf); output_GL = SETG0;}
925 	    putchar((c->c1)&0x7f);
926 	    break;
927 	  case KATAKANA:
928 	    if (output_G[2] != KATAKANA) {
929 		putchar('\033');putchar('*');putchar('I');
930 		output_G[2] = KATAKANA;}
931 	    putchar(0x8e);
932 	    putchar((c->c1)|0x80);
933 	    break;
934 	  case JIS78:
935 	    exc78_83(c);
936 	  case JIS83:
937 	    if (output_G[3] != JIS83) {
938 		putchar('\033');putchar('$');putchar('+');putchar('B');
939 		output_G[3] = JIS83;}
940 	    if (output_GR != SETG3) {
941 		putchar('\033'); putchar('|'); output_GR = SETG3;}
942 	    putchar((c->c1)|0x80);
943 	    putchar((c->c2)|0x80);
944 	    break;
945 	}
946 	break;
947       case MSKANJI:
948 	switch(c->type) {
949 	  case OTHER:
950 	    if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
951 		putchar('\033');putchar('(');putchar('B');
952 		output_G[0] = ASCII;}
953 	    if (output_GL != SETG0) {
954 		putchar(0xf); output_GL = SETG0;}
955 	    putchar(c->c1);
956 	    break;
957 	  case ASCII:
958 	  case JISROMAN:
959 	    putchar((c->c1)&0x7f);
960 	    break;
961 	  case KATAKANA:
962 	    putchar((c->c1)|0x80);
963 	    break;
964 	  case JIS78:
965 	    exc78_83(c);
966 	  case JIS83:
967 	    {
968 		int o1, o2;
969 
970 		if ((c->c1) & 1) {
971 		    o1 = c->c1/2 + ((c->c1 < 0xdf) ? 0x31 : 0x71);
972 		    o2 = c->c2 - ((c->c2 >= 0xe0) ? 0x60 : 0x61);
973 		} else {
974 		    o1 = c->c1/2 + ((c->c1 < 0xdf) ? 0x30 : 0x70);
975 		    o2 = c->c2 - 2;
976 		}
977 		putchar(o1);
978 		putchar(o2);
979 		break;
980 	    }
981 	}
982 	break;
983 #ifdef KAKASI_SUPPORT_UTF8
984       case UTF8:
985 	switch(c->type) {
986 	  case OTHER:
987 	    if ((output_G[0] != ASCII) && (output_G[0] != JISROMAN)) {
988 		putchar('\033');putchar('(');putchar('B');
989 		output_G[0] = ASCII;}
990 	    if (output_GL != SETG0) {
991 		putchar(0xf); output_GL = SETG0;}
992 	    putchar(c->c1);
993 	    break;
994 	  case ASCII:
995 	    if (output_G[0] != ASCII) {
996 		putchar('\033');putchar('(');putchar('B');
997 		output_G[0] = ASCII;}
998 	    if (output_GL != SETG0) {
999 		putchar(0xf); output_GL = SETG0;}
1000 	    putchar((c->c1)&0x7f);
1001 	    break;
1002 	  case JISROMAN:
1003 	    if (output_G[0] != JISROMAN) {
1004 		putchar('\033');putchar('(');putchar('J');
1005 		output_G[0] = JISROMAN;}
1006 	    if (output_GL != SETG0) {
1007 		putchar(0xf); output_GL = SETG0;}
1008 	    putchar((c->c1)&0x7f);
1009 	    break;
1010 	  case KATAKANA:
1011 	    if (output_G[2] != KATAKANA) {
1012 		putchar('\033');putchar('*');putchar('I');
1013 		output_G[2] = KATAKANA;}
1014 	    pututf8(0x8e, (c->c1)|0x80);
1015 	    break;
1016 	  case JIS78:
1017 	    exc78_83(c);
1018 	  case JIS83:
1019 	    if (output_G[3] != JIS83) {
1020 		putchar('\033');putchar('$');putchar('+');putchar('B');
1021 		output_G[3] = JIS83;}
1022 	    if (output_GR != SETG3) {
1023 		putchar('\033'); putchar('|'); output_GR = SETG3;}
1024 	    pututf8((c->c1)|0x80, (c->c2)|0x80);
1025 	    break;
1026 	}
1027 	break;
1028 #endif /* KAKASI_SUPPORT_UTF8 */
1029     }
1030 }
1031 
1032 int
term_type_str(str)1033 term_type_str(str)
1034      char *str;
1035 {
1036     if ((strncmp(str, "oldjis", 6) == 0) ||
1037 	(strncmp(str, "jisold", 6) == 0))
1038 	return OLDJIS;
1039     if (strncmp(str, "dec", 6) == 0)
1040 	return DEC;
1041     if ((strncmp(str, "euc", 6) == 0) ||
1042 	(strncmp(str, "att", 6) == 0))
1043 	return EUC;
1044     if ((strncmp(str, "sjis", 6) == 0) ||
1045 	(strncmp(str, "msjis", 6) == 0) ||
1046 	(strncmp(str, "shiftjis", 6) == 0))
1047 	return MSKANJI;
1048 #ifdef KAKASI_SUPPORT_UTF8
1049     if ((strncmp(str, "utf8", 6) == 0) ||
1050 	(strncmp(str, "utf-8", 6) == 0))
1051 	return UTF8;
1052 #endif /* KAKASI_SUPPORT_UTF8 */
1053 
1054     return NEWJIS;
1055 }
1056 
1057 #ifdef KAKASI_SUPPORT_UTF8
1058 void
pututf8(f,s)1059 pututf8(f, s)
1060     int f, s;
1061 {
1062     char fromstr[1024], tostr[1024];
1063     char *from = fromstr, *to = tostr;
1064     size_t fromlen = 2, tolen = 6;
1065     int i;
1066     fromstr[0] = (char) f;
1067     fromstr[1] = (char) s;
1068     if (toutf8 == (iconv_t) -1)
1069 	toutf8 = iconv_open("UTF-8", "EUC-JP");
1070     iconv(toutf8, &from, &fromlen, &to, &tolen);
1071     if (tolen >= 6 || tolen < 0)
1072 	return;
1073     for (i = 0; i < (6 - tolen); i ++) {
1074 	putchar(tostr[i]);
1075     }
1076 }
1077 
1078 void
close_iconv()1079 close_iconv()
1080 {
1081     if (fromutf8 != -1) {
1082 	iconv_close(fromutf8);
1083 	fromutf8 = -1;
1084     }
1085     if (toutf8 != -1) {
1086 	iconv_close(toutf8);
1087 	toutf8 = -1;
1088     }
1089 }
1090 #endif /* KAKASI_SUPPORT_UTF8 */
1091