1 /**** << kanji code converter >> ****
2  *
3  *  kcc.c
4  *                                                  Aug 13 1992
5  *                                      mod:        Jul  1 1994
6  ************************************************** tonooka ***********/
7 /*
8  *	Copyright (c) 1994 Yasuhiro Tonooka (tonooka@msi.co.jp)
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2, or (at your option)
13  * any later version.
14  *
15  * This program is distributed in the hope that it will be useful, but
16  * WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23  */
24 #if !defined lint
25 static char sccsid[] = "@(#)kcc.c 2.3 (Y.Tonooka) 7/1/94";
26 static char copyright[] = "@(#)Copyright (c) 1992 Yasuhiro Tonooka";
27 #endif
28 
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 
34 /*-
35  *  OPTIONS
36  *	-O or -IO
37  *		I specifies input code and O specifies output code.
38  *              When input code is not specified, kcc guesses kanji
39  *		code.
40  *		I is one of:
41  *			e	EUC (with 7-bit JIS)
42  *			d	DEC (with 7-bit JIS)
43  *			s	shift-JIS (with 7-bit JIS)
44  *			j, 7 or k
45  *				7-bit JIS
46  *			8	8-bit JIS
47  *		O is one of:
48  *			e	EUC
49  *			d	DEC
50  *			s	shift-JIS
51  *			jXY, 7XY
52  *				7-bit JIS
53  *			kXY	7-bit JIS using "ESC(I" for JIS katakana
54  *			8XY	8-bit JIS
55  *		where X is:
56  *			B	"ESC$B" as kanji shift in
57  *			@	"ESC$@" as kanji shift in
58  *			+	"ESC&@ESC$B" as kanji shift in
59  *		Y is:
60  *			B	"ESC(B" as kanji shift out
61  *			J	"ESC(J" as kanji shift out
62  *			H	"ESC(H" as kanji shift out
63  *
64  *	-c      Check:  Only tells file type to stdout.  Overrides any
65  *		other option except -x and -z.  In this mode, kcc
66  *		reads all thru the input file to guess the file type.
67  *
68  *	-v	Verbose:  Prints to stderr which kanji code system
69  *		kcc guessed the input code is in.
70  *
71  *	-x      Extended mode:  In this mode, recognized area of each
72  *		code system is extended in guessing.
73  *
74  *      -z      Reduced mode:  In this mode, only zenkaku characters
75  *		are recognized with EUC and shift-JIS in guessing.
76  *		(Reduces ambiguity).
77  *
78  *      -n      Gaiji & undefined kankaku kana characters are replaced
79  *		with padding character.
80  *
81  *	-h	Hiragana is used instead of katakana when converting
82  *		hankaku kana to DEC zenkaku.
83  *
84  *	-b xxx	Specify hold buffer size to xxx (byte).
85  */
86 
87 #define LENLINE		(BUFSIZ * 4)
88 #define HOLDBUFSIZ	8192	/* default size of hold buffer */
89 
90 #define ESC		0x1b
91 #define SO		0x0e
92 #define SI		0x0f
93 #define SS2		0x8e	/* EUC single shift 2 */
94 #define SS3		0x8f	/* EUC single shift 3 */
95 
96 #define ZENPAD		0x2222	/* padding char for zenkaku */
97 #define HANPAD		0x25	/* padding char for hankaku */
98 
99 typedef int bool;
100 
101 #define bitflag(c)	(1L << (c) - '@')
102 
103 #define NONASCII	0x01	/* non-ASCII character */
104 #define JIS		0x02	/* JIS */
105 #define ESCI		0x04	/* "ESC(I" */
106 #define ASSUME		0x08	/* assumed EUC (or DEC) */
107 #define EUC		0x10
108 #define DEC		0x20
109 #define SJIS		0x40
110 #define JIS8		0x80	/* 8-bit JIS */
111 #define BIT8		(EUC | DEC | SJIS | JIS8)
112 
113 enum mode {
114     M_ASCII,
115     M_KANJI,
116     M_GAIJI,
117     M_SO,			/* hankaku kana with SO */
118     M_ESCI,			/* hankaku kana with "ESC(I" */
119 };
120 
121 char *progname;
122 char *filename = NULL;
123 char shiftin[7] = "\033$B";
124 char shiftout[4] = "\033(J";
125 unsigned incode = 0;
126 unsigned outcode = JIS;
127 bool verbose = 0;
128 bool docheck = 0;
129 bool extend = 0;
130 bool zenkaku = 0;
131 bool nogaiji = 0;
132 
133 extern unsigned short katakana[];
134 extern unsigned short hiragana[];
135 unsigned short *kanatbl = katakana;
136 
137 void error(char* fmt, ...);
138 
139 /**********************************************************************
140  *                                                                    *
141  *  Main Routines                                                     *
142  *                                                                    *
143  **********************************************************************/
144 /*---------------------------------------------------------------------
145     NAME
146 	main
147  ---------------------------------------------------------------------*/
main(c,v)148 main(c, v)
149     register int c;
150     register char *v[];
151 {
152     register char *s;
153     bool codeopt = 0;		/* code option is read */
154     FILE *iop;
155     int status;
156     int dev, ino = -1;
157     struct stat sbuf;
158     unsigned size = HOLDBUFSIZ;
159     void filter();
160     void check();
161     void buffalloc();
162     void setfunc();
163 
164     progname = *v++;
165     /*
166      * Process options here.
167      */
168     for (; --c; v++) {
169 	/*
170 	 * With -, input is taken from stdin like cat(1).
171 	 */
172 	if ((*v)[0] != '-' || (*v)[1] == '\0')
173 	    break;
174 	/*
175 	 * Size of hold buf can be changed with "-b size".
176 	 */
177 	if (strcmp(*v, "-b") == 0) {
178 	    if (--c == 0)
179 		error("%s option must have an argument", *v);
180 	    v++;
181 	    if ((size = atoi(*v)) <= 0)
182 		error("bad buffer size");
183 	    continue;
184 	}
185 	/*
186 	 * Options:
187 	 */
188 	for (s = *v + 1; *s; s++) {
189 	    if (strchr("esdj7k8", *s)) {
190 		if (codeopt)
191 		    error("%s: duplicate code specification", *v);
192 		codeopt = 1;
193 		if (s[1] && strchr("esdj7k8", s[1]))
194 		    /*
195 		     * Input code:  e, s, j, 7, k or 8.
196 		     */
197 		    switch (*s++) {
198 		    case 'e':	/* EUC with JIS */
199 			incode = EUC | NONASCII;
200 			break;
201 		    case 's':	/* shift-JIS with JIS */
202 			incode = SJIS | NONASCII;
203 			break;
204 		    case 'd':	/* DEC with JIS */
205 			incode = DEC | NONASCII;
206 			break;
207 		    case 'j':	/* JIS */
208 		    case '7':	/* equivalent to 'j' */
209 		    case 'k':	/* JIS */
210 			incode = JIS;
211 			break;
212 		    case '8':	/* 8-bit JIS */
213 			incode = JIS | JIS8 | NONASCII;
214 			break;
215 		    }
216 		/*
217 		 * Output code:  e, s, d, jXY, 7XY, kXY or 8XY.
218 		 */
219 		switch (*s) {
220 		case 'e':	/* EUC */
221 		    outcode = EUC;
222 		    continue;
223 		case 's':	/* shift-JIS */
224 		    outcode = SJIS;
225 		    continue;
226 		case 'd':	/* EUC */
227 		    outcode = DEC;
228 		    continue;
229 		case 'j':	/* 7-bit JIS using SO & SI */
230 		case '7':	/* equivalent to 'j' */
231 		    outcode = JIS;
232 		    break;
233 		case 'k':	/* 7-bit JIS using "ESC(I" */
234 		    outcode = JIS | ESCI;
235 		    break;
236 		case '8':	/* 8-bit JIS */
237 		    outcode = JIS | JIS8;
238 		    break;
239 		}
240 		/*
241 		 * Process "XY" part of options j, 7, k & 8.
242 		 */
243 		if ((s[1] == 'B' || s[1] == '@' || s[1] == '+') &&
244 			(s[2] == 'B' || s[2] == 'J' || s[2] == 'H')) {
245 		    if (s[1] == '+')
246 			sprintf(shiftin, "\033&@\033$B");
247 		    else
248 			sprintf(shiftin, "\033$%c", s[1]);
249 		    sprintf(shiftout, "\033(%c", s[2]);
250 		    s += 2;
251 		}
252 		continue;
253 	    }
254 	    /*
255 	     * Other one letter options:
256 	     */
257 	    switch (*s) {
258 	    case 'c':		/* check */
259 		docheck = 1;
260 		break;
261 	    case 'h':		/* hiragana for hankaku->DEC */
262 		kanatbl = hiragana;
263 		break;
264 	    case 'n':		/* no gaiji */
265 		nogaiji = 1;
266 		break;
267 	    case 'v':		/* verbose */
268 		verbose = 1;
269 		break;
270 	    case 'x':		/* extended mode */
271 		extend = 1;
272 		break;
273 	    case 'z':		/* reduced mode */
274 		zenkaku = 1;
275 		break;
276 	    default:
277 		error("-%c: bad option", *s);
278 	    }
279 	}
280     }
281     if (extend && zenkaku)
282 	error("-x and -z can't go together");
283     if (!docheck) {
284 	buffalloc(size);	/* allocate hold buf */
285 	setfunc();
286     }
287     /*
288      * Get some info on output file.
289      */
290     if (fstat(fileno(stdout), &sbuf) == 0) {
291 	sbuf.st_mode &= S_IFMT;
292 	if (sbuf.st_mode != S_IFCHR && sbuf.st_mode != S_IFBLK) {
293 	    dev = sbuf.st_dev;
294 	    ino = sbuf.st_ino;
295 	}
296     }
297     /*
298      * Main loop.
299      */
300     status = 0;
301     do {
302 	if (c == 0 || strcmp(*v, "-") == 0) {
303 	    /*
304 	     * Stdin:  If tty and at EOF, clear EOF.
305 	     */
306 	    if (isatty(fileno(stdin)) && feof(stdin))
307 		clearerr(stdin);
308 	    iop = stdin;
309 	} else
310 	    /*
311 	     * Open a file.
312 	     */
313 	    if ((iop = fopen(*v, "r")) == NULL) {
314 		perror(*v);
315 		status |= 1;
316 		continue;
317 	    }
318 	if (c)
319 	    filename = *v;
320 	if (fstat(fileno(iop), &sbuf) == 0) {
321 	    /*
322 	     * Get some info on input file, and see if it is a
323 	     * directory.
324 	     */
325 	    if ((sbuf.st_mode & S_IFMT) == S_IFDIR) {
326 		fprintf(stderr,
327 			"%s: read error on %s: Is a directory\n",
328 			progname, c ? *v : "standard input");
329 		if (iop != stdin)
330 		    fclose(iop);
331 		status |= 1;
332 		continue;
333 	    }
334 	    /*
335 	     * Compare the info of input with that of output, and see
336 	     * if they are identical.
337 	     */
338 	    if ((sbuf.st_mode & S_IFMT) == S_IFREG &&
339 		    sbuf.st_dev == dev && sbuf.st_ino == ino) {
340 		fprintf(stderr, "%s: input %s is output\n", progname,
341 			c ? *v : "-");
342 		if (iop != stdin)
343 		    fclose(iop);
344 		status |= 1;
345 		continue;
346 	    }
347 	}
348 	/*
349 	 * Do the job here!
350 	 */
351 	if (docheck)
352 	    check(iop);
353 	else
354 	    filter(iop);
355 	if (iop != stdin)
356 	    fclose(iop);
357     } while (v++, --c > 0);
358     if (ferror(stdout))
359 	error("output write error");
360     return (status);
361 }
362 
363 /*---------------------------------------------------------------------
364     NAME
365 	error - print formatted error message on stderr and die
366  ---------------------------------------------------------------------*/
367 #include <stdarg.h>
368 
error(char * fmt,...)369 void error(char *fmt, ...)
370 {
371     va_list ap;
372 
373     va_start(ap, fmt);
374     fprintf(stderr, "%s: ", progname);
375     vfprintf(stderr, fmt, ap);
376     putc('\n', stderr);
377     va_end(ap);
378     exit(1);
379 }
380 
381 /**********************************************************************
382  *                                                                    *
383  *  Filter                                                            *
384  *                                                                    *
385  **********************************************************************/
386 enum mode gsmode;		/* guess:  M_ASCII M_KANJI M_SO */
387 enum mode inmode;		/* input:  M_ASCII M_KANJI M_GAIJI
388 				 * M_SO M_ESCI */
389 enum mode outmode;		/* output: M_ASCII M_KANJI M_GAIJI
390 				 * M_SO M_ESCI */
391 
392 unsigned long insi;		/* JIS shift-in sequence flag */
393 unsigned long inso;		/* JIS shift-out sequence flag
394 				 * including "ESC(I" */
395 unsigned long innj;		/* JIS 1990 sequence flag */
396 unsigned long ingj;		/* JIS 1990 aux flag */
397 
398 /*---------------------------------------------------------------------
399     NAME
400 	filter - filtering routine
401  ---------------------------------------------------------------------*/
filter(fp)402 void filter(fp)
403     FILE *fp;
404 {
405     register bool hold;
406     register unsigned code, c;
407     register int len;
408     char str[LENLINE];
409     unsigned guess();
410     bool append();
411     void flush();
412     unsigned out();
413     void showcode();
414 
415     code = incode ? incode : extend ? BIT8 : BIT8 & ~DEC;
416     gsmode = inmode = outmode = M_ASCII;
417     insi = inso = innj = ingj = 0;
418     hold = 0;
419     while (len = getstr(str, sizeof str, fp)) {
420 	if (!(code & NONASCII) && code & BIT8 ||
421 		code & (EUC | DEC) && code & SJIS && !(code & ASSUME)) {
422 	    /*
423 	     * So far, no kanji has been seen, or ambiguous.
424 	     */
425 	    c = guess(str, len);
426 	    code |= c & (JIS | NONASCII), code &= c | ~BIT8;
427 	    if (code & NONASCII && code & (EUC | DEC) && code & SJIS) {
428 		/*
429 		 * If ambiguous, store the line in hold buffer.
430 		 */
431 		if (append(str, len)) {
432 		    hold = 1;
433 		    continue;
434 		}
435 		/*
436 		 * When buffer is full, assume EUC/DEC.
437 		 */
438 		code |= ASSUME;
439 	    }
440 	}
441 	if (hold) {
442 	    /*
443 	     * Flush hold buffer.
444 	     */
445 	    flush(code);
446 	    hold = 0;
447 	}
448 	c = out(str, len, code);
449 	code |= c & JIS, code &= c | ~BIT8;
450     }
451     if (hold)
452 	/*
453 	 * Assume EUC.
454 	 */
455 	flush(code |= ASSUME);
456     if (verbose)
457 	showcode(code, stderr);
458 }
459 
460 /*---------------------------------------------------------------------
461     NAME
462 	check
463  ---------------------------------------------------------------------*/
check(fp)464 void check(fp)
465     FILE *fp;
466 {
467     register unsigned code, c;
468     register int len;
469     char str[LENLINE];
470     void showcode();
471     unsigned guess();
472 
473     code = extend ? BIT8 : BIT8 & ~DEC;
474     gsmode = M_ASCII;
475     insi = inso = innj = ingj = 0;
476     while (len = getstr(str, sizeof str, fp)) {
477 	c = guess(str, len);
478 	code |= c & (JIS | NONASCII), code &= c | ~BIT8;
479 	if (code & NONASCII && !(code & BIT8))
480 	    break;
481     }
482     showcode(code, stdout);
483 }
484 
485 /*---------------------------------------------------------------------
486     NAME
487 	showcode
488  ---------------------------------------------------------------------*/
showcode(code,fp)489 void showcode(code, fp)
490     register unsigned code;
491     register FILE *fp;
492 {
493     char *s;
494     void showjis();
495 
496     if (filename)
497 	if (fprintf(fp, "%s:\t", filename) < 9)
498 	    putc('\t', fp);
499     if (!(code & NONASCII)) {
500 	/*
501 	 * 7-bit JIS / ASCII.
502 	 */
503 	if (code & JIS) {
504 	    showjis('7', fp);
505 	    putc('\n', fp);
506 	} else
507 	    fputs("ASCII\n", fp);
508 	return;
509     } else if (code & (EUC | DEC)) {
510 	s = code & EUC ? code & DEC ? "EUC/DEC" : "EUC" : "DEC";
511 	if (code & SJIS) {
512 	    /*
513 	     * Ambiguous.
514 	     */
515 	    fprintf(fp, "ambiguous (%s", s);
516 	    if (code & JIS8) {
517 		fputs(code & JIS ?
518 			" with 7-bit JIS, or " : ", shift-JIS or ", fp);
519 		showjis('8', fp);
520 		if (code & ASSUME)
521 		    fprintf(fp, "; assumed %s",
522 			    code & JIS ? "the former" : s);
523 		fputs(")\n", fp);
524 		return;
525 	    }
526 	    fputs(" or shift-JIS", fp);
527 	    if (code & ASSUME)
528 		fprintf(fp, "; assumed %s", s);
529 	    fputs(")", fp);
530 	} else
531 	    /*
532 	     * EUC/DEC.
533 	     */
534 	    fputs(s, fp);
535     } else if (code & JIS8) {
536 	/*
537 	 * 8-bit JIS / shift-JIS or 8-bit JIS.
538 	 */
539 	if (!(code & JIS))
540 	    fputs("shift-JIS or ", fp);
541 	showjis('8', fp);
542 	putc('\n', fp);
543 	return;
544     } else if (code & SJIS)
545 	/*
546 	 * Shift-JIS.
547 	 */
548 	fputs("shift-JIS", fp);
549     else {
550 	/*
551 	 * Non-ASCII deteced but neither EUC/DEC nor SJIS.
552 	 */
553 	fputs("data\n", fp);
554 	return;
555     }
556     if (code & JIS) {
557 	fputs(" with ", fp);
558 	showjis('7', fp);
559     }
560     putc('\n', fp);
561 }
562 
563 /*---------------------------------------------------------------------
564     NAME
565 	showjis
566  ---------------------------------------------------------------------*/
showjis(bit,fp)567 void showjis(bit, fp)
568     int bit;			/* 8-bit or 7-bit */
569     FILE *fp;
570 {
571     bool comma;
572     bool showesc();
573 
574     fprintf(fp, "%c-bit JIS [", bit);
575     comma = showesc("ESC$", insi, 0, fp);
576     comma = showesc("ESC&@ESC$", innj, comma, fp);
577     comma = showesc("ESC(", inso, comma, fp);
578     showesc("ESC$(", ingj, comma, fp);
579     putc(']', fp);
580 }
581 
582 /*---------------------------------------------------------------------
583     NAME
584 	showesc
585  ---------------------------------------------------------------------*/
showesc(str,mask,comma,fp)586 bool showesc(str, mask, comma, fp)
587     char *str;
588     register unsigned long mask;
589     bool comma;
590     FILE *fp;
591 {
592     register unsigned long m;
593     register int c;
594 
595     for (m = 1, c = '@'; m; m <<= 1, c++)
596 	if (mask & m) {
597 	    if (comma)
598 		fputs(", ", fp);
599 	    else
600 		comma = 1;
601 	    fputs(str, fp);
602 	    putc(c, fp);
603 	}
604     return (comma);
605 }
606 
607 /*---------------------------------------------------------------------
608     NAME
609 	getstr
610  ---------------------------------------------------------------------*/
getstr(str,n,fp)611 int getstr(str, n, fp)
612     char *str;
613     register int n;
614     FILE *fp;
615 {
616     register int c;
617     register char *s;
618 
619     for (s = str; --n > 0 && (c = getc(fp)) != EOF; )
620 	if ((*s++ = c) == '\n')
621 	    break;
622     return (s - str);
623 }
624 
625 /**********************************************************************
626  *                                                                    *
627  *  Hold Buffer Operations                                            *
628  *                                                                    *
629  **********************************************************************/
630 char *holdbuf, *bufend;
631 char *bufp;
632 
633 /*---------------------------------------------------------------------
634     NAME
635 	buffalloc
636  ---------------------------------------------------------------------*/
buffalloc(len)637 void buffalloc(len)
638     unsigned len;
639 {
640     if ((bufp = holdbuf = (char *) malloc(len)) == NULL)
641 	error("out of memory");
642     bufend = holdbuf + len;
643 }
644 
645 /*---------------------------------------------------------------------
646     NAME
647 	append
648  ---------------------------------------------------------------------*/
append(s,len)649 bool append(s, len)
650     register char *s;
651     register int len;
652 {
653     if (bufp + len > bufend)
654 	return (0);
655     for (; len; --len)
656 	*bufp++ = *(u_char *) s++;
657     return (1);
658 }
659 
660 /*---------------------------------------------------------------------
661     NAME
662 	flush
663  ---------------------------------------------------------------------*/
flush(code)664 void flush(code)
665     unsigned code;
666 {
667     unsigned out();
668 
669     out(holdbuf, bufp - holdbuf, code);
670     bufp = holdbuf;
671 }
672 
673 /**********************************************************************
674  *                                                                    *
675  *  General                                                           *
676  *                                                                    *
677  **********************************************************************/
678 /*---------------------------------------------------------------------
679     NAME
680 	compare
681  ---------------------------------------------------------------------*/
compare(s,str)682 bool compare(s, str)
683     register char *s, *str;
684 {
685     while (*s)
686 	if (*s++ != *str++)
687 	    return (0);
688     return (1);
689 }
690 
691 /**********************************************************************
692  *                                                                    *
693  *  Guessing                                                          *
694  *                                                                    *
695  **********************************************************************/
696 /*---------------------------------------------------------------------
697     NAME
698 	guess - distinguish code system
699  ---------------------------------------------------------------------*/
guess(str,len)700 unsigned guess(str, len)
701     char *str;
702     int len;
703 {
704     register char *s;
705     register int euc, sjis, dec;
706     bool jis8;
707     register unsigned code;
708     register int i;
709     enum mode old;
710 
711     euc = sjis = 1;
712     dec = extend ? 1 : 0;
713     jis8 = 1;
714     code = 0;
715     for (s = str; s < str + len; s += i) {
716 	i = 1;
717 	switch (*(u_char *) s) {
718 	case ESC:
719 	    if (gsmode == M_SO)
720 		continue;
721 	    old = gsmode;
722 	    if (compare("$B", s + 1) || compare("$@", s + 1)) {
723 		gsmode = M_KANJI;	/* kanji */
724 		insi |= bitflag(((u_char *) s)[2]);
725 		i = 3;
726 	    } else if (compare("&@\033$B", s + 1)) {
727 		gsmode = M_KANJI;	/* kanji 1990 */
728 		innj |= bitflag('B');
729 		i = 6;
730 	    } else if (compare("(B", s + 1) ||
731 		    compare("(J", s + 1) || compare("(H", s + 1)) {
732 		gsmode = M_ASCII;	/* kanji end */
733 		inso |= bitflag(((u_char *) s)[2]);
734 		i = 3;
735 	    } else if (compare("(I", s + 1)) {
736 		gsmode = M_KANJI;	/* "ESC(I" */
737 		inso |= bitflag('I');
738 		i = 3;
739 	    } else if (compare("$(D", s + 1)) {
740 		gsmode = M_KANJI;	/* gaiji */
741 		ingj |= bitflag('D');
742 		i = 4;
743 	    } else
744 		break;
745 	    code |= JIS;
746 	    if (old != M_ASCII)
747 		continue;
748 	    break;
749 	case SO:
750 	    if (gsmode == M_ASCII) {
751 		code |= JIS;
752 		gsmode = M_SO;
753 		break;
754 	    }
755 	    continue;
756 	case SI:
757 	    if (gsmode == M_SO) {
758 		gsmode = M_ASCII;
759 		continue;
760 	    }
761 	    /* fall thru */
762 	default:
763 	    if (gsmode != M_ASCII)
764 		continue;
765 	    break;
766 	}
767 	if (*(u_char *) s & 0x80)
768 	    code |= NONASCII;
769 	switch (euc) {
770 	case 1:
771 	    /*
772 	     * EUC first byte.
773 	     */
774 	    if (*(u_char *) s & 0x80) {
775 		if (0xa0 < *(u_char *) s && *(u_char *) s < 0xff ||
776 			!zenkaku && *(u_char *) s == SS2) {
777 		    euc = 2;
778 		    break;
779 		}
780 		if (extend)
781 		    if (*(u_char *) s == SS3) {
782 			euc = 2;
783 			break;
784 		    } else if (*(u_char *) s < 0xa0)
785 			break;
786 		euc = 0;	/* not EUC */
787 	    }
788 	    break;
789 	case 2:
790 	    /*
791 	     * EUC second byte or third byte of CS3.
792 	     */
793 	    if (((u_char *) s)[-1] == SS2) {
794 		if (0xa0 < *(u_char *) s &&
795 			*(u_char *) s < (extend ? 0xff : 0xe0)) {
796 		    euc = 1;	/* hankaku kana */
797 		    break;
798 		}
799 	    } else
800 		if (0xa0 < *(u_char *) s && *(u_char *) s < 0xff) {
801 		    if (((u_char *) s)[-1] != SS3)
802 			euc = 1;/* zenkaku */
803 		    break;
804 		}
805 	    euc = 0;		/* not EUC */
806 	    break;
807 	}
808 	if (extend)
809 	    switch (dec) {
810 	    case 1:
811 		/*
812 		 * DEC first byte.
813 		 */
814 		if (*(u_char *) s & 0x80) {
815 		    if (0xa0 < *(u_char *) s && *(u_char *) s < 0xff) {
816 			dec = 2;
817 			break;
818 		    } else if (*(u_char *) s < 0xa0)
819 			break;
820 		    dec = 0;	/* not DEC */
821 		}
822 		break;
823 	    case 2:
824 		/*
825 		 * DEC second byte.
826 		 */
827 		if (0x20 < (*(u_char *) s & 0x7f) &&
828 			(*(u_char *) s & 0x7f) < 0x7f) {
829 		    dec = 1;
830 		} else
831 		    dec = 0;	/* not DEC */
832 		break;
833 	    }
834 	switch (sjis) {
835 	case 1:
836 	    /*
837 	     * shift-JIS first byte.
838 	     */
839 	    if (*(u_char *) s & 0x80) {
840 		if (0xa0 < *(u_char *) s && *(u_char *) s < 0xe0) {
841 		    if (!zenkaku)
842 			break;	/* hankaku */
843 		} else if (*(u_char *) s != 0x80 &&
844 			*(u_char *) s != 0xa0 &&
845 			*(u_char *) s <= (extend ? 0xfc : 0xef)) {
846 		    sjis = 2;	/* zenkaku */
847 		    jis8 = 0;
848 		    break;
849 		}
850 		sjis = 0;	/* not SJIS */
851 	    }
852 	    break;
853 	case 2:
854 	    /*
855 	     * shift-JIS second byte.
856 	     */
857 	    if (0x40 <= *(u_char *) s && *(u_char *) s != 0x7f &&
858 		    *(u_char *) s <= 0xfc)
859 		sjis = 1;
860 	    else
861 		sjis = 0;	/* not SJIS */
862 	    break;
863 	}
864     }
865     if (euc == 1)
866 	code |= EUC;
867     if (dec == 1)
868 	code |= DEC;
869     if (sjis == 1)
870 	code |= zenkaku || !jis8 ? SJIS : SJIS | JIS8;
871     return (code);
872 }
873 
874 /**********************************************************************
875  *                                                                    *
876  *  Output Routines                                                   *
877  *                                                                    *
878  **********************************************************************/
879 void (*outascii)(), (*outkanji)(), (*outgaiji)(), (*outkana)();
880 
881 /*---------------------------------------------------------------------
882     NAME
883 	out
884  ---------------------------------------------------------------------*/
out(str,len,code)885 unsigned out(str, len, code)
886     char *str;
887     int len;
888     register unsigned code;
889 {
890     register char *s;
891     register int i;
892     void outsjis();
893 
894     for (s = str; s < str + len; s += i) {
895 	i = 1;
896 	switch (*(u_char *) s) {
897 	case ESC:
898 	    if (inmode == M_SO)
899 		break;
900 	    if (compare("$B", s + 1) || compare("$@", s + 1)) {
901 		inmode = M_KANJI;	/* kanji */
902 		insi |= bitflag(((u_char *) s)[2]);
903 		i = 3;
904 	    } else if (compare("&@\033$B", s + 1)) {
905 		inmode = M_KANJI;	/* kanji 1990 */
906 		innj |= bitflag('B');
907 		i = 6;
908 	    } else if (compare("(B", s + 1) || compare("(J", s + 1) ||
909 		    compare("(H", s + 1)) {
910 		inmode = M_ASCII;	/* kanji end */
911 		inso |= bitflag(((u_char *) s)[2]);
912 		i = 3;
913 	    } else if (compare("(I", s + 1)) {
914 		inmode = M_ESCI;	/* "ESC(I" */
915 		inso |= bitflag('I');
916 		i = 3;
917 	    } else if (compare("$(D", s + 1)) {
918 		inmode = M_GAIJI;	/* gaiji */
919 		ingj |= bitflag('D');
920 		i = 4;
921 	    } else
922 		break;
923 	    code |= JIS;
924 	    continue;
925 	case SO:
926 	    if (inmode == M_ASCII) {
927 		code |= JIS;
928 		inmode = M_SO;
929 		continue;
930 	    }
931 	    break;
932 	case SI:
933 	    if (inmode == M_SO) {
934 		inmode = M_ASCII;
935 		continue;
936 	    }
937 	    break;
938 	}
939 	if (inmode != M_ASCII) {
940 	    if (0x20 < ((u_char *) s)[0] && ((u_char *) s)[0] < 0x7f)
941 		switch (inmode) {
942 		case M_KANJI:
943 		    (*outkanji)(((u_char *) s)[0],
944 			    ((u_char *) s)[1] & 0x7f);
945 		    i = 2;
946 		    continue;
947 		case M_GAIJI:
948 		    (*outgaiji)(((u_char *) s)[0],
949 			    ((u_char *) s)[1] & 0x7f);
950 		    i = 2;
951 		    continue;
952 		case M_SO:
953 		case M_ESCI:
954 		    (*outkana)(((u_char *) s)[0]);
955 		    continue;
956 		}
957 	} else if (((u_char *) s)[0] & 0x80)
958 	    if (code & (EUC | DEC)) {
959 		/*
960 		 * EUC or DEC:
961 		 */
962 		if (0xa0 < ((u_char *) s)[0] &&
963 			((u_char *) s)[0] < 0xff) {
964 		    if (!(((u_char *) s)[1] & 0x80) && code & DEC) {
965 			/*
966 			 * DEC gaiji:
967 			 */
968 			code &= ~EUC;	/* definitely DEC  */
969 			(*outgaiji)(((u_char *) s)[0] & 0x7f,
970 				((u_char *) s)[1]);
971 		    } else
972 			/*
973 			 * EUC code set 1 (kanji), DEC kanji:
974 			 */
975 			(*outkanji)(((u_char *) s)[0] & 0x7f,
976 				((u_char *) s)[1] & 0x7f);
977 		} else if (((u_char *) s)[0] == SS2 && code & EUC &&
978 			0xa0 < ((u_char *) s)[1] &&
979 			((u_char *) s)[1] < 0xff) {
980 		    /*
981 		     * EUC code set 2 (hankaku kana):
982 		     */
983 		    code &= ~DEC;	/* probably EUC */
984 		    (*outkana)(((u_char *) s)[1] & 0x7f);
985 		} else if (((u_char *) s)[0] == SS3 && code & EUC &&
986 			0xa0 < ((u_char *) s)[1] &&
987 			((u_char *) s)[1] < 0xff &&
988 			0xa0 < ((u_char *) s)[2] &&
989 			((u_char *) s)[2] < 0xff) {
990 		    /*
991 		     * EUC code set 3 (gaiji):
992 		     */
993 		    code &= ~DEC;	/* probably EUC */
994 		    (*outgaiji)(((u_char *) s)[1] & 0x7f,
995 			    ((u_char *) s)[2] & 0x7f);
996 		    i = 3;
997 		    continue;
998 		} else {
999 		    /*
1000 		     * Control character (C1):
1001 		     */
1002 		    if (outcode != SJIS && (outcode != EUC ||
1003 			    ((u_char *) s)[0] != SS2 &&
1004 			    ((u_char *) s)[0] != SS3))
1005 			putchar(((u_char *) s)[0]);
1006 		    continue;
1007 		}
1008 		i = 2;
1009 		continue;
1010 	    } else if (code & (SJIS | JIS8)) {
1011 		/*
1012 		 * Shift-JIS or JIS8:
1013 		 */
1014 		if (!(code & SJIS) || 0xa0 < ((u_char *) s)[0] &&
1015 			((u_char *) s)[0] < 0xe0)
1016 		    /*
1017 		     * Hankaku kana:
1018 		     */
1019 		    (*outkana)(((u_char *) s)[0] & 0x7f);
1020 		else {
1021 		    /*
1022 		     * Shift-JIS kanji:
1023 		     */
1024 		    code &= ~JIS8;	/* definitely shift-JIS */
1025 		    outsjis(((u_char *) s)[0], ((u_char *) s)[1]);
1026 		    i = 2;
1027 		}
1028 		continue;
1029 	    }
1030 	(*outascii)(((u_char *) s)[0]);
1031     }
1032     return (code);
1033 }
1034 
1035 /*---------------------------------------------------------------------
1036     NAME
1037 	outsjis
1038  ---------------------------------------------------------------------*/
outsjis(c1,c2)1039 void outsjis(c1, c2)
1040     register int c1, c2;
1041 {
1042     register int c;
1043 
1044     c = c1 * 2 - (c1 <= 0x9f ? 0x00e1 : (c1 < 0xf0 ? 0x0161 : 0x01bf));
1045     if (c2 < 0x9f)
1046 	c2 = c2 - (c2 > 0x7f ? 0x20 : 0x1f);
1047     else {
1048 	c2 = c2 - 0x7e;
1049 	c++;
1050     }
1051     (*(c1 <= 0xef ? outkanji : outgaiji))(c, c2);
1052 }
1053 
1054 /**********************************************************************
1055  *                                                                    *
1056  *  Conversion Routines                                               *
1057  *                                                                    *
1058  **********************************************************************/
1059 void outchar();
1060 void jisascii(), jiskanji(), jisgaiji();
1061 void jiskana(), jiskanak(), jiskana8();
1062 void euckanji(), eucgaiji(), euckana();
1063 void sjiskanji(), sjisgaiji(), sjiskana();
1064 void decascii(), deckanji(), decgaiji(), deckana();
1065 
1066 int lastkana = 0;		/* last hankaku kana for DEC */
1067 
1068 /*---------------------------------------------------------------------
1069     NAME
1070 	setfunc
1071  ---------------------------------------------------------------------*/
setfunc()1072 void setfunc()
1073 {
1074     switch (outcode) {
1075     case EUC:
1076 	outascii = outchar;
1077 	outkanji = euckanji;
1078 	outgaiji = eucgaiji;
1079 	outkana = euckana;
1080 	break;
1081     case DEC:
1082 	outascii = decascii;
1083 	outkanji = deckanji;
1084 	outgaiji = decgaiji;
1085 	outkana = deckana;
1086 	break;
1087     case SJIS:
1088 	outascii = outchar;
1089 	outkanji = sjiskanji;
1090 	outgaiji = sjisgaiji;
1091 	outkana = sjiskana;
1092 	break;
1093     default:
1094 	outascii = jisascii;
1095 	outkanji = jiskanji;
1096 	outgaiji = jisgaiji;
1097 	switch (outcode) {
1098 	case JIS:		/* mode:  M_ASCII M_KANJI M_GAIJI
1099 				 * M_SO */
1100 	    outkana = jiskana;
1101 	    break;
1102 	case JIS | ESCI:	/* mode:  M_ASCII M_KANJI M_GAIJI
1103 				 * M_ESCI */
1104 	    outkana = jiskanak;
1105 	    break;
1106 	case JIS | JIS8:	/* mode:  M_ASCII M_KANJI M_GAIJI */
1107 	    outkana = jiskana8;
1108 	    break;
1109 	}
1110 	break;
1111     }
1112 }
1113 
1114 /*---------------------------------------------------------------------
1115     NAME
1116 	outchar
1117  ---------------------------------------------------------------------*/
outchar(c)1118 void outchar(c)
1119     register int c;
1120 {
1121     putchar(c);
1122 }
1123 
1124 /*---------------------------------------------------------------------
1125     NAME
1126 	jisascii
1127  ---------------------------------------------------------------------*/
jisascii(c)1128 void jisascii(c)
1129     register int c;
1130 {
1131     switch (outmode) {
1132     case M_ASCII:
1133 	break;
1134     case M_SO:
1135 	putchar(SI);
1136 	outmode = M_ASCII;
1137 	break;
1138     default:
1139 	fputs(shiftout, stdout);
1140 	outmode = M_ASCII;
1141 	break;
1142     }
1143     putchar(c);
1144 }
1145 
1146 /*---------------------------------------------------------------------
1147     NAME
1148 	jiskanji
1149  ---------------------------------------------------------------------*/
jiskanji(c1,c2)1150 void jiskanji(c1, c2)
1151     register int c1, c2;
1152 {
1153     if (outmode != M_KANJI) {
1154 	if (outmode == M_SO)
1155 	    putchar(SI);
1156 	fputs(shiftin, stdout);
1157 	outmode = M_KANJI;
1158     }
1159     putchar(c1);
1160     putchar(c2);
1161 }
1162 
1163 /*---------------------------------------------------------------------
1164     NAME
1165 	jisgaiji
1166  ---------------------------------------------------------------------*/
jisgaiji(c1,c2)1167 void jisgaiji(c1, c2)
1168     register int c1, c2;
1169 {
1170     if (nogaiji)
1171 	jiskanji(ZENPAD >> 8, ZENPAD & 0xff);
1172     else {
1173 	if (outmode != M_GAIJI) {
1174 	    if (outmode == M_SO)
1175 		putchar(SI);
1176 	    fputs("\033$(D", stdout);
1177 	    outmode = M_GAIJI;
1178 	}
1179 	putchar(c1);
1180 	putchar(c2);
1181     }
1182 }
1183 
1184 /*---------------------------------------------------------------------
1185     NAME
1186 	jiskana
1187  ---------------------------------------------------------------------*/
jiskana(c)1188 void jiskana(c)
1189     register int c;
1190 {
1191     if (outmode != M_SO) {
1192 	if (outmode != M_ASCII)
1193 	    fputs(shiftout, stdout);
1194 	putchar(SO);
1195 	outmode = M_SO;
1196     }
1197     putchar(!nogaiji || 0x20 < c && c < 0x60 ? c : HANPAD);
1198 }
1199 
1200 /*---------------------------------------------------------------------
1201     NAME
1202 	jiskanak
1203  ---------------------------------------------------------------------*/
jiskanak(c)1204 void jiskanak(c)
1205     register int c;
1206 {
1207     if (outmode != M_ESCI) {
1208 	fputs("\033(I", stdout);
1209 	outmode = M_ESCI;
1210     }
1211     putchar(!nogaiji || 0x20 < c && c < 0x60 ? c : HANPAD);
1212 }
1213 
1214 /*---------------------------------------------------------------------
1215     NAME
1216 	jiskana8
1217  ---------------------------------------------------------------------*/
jiskana8(c)1218 void jiskana8(c)
1219     register int c;
1220 {
1221     if (outmode != M_ASCII) {
1222 	fputs(shiftout, stdout);
1223 	outmode = M_ASCII;
1224     }
1225     putchar((!nogaiji || 0x20 < c && c < 0x60 ? c : HANPAD) | 0x80);
1226 }
1227 
1228 /*---------------------------------------------------------------------
1229     NAME
1230 	euckanji
1231  ---------------------------------------------------------------------*/
euckanji(c1,c2)1232 void euckanji(c1, c2)
1233     register int c1, c2;
1234 {
1235     putchar(c1 | 0x80);
1236     putchar(c2 | 0x80);
1237 }
1238 
1239 /*---------------------------------------------------------------------
1240     NAME
1241 	eucgaiji
1242  ---------------------------------------------------------------------*/
eucgaiji(c1,c2)1243 void eucgaiji(c1, c2)
1244     register int c1, c2;
1245 {
1246     if (nogaiji) {
1247 	putchar(ZENPAD >> 8 | 0x80);
1248 	putchar(ZENPAD & 0xff | 0x80);
1249     } else {
1250 	putchar(SS3);
1251 	putchar(c1 | 0x80);
1252 	putchar(c2 | 0x80);
1253     }
1254 }
1255 
1256 /*---------------------------------------------------------------------
1257     NAME
1258 	euckana
1259  ---------------------------------------------------------------------*/
euckana(c)1260 void euckana(c)
1261     register int c;
1262 {
1263     putchar(SS2);
1264     putchar((!nogaiji || 0x20 < c && c < 0x60 ? c : HANPAD) | 0x80);
1265 }
1266 
1267 /*---------------------------------------------------------------------
1268     NAME
1269 	sjiskanji
1270  ---------------------------------------------------------------------*/
sjiskanji(c1,c2)1271 void sjiskanji(c1, c2)
1272     register int c1, c2;
1273 {
1274     putchar((c1 - 1 >> 1) + (c1 <= 0x5e ? 0x71 : 0xb1));
1275     putchar(c2 + (c1 & 1 ? (c2 < 0x60 ? 0x1f : 0x20) : 0x7e));
1276 }
1277 
1278 /*---------------------------------------------------------------------
1279     NAME
1280 	sjisgaiji
1281     DESCRIPTION
1282 	Characters are mapped as follows:
1283 	    0x2121 to 0x3a7e --> 0xf040 to 0xfcfc
1284 	    0x3b21 to 0x7e7e --> 0xfcfc
1285  ---------------------------------------------------------------------*/
sjisgaiji(c1,c2)1286 void sjisgaiji(c1, c2)
1287     register int c1, c2;
1288 {
1289     if (nogaiji)
1290 	sjiskanji(ZENPAD >> 8, ZENPAD & 0xff);
1291     else {
1292 	putchar(c1 < 0x3b ? (c1 - 1 >> 1) + 0xe0 : 0xfc);
1293 	putchar(c1 < 0x3b ? c2 +
1294 		(c1 & 1 ? (c2 < 0x60 ? 0x1f : 0x20) : 0x7e) : 0xfc);
1295     }
1296 }
1297 
1298 /*---------------------------------------------------------------------
1299     NAME
1300 	sjiskana
1301  ---------------------------------------------------------------------*/
sjiskana(c)1302 void sjiskana(c)
1303     register int c;
1304 {
1305     putchar(0x20 < c && c < 0x60 ? c | 0x80 : HANPAD | 0x80);
1306 }
1307 
1308 /*---------------------------------------------------------------------
1309     NAME
1310 	decascii
1311  ---------------------------------------------------------------------*/
decascii(c)1312 void decascii(c)
1313     register int c;
1314 {
1315     if (lastkana) {
1316 	putchar(kanatbl[lastkana] >> 8);
1317 	putchar(kanatbl[lastkana] & 0xff);
1318 	lastkana = 0;
1319     }
1320     putchar(c);
1321 }
1322 
1323 /*---------------------------------------------------------------------
1324     NAME
1325 	deckanji
1326  ---------------------------------------------------------------------*/
deckanji(c1,c2)1327 void deckanji(c1, c2)
1328     register int c1, c2;
1329 {
1330     if (lastkana) {
1331 	putchar(kanatbl[lastkana] >> 8);
1332 	putchar(kanatbl[lastkana] & 0xff);
1333 	lastkana = 0;
1334     }
1335     putchar(c1 | 0x80);
1336     putchar(c2 | 0x80);
1337 }
1338 
1339 /*---------------------------------------------------------------------
1340     NAME
1341 	decgaiji
1342  ---------------------------------------------------------------------*/
decgaiji(c1,c2)1343 void decgaiji(c1, c2)
1344     register int c1, c2;
1345 {
1346     if (lastkana) {
1347 	putchar(kanatbl[lastkana] >> 8);
1348 	putchar(kanatbl[lastkana] & 0xff);
1349 	lastkana = 0;
1350     }
1351     if (nogaiji) {
1352 	putchar(ZENPAD >> 8 | 0x80);
1353 	putchar(ZENPAD & 0xff | 0x80);
1354     } else {
1355 	putchar(c1 | 0x80);
1356 	putchar(c2);
1357     }
1358 }
1359 
1360 /*---------------------------------------------------------------------
1361     NAME
1362 	deckana
1363  ---------------------------------------------------------------------*/
deckana(c)1364 void deckana(c)
1365     register int c;
1366 {
1367     register int cc;
1368     int i;
1369     extern unsigned char dakuon[];
1370 
1371     if (lastkana) {
1372 	cc = kanatbl[lastkana];
1373 	if ((c == 0x5e || c == 0x5f) &&
1374 		(i = dakuon[lastkana] & (c == 0x5e ? 1 : 2))) {
1375 	    cc += i;
1376 	    c = -1;
1377 	}
1378 	putchar(cc >> 8);
1379 	putchar(cc & 0xff);
1380     }
1381     if (c < 0x21 || 0x5f < c) {
1382 	if (c > 0) {
1383 	    putchar(ZENPAD >> 8);
1384 	    putchar(ZENPAD & 0xff);
1385 	}
1386 	lastkana = 0;
1387     } else
1388 	lastkana = c - 0x20;
1389 }
1390 
1391 /*---------------------------------------------------------------------
1392     TYPE
1393 	table
1394     NAME
1395 	katakana, hiragana, dakuon - JIS X0201 kana to JIS kanji in DEC
1396  ---------------------------------------------------------------------*/
1397 unsigned short katakana[] = {
1398     0,      0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1,
1399     0xa5a3, 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3,
1400     0xa1bc, 0xa5a2, 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad,
1401     0xa5af, 0xa5b1, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd,
1402     0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc,
1403     0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, 0xa5d8, 0xa5db, 0xa5de,
1404     0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, 0xa5e8, 0xa5e9,
1405     0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab, 0xa1ac,
1406 };
1407 
1408 unsigned short hiragana[] = {
1409     0,      0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa4f2, 0xa4a1,
1410     0xa4a3, 0xa4a5, 0xa4a7, 0xa4a9, 0xa4e3, 0xa4e5, 0xa4e7, 0xa4c3,
1411     0xa1bc, 0xa4a2, 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ad,
1412     0xa4af, 0xa4b1, 0xa4b3, 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd,
1413     0xa4bf, 0xa4c1, 0xa4c4, 0xa4c6, 0xa4c8, 0xa4ca, 0xa4cb, 0xa4cc,
1414     0xa4cd, 0xa4ce, 0xa4cf, 0xa4d2, 0xa4d5, 0xa4d8, 0xa4db, 0xa4de,
1415     0xa4df, 0xa4e0, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e6, 0xa4e8, 0xa4e9,
1416     0xa4ea, 0xa4eb, 0xa4ec, 0xa4ed, 0xa4ef, 0xa4f3, 0xa1ab, 0xa1ac,
1417 };
1418 
1419 unsigned char dakuon[] = {
1420     0,      0,      0,      0,      0,      0,      0,      0,
1421     0,      0,      0,      0,      0,      0,      0,      0,
1422     0,      0,      0,      0,      0,      0,      1,      1,
1423     1,      1,      1,      1,      1,      1,      1,      1,
1424     1,      1,      1,      1,      1,      0,      0,      0,
1425     0,      0,      3,      3,      3,      3,      3,      0,
1426     0,      0,      0,      0,      0,      0,      0,      0,
1427     0,      0,      0,      0,      0,      0,      0,      0,
1428 };
1429