1 /*
2  * $Id: kakasi.c,v 1.41 2013-02-06 06:05:02 knok Exp $
3  * Copyright (C) 1992
4  * Hironobu Takahashi (takahasi@tiny.or.jp)
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either versions 2, or (at your option)
9  * any later version.
10  *
11  * This program is distributed in the hope that it will be useful
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with KAKASI, see the file COPYING.  If not, write to the Free
18  * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
19  * 02111-1307, USA.
20  */
21 /*
22   Modified by NOKUBI Takatsugu <knok@daionet.gr.jp>
23   1999/05/09
24      Fix kakasi_do returning no values.
25   1999/04/14
26      Add more valuables initialize routine.
27   1999/04/12
28      Add initialize routine for some valuables to funtion kakasi_getopt_argv.
29   1999/03/04
30      Rename PERLMOD macro to LIBRARY
31   1999/01/08
32       Add PERLMOD macro.
33 */
34 
35 #ifdef HAVE_CONFIG_H
36 # include <config.h>
37 #endif
38 
39 #include <stdio.h>
40 #ifdef HAVE_STRING_H
41 # include <string.h>
42 #else
43 # include <strings.h>
44 #endif
45 #ifdef HAVE_MALLOC_H
46 # include <malloc.h>
47 #endif
48 #include <stdlib.h>
49 #include "kakasi.h"
50 #ifdef LIBRARY
51 # include "libkakasi.h"
52 #endif
53 #ifdef KAKASI_SUPPORT_UTF8
54 #include <iconv.h>
55 extern void close_iconv();
56 
57 iconv_t fromutf8 = (iconv_t) -1;
58 iconv_t toutf8 = (iconv_t) -1;
59 #endif /* KAKASI_SUPPORT_UTF8 */
60 
61 /* FIXME: this macro should be removed future. */
62 #ifdef LIBRARY
63 #define KAKASI_ATTR
64 #else /* !LIBRARY */
65 #define KAKASI_ATTR static
66 #endif /* !LIBRARY */
67 
68 int romaji_type = HEPBURN;
69 int use_old_romaji_table = 0;
70 int romaji_capitalize = 0;
71 int romaji_upcase = 0;
72 int heiki_mode = 0;
73 int bunkatu_mode = 0;
74 int furigana_mode = 0;
75 int cr_eat_mode = 0;
76 int flush_mode = 0;
77 #ifdef WAKATIGAKI
78 int wakatigaki_mode = 0;
79 int terminate_done = 0;
80 int wo_mode = 0;
81 #endif /* WAKATIGAKI */
82 int level_hiragana_mode = 0;
83 int level_furigana_mode = 0;
84 #ifdef EACH_YOMI
85 int eachyomi_mode = 0;
86 void output_yomi_eachkanji(Character *, int);
87 void putkanjis(Character *);
88 #endif /* EACH_YOMI */
89 
90 int kanji_digest;
91 int separator_out;
92 Character separator[KAKASIBUF];
93 char cr_eat_string[KAKASIBUF];
94 Character n[KAKASIBUF];
95 Character left_paren[KAKASIBUF];
96 Character right_paren[KAKASIBUF];
97 
98 #ifdef LIBRARY
99 extern FILE *kanwadict;
100 static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
101 /* ASCII, JISROMAN, KATAKANA, GRAPHIC, ZENKAKU-KIGOU, ZENKAKU-KATAKANA, ZENKAKU-HIRAGANA, KANJI, */
102 #endif
103 
104 extern int input_term_type;
105 extern int output_term_type;
106 
107 /* variables for memory management */
108 extern void ** ary_charalloc;
109 extern void ** ary_cellalloc;
110 extern size_t ary_size_charalloc;
111 extern size_t ary_cur_charalloc;
112 extern size_t ary_size_cellalloc;
113 extern size_t ary_cur_cellalloc;
114 extern int point_charalloc;
115 extern unsigned char *ptr_charalloc;
116 extern int point_cellalloc;
117 extern struct kanji_yomi *ptr_cellalloc;
118 
119 /* forward decls */
120 KAKASI_ATTR void digest_start_copy PARAMS((Character *c, Character *r));
121 KAKASI_ATTR void put_separator PARAMS((void));
122 KAKASI_ATTR void putchars PARAMS((Character *results));
123 KAKASI_ATTR void digest_out PARAMS((Character *c, int ret));
124 KAKASI_ATTR int digest PARAMS((Character *c, int clen, Character *r, int rlen, int type, int (*proc)(void)));
125 KAKASI_ATTR void digest_shift PARAMS((Character *c, int s));
126 #ifdef LIBRARY
127 static void free_jisyo PARAMS((void));
128 #endif /* LIBRARY */
129 
130 KAKASI_ATTR void
digest_start_copy(c,r)131 digest_start_copy(c, r)
132      Character *c;
133      Character *r;
134 {
135     for(;;) {
136 	r->type = c->type;
137 	r->c1 = c->c1;
138 	r->c2 = c->c2;
139 	if ((r->type == OTHER) && (r->c1 == 0)) return;
140 	++r, ++c;
141     }
142 }
143 
144 KAKASI_ATTR void
put_separator()145 put_separator()
146 {
147 #ifdef WAKATIGAKI
148     if (bunkatu_mode) {
149         if(! terminate_done) {
150 	    if (separator_out == 1)
151 	        separator_out = 2;
152         }
153     }
154 #else
155     if (bunkatu_mode)
156 	if (separator_out == 1)
157 	    separator_out = 2;
158 #endif /* WAKATIGAKI */
159 }
160 
161 KAKASI_ATTR void
putchars(results)162 putchars(results)
163      Character *results;
164 {
165     while(results->c1 != '\0') {
166 	putkanji(results);
167 	++ results;
168     }
169 }
170 
171 KAKASI_ATTR void
digest_out(c,ret)172 digest_out(c, ret)
173      Character *c;
174      int ret;
175 {
176     Character *ptr;
177     int i;
178 
179     if (kanji_digest) {
180 	put_separator();
181 	if (romaji_capitalize) {
182 	    if ((n[0].type == ASCII) || (n[0].type == JISROMAN))
183 		if (('a' <= n[0].c1) && (n[0].c1 <= 'z'))
184 		    n[0].c1 = n[0].c1 - 0x20;
185 	} else if (romaji_upcase) {
186 	    for (ptr = n; ptr->c1 != '\0'; ++ptr) {
187 		if ((ptr->type == ASCII) || (ptr->type == JISROMAN))
188 		    if (('a' <= ptr->c1) && (ptr->c1 <= 'z'))
189 			ptr->c1 = ptr->c1 - 0x20;
190 	    }
191 	}
192     }
193 
194     if ((kanji_digest) && (furigana_mode)) {
195 	for (i = 0; i < ret; ++ i)
196 	    putkanji(c+i);
197 	/* put parentheses around furigana (a.k.a. ruby) */
198 	for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
199 	    putchars(&left_paren[i]);
200 	}
201 	putchars(n);
202 	for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
203 	    putchars(&right_paren[i]);
204 	}
205 #ifdef WAKATIGAKI
206     } else if ((kanji_digest) && (wakatigaki_mode)) {
207 	for (i = 0; i < ret; ++ i)
208 	    putkanji(c+i);
209 #endif /* WAKATIGAKI */
210     } else if ((kanji_digest) && (level_hiragana_mode)) {
211 	if (check_kanji_level(c, ret, level_hiragana_mode)) {
212             for (i = 0; i < ret; i++)
213 		putkanji(c+i);
214 	} else {
215 	    putchars(n);
216 	}
217     } else if ((kanji_digest) && (level_furigana_mode)) {
218 	for (i = 0; i < ret; ++ i)
219 	    putkanji(c+i);
220 	if (! check_kanji_level(c, ret, level_furigana_mode)) {
221 	    /* put parentheses around furigana (a.k.a. ruby) */
222 	    for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
223 		putchars(&left_paren[i]);
224 	    }
225 	    putchars(n);
226 	    for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
227 		putchars(&right_paren[i]);
228 	    }
229 	}
230     } else {
231 	putchars(n);
232     }
233     if (flush_mode) fflush(stdout);
234 }
235 
236 KAKASI_ATTR int
digest(c,clen,r,rlen,type,proc)237 digest(c, clen, r, rlen, type, proc)
238      Character *c;
239      int clen;
240      Character *r;
241      int rlen;
242      int type;
243      int (*proc)();
244 {
245     int ret, i, j, k;
246     Character new;
247     char *p;
248 
249     ret = (* proc)(c, n);
250     if (ret == 0) ret = 1;
251 
252     if ((ret < 0) && (rlen < KAKASIBUF)) {
253 	getkanji(&new);
254 	if(new.type == type) {
255 	    r[rlen].type = c[clen].type = type;
256 	    r[rlen].c1 = c[clen].c1 = new.c1;
257 	    r[rlen].c2 = c[clen].c2 = new.c2;
258 	    r[rlen+1].type = c[clen+1].type = OTHER;
259 	    r[rlen+1].c1 = c[clen+1].c1 = '\0';
260 	    return digest(c, clen+1, r, rlen+1, type, proc);
261 	} else if (cr_eat_mode) {
262 	    if ((rlen < KAKASIBUF -1) && /* keep in check a buffer overflow */
263 		((new.type == ASCII) || (new.type == JISROMAN) || (new.type == OTHER))) {
264 		for (p = cr_eat_string; *p != '\0'; ++ p) {
265 		    if ((unsigned)(*p) == new.c1) {
266 			r[rlen].type = new.type;
267 			r[rlen].c1 = new.c1;
268 			r[rlen].c2 = new.c2;
269 			r[rlen+1].type = OTHER;
270 			r[rlen+1].c1 = '\0';
271 			return digest(c, clen, r, rlen+1, type, proc);
272 		    }
273 		}
274 	    }
275 	}
276 	ungetkanji(&new);
277 	ret = -ret;
278     }
279 
280     digest_out(c, ret);
281 
282 #ifdef EACH_YOMI
283     if (eachyomi_mode) {
284 	output_yomi_eachkanji(c, ret);
285     }
286 #endif /* EACH_YOMI */
287 
288     k = ret;
289     j = 0;
290     for (i = 0;; ++ i) {
291 	if ((r[i].type == type) && (k > 0)) {
292 	    -- k;
293 	} else {
294 	    c[j].type = r[i].type;
295 	    c[j].c1 = r[i].c1;
296 	    c[j].c2 = r[i].c2;
297 	    if (c[j].c1 == '\0')
298 		break;
299 	    ++ j;
300 	}
301     }
302     return rlen - ret;
303 }
304 
305 KAKASI_ATTR void
digest_shift(c,s)306 digest_shift(c, s)
307      Character *c;
308      int s;
309 {
310     int i;
311 
312     for (i = 0;; ++ i) { /* Yes, I know following lines can be written in
313 			    1 line, but I have doubts of compatibilities.. */
314 	c[i].type = c[i+s].type;
315 	c[i].c1 = c[i+s].c1;
316 	c[i].c2 = c[i+s].c2;
317 	if (c[i+s].c1 == '\0')
318 	    break;
319     }
320 }
321 
322 #ifndef LIBRARY
323 int
main(argc,argv)324 main(argc, argv)
325      int argc;
326      char **argv;
327 #else
328 int
329 kakasi_getopt_argv(argc, argv)
330      int argc;
331      char **argv;
332 #endif
333 {
334 #ifdef LIBRARY
335   int retval = 0;
336 #endif
337 #ifndef LIBRARY
338     Character c[KAKASIBUF], r[KAKASIBUF];
339     int clen, ptype, pctype;
340     static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
341     /* ASCII, JISROMAN, KATAKANA, GRAPHIC, ZENKAKU-KIGOU, ZENKAKU-KATAKANA, ZENKAKU-HIRAGANA, KANJI, */
342 #endif
343 
344     int i;
345 #ifdef LIBRARY
346     /* Initialize some valuables. */
347     for (i = 0; i < 8; i ++) {
348       proc[i] = NULL;
349     }
350     romaji_type = HEPBURN;
351     heiki_mode = 0;
352     bunkatu_mode = 0;
353     furigana_mode = 0;
354     cr_eat_mode = 0;
355     romaji_capitalize = 0;
356     romaji_upcase = 0;
357     flush_mode = 0;
358 #ifdef WAKATIGAKI
359     wakatigaki_mode = 0;
360     terminate_done = 0;
361     wo_mode = 0;
362 #endif /* WAKATIGAKI*/
363     input_term_type = UNKNOWN;
364     output_term_type = UNKNOWN;
365 #endif
366     /* Initialize separator */
367     separator[0].type = OTHER;
368     separator[0].c1 = ' ';
369     separator[0].c2 = 0;
370     for (i=1; i<KAKASIBUF; i++)
371 	separator[i].c1 = separator[i].c2 = 0;
372     /* Initialize parentheses around furigana (a.k.a. ruby) */
373     left_paren[0].type = OTHER;
374     left_paren[0].c1 = '[';
375     left_paren[0].c2 = 0;
376     for (i=1; i<KAKASIBUF; i++)
377 	left_paren[i].c1 = left_paren[i].c2 = 0;
378     right_paren[0].type = OTHER;
379     right_paren[0].c1 = ']';
380     right_paren[0].c2 = 0;
381     for (i=1; i<KAKASIBUF; i++)
382 	right_paren[i].c1 = right_paren[i].c2 = 0;
383 
384     while(--argc > 0) {
385 	++ argv;
386 	if ((*argv)[0] != '-') break;
387 	switch((*argv)[1]) {
388 	  case 'a':
389 	    switch((*argv)[2]) {
390 	      case 'j':	proc[0] = a2j; break;
391 	      case 'E':	proc[0] = a2E; break;
392 	      default:  proc[0] = NULL;
393 	    }
394 	    break;
395 	  case 'j':
396 	    switch((*argv)[2]) {
397 	      case 'a':	proc[1] = j2a; break;
398 	      case 'E':	proc[1] = j2E; break;
399 	      default:  proc[1] = NULL;
400 	    }
401 	    break;
402 	  case 'g':
403 	    switch((*argv)[2]) {
404 	      case 'a':	proc[2] = g2a; break;
405 	      case 'j':	proc[2] = g2j; break;
406 	      case 'E':	proc[2] = g2E; break;
407 	      default:  proc[2] = NULL;
408 	    }
409 	    break;
410 	  case 'k':
411 	    switch((*argv)[2]) {
412 	      case 'a':	proc[3] = k2a; break;
413 	      case 'j':	proc[3] = k2j; break;
414 	      case 'K':	proc[3] = k2K; break;
415 	      case 'H':	proc[3] = k2H; break;
416 	      default:  proc[3] = NULL;
417 	    }
418 	    break;
419 	  case 'E':
420 	    switch((*argv)[2]) {
421 	      case 'a':	proc[4] = E2a; break;
422 	      case 'j':	proc[4] = E2j; break;
423 	      default:  proc[4] = NULL;
424 	    }
425 	    break;
426 	  case 'K':
427 	    switch((*argv)[2]) {
428 	      case 'a':	proc[5] = K2a; break;
429 	      case 'j':	proc[5] = K2j; break;
430 	      case 'k':	proc[5] = K2k; break;
431 	      case 'H':	proc[5] = K2H; break;
432 	      default:  proc[5] = NULL;
433 	    }
434 	    break;
435 	  case 'H':
436 	    switch((*argv)[2]) {
437 	      case 'a':	proc[6] = H2a; break;
438 	      case 'j':	proc[6] = H2j; break;
439 	      case 'k':	proc[6] = H2k; break;
440 	      case 'K':	proc[6] = H2K; break;
441 	      case 'H':	proc[6] = H2H; break;
442 	      default:  proc[6] = NULL;
443 	    }
444 	    break;
445 	  case 'J':
446 	    switch((*argv)[2]) {
447 	      case 'a':	proc[7] = J2a; break;
448 	      case 'j':	proc[7] = J2j; break;
449 	      case 'k':	proc[7] = J2k; break;
450 	      case 'K':	proc[7] = J2K; break;
451 	      case 'H':	proc[7] = J2H; break;
452 	      default:  proc[7] = NULL;
453 	    }
454 	    break;
455 	  case 'i':
456 	    if ((*argv)[2] != '\0')
457 		set_input_term(term_type_str((*argv)+2));
458 	    else
459 		if (argc > 1) {
460 		    -- argc;
461 		    set_input_term(term_type_str(*(++ argv)));
462 		}
463 	    break;
464 	  case 'o':
465 	    if ((*argv)[2] != '\0')
466 		set_output_term(term_type_str((*argv)+2));
467 	    else
468 		if (argc > 1) {
469 		    -- argc;
470 		    set_output_term(term_type_str(*(++ argv)));
471 		}
472 	    break;
473 	  case 'r':
474 	    if ((*argv)[2] == 'k')
475 		romaji_type = KUNREI;
476 	    break;
477 	  case 'p':
478 	    heiki_mode = 1;
479 	    break;
480 	  case 's':
481 	    bunkatu_mode = 1;
482 	    break;
483 	  case 'S':
484 	    separator[0].type = OTHER;
485 	    for (i=0; i<KAKASIBUF && *(*(argv)+2+i) != 0; i++) {
486 		separator[i].c1 = *((*argv)+2+i);
487 		separator[i].c2 = 0;
488 	    }
489 	    break;
490 	  case 'f':
491 	    furigana_mode = 1;
492 	    break;
493 	  case 'F':
494 	    switch((*argv)[2]) {
495 	      case 'l':
496 		left_paren[0].type = OTHER;
497 		for (i=0; i<KAKASIBUF && *(*(argv)+3+i) != 0; i++) {
498 		    left_paren[i].c1 = *((*argv)+3+i);
499 		    left_paren[i].c2 = 0;
500 		}
501 		break;
502 	      case 'r':
503 		right_paren[0].type = OTHER;
504 		for (i=0; i<KAKASIBUF && *(*(argv)+3+i) != 0; i++) {
505 		    right_paren[i].c1 = *((*argv)+3+i);
506 		    right_paren[i].c2 = 0;
507 		}
508 		break;
509 	      default:
510 		break;
511 	    }
512 	    break;
513 	  case 'c':
514 	    cr_eat_mode = 1;
515 	    sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
516 	    break;
517 	  case 'C':
518 	    romaji_capitalize = 1;
519 	    break;
520 	  case 'U':
521 	    romaji_upcase = 1;
522 	    break;
523 	  case 'u':
524 	    flush_mode = 1;
525 	    break;
526 	  case 't':
527 	    use_old_romaji_table = 1;
528 #ifdef WAKATIGAKI
529 	  case 'w':
530 	    wakatigaki_mode = 1;
531 	    bunkatu_mode = 1;
532 	    cr_eat_mode = 1;
533 	    sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
534 	    proc[5] = K2K;
535 	    proc[6] = H2H;
536 	    proc[7] = J2H;
537 	    break;
538 #endif /* WAKATIGAKI */
539 	  case 'l':
540 	    switch((*argv)[2]) {
541 	      case '0': case '1': case '2': case '3': case '4': case '5':
542 	      case '6': case '7': case '8': case '9': case 'j': case 'n':
543 		level_hiragana_mode = (*argv)[2]; break;
544 	    }
545 	    cr_eat_mode = 1;
546 	    sprintf(cr_eat_string, "\011\012\015");
547 	    proc[5] = K2K;
548 	    proc[6] = H2H;
549 	    proc[7] = J2H;
550 	    break;
551 	  case 'L':
552 	    switch((*argv)[2]) {
553 	      case '0': case '1': case '2': case '3': case '4': case '5':
554 	      case '6': case '7': case '8': case '9': case 'j': case 'n':
555 		level_furigana_mode = (*argv)[2]; break;
556 	    }
557 	    cr_eat_mode = 1;
558 	    sprintf(cr_eat_string, "\011\012\015");
559 	    proc[5] = K2K;
560 	    proc[6] = H2H;
561 	    proc[7] = J2H;
562 	    break;
563 #ifdef EACH_YOMI
564 	  case 'y':
565 	    eachyomi_mode = 1;
566 	    break;
567 #endif /* EACH_YOMI */
568 	  case '?':
569 	  default:
570 #ifndef LIBRARY
571 	    fprintf(stderr, "KAKASI - Kanji Kana Simple Inverter  Version %s\n", VERSION);
572 	    fprintf(stderr, "Copyright (C) 1992-1999 Hironobu Takahashi. All rights reserved.\n");
573 	    fprintf(stderr, "\n");
574 	    fprintf(stderr, "Usage: kakasi -a[jE] -j[aE] -g[ajE] -k[ajKH] -E[aj] -K[ajkH] -H[ajkKH] -J[ajkKH]\n");
575 #ifdef KAKASI_SUPPORT_UTF8
576 	    fprintf(stderr, "              -i{oldjis,newjis,dec,euc,sjis,utf8} -o{oldjis,newjis,dec,euc,sjis,utf8}\n");
577 #else
578 	    fprintf(stderr, "              -i{oldjis,newjis,dec,euc,sjis} -o{oldjis,newjis,dec,euc,sjis}\n");
579 #endif /* KAKASI_SUPPORT_UTF8 */
580 	    fprintf(stderr, "              -r{hepburn,kunrei} -p -s -f -c\"chars\"  [jisyo1, jisyo2,,,]\n");
581 	    fprintf(stderr, "\n");
582 	    fprintf(stderr, "      Character Sets:\n");
583 	    fprintf(stderr, "       a: ascii  j: jisroman  g: graphic  k: kana (j,k     defined in jisx0201)\n");
584 	    fprintf(stderr, "       E: kigou  K: katakana  H: hiragana J: kanji(E,K,H,J defined in jisx0208)\n");
585 	    fprintf(stderr, "\n");
586 	    fprintf(stderr, "      Options:\n");
587 	    fprintf(stderr, "      -i: input coding system    -o: output coding system\n");
588 	    fprintf(stderr, "      -r: romaji conversion system\n");
589 	    fprintf(stderr, "      -p: list all readings (with -J option)\n");
590 	    fprintf(stderr, "      -s: insert separate characters (with -J option)  -S\"chars\": set separator\n");
591 	    fprintf(stderr, "      -f: furigana mode (with -J option)\n");
592 	    fprintf(stderr, "      -F[rl]\"chars\": set parentheses around furigana\n");
593 	    fprintf(stderr, "      -c: skip chars within jukugo (with -J option: default TAB CR LF BLANK)\n");
594 	    fprintf(stderr, "      -C: romaji Capitalize (with -Ja or -Jj option)\n");
595 	    fprintf(stderr, "      -U: romaji Upcase     (with -Ja or -Jj option)\n");
596 	    fprintf(stderr, "      -u: call fflush() after 1 character output\n");
597 	    fprintf(stderr, "      -t: use old romaji table\n");
598 #ifdef WAKATIGAKI
599 	    fprintf(stderr, "      -w: wakatigaki mode\n");
600 #endif /* WAKATIGAKI */
601 	    fprintf(stderr, "      -{l,L}: level {hiragana,furigana} mode (-{l,L}[123456jn])\n");
602 #ifdef EACH_YOMI
603 	    fprintf(stderr, "      -y: display yomi of each kanji characters\n");
604 #endif /* EACH_YOMI */
605 	    fprintf(stderr, "\n");
606 	    fprintf(stderr, "Report bugs to <bug-kakasi@namazu.org>.\n");
607 	    exit(1);
608 #else /* LIBRARY */
609 	    retval = 1;
610 #endif
611 	}
612     }
613 
614     if ((input_term_type != UNKNOWN) && (output_term_type == UNKNOWN))
615 	set_output_term(input_term_type);
616 
617 #ifdef LIBRARY
618     free_jisyo();
619     kakasi_close_kanwadict();
620 #ifdef KAKASI_SUPPORT_UTF8
621     close_iconv();
622 #endif /* KAKASI_SUPPORT_UTF8 */
623 
624 #endif /* LIBRARY */
625     init_jisyo();
626     init_kanwa();
627     if (proc[7] != NULL) {
628 	for (; argc > 0; -- argc)
629 	    add_jisyo(*(argv ++));
630     }
631 
632 #ifdef LIBRARY
633     return retval;
634 }
635 
636 char *
kakasi_do(str)637 kakasi_do(str)
638      char *str;
639 {
640     Character c[KAKASIBUF], r[KAKASIBUF];
641     int clen, ptype, pctype;
642 
643     setcharbuffer((unsigned char *)str);
644 #endif
645 
646     ptype = pctype = OTHER;
647     separator_out = 0;
648     for(;;) {
649 	getkanji(c);
650 	if ((c[0].type == OTHER) && (c[0].c1 == 0xff)) break;
651 	c[1].type = OTHER;
652 	c[1].c1 = '\0';
653 	clen = 1;
654 	while (clen > 0) {
655 	    kanji_digest = 0;
656 	    switch (c[0].type) {
657 	      case ASCII:
658 	      case JISROMAN:
659 	      case GRAPHIC:
660 	      case KATAKANA:
661 		if ((c[0].type != OTHER) && (c[0].type != pctype)) {
662 		    put_separator();
663 		    pctype = c[0].type;
664 		}
665 		if ((*proc[(int)(c[0].type)]) == NULL) {
666 		    putkanji(c); digest_shift(c, 1); -- clen;
667 		    if (flush_mode) fflush(stdout);
668 		} else {
669 		    digest_start_copy(c, r);
670 		    clen = digest(c, clen, r, clen, (int)(c[0].type), *proc[(int)(c[0].type)]);
671 		}
672 #ifdef WAKATIGAKI
673 		terminate_done = 0;
674 #endif /* WAKATIGAKI */
675 		break;
676 	      case JIS83:
677 		if (c[0].c1 >= 0xb0) {
678 		    ptype = 7;
679 		    kanji_digest = 1;
680 #ifdef WAKATIGAKI
681 		} else if ((c[0].c1 == 0xa1) && /* charcter code(\241\270),charcter code(\241\271),charcter code(\241\272) */
682 			   (c[0].c2 >= 0xb8 && c[0].c2 <= 0xba)) {
683 		    ptype = 7;
684 		    kanji_digest = 1;
685 		} else if ((c[0].c1 == 0xa5) && /* charcter code(\245\365),charcter code(\245\366) */
686 			   (c[0].c2 >= 0xf5 && c[0].c2 <= 0xf6)) {
687 		    ptype = 7;
688 		    kanji_digest = 1;
689 #endif /* WAKATIGAKI */
690     		} else if (c[0].c1 == 0xa4) {
691 		    ptype = 6;
692 #ifdef WAKATIGAKI
693 		} else if ((c[0].c1 == 0xa1)  && /* charcter code(\241\263),charcter code(\241\264),charcter code(\241\265),charcter code(\241\266) */
694 			   (c[0].c2 >= 0xb3 && c[0].c2 <= 0xb6)) {
695 		    if (c[0].c2 == 0xb3 || c[0].c2 == 0xb4) {
696 			ptype = 5;
697 		    } else if (c[0].c2 == 0xb5 || c[0].c2 <= 0xb6) {
698 			ptype = 6;
699 		    }
700 #endif /* WAKATIGAKI */
701 		} else if (c[0].c1 == 0xa5) {
702 		    ptype = 5;
703 		} else if ((c[0].c1 == 0xa1) && (c[0].c2 == 0xbc)) {
704 		    if (pctype == 5) {
705 			ptype = 5;
706 		    } else if (pctype == 6) {
707 			ptype = 6;
708 		    } else {
709 			ptype = 5;
710 		    }
711 		} else {
712 		    ptype = 4;
713 		}
714 		if (ptype != pctype) {
715 		    put_separator();
716 		    pctype = ptype;
717 		}
718 		if ((*proc[ptype]) == NULL) {
719 		    putkanji(c); digest_shift(c, 1); -- clen;
720 		    if (flush_mode) fflush(stdout);
721 		} else {
722 		    digest_start_copy(c, r);
723 		    clen = digest(c, clen, r, clen, JIS83, *proc[ptype]);
724 		}
725 #ifdef WAKATIGAKI
726 		terminate_done = 0;
727 #endif /* WAKATIGAKI */
728 		break;
729 	      default:
730 #ifdef WAKATIGAKI
731 		terminate_done = 1;
732 #endif /* WAKATIGAKI */
733 		putkanji(c); digest_shift(c, 1); -- clen;
734 #ifndef LIBRARY
735 		if (flush_mode) fflush(stdout);
736 #endif
737 	    }
738 	}
739     }
740 #ifndef LIBRARY
741     return 0;
742 #else /* LIBRARY */
743     {
744 	char *ret = getpbstr();
745 	if (ret == NULL)
746 	    return strdup("");
747 	return ret;
748     }
749 #endif
750 }
751 
752 #ifdef LIBRARY
753 int
kakasi_close_kanwadict()754 kakasi_close_kanwadict()
755 {
756     if (kanwadict != NULL) {
757 	fclose(kanwadict);
758 	kanwadict = NULL;
759 	return 0;
760     }
761     return 1;
762 }
763 
764 static void
free_jisyo()765 free_jisyo()
766 {
767     size_t x;
768 
769     if (ary_charalloc) {
770 	for (x = 0; x <= ary_cur_charalloc; x ++) {
771 	    free(ary_charalloc[x]);
772 	}
773     }
774 
775     if (ary_cellalloc) {
776 	for (x = 0; x <= ary_cur_cellalloc; x ++) {
777 	    free(ary_cellalloc[x]);
778 	}
779     }
780 
781     free(ary_charalloc);
782     free(ary_cellalloc);
783 
784     ary_charalloc = NULL;
785     ary_cellalloc = NULL;
786     ary_size_charalloc = -1;
787     ary_cur_charalloc = -1;
788     ary_size_cellalloc = -1;
789     ary_cur_cellalloc = -1;
790     point_charalloc = 0;
791     ptr_charalloc = NULL;
792     point_cellalloc = 0;
793     ptr_cellalloc = NULL;
794 }
795 
796 int
kakasi_free(char * p)797 kakasi_free(char *p)
798 {
799     if (p) {
800 	free(p);
801 	return 1;
802     }
803     return 0;
804 }
805 #endif /* LIBRARY */
806 
807 #ifdef EACH_YOMI
808 
809 /*
810   each_yomi output
811  */
812 
813 Character ek_bc[] = {{ASCII, '[', 0}, {OTHER, 0, 0}};
814 Character ek_ec[] = {{ASCII, ']', 0}, {OTHER, 0, 0}};
815 Character ek_kysep[] = {{ASCII, ':', 0}, {OTHER, 0, 0}};
816 Character ek_kksep[] = {{ASCII, ',', 0}, {OTHER, 0, 0}};
817 
818 void
putkanjis(c)819 putkanjis(c)
820     Character *c;
821 {
822     while (c->type != OTHER && c->c1 != 0) {
823         putkanji(c);
824         c ++;
825     }
826 }
827 
828 void
output_yomi_eachkanji(Character * c,int len)829 output_yomi_eachkanji(Character *c, int len)
830 {
831     int old_hy, i;
832     Character cbuf[KAKASIBUF], rbuf[KAKASIBUF];
833     old_hy = heiki_mode;
834     heiki_mode = 1;
835 
836     if (! (c[0].c1 >= 0xb0 ||
837 	((c[0].c1 == 0xa1) && (c[0].c2 >= 0xb8 && c[0].c2 <= 0xba)) ||
838 	((c[0].c1 == 0xa5) && (c[0].c2 >= 0xf5 && c[0].c2 <= 0xf6))))
839 	return;
840 
841     putkanji(ek_bc);
842     for (i = 0; i < len; i ++) {
843 	if (c[i].c1 >= 0xb0 ||
844 	    ((c[i].c1 == 0xa1) && (c[i].c2 >= 0xb8 && c[i].c2 <= 0xba)) ||
845 	    ((c[i].c1 == 0xa5) && (c[i].c2 >= 0xf5 && c[i].c2 <= 0xf6))) {
846 	    if (i > 0) putkanji(ek_kksep);
847 	    memcpy(cbuf, &c[i], sizeof(Character));
848 	    cbuf[1].type = OTHER;
849 	    cbuf[1].c1 = 0;
850 	    cbuf[1].c2 = 0;
851 	    putkanji(cbuf);
852 	    putkanji(ek_kysep);
853 	    J2H(cbuf, rbuf);
854 	    putkanjis(rbuf);
855 	}
856     }
857     putkanji(ek_ec);
858 
859     heiki_mode = old_hy;
860 }
861 
862 #endif /* EACH_YOMI */
863