1 /*
2 * $Id: kakasi.c,v 1.41 2013-02-06 06:05:02 knok Exp $
3 * Copyright (C) 1992
4 * Hironobu Takahashi (takahasi@tiny.or.jp)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either versions 2, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with KAKASI, see the file COPYING. If not, write to the Free
18 * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
19 * 02111-1307, USA.
20 */
21 /*
22 Modified by NOKUBI Takatsugu <knok@daionet.gr.jp>
23 1999/05/09
24 Fix kakasi_do returning no values.
25 1999/04/14
26 Add more valuables initialize routine.
27 1999/04/12
28 Add initialize routine for some valuables to funtion kakasi_getopt_argv.
29 1999/03/04
30 Rename PERLMOD macro to LIBRARY
31 1999/01/08
32 Add PERLMOD macro.
33 */
34
35 #ifdef HAVE_CONFIG_H
36 # include <config.h>
37 #endif
38
39 #include <stdio.h>
40 #ifdef HAVE_STRING_H
41 # include <string.h>
42 #else
43 # include <strings.h>
44 #endif
45 #ifdef HAVE_MALLOC_H
46 # include <malloc.h>
47 #endif
48 #include <stdlib.h>
49 #include "kakasi.h"
50 #ifdef LIBRARY
51 # include "libkakasi.h"
52 #endif
53 #ifdef KAKASI_SUPPORT_UTF8
54 #include <iconv.h>
55 extern void close_iconv();
56
57 iconv_t fromutf8 = (iconv_t) -1;
58 iconv_t toutf8 = (iconv_t) -1;
59 #endif /* KAKASI_SUPPORT_UTF8 */
60
61 /* FIXME: this macro should be removed future. */
62 #ifdef LIBRARY
63 #define KAKASI_ATTR
64 #else /* !LIBRARY */
65 #define KAKASI_ATTR static
66 #endif /* !LIBRARY */
67
68 int romaji_type = HEPBURN;
69 int use_old_romaji_table = 0;
70 int romaji_capitalize = 0;
71 int romaji_upcase = 0;
72 int heiki_mode = 0;
73 int bunkatu_mode = 0;
74 int furigana_mode = 0;
75 int cr_eat_mode = 0;
76 int flush_mode = 0;
77 #ifdef WAKATIGAKI
78 int wakatigaki_mode = 0;
79 int terminate_done = 0;
80 int wo_mode = 0;
81 #endif /* WAKATIGAKI */
82 int level_hiragana_mode = 0;
83 int level_furigana_mode = 0;
84 #ifdef EACH_YOMI
85 int eachyomi_mode = 0;
86 void output_yomi_eachkanji(Character *, int);
87 void putkanjis(Character *);
88 #endif /* EACH_YOMI */
89
90 int kanji_digest;
91 int separator_out;
92 Character separator[KAKASIBUF];
93 char cr_eat_string[KAKASIBUF];
94 Character n[KAKASIBUF];
95 Character left_paren[KAKASIBUF];
96 Character right_paren[KAKASIBUF];
97
98 #ifdef LIBRARY
99 extern FILE *kanwadict;
100 static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
101 /* ASCII, JISROMAN, KATAKANA, GRAPHIC, ZENKAKU-KIGOU, ZENKAKU-KATAKANA, ZENKAKU-HIRAGANA, KANJI, */
102 #endif
103
104 extern int input_term_type;
105 extern int output_term_type;
106
107 /* variables for memory management */
108 extern void ** ary_charalloc;
109 extern void ** ary_cellalloc;
110 extern size_t ary_size_charalloc;
111 extern size_t ary_cur_charalloc;
112 extern size_t ary_size_cellalloc;
113 extern size_t ary_cur_cellalloc;
114 extern int point_charalloc;
115 extern unsigned char *ptr_charalloc;
116 extern int point_cellalloc;
117 extern struct kanji_yomi *ptr_cellalloc;
118
119 /* forward decls */
120 KAKASI_ATTR void digest_start_copy PARAMS((Character *c, Character *r));
121 KAKASI_ATTR void put_separator PARAMS((void));
122 KAKASI_ATTR void putchars PARAMS((Character *results));
123 KAKASI_ATTR void digest_out PARAMS((Character *c, int ret));
124 KAKASI_ATTR int digest PARAMS((Character *c, int clen, Character *r, int rlen, int type, int (*proc)(void)));
125 KAKASI_ATTR void digest_shift PARAMS((Character *c, int s));
126 #ifdef LIBRARY
127 static void free_jisyo PARAMS((void));
128 #endif /* LIBRARY */
129
130 KAKASI_ATTR void
digest_start_copy(c,r)131 digest_start_copy(c, r)
132 Character *c;
133 Character *r;
134 {
135 for(;;) {
136 r->type = c->type;
137 r->c1 = c->c1;
138 r->c2 = c->c2;
139 if ((r->type == OTHER) && (r->c1 == 0)) return;
140 ++r, ++c;
141 }
142 }
143
144 KAKASI_ATTR void
put_separator()145 put_separator()
146 {
147 #ifdef WAKATIGAKI
148 if (bunkatu_mode) {
149 if(! terminate_done) {
150 if (separator_out == 1)
151 separator_out = 2;
152 }
153 }
154 #else
155 if (bunkatu_mode)
156 if (separator_out == 1)
157 separator_out = 2;
158 #endif /* WAKATIGAKI */
159 }
160
161 KAKASI_ATTR void
putchars(results)162 putchars(results)
163 Character *results;
164 {
165 while(results->c1 != '\0') {
166 putkanji(results);
167 ++ results;
168 }
169 }
170
171 KAKASI_ATTR void
digest_out(c,ret)172 digest_out(c, ret)
173 Character *c;
174 int ret;
175 {
176 Character *ptr;
177 int i;
178
179 if (kanji_digest) {
180 put_separator();
181 if (romaji_capitalize) {
182 if ((n[0].type == ASCII) || (n[0].type == JISROMAN))
183 if (('a' <= n[0].c1) && (n[0].c1 <= 'z'))
184 n[0].c1 = n[0].c1 - 0x20;
185 } else if (romaji_upcase) {
186 for (ptr = n; ptr->c1 != '\0'; ++ptr) {
187 if ((ptr->type == ASCII) || (ptr->type == JISROMAN))
188 if (('a' <= ptr->c1) && (ptr->c1 <= 'z'))
189 ptr->c1 = ptr->c1 - 0x20;
190 }
191 }
192 }
193
194 if ((kanji_digest) && (furigana_mode)) {
195 for (i = 0; i < ret; ++ i)
196 putkanji(c+i);
197 /* put parentheses around furigana (a.k.a. ruby) */
198 for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
199 putchars(&left_paren[i]);
200 }
201 putchars(n);
202 for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
203 putchars(&right_paren[i]);
204 }
205 #ifdef WAKATIGAKI
206 } else if ((kanji_digest) && (wakatigaki_mode)) {
207 for (i = 0; i < ret; ++ i)
208 putkanji(c+i);
209 #endif /* WAKATIGAKI */
210 } else if ((kanji_digest) && (level_hiragana_mode)) {
211 if (check_kanji_level(c, ret, level_hiragana_mode)) {
212 for (i = 0; i < ret; i++)
213 putkanji(c+i);
214 } else {
215 putchars(n);
216 }
217 } else if ((kanji_digest) && (level_furigana_mode)) {
218 for (i = 0; i < ret; ++ i)
219 putkanji(c+i);
220 if (! check_kanji_level(c, ret, level_furigana_mode)) {
221 /* put parentheses around furigana (a.k.a. ruby) */
222 for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
223 putchars(&left_paren[i]);
224 }
225 putchars(n);
226 for (i=0; i<KAKASIBUF && separator[i].c1 != 0; i++) {
227 putchars(&right_paren[i]);
228 }
229 }
230 } else {
231 putchars(n);
232 }
233 if (flush_mode) fflush(stdout);
234 }
235
236 KAKASI_ATTR int
digest(c,clen,r,rlen,type,proc)237 digest(c, clen, r, rlen, type, proc)
238 Character *c;
239 int clen;
240 Character *r;
241 int rlen;
242 int type;
243 int (*proc)();
244 {
245 int ret, i, j, k;
246 Character new;
247 char *p;
248
249 ret = (* proc)(c, n);
250 if (ret == 0) ret = 1;
251
252 if ((ret < 0) && (rlen < KAKASIBUF)) {
253 getkanji(&new);
254 if(new.type == type) {
255 r[rlen].type = c[clen].type = type;
256 r[rlen].c1 = c[clen].c1 = new.c1;
257 r[rlen].c2 = c[clen].c2 = new.c2;
258 r[rlen+1].type = c[clen+1].type = OTHER;
259 r[rlen+1].c1 = c[clen+1].c1 = '\0';
260 return digest(c, clen+1, r, rlen+1, type, proc);
261 } else if (cr_eat_mode) {
262 if ((rlen < KAKASIBUF -1) && /* keep in check a buffer overflow */
263 ((new.type == ASCII) || (new.type == JISROMAN) || (new.type == OTHER))) {
264 for (p = cr_eat_string; *p != '\0'; ++ p) {
265 if ((unsigned)(*p) == new.c1) {
266 r[rlen].type = new.type;
267 r[rlen].c1 = new.c1;
268 r[rlen].c2 = new.c2;
269 r[rlen+1].type = OTHER;
270 r[rlen+1].c1 = '\0';
271 return digest(c, clen, r, rlen+1, type, proc);
272 }
273 }
274 }
275 }
276 ungetkanji(&new);
277 ret = -ret;
278 }
279
280 digest_out(c, ret);
281
282 #ifdef EACH_YOMI
283 if (eachyomi_mode) {
284 output_yomi_eachkanji(c, ret);
285 }
286 #endif /* EACH_YOMI */
287
288 k = ret;
289 j = 0;
290 for (i = 0;; ++ i) {
291 if ((r[i].type == type) && (k > 0)) {
292 -- k;
293 } else {
294 c[j].type = r[i].type;
295 c[j].c1 = r[i].c1;
296 c[j].c2 = r[i].c2;
297 if (c[j].c1 == '\0')
298 break;
299 ++ j;
300 }
301 }
302 return rlen - ret;
303 }
304
305 KAKASI_ATTR void
digest_shift(c,s)306 digest_shift(c, s)
307 Character *c;
308 int s;
309 {
310 int i;
311
312 for (i = 0;; ++ i) { /* Yes, I know following lines can be written in
313 1 line, but I have doubts of compatibilities.. */
314 c[i].type = c[i+s].type;
315 c[i].c1 = c[i+s].c1;
316 c[i].c2 = c[i+s].c2;
317 if (c[i+s].c1 == '\0')
318 break;
319 }
320 }
321
322 #ifndef LIBRARY
323 int
main(argc,argv)324 main(argc, argv)
325 int argc;
326 char **argv;
327 #else
328 int
329 kakasi_getopt_argv(argc, argv)
330 int argc;
331 char **argv;
332 #endif
333 {
334 #ifdef LIBRARY
335 int retval = 0;
336 #endif
337 #ifndef LIBRARY
338 Character c[KAKASIBUF], r[KAKASIBUF];
339 int clen, ptype, pctype;
340 static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
341 /* ASCII, JISROMAN, KATAKANA, GRAPHIC, ZENKAKU-KIGOU, ZENKAKU-KATAKANA, ZENKAKU-HIRAGANA, KANJI, */
342 #endif
343
344 int i;
345 #ifdef LIBRARY
346 /* Initialize some valuables. */
347 for (i = 0; i < 8; i ++) {
348 proc[i] = NULL;
349 }
350 romaji_type = HEPBURN;
351 heiki_mode = 0;
352 bunkatu_mode = 0;
353 furigana_mode = 0;
354 cr_eat_mode = 0;
355 romaji_capitalize = 0;
356 romaji_upcase = 0;
357 flush_mode = 0;
358 #ifdef WAKATIGAKI
359 wakatigaki_mode = 0;
360 terminate_done = 0;
361 wo_mode = 0;
362 #endif /* WAKATIGAKI*/
363 input_term_type = UNKNOWN;
364 output_term_type = UNKNOWN;
365 #endif
366 /* Initialize separator */
367 separator[0].type = OTHER;
368 separator[0].c1 = ' ';
369 separator[0].c2 = 0;
370 for (i=1; i<KAKASIBUF; i++)
371 separator[i].c1 = separator[i].c2 = 0;
372 /* Initialize parentheses around furigana (a.k.a. ruby) */
373 left_paren[0].type = OTHER;
374 left_paren[0].c1 = '[';
375 left_paren[0].c2 = 0;
376 for (i=1; i<KAKASIBUF; i++)
377 left_paren[i].c1 = left_paren[i].c2 = 0;
378 right_paren[0].type = OTHER;
379 right_paren[0].c1 = ']';
380 right_paren[0].c2 = 0;
381 for (i=1; i<KAKASIBUF; i++)
382 right_paren[i].c1 = right_paren[i].c2 = 0;
383
384 while(--argc > 0) {
385 ++ argv;
386 if ((*argv)[0] != '-') break;
387 switch((*argv)[1]) {
388 case 'a':
389 switch((*argv)[2]) {
390 case 'j': proc[0] = a2j; break;
391 case 'E': proc[0] = a2E; break;
392 default: proc[0] = NULL;
393 }
394 break;
395 case 'j':
396 switch((*argv)[2]) {
397 case 'a': proc[1] = j2a; break;
398 case 'E': proc[1] = j2E; break;
399 default: proc[1] = NULL;
400 }
401 break;
402 case 'g':
403 switch((*argv)[2]) {
404 case 'a': proc[2] = g2a; break;
405 case 'j': proc[2] = g2j; break;
406 case 'E': proc[2] = g2E; break;
407 default: proc[2] = NULL;
408 }
409 break;
410 case 'k':
411 switch((*argv)[2]) {
412 case 'a': proc[3] = k2a; break;
413 case 'j': proc[3] = k2j; break;
414 case 'K': proc[3] = k2K; break;
415 case 'H': proc[3] = k2H; break;
416 default: proc[3] = NULL;
417 }
418 break;
419 case 'E':
420 switch((*argv)[2]) {
421 case 'a': proc[4] = E2a; break;
422 case 'j': proc[4] = E2j; break;
423 default: proc[4] = NULL;
424 }
425 break;
426 case 'K':
427 switch((*argv)[2]) {
428 case 'a': proc[5] = K2a; break;
429 case 'j': proc[5] = K2j; break;
430 case 'k': proc[5] = K2k; break;
431 case 'H': proc[5] = K2H; break;
432 default: proc[5] = NULL;
433 }
434 break;
435 case 'H':
436 switch((*argv)[2]) {
437 case 'a': proc[6] = H2a; break;
438 case 'j': proc[6] = H2j; break;
439 case 'k': proc[6] = H2k; break;
440 case 'K': proc[6] = H2K; break;
441 case 'H': proc[6] = H2H; break;
442 default: proc[6] = NULL;
443 }
444 break;
445 case 'J':
446 switch((*argv)[2]) {
447 case 'a': proc[7] = J2a; break;
448 case 'j': proc[7] = J2j; break;
449 case 'k': proc[7] = J2k; break;
450 case 'K': proc[7] = J2K; break;
451 case 'H': proc[7] = J2H; break;
452 default: proc[7] = NULL;
453 }
454 break;
455 case 'i':
456 if ((*argv)[2] != '\0')
457 set_input_term(term_type_str((*argv)+2));
458 else
459 if (argc > 1) {
460 -- argc;
461 set_input_term(term_type_str(*(++ argv)));
462 }
463 break;
464 case 'o':
465 if ((*argv)[2] != '\0')
466 set_output_term(term_type_str((*argv)+2));
467 else
468 if (argc > 1) {
469 -- argc;
470 set_output_term(term_type_str(*(++ argv)));
471 }
472 break;
473 case 'r':
474 if ((*argv)[2] == 'k')
475 romaji_type = KUNREI;
476 break;
477 case 'p':
478 heiki_mode = 1;
479 break;
480 case 's':
481 bunkatu_mode = 1;
482 break;
483 case 'S':
484 separator[0].type = OTHER;
485 for (i=0; i<KAKASIBUF && *(*(argv)+2+i) != 0; i++) {
486 separator[i].c1 = *((*argv)+2+i);
487 separator[i].c2 = 0;
488 }
489 break;
490 case 'f':
491 furigana_mode = 1;
492 break;
493 case 'F':
494 switch((*argv)[2]) {
495 case 'l':
496 left_paren[0].type = OTHER;
497 for (i=0; i<KAKASIBUF && *(*(argv)+3+i) != 0; i++) {
498 left_paren[i].c1 = *((*argv)+3+i);
499 left_paren[i].c2 = 0;
500 }
501 break;
502 case 'r':
503 right_paren[0].type = OTHER;
504 for (i=0; i<KAKASIBUF && *(*(argv)+3+i) != 0; i++) {
505 right_paren[i].c1 = *((*argv)+3+i);
506 right_paren[i].c2 = 0;
507 }
508 break;
509 default:
510 break;
511 }
512 break;
513 case 'c':
514 cr_eat_mode = 1;
515 sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
516 break;
517 case 'C':
518 romaji_capitalize = 1;
519 break;
520 case 'U':
521 romaji_upcase = 1;
522 break;
523 case 'u':
524 flush_mode = 1;
525 break;
526 case 't':
527 use_old_romaji_table = 1;
528 #ifdef WAKATIGAKI
529 case 'w':
530 wakatigaki_mode = 1;
531 bunkatu_mode = 1;
532 cr_eat_mode = 1;
533 sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
534 proc[5] = K2K;
535 proc[6] = H2H;
536 proc[7] = J2H;
537 break;
538 #endif /* WAKATIGAKI */
539 case 'l':
540 switch((*argv)[2]) {
541 case '0': case '1': case '2': case '3': case '4': case '5':
542 case '6': case '7': case '8': case '9': case 'j': case 'n':
543 level_hiragana_mode = (*argv)[2]; break;
544 }
545 cr_eat_mode = 1;
546 sprintf(cr_eat_string, "\011\012\015");
547 proc[5] = K2K;
548 proc[6] = H2H;
549 proc[7] = J2H;
550 break;
551 case 'L':
552 switch((*argv)[2]) {
553 case '0': case '1': case '2': case '3': case '4': case '5':
554 case '6': case '7': case '8': case '9': case 'j': case 'n':
555 level_furigana_mode = (*argv)[2]; break;
556 }
557 cr_eat_mode = 1;
558 sprintf(cr_eat_string, "\011\012\015");
559 proc[5] = K2K;
560 proc[6] = H2H;
561 proc[7] = J2H;
562 break;
563 #ifdef EACH_YOMI
564 case 'y':
565 eachyomi_mode = 1;
566 break;
567 #endif /* EACH_YOMI */
568 case '?':
569 default:
570 #ifndef LIBRARY
571 fprintf(stderr, "KAKASI - Kanji Kana Simple Inverter Version %s\n", VERSION);
572 fprintf(stderr, "Copyright (C) 1992-1999 Hironobu Takahashi. All rights reserved.\n");
573 fprintf(stderr, "\n");
574 fprintf(stderr, "Usage: kakasi -a[jE] -j[aE] -g[ajE] -k[ajKH] -E[aj] -K[ajkH] -H[ajkKH] -J[ajkKH]\n");
575 #ifdef KAKASI_SUPPORT_UTF8
576 fprintf(stderr, " -i{oldjis,newjis,dec,euc,sjis,utf8} -o{oldjis,newjis,dec,euc,sjis,utf8}\n");
577 #else
578 fprintf(stderr, " -i{oldjis,newjis,dec,euc,sjis} -o{oldjis,newjis,dec,euc,sjis}\n");
579 #endif /* KAKASI_SUPPORT_UTF8 */
580 fprintf(stderr, " -r{hepburn,kunrei} -p -s -f -c\"chars\" [jisyo1, jisyo2,,,]\n");
581 fprintf(stderr, "\n");
582 fprintf(stderr, " Character Sets:\n");
583 fprintf(stderr, " a: ascii j: jisroman g: graphic k: kana (j,k defined in jisx0201)\n");
584 fprintf(stderr, " E: kigou K: katakana H: hiragana J: kanji(E,K,H,J defined in jisx0208)\n");
585 fprintf(stderr, "\n");
586 fprintf(stderr, " Options:\n");
587 fprintf(stderr, " -i: input coding system -o: output coding system\n");
588 fprintf(stderr, " -r: romaji conversion system\n");
589 fprintf(stderr, " -p: list all readings (with -J option)\n");
590 fprintf(stderr, " -s: insert separate characters (with -J option) -S\"chars\": set separator\n");
591 fprintf(stderr, " -f: furigana mode (with -J option)\n");
592 fprintf(stderr, " -F[rl]\"chars\": set parentheses around furigana\n");
593 fprintf(stderr, " -c: skip chars within jukugo (with -J option: default TAB CR LF BLANK)\n");
594 fprintf(stderr, " -C: romaji Capitalize (with -Ja or -Jj option)\n");
595 fprintf(stderr, " -U: romaji Upcase (with -Ja or -Jj option)\n");
596 fprintf(stderr, " -u: call fflush() after 1 character output\n");
597 fprintf(stderr, " -t: use old romaji table\n");
598 #ifdef WAKATIGAKI
599 fprintf(stderr, " -w: wakatigaki mode\n");
600 #endif /* WAKATIGAKI */
601 fprintf(stderr, " -{l,L}: level {hiragana,furigana} mode (-{l,L}[123456jn])\n");
602 #ifdef EACH_YOMI
603 fprintf(stderr, " -y: display yomi of each kanji characters\n");
604 #endif /* EACH_YOMI */
605 fprintf(stderr, "\n");
606 fprintf(stderr, "Report bugs to <bug-kakasi@namazu.org>.\n");
607 exit(1);
608 #else /* LIBRARY */
609 retval = 1;
610 #endif
611 }
612 }
613
614 if ((input_term_type != UNKNOWN) && (output_term_type == UNKNOWN))
615 set_output_term(input_term_type);
616
617 #ifdef LIBRARY
618 free_jisyo();
619 kakasi_close_kanwadict();
620 #ifdef KAKASI_SUPPORT_UTF8
621 close_iconv();
622 #endif /* KAKASI_SUPPORT_UTF8 */
623
624 #endif /* LIBRARY */
625 init_jisyo();
626 init_kanwa();
627 if (proc[7] != NULL) {
628 for (; argc > 0; -- argc)
629 add_jisyo(*(argv ++));
630 }
631
632 #ifdef LIBRARY
633 return retval;
634 }
635
636 char *
kakasi_do(str)637 kakasi_do(str)
638 char *str;
639 {
640 Character c[KAKASIBUF], r[KAKASIBUF];
641 int clen, ptype, pctype;
642
643 setcharbuffer((unsigned char *)str);
644 #endif
645
646 ptype = pctype = OTHER;
647 separator_out = 0;
648 for(;;) {
649 getkanji(c);
650 if ((c[0].type == OTHER) && (c[0].c1 == 0xff)) break;
651 c[1].type = OTHER;
652 c[1].c1 = '\0';
653 clen = 1;
654 while (clen > 0) {
655 kanji_digest = 0;
656 switch (c[0].type) {
657 case ASCII:
658 case JISROMAN:
659 case GRAPHIC:
660 case KATAKANA:
661 if ((c[0].type != OTHER) && (c[0].type != pctype)) {
662 put_separator();
663 pctype = c[0].type;
664 }
665 if ((*proc[(int)(c[0].type)]) == NULL) {
666 putkanji(c); digest_shift(c, 1); -- clen;
667 if (flush_mode) fflush(stdout);
668 } else {
669 digest_start_copy(c, r);
670 clen = digest(c, clen, r, clen, (int)(c[0].type), *proc[(int)(c[0].type)]);
671 }
672 #ifdef WAKATIGAKI
673 terminate_done = 0;
674 #endif /* WAKATIGAKI */
675 break;
676 case JIS83:
677 if (c[0].c1 >= 0xb0) {
678 ptype = 7;
679 kanji_digest = 1;
680 #ifdef WAKATIGAKI
681 } else if ((c[0].c1 == 0xa1) && /* charcter code(\241\270),charcter code(\241\271),charcter code(\241\272) */
682 (c[0].c2 >= 0xb8 && c[0].c2 <= 0xba)) {
683 ptype = 7;
684 kanji_digest = 1;
685 } else if ((c[0].c1 == 0xa5) && /* charcter code(\245\365),charcter code(\245\366) */
686 (c[0].c2 >= 0xf5 && c[0].c2 <= 0xf6)) {
687 ptype = 7;
688 kanji_digest = 1;
689 #endif /* WAKATIGAKI */
690 } else if (c[0].c1 == 0xa4) {
691 ptype = 6;
692 #ifdef WAKATIGAKI
693 } else if ((c[0].c1 == 0xa1) && /* charcter code(\241\263),charcter code(\241\264),charcter code(\241\265),charcter code(\241\266) */
694 (c[0].c2 >= 0xb3 && c[0].c2 <= 0xb6)) {
695 if (c[0].c2 == 0xb3 || c[0].c2 == 0xb4) {
696 ptype = 5;
697 } else if (c[0].c2 == 0xb5 || c[0].c2 <= 0xb6) {
698 ptype = 6;
699 }
700 #endif /* WAKATIGAKI */
701 } else if (c[0].c1 == 0xa5) {
702 ptype = 5;
703 } else if ((c[0].c1 == 0xa1) && (c[0].c2 == 0xbc)) {
704 if (pctype == 5) {
705 ptype = 5;
706 } else if (pctype == 6) {
707 ptype = 6;
708 } else {
709 ptype = 5;
710 }
711 } else {
712 ptype = 4;
713 }
714 if (ptype != pctype) {
715 put_separator();
716 pctype = ptype;
717 }
718 if ((*proc[ptype]) == NULL) {
719 putkanji(c); digest_shift(c, 1); -- clen;
720 if (flush_mode) fflush(stdout);
721 } else {
722 digest_start_copy(c, r);
723 clen = digest(c, clen, r, clen, JIS83, *proc[ptype]);
724 }
725 #ifdef WAKATIGAKI
726 terminate_done = 0;
727 #endif /* WAKATIGAKI */
728 break;
729 default:
730 #ifdef WAKATIGAKI
731 terminate_done = 1;
732 #endif /* WAKATIGAKI */
733 putkanji(c); digest_shift(c, 1); -- clen;
734 #ifndef LIBRARY
735 if (flush_mode) fflush(stdout);
736 #endif
737 }
738 }
739 }
740 #ifndef LIBRARY
741 return 0;
742 #else /* LIBRARY */
743 {
744 char *ret = getpbstr();
745 if (ret == NULL)
746 return strdup("");
747 return ret;
748 }
749 #endif
750 }
751
752 #ifdef LIBRARY
753 int
kakasi_close_kanwadict()754 kakasi_close_kanwadict()
755 {
756 if (kanwadict != NULL) {
757 fclose(kanwadict);
758 kanwadict = NULL;
759 return 0;
760 }
761 return 1;
762 }
763
764 static void
free_jisyo()765 free_jisyo()
766 {
767 size_t x;
768
769 if (ary_charalloc) {
770 for (x = 0; x <= ary_cur_charalloc; x ++) {
771 free(ary_charalloc[x]);
772 }
773 }
774
775 if (ary_cellalloc) {
776 for (x = 0; x <= ary_cur_cellalloc; x ++) {
777 free(ary_cellalloc[x]);
778 }
779 }
780
781 free(ary_charalloc);
782 free(ary_cellalloc);
783
784 ary_charalloc = NULL;
785 ary_cellalloc = NULL;
786 ary_size_charalloc = -1;
787 ary_cur_charalloc = -1;
788 ary_size_cellalloc = -1;
789 ary_cur_cellalloc = -1;
790 point_charalloc = 0;
791 ptr_charalloc = NULL;
792 point_cellalloc = 0;
793 ptr_cellalloc = NULL;
794 }
795
796 int
kakasi_free(char * p)797 kakasi_free(char *p)
798 {
799 if (p) {
800 free(p);
801 return 1;
802 }
803 return 0;
804 }
805 #endif /* LIBRARY */
806
807 #ifdef EACH_YOMI
808
809 /*
810 each_yomi output
811 */
812
813 Character ek_bc[] = {{ASCII, '[', 0}, {OTHER, 0, 0}};
814 Character ek_ec[] = {{ASCII, ']', 0}, {OTHER, 0, 0}};
815 Character ek_kysep[] = {{ASCII, ':', 0}, {OTHER, 0, 0}};
816 Character ek_kksep[] = {{ASCII, ',', 0}, {OTHER, 0, 0}};
817
818 void
putkanjis(c)819 putkanjis(c)
820 Character *c;
821 {
822 while (c->type != OTHER && c->c1 != 0) {
823 putkanji(c);
824 c ++;
825 }
826 }
827
828 void
output_yomi_eachkanji(Character * c,int len)829 output_yomi_eachkanji(Character *c, int len)
830 {
831 int old_hy, i;
832 Character cbuf[KAKASIBUF], rbuf[KAKASIBUF];
833 old_hy = heiki_mode;
834 heiki_mode = 1;
835
836 if (! (c[0].c1 >= 0xb0 ||
837 ((c[0].c1 == 0xa1) && (c[0].c2 >= 0xb8 && c[0].c2 <= 0xba)) ||
838 ((c[0].c1 == 0xa5) && (c[0].c2 >= 0xf5 && c[0].c2 <= 0xf6))))
839 return;
840
841 putkanji(ek_bc);
842 for (i = 0; i < len; i ++) {
843 if (c[i].c1 >= 0xb0 ||
844 ((c[i].c1 == 0xa1) && (c[i].c2 >= 0xb8 && c[i].c2 <= 0xba)) ||
845 ((c[i].c1 == 0xa5) && (c[i].c2 >= 0xf5 && c[i].c2 <= 0xf6))) {
846 if (i > 0) putkanji(ek_kksep);
847 memcpy(cbuf, &c[i], sizeof(Character));
848 cbuf[1].type = OTHER;
849 cbuf[1].c1 = 0;
850 cbuf[1].c2 = 0;
851 putkanji(cbuf);
852 putkanji(ek_kysep);
853 J2H(cbuf, rbuf);
854 putkanjis(rbuf);
855 }
856 }
857 putkanji(ek_ec);
858
859 heiki_mode = old_hy;
860 }
861
862 #endif /* EACH_YOMI */
863