1 /*
2  * wx2-conv.c  --  WX2 -> KAKASI dictionary converter
3  * $Id: wx2-conv.c,v 1.4 2007-10-23 05:25:56 knok Exp $
4  * Copyright(c) 1996, Hajime BABA,
5  *          Department of Astronomy, Kyoto University,
6  *          KYOTO, Japan, 606-01.
7  * E-mail: < baba@kusastro.kyoto-u.ac.jp >
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either versions 2, or (at your option)
12  * any later version.
13  *
14  * This program is distributed in the hope that it will be useful
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with KAKASI, see the file COPYING.  If not, write to the Free
21  * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
22  * 02111-1307, USA.
23  */
24 
25 /*
26  *  [IKKATUTOUROKU:TOUROKU file] format of
27  *                                  WX2 series.
28  *  <YOMI><tab>"<KANJI>":<HINSHI><FYKUKAI>
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 # include <config.h>
33 #endif
34 
35 #include <stdio.h>
36 #ifdef HAVE_STRING_H
37 # include <string.h>
38 #else
39 # include <strings.h>
40 #endif
41 #include "conv-util.h"
42 
43 static void
sjis2ujis(sjis,ujis)44 sjis2ujis(sjis, ujis)
45      unsigned char *sjis;
46      unsigned char *ujis;
47 {
48     unsigned char *p, *q;
49     int c1, c2, o1, o2;
50     static unsigned char k2H_table[64][3] = {
51 	"\241\241", "\241\243", "\241\326", "\241\327", "\241\242", "\241\245", "\244\362", "\244\241",
52 	"\244\243", "\244\245", "\244\247", "\244\251", "\244\343", "\244\345", "\244\347", "\244\303",
53 	"\241\274", "\244\242", "\244\244", "\244\246", "\244\250", "\244\252", "\244\253", "\244\255",
54 	"\244\257", "\244\261", "\244\263", "\244\265", "\244\267", "\244\271", "\244\273", "\244\275",
55 	"\244\277", "\244\301", "\244\304", "\244\306", "\244\310", "\244\312", "\244\313", "\244\314",
56 	"\244\315", "\244\316", "\244\317", "\244\322", "\244\325", "\244\330", "\244\333", "\244\336",
57 	"\244\337", "\244\340", "\244\341", "\244\342", "\244\344", "\244\346", "\244\350", "\244\351",
58 	"\244\352", "\244\353", "\244\354", "\244\355", "\244\357", "\244\363", "\241\253", "\241\254" };
59     static unsigned char k2H_dtable[64][3] = {
60 	"",   "",   "",   "",   "",   "",   "",   "",
61 	"",   "",   "",   "",   "",   "",   "",   "",
62 	"",   "",   "",   "",   "",   "",   "\244\254", "\244\256",
63 	"\244\260", "\244\262", "\244\264", "\244\266", "\244\270", "\244\272", "\244\274", "\244\276",
64 	"\244\300", "\244\302", "\244\305", "\244\307", "\244\311", "",   "",   "",
65 	"",   "",   "\244\320", "\244\323", "\244\326", "\244\331", "\244\334", "",
66 	"",   "",   "",   "",   "",   "",   "",   "",
67 	"",   "",   "",   "",   "",   "",   "",   "" };
68     static unsigned char k2H_htable[64][3] = {
69 	"",   "",   "",   "",   "",   "",   "",   "",
70 	"",   "",   "",   "",   "",   "",   "",   "",
71 	"",   "",   "",   "",   "",   "",   "",   "",
72 	"",   "",   "",   "",   "",   "",   ""  , "",
73 	"",   "",   "",   "",   "",   "",   "",   "",
74 	"",   "",   "\244\321", "\244\324", "\244\327", "\244\332", "\244\335", "",
75 	"",   "",   "",   "",   "",   "",   "",   "",
76 	"",   "",   "",   "",   "",   "",   "",   "" };
77 
78     p = sjis;
79     q = ujis;
80     while((c1 = *p) != '\0') {
81 	if (c1 == ',') {
82 	    *(q ++) = ' ';
83 	} else if (c1 < 0x80) {
84 	    *(q ++) = c1;
85 	} else if ((0xa0 <= c1) && (c1  < 0xe0)) {
86 	    c2 = p[1];
87 	    if ((c2==0xde) && (k2H_dtable[c1-0xa0][0] != '\0')) {
88 		*(q ++) = k2H_dtable[c1-0xa0][0];
89 		*(q ++) = k2H_dtable[c1-0xa0][1];
90 		++ p;
91 	    } else if ((c2==0xdf) && (k2H_htable[c1-0xa0][0] != '\0')) {
92 		*(q ++) = k2H_htable[c1-0xa0][0];
93 		*(q ++) = k2H_htable[c1-0xa0][1];
94 		++ p;
95 	    } else {
96 		*(q ++) = k2H_table[c1-0xa0][0];
97 		*(q ++) = k2H_table[c1-0xa0][1];
98 	    }
99 	} else {
100 	    c2 = p[1];
101 	    if (c2 >= 0x9f) {
102 		if (c1 >= 0xe0) o1 = c1*2 - 0xe0;
103 		else o1 = c1*2 - 0x60;
104 		o2 = c2 + 2;
105 	    } else {
106 		if (c1 >= 0xe0) o1 = c1*2 - 0xe1;
107 		else o1 = c1*2 - 0x61;
108 		if (c2 >= 0x7f) o2 = c2 + 0x60;
109 		else o2 = c2 +  0x61;
110 	    }
111 	    *(q ++) = o1;
112 	    *(q ++) = o2;
113 	    ++ p;
114 	}
115 	++ p;
116     }
117     *q = '\0';
118 }
119 
120 static void
getkanji(kanji,s)121 getkanji(kanji, s)
122      char *kanji;
123      char *s;
124 {
125     s++;			 /* skip first '"' */
126     while (*s) {
127 	if (*s == '"') {	 /* detect second '"' */
128 	    *kanji = '\0';
129 	    break;
130 	}
131 	*kanji = *s;
132 	kanji++;
133 	s++;
134     }
135 }
136 
137 static void
extract(file_name)138 extract(file_name)
139      char *file_name;
140 {
141     FILE *fp;
142     unsigned char sjis[1024], ujis[1024];
143     unsigned char f1[1024], f2[1024];
144     unsigned char tmp[1024];
145 
146     if ((fp = fopen(file_name, "r")) == NULL) {
147 	perror(file_name);
148 	return;
149     }
150 
151     while(fgets((char *)sjis, 1024, fp) != NULL) {
152 	if ((sjis[0] == '\0') || (sjis[0] == '#')) continue;
153 	sjis2ujis(sjis, ujis);
154 	if (sscanf((const char *)ujis, "%s%s", f1, tmp) != 2) continue;
155 	getkanji(f2, tmp);
156 	if (isallkana(f1) == 0) continue;
157 	if (isallzenkaku(f2) == 0) continue;
158 	if (includekanji(f2) == 0) continue;
159 	printf("%s %s\n", f1, f2);
160     }
161 
162     fclose(fp);
163 }
164 
165 int
main(argc,argv)166 main(argc, argv)
167      int argc;
168      char **argv;
169 {
170     int i;
171     for(i = 1; i < argc; ++ i) {
172 	extract(argv[i]);
173     }
174     return 0;
175 }
176