1 /*
2 * wx2-conv.c -- WX2 -> KAKASI dictionary converter
3 * $Id: wx2-conv.c,v 1.4 2007-10-23 05:25:56 knok Exp $
4 * Copyright(c) 1996, Hajime BABA,
5 * Department of Astronomy, Kyoto University,
6 * KYOTO, Japan, 606-01.
7 * E-mail: < baba@kusastro.kyoto-u.ac.jp >
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either versions 2, or (at your option)
12 * any later version.
13 *
14 * This program is distributed in the hope that it will be useful
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with KAKASI, see the file COPYING. If not, write to the Free
21 * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
22 * 02111-1307, USA.
23 */
24
25 /*
26 * [IKKATUTOUROKU:TOUROKU file] format of
27 * WX2 series.
28 * <YOMI><tab>"<KANJI>":<HINSHI><FYKUKAI>
29 */
30
31 #ifdef HAVE_CONFIG_H
32 # include <config.h>
33 #endif
34
35 #include <stdio.h>
36 #ifdef HAVE_STRING_H
37 # include <string.h>
38 #else
39 # include <strings.h>
40 #endif
41 #include "conv-util.h"
42
43 static void
sjis2ujis(sjis,ujis)44 sjis2ujis(sjis, ujis)
45 unsigned char *sjis;
46 unsigned char *ujis;
47 {
48 unsigned char *p, *q;
49 int c1, c2, o1, o2;
50 static unsigned char k2H_table[64][3] = {
51 "\241\241", "\241\243", "\241\326", "\241\327", "\241\242", "\241\245", "\244\362", "\244\241",
52 "\244\243", "\244\245", "\244\247", "\244\251", "\244\343", "\244\345", "\244\347", "\244\303",
53 "\241\274", "\244\242", "\244\244", "\244\246", "\244\250", "\244\252", "\244\253", "\244\255",
54 "\244\257", "\244\261", "\244\263", "\244\265", "\244\267", "\244\271", "\244\273", "\244\275",
55 "\244\277", "\244\301", "\244\304", "\244\306", "\244\310", "\244\312", "\244\313", "\244\314",
56 "\244\315", "\244\316", "\244\317", "\244\322", "\244\325", "\244\330", "\244\333", "\244\336",
57 "\244\337", "\244\340", "\244\341", "\244\342", "\244\344", "\244\346", "\244\350", "\244\351",
58 "\244\352", "\244\353", "\244\354", "\244\355", "\244\357", "\244\363", "\241\253", "\241\254" };
59 static unsigned char k2H_dtable[64][3] = {
60 "", "", "", "", "", "", "", "",
61 "", "", "", "", "", "", "", "",
62 "", "", "", "", "", "", "\244\254", "\244\256",
63 "\244\260", "\244\262", "\244\264", "\244\266", "\244\270", "\244\272", "\244\274", "\244\276",
64 "\244\300", "\244\302", "\244\305", "\244\307", "\244\311", "", "", "",
65 "", "", "\244\320", "\244\323", "\244\326", "\244\331", "\244\334", "",
66 "", "", "", "", "", "", "", "",
67 "", "", "", "", "", "", "", "" };
68 static unsigned char k2H_htable[64][3] = {
69 "", "", "", "", "", "", "", "",
70 "", "", "", "", "", "", "", "",
71 "", "", "", "", "", "", "", "",
72 "", "", "", "", "", "", "" , "",
73 "", "", "", "", "", "", "", "",
74 "", "", "\244\321", "\244\324", "\244\327", "\244\332", "\244\335", "",
75 "", "", "", "", "", "", "", "",
76 "", "", "", "", "", "", "", "" };
77
78 p = sjis;
79 q = ujis;
80 while((c1 = *p) != '\0') {
81 if (c1 == ',') {
82 *(q ++) = ' ';
83 } else if (c1 < 0x80) {
84 *(q ++) = c1;
85 } else if ((0xa0 <= c1) && (c1 < 0xe0)) {
86 c2 = p[1];
87 if ((c2==0xde) && (k2H_dtable[c1-0xa0][0] != '\0')) {
88 *(q ++) = k2H_dtable[c1-0xa0][0];
89 *(q ++) = k2H_dtable[c1-0xa0][1];
90 ++ p;
91 } else if ((c2==0xdf) && (k2H_htable[c1-0xa0][0] != '\0')) {
92 *(q ++) = k2H_htable[c1-0xa0][0];
93 *(q ++) = k2H_htable[c1-0xa0][1];
94 ++ p;
95 } else {
96 *(q ++) = k2H_table[c1-0xa0][0];
97 *(q ++) = k2H_table[c1-0xa0][1];
98 }
99 } else {
100 c2 = p[1];
101 if (c2 >= 0x9f) {
102 if (c1 >= 0xe0) o1 = c1*2 - 0xe0;
103 else o1 = c1*2 - 0x60;
104 o2 = c2 + 2;
105 } else {
106 if (c1 >= 0xe0) o1 = c1*2 - 0xe1;
107 else o1 = c1*2 - 0x61;
108 if (c2 >= 0x7f) o2 = c2 + 0x60;
109 else o2 = c2 + 0x61;
110 }
111 *(q ++) = o1;
112 *(q ++) = o2;
113 ++ p;
114 }
115 ++ p;
116 }
117 *q = '\0';
118 }
119
120 static void
getkanji(kanji,s)121 getkanji(kanji, s)
122 char *kanji;
123 char *s;
124 {
125 s++; /* skip first '"' */
126 while (*s) {
127 if (*s == '"') { /* detect second '"' */
128 *kanji = '\0';
129 break;
130 }
131 *kanji = *s;
132 kanji++;
133 s++;
134 }
135 }
136
137 static void
extract(file_name)138 extract(file_name)
139 char *file_name;
140 {
141 FILE *fp;
142 unsigned char sjis[1024], ujis[1024];
143 unsigned char f1[1024], f2[1024];
144 unsigned char tmp[1024];
145
146 if ((fp = fopen(file_name, "r")) == NULL) {
147 perror(file_name);
148 return;
149 }
150
151 while(fgets((char *)sjis, 1024, fp) != NULL) {
152 if ((sjis[0] == '\0') || (sjis[0] == '#')) continue;
153 sjis2ujis(sjis, ujis);
154 if (sscanf((const char *)ujis, "%s%s", f1, tmp) != 2) continue;
155 getkanji(f2, tmp);
156 if (isallkana(f1) == 0) continue;
157 if (isallzenkaku(f2) == 0) continue;
158 if (includekanji(f2) == 0) continue;
159 printf("%s %s\n", f1, f2);
160 }
161
162 fclose(fp);
163 }
164
165 int
main(argc,argv)166 main(argc, argv)
167 int argc;
168 char **argv;
169 {
170 int i;
171 for(i = 1; i < argc; ++ i) {
172 extract(argv[i]);
173 }
174 return 0;
175 }
176