1 /*
2 * $Id: dic_atojis.c,v 1.4 2002/07/14 04:26:57 hiroo Exp $
3 */
4
5 /*
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
7 * This file is part of FreeWnn.
8 *
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
10 * 1987, 1988, 1989, 1990, 1991, 1992
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
13 * Copyright FreeWnn Project 1999, 2000, 2002
14 *
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 */
31
32 #ifdef HAVE_CONFIG_H
33 # include <config.h>
34 #endif
35
36 #if STDC_HEADERS
37 # include <string.h>
38 #else
39 # if HAVE_STRINGS_H
40 # include <strings.h>
41 # endif
42 #endif /* STDC_HEADERS */
43
44 #include "commonhd.h"
45 #include "jslib.h"
46 #include "jdata.h"
47 #include "wnn_os.h"
48 #include "wnn_string.h"
49
50 static int keta_4;
51
52 static w_char _tatojis[] = {
53 0xa1a1, 0xa1aa, 0xa1c9, 0xa1f4, 0xa1f0, 0xa1f3, 0xa1f5, 0xa1c7,
54 0xa1ca, 0xa1cb, 0xa1f6, 0xa1dc, 0xa1a4, 0xa1dd, 0xa1a5, 0xa1bf,
55 0xa3b0, 0xa3b1, 0xa3b2, 0xa3b3, 0xa3b4, 0xa3b5, 0xa3b6, 0xa3b7,
56 0xa3b8, 0xa3b9, 0xa1a7, 0xa1a8, 0xa1e3, 0xa1e1, 0xa1e4, 0xa1a9,
57 0xa1f7, 0xa3c1, 0xa3c2, 0xa3c3, 0xa3c4, 0xa3c5, 0xa3c6, 0xa3c7,
58 0xa3c8, 0xa3c9, 0xa3ca, 0xa3cb, 0xa3cc, 0xa3cd, 0xa3ce, 0xa3cf,
59 0xa3d0, 0xa3d1, 0xa3d2, 0xa3d3, 0xa3d4, 0xa3d5, 0xa3d6, 0xa3d7,
60 0xa3d8, 0xa3d9, 0xa3da, 0xa1ce, 0xa1ef, 0xa1cf, 0xa1b0, 0xa1b2,
61 0xa1ae, 0xa3e1, 0xa3e2, 0xa3e3, 0xa3e4, 0xa3e5, 0xa3e6, 0xa3e7,
62 0xa3e8, 0xa3e9, 0xa3ea, 0xa3eb, 0xa3ec, 0xa3ed, 0xa3ee, 0xa3ef,
63 0xa3f0, 0xa3f1, 0xa3f2, 0xa3f3, 0xa3f4, 0xa3f5, 0xa3f6, 0xa3f7,
64 0xa3f8, 0xa3f9, 0xa3fa, 0xa1d0, 0xa1c3, 0xa1d1, 0xa1b1, 0x007f
65 };
66
67
68 static int
atojis(c)69 atojis (c)
70 register w_char c;
71 {
72 if (c >= ' ' && c <= (unsigned char) '\376')
73 c = _tatojis[c - ' '];
74 return (c);
75 }
76
77 static w_char _tatokan[] = {
78 0xa1bb, 0xb0ec, 0xc6f3, 0xbbb0, 0xbbcd, /* ������ */
79 0xb8de, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5 /* ��ϻ��Ȭ�� */
80 };
81
82 static w_char _tatokan_o[] = {
83 0xceed, 0xb0ed, 0xc6f5, 0xbbb2, 0xbbcd, /* ��������� */
84 0xb8e0, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5 /* ��ϻ��Ȭ�� */
85 };
86
87 static w_char
atokan(c)88 atokan (c)
89 register w_char c;
90 {
91 if (c >= '0' && c <= '9')
92 c = _tatokan[c - '0'];
93 return (c);
94 }
95
96 static w_char
atokan_o(c)97 atokan_o (c)
98 register w_char c;
99 {
100 if (c >= '0' && c <= '9')
101 c = _tatokan_o[c - '0'];
102 return (c);
103 }
104
105 static w_char _tatoket[] = {
106 0xbdbd, 0xc9b4, 0xc0e9, /* ��ɴ�� */
107 0xcbfc, 0xb2af, 0xc3fb, 0xb5fe, /* �������� */
108 0xd4b6, 0xa4b7, 0xbef7, 0xb9c2, /* Զ������ */
109 0xb4c2, 0xc0b5, 0xbadc, 0xb6cb /* �����ܶ� */
110 /* ���Ϻ� *//* ���ε� *//* ��ͳ¾ *//* �ԲĻ� *//* ̵����� */
111 };
112
113 static w_char _tatoket_o[] = {
114 0xbda6, 0xeff9, 0xeff4, /* ������ */
115 0xe8df /* �� */
116 };
117
118 static void
getketa(k,kouho,col)119 getketa (k, kouho, col)
120 int k;
121 w_char kouho[];
122 int *col;
123 {
124 if (k % 4)
125 {
126 if (keta_4 != 0)
127 {
128 keta_4 = 0;
129 getketa (k / 4 * 4, kouho, col);
130 }
131 kouho[(*col)++] = _tatoket[k % 4 - 1];
132 }
133 else if ((k / 4 > 0) && (k / 4 < 13))
134 {
135 kouho[(*col)++] = _tatoket[k / 4 + 2];
136 }
137 else if (k / 4 == 13)
138 {
139 kouho[(*col)++] = 0xbabb;
140 kouho[(*col)++] = 0xb2cf;
141 kouho[(*col)++] = 0xb9b1; /* ���Ϻ� */
142 }
143 else if (k / 4 == 14)
144 {
145 kouho[(*col)++] = 0xb5c0;
146 kouho[(*col)++] = 0xc1ce;
147 kouho[(*col)++] = 0xb0a4; /* ���ε� */
148 }
149 else if (k / 4 == 15)
150 {
151 kouho[(*col)++] = 0xc2be;
152 kouho[(*col)++] = 0xcdb3;
153 kouho[(*col)++] = 0xc6e1; /* ��ͳ¾ */
154 }
155 else if (k / 4 == 16)
156 {
157 kouho[(*col)++] = 0xb5c4;
158 kouho[(*col)++] = 0xbbd7;
159 kouho[(*col)++] = 0xb2c4;
160 kouho[(*col)++] = 0xc9d4; /* �ԲĻ� */
161 }
162 else if (k / 4 == 17)
163 {
164 kouho[(*col)++] = 0xbff4;
165 kouho[(*col)++] = 0xc2e7;
166 kouho[(*col)++] = 0xcecc;
167 kouho[(*col)++] = 0xccb5; /* ̵����� */
168 }
169 else
170 {
171 kouho[(*col)++] = 0xa1a9; /* �� */
172 }
173 keta_4 = 0;
174 }
175
176 static void
getketa_o(k,kouho,col)177 getketa_o (k, kouho, col)
178 int k;
179 w_char kouho[];
180 int *col;
181 {
182 if (k % 4)
183 {
184 if (keta_4 != 0)
185 {
186 keta_4 = 0;
187 if (k / 4 == 1)
188 kouho[(*col)++] = _tatoket_o[3]; /* �� */
189 else
190 getketa (k / 4 * 4, kouho, col);
191 }
192 kouho[(*col)++] = _tatoket_o[k % 4 - 1];
193 }
194 else if (k / 4 == 1)
195 {
196 kouho[(*col)++] = _tatoket_o[3]; /* �� */
197 }
198 else
199 {
200 getketa (k, kouho, col);
201 }
202 keta_4 = 0;
203 }
204
205 int
substr(c1,c2)206 substr (c1, c2)
207 char *c1;
208 w_char *c2;
209 {
210 for (; *c1; c1++, c2++)
211 {
212 if (*c1 != *c2)
213 break;
214 }
215 if (*c1)
216 return (0);
217 return (1);
218 }
219
220 w_char *
kanji_giji_str(bun,bunl,c,kouho)221 kanji_giji_str (bun, bunl, c, kouho)
222 w_char *bun;
223 int bunl;
224 w_char *c;
225 w_char *kouho;
226 {
227 register int k;
228 w_char revkouho[LENGTHYOMI];
229 char tmp[LENGTHYOMI];
230 int col = 0, keta = 0;
231
232 if (substr (DIC_HIRAGANA, c))
233 {
234 for (k = bunl - 1; k >= 0; k--)
235 {
236 *kouho++ = bun[k];
237 }
238 }
239 else if (substr (DIC_KATAKANA, c))
240 { /* �������� */
241 for (k = bunl - 1; k >= 0; k--)
242 {
243 if ((bun[k] & 0xff00) == 0xa400)
244 *kouho++ = bun[k] | 0x100;
245 else
246 *kouho++ = bun[k];
247 }
248 }
249 else if (substr (DIC_ZENKAKU, c))
250 { /* ���ѿ��� *//* ������ */
251 for (k = bunl - 1; k >= 0; k--)
252 {
253 *kouho++ = atojis (bun[k]);
254 }
255 }
256 else if (substr (DIC_NUM_KAN, c))
257 { /* ������ *//* ���� */
258 for (k = bunl - 1; k >= 0; k--)
259 {
260 *kouho++ = atokan (bun[k]);
261 }
262 }
263 else if (substr (DIC_NUM_HANCAN, c))
264 { /* Ⱦ�ѿ��� *//* 1,234 */
265 for (k = 0; k < bunl; k++)
266 {
267 if ((keta != 0) && (keta % 3 == 0))
268 revkouho[col++] = ','; /* , */
269 revkouho[col++] = bun[k];
270 keta++;
271 }
272 while (--col >= 0)
273 {
274 *kouho++ = revkouho[col];
275 }
276 }
277 else if (substr (DIC_NUM_ZENCAN, c))
278 { /* ���ѿ��� *//* ���������� */
279 for (k = 0; k < bunl; k++)
280 {
281 if ((keta != 0) && (keta % 3 == 0))
282 revkouho[col++] = 0xa1a4; /* �� */
283 revkouho[col++] = atojis (bun[k]);
284 keta++;
285 }
286 while (--col >= 0)
287 {
288 *kouho++ = revkouho[col];
289 }
290 }
291 else if (substr (DIC_NUM_KANSUUJI, c))
292 { /* ������ *//* ɴ�� */
293 keta_4 = 0;
294 for (k = 0; k < bunl; k++)
295 {
296 if (bun[k] != '0')
297 {
298 if (keta != 0)
299 getketa (keta, revkouho, &col);
300 if (bun[k] != '1' || keta % 4 == 0 || keta % 4 == 3)
301 revkouho[col++] = atokan (bun[k]);
302 }
303 else if ((keta != 0) && (keta % 4 == 0))
304 {
305 keta_4 = 1;
306 }
307 keta++;
308 }
309 if (col == 0)
310 *kouho++ = _tatokan[0];
311 while (--col >= 0)
312 {
313 *kouho++ = revkouho[col];
314 }
315 }
316 else if (substr (DIC_NUM_KANOLD, c))
317 { /* ������ *//* ��ɴ������ chao */
318 keta_4 = 0;
319 for (k = 0; k < bunl; k++)
320 {
321 if (bun[k] != '0')
322 {
323 if (keta != 0)
324 getketa_o (keta, revkouho, &col);
325 if (bun[k] != '1' || keta % 4 == 0 || keta % 4 == 3)
326 revkouho[col++] = atokan_o (bun[k]);
327 }
328 else if ((keta != 0) && (keta % 4 == 0))
329 {
330 keta_4 = 1;
331 }
332 keta++;
333 }
334 if (col == 0)
335 *kouho++ = _tatokan_o[0];
336 while (--col >= 0)
337 {
338 *kouho++ = revkouho[col];
339 }
340 }
341 else if (substr (DIC_ESC, c))
342 {
343 *kouho++ = '\\';
344 }
345 else if (substr (DIC_HEX, c) || substr (DIC_HEXc, c))
346 {
347 int num;
348 wnn_sStrcpy (tmp, c + strlen (DIC_HEX));
349 sscanf (tmp, "%x", &num);
350 *kouho++ = num;
351 }
352 else if (substr (DIC_OCT, c))
353 {
354 int num;
355 wnn_sStrcpy (tmp, c + strlen (DIC_OCT));
356 sscanf (tmp, "%o", &num);
357 *kouho++ = num;
358 }
359 else
360 {
361 *kouho++ = *c++;
362 for (; *c && *c != '\\'; c++)
363 {
364 *kouho++ = *c;
365 }
366 if (*c == '\\')
367 *kouho++ = *c;
368 }
369 *kouho = 0;
370 return (kouho);
371 }
372
373
374 void
kanji_esc_str(w,oy,oyl)375 kanji_esc_str (w, oy, oyl)
376 w_char *w, *oy;
377 int oyl;
378 {
379 register w_char *ret;
380 register w_char *c = w;
381 w_char tmp[LENGTHKANJI];
382
383 for (; *c; c++)
384 {
385 if (*c == '\\')
386 break;
387 }
388 if (!*c)
389 return;
390 wnn_Strcpy (tmp, w);
391 c = tmp;
392 while (*c)
393 {
394 if (*c == '\\' && (ret = kanji_giji_str (oy, oyl, c, w)) != NULL)
395 {
396 w = ret;
397 for (++c; *c; c++)
398 {
399 if (*c == '\\')
400 {
401 c++;
402 break;
403 }
404 }
405 }
406 else
407 {
408 *w++ = *c++;
409 }
410 }
411 *w = 0;
412 }
413
414 void
Get_kanji(kptr,oy,oyl,kanji,yomi,comment)415 Get_kanji (kptr, oy, oyl, kanji, yomi, comment)
416 UCHAR *kptr;
417 w_char *kanji, *comment, *yomi, *oy;
418 int oyl;
419 {
420 extern void get_kanji_str ();
421
422 get_kanji_str (kptr, kanji, yomi, comment);
423 if (kanji)
424 {
425 kanji_esc_str (kanji, oy, oyl);
426 }
427 if (yomi)
428 {
429 kanji_esc_str (yomi, oy, oyl);
430 }
431 }
432
433
434 void
Get_knj1(kptr,oy,oyl,kanji2,kouho,yomi,comment)435 Get_knj1 (kptr, oy, oyl, kanji2, kouho, yomi, comment)
436 UCHAR *kptr;
437 int kanji2;
438 w_char *kouho, *comment, *yomi, *oy;
439 int oyl;
440 {
441 int tcnt;
442 for (tcnt = 0; tcnt < kanji2; tcnt++)
443 {
444 kptr += *kptr;
445 }
446 Get_kanji (kptr, oy, oyl, kouho, yomi, comment);
447 return;
448 }
449