1 /*
2  *  $Id: dic_atojis.c,v 1.4 2002/07/14 04:26:57 hiroo Exp $
3  */
4 
5 /*
6  * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
7  * This file is part of FreeWnn.
8  *
9  * Copyright Kyoto University Research Institute for Mathematical Sciences
10  *                 1987, 1988, 1989, 1990, 1991, 1992
11  * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
12  * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
13  * Copyright FreeWnn Project 1999, 2000, 2002
14  *
15  * Maintainer:  FreeWnn Project   <freewnn@tomo.gr.jp>
16  *
17  * This program is free software; you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation; either version 2 of the License, or
20  * (at your option) any later version.
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30  */
31 
32 #ifdef HAVE_CONFIG_H
33 #  include <config.h>
34 #endif
35 
36 #if STDC_HEADERS
37 #  include <string.h>
38 #else
39 #  if HAVE_STRINGS_H
40 #    include <strings.h>
41 #  endif
42 #endif /* STDC_HEADERS */
43 
44 #include "commonhd.h"
45 #include "jslib.h"
46 #include "jdata.h"
47 #include "wnn_os.h"
48 #include "wnn_string.h"
49 
50 static int keta_4;
51 
52 static w_char _tatojis[] = {
53   0xa1a1, 0xa1aa, 0xa1c9, 0xa1f4, 0xa1f0, 0xa1f3, 0xa1f5, 0xa1c7,
54   0xa1ca, 0xa1cb, 0xa1f6, 0xa1dc, 0xa1a4, 0xa1dd, 0xa1a5, 0xa1bf,
55   0xa3b0, 0xa3b1, 0xa3b2, 0xa3b3, 0xa3b4, 0xa3b5, 0xa3b6, 0xa3b7,
56   0xa3b8, 0xa3b9, 0xa1a7, 0xa1a8, 0xa1e3, 0xa1e1, 0xa1e4, 0xa1a9,
57   0xa1f7, 0xa3c1, 0xa3c2, 0xa3c3, 0xa3c4, 0xa3c5, 0xa3c6, 0xa3c7,
58   0xa3c8, 0xa3c9, 0xa3ca, 0xa3cb, 0xa3cc, 0xa3cd, 0xa3ce, 0xa3cf,
59   0xa3d0, 0xa3d1, 0xa3d2, 0xa3d3, 0xa3d4, 0xa3d5, 0xa3d6, 0xa3d7,
60   0xa3d8, 0xa3d9, 0xa3da, 0xa1ce, 0xa1ef, 0xa1cf, 0xa1b0, 0xa1b2,
61   0xa1ae, 0xa3e1, 0xa3e2, 0xa3e3, 0xa3e4, 0xa3e5, 0xa3e6, 0xa3e7,
62   0xa3e8, 0xa3e9, 0xa3ea, 0xa3eb, 0xa3ec, 0xa3ed, 0xa3ee, 0xa3ef,
63   0xa3f0, 0xa3f1, 0xa3f2, 0xa3f3, 0xa3f4, 0xa3f5, 0xa3f6, 0xa3f7,
64   0xa3f8, 0xa3f9, 0xa3fa, 0xa1d0, 0xa1c3, 0xa1d1, 0xa1b1, 0x007f
65 };
66 
67 
68 static int
atojis(c)69 atojis (c)
70      register w_char c;
71 {
72   if (c >= ' ' && c <= (unsigned char) '\376')
73     c = _tatojis[c - ' '];
74   return (c);
75 }
76 
77 static w_char _tatokan[] = {
78   0xa1bb, 0xb0ec, 0xc6f3, 0xbbb0, 0xbbcd,       /* �������� */
79   0xb8de, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5        /* ��ϻ��Ȭ�� */
80 };
81 
82 static w_char _tatokan_o[] = {
83   0xceed, 0xb0ed, 0xc6f5, 0xbbb2, 0xbbcd,       /* ��������� */
84   0xb8e0, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5        /* ��ϻ��Ȭ�� */
85 };
86 
87 static w_char
atokan(c)88 atokan (c)
89      register w_char c;
90 {
91   if (c >= '0' && c <= '9')
92     c = _tatokan[c - '0'];
93   return (c);
94 }
95 
96 static w_char
atokan_o(c)97 atokan_o (c)
98      register w_char c;
99 {
100   if (c >= '0' && c <= '9')
101     c = _tatokan_o[c - '0'];
102   return (c);
103 }
104 
105 static w_char _tatoket[] = {
106   0xbdbd, 0xc9b4, 0xc0e9,       /* ��ɴ�� */
107   0xcbfc, 0xb2af, 0xc3fb, 0xb5fe,       /* �������� */
108   0xd4b6, 0xa4b7, 0xbef7, 0xb9c2,       /* Զ������ */
109   0xb4c2, 0xc0b5, 0xbadc, 0xb6cb        /* �����ܶ� */
110 /* ���Ϻ� *//* ���ε� *//* ��ͳ¾ *//* �ԲĻ׵� *//* ̵����� */
111 };
112 
113 static w_char _tatoket_o[] = {
114   0xbda6, 0xeff9, 0xeff4,       /* ������ */
115   0xe8df                        /* �� */
116 };
117 
118 static void
getketa(k,kouho,col)119 getketa (k, kouho, col)
120      int k;
121      w_char kouho[];
122      int *col;
123 {
124   if (k % 4)
125     {
126       if (keta_4 != 0)
127         {
128           keta_4 = 0;
129           getketa (k / 4 * 4, kouho, col);
130         }
131       kouho[(*col)++] = _tatoket[k % 4 - 1];
132     }
133   else if ((k / 4 > 0) && (k / 4 < 13))
134     {
135       kouho[(*col)++] = _tatoket[k / 4 + 2];
136     }
137   else if (k / 4 == 13)
138     {
139       kouho[(*col)++] = 0xbabb;
140       kouho[(*col)++] = 0xb2cf;
141       kouho[(*col)++] = 0xb9b1; /* ���Ϻ� */
142     }
143   else if (k / 4 == 14)
144     {
145       kouho[(*col)++] = 0xb5c0;
146       kouho[(*col)++] = 0xc1ce;
147       kouho[(*col)++] = 0xb0a4; /* ���ε� */
148     }
149   else if (k / 4 == 15)
150     {
151       kouho[(*col)++] = 0xc2be;
152       kouho[(*col)++] = 0xcdb3;
153       kouho[(*col)++] = 0xc6e1; /* ��ͳ¾ */
154     }
155   else if (k / 4 == 16)
156     {
157       kouho[(*col)++] = 0xb5c4;
158       kouho[(*col)++] = 0xbbd7;
159       kouho[(*col)++] = 0xb2c4;
160       kouho[(*col)++] = 0xc9d4; /* �ԲĻ׵� */
161     }
162   else if (k / 4 == 17)
163     {
164       kouho[(*col)++] = 0xbff4;
165       kouho[(*col)++] = 0xc2e7;
166       kouho[(*col)++] = 0xcecc;
167       kouho[(*col)++] = 0xccb5; /* ̵����� */
168     }
169   else
170     {
171       kouho[(*col)++] = 0xa1a9; /* �� */
172     }
173   keta_4 = 0;
174 }
175 
176 static void
getketa_o(k,kouho,col)177 getketa_o (k, kouho, col)
178      int k;
179      w_char kouho[];
180      int *col;
181 {
182   if (k % 4)
183     {
184       if (keta_4 != 0)
185         {
186           keta_4 = 0;
187           if (k / 4 == 1)
188             kouho[(*col)++] = _tatoket_o[3];    /* �� */
189           else
190             getketa (k / 4 * 4, kouho, col);
191         }
192       kouho[(*col)++] = _tatoket_o[k % 4 - 1];
193     }
194   else if (k / 4 == 1)
195     {
196       kouho[(*col)++] = _tatoket_o[3];  /* �� */
197     }
198   else
199     {
200       getketa (k, kouho, col);
201     }
202   keta_4 = 0;
203 }
204 
205 int
substr(c1,c2)206 substr (c1, c2)
207      char *c1;
208      w_char *c2;
209 {
210   for (; *c1; c1++, c2++)
211     {
212       if (*c1 != *c2)
213         break;
214     }
215   if (*c1)
216     return (0);
217   return (1);
218 }
219 
220 w_char *
kanji_giji_str(bun,bunl,c,kouho)221 kanji_giji_str (bun, bunl, c, kouho)
222      w_char *bun;
223      int bunl;
224      w_char *c;
225      w_char *kouho;
226 {
227   register int k;
228   w_char revkouho[LENGTHYOMI];
229   char tmp[LENGTHYOMI];
230   int col = 0, keta = 0;
231 
232   if (substr (DIC_HIRAGANA, c))
233     {
234       for (k = bunl - 1; k >= 0; k--)
235         {
236           *kouho++ = bun[k];
237         }
238     }
239   else if (substr (DIC_KATAKANA, c))
240     {                           /* �������� */
241       for (k = bunl - 1; k >= 0; k--)
242         {
243           if ((bun[k] & 0xff00) == 0xa400)
244             *kouho++ = bun[k] | 0x100;
245           else
246             *kouho++ = bun[k];
247         }
248     }
249   else if (substr (DIC_ZENKAKU, c))
250     {                           /* ���ѿ��� *//* ������ */
251       for (k = bunl - 1; k >= 0; k--)
252         {
253           *kouho++ = atojis (bun[k]);
254         }
255     }
256   else if (substr (DIC_NUM_KAN, c))
257     {                           /* ������ *//* ���� */
258       for (k = bunl - 1; k >= 0; k--)
259         {
260           *kouho++ = atokan (bun[k]);
261         }
262     }
263   else if (substr (DIC_NUM_HANCAN, c))
264     {                           /* Ⱦ�ѿ��� *//* 1,234 */
265       for (k = 0; k < bunl; k++)
266         {
267           if ((keta != 0) && (keta % 3 == 0))
268             revkouho[col++] = ',';      /* , */
269           revkouho[col++] = bun[k];
270           keta++;
271         }
272       while (--col >= 0)
273         {
274           *kouho++ = revkouho[col];
275         }
276     }
277   else if (substr (DIC_NUM_ZENCAN, c))
278     {                           /* ���ѿ��� *//* ���������� */
279       for (k = 0; k < bunl; k++)
280         {
281           if ((keta != 0) && (keta % 3 == 0))
282             revkouho[col++] = 0xa1a4;   /* �� */
283           revkouho[col++] = atojis (bun[k]);
284           keta++;
285         }
286       while (--col >= 0)
287         {
288           *kouho++ = revkouho[col];
289         }
290     }
291   else if (substr (DIC_NUM_KANSUUJI, c))
292     {                           /* ������ *//* ɴ���� */
293       keta_4 = 0;
294       for (k = 0; k < bunl; k++)
295         {
296           if (bun[k] != '0')
297             {
298               if (keta != 0)
299                 getketa (keta, revkouho, &col);
300               if (bun[k] != '1' || keta % 4 == 0 || keta % 4 == 3)
301                 revkouho[col++] = atokan (bun[k]);
302             }
303           else if ((keta != 0) && (keta % 4 == 0))
304             {
305               keta_4 = 1;
306             }
307           keta++;
308         }
309       if (col == 0)
310         *kouho++ = _tatokan[0];
311       while (--col >= 0)
312         {
313           *kouho++ = revkouho[col];
314         }
315     }
316   else if (substr (DIC_NUM_KANOLD, c))
317     {                           /* ������ *//* ��ɴ������ chao */
318       keta_4 = 0;
319       for (k = 0; k < bunl; k++)
320         {
321           if (bun[k] != '0')
322             {
323               if (keta != 0)
324                 getketa_o (keta, revkouho, &col);
325               if (bun[k] != '1' || keta % 4 == 0 || keta % 4 == 3)
326                 revkouho[col++] = atokan_o (bun[k]);
327             }
328           else if ((keta != 0) && (keta % 4 == 0))
329             {
330               keta_4 = 1;
331             }
332           keta++;
333         }
334       if (col == 0)
335         *kouho++ = _tatokan_o[0];
336       while (--col >= 0)
337         {
338           *kouho++ = revkouho[col];
339         }
340     }
341   else if (substr (DIC_ESC, c))
342     {
343       *kouho++ = '\\';
344     }
345   else if (substr (DIC_HEX, c) || substr (DIC_HEXc, c))
346     {
347       int num;
348       wnn_sStrcpy (tmp, c + strlen (DIC_HEX));
349       sscanf (tmp, "%x", &num);
350       *kouho++ = num;
351     }
352   else if (substr (DIC_OCT, c))
353     {
354       int num;
355       wnn_sStrcpy (tmp, c + strlen (DIC_OCT));
356       sscanf (tmp, "%o", &num);
357       *kouho++ = num;
358     }
359   else
360     {
361       *kouho++ = *c++;
362       for (; *c && *c != '\\'; c++)
363         {
364           *kouho++ = *c;
365         }
366       if (*c == '\\')
367         *kouho++ = *c;
368     }
369   *kouho = 0;
370   return (kouho);
371 }
372 
373 
374 void
kanji_esc_str(w,oy,oyl)375 kanji_esc_str (w, oy, oyl)
376      w_char *w, *oy;
377      int oyl;
378 {
379   register w_char *ret;
380   register w_char *c = w;
381   w_char tmp[LENGTHKANJI];
382 
383   for (; *c; c++)
384     {
385       if (*c == '\\')
386         break;
387     }
388   if (!*c)
389     return;
390   wnn_Strcpy (tmp, w);
391   c = tmp;
392   while (*c)
393     {
394       if (*c == '\\' && (ret = kanji_giji_str (oy, oyl, c, w)) != NULL)
395         {
396           w = ret;
397           for (++c; *c; c++)
398             {
399               if (*c == '\\')
400                 {
401                   c++;
402                   break;
403                 }
404             }
405         }
406       else
407         {
408           *w++ = *c++;
409         }
410     }
411   *w = 0;
412 }
413 
414 void
Get_kanji(kptr,oy,oyl,kanji,yomi,comment)415 Get_kanji (kptr, oy, oyl, kanji, yomi, comment)
416      UCHAR *kptr;
417      w_char *kanji, *comment, *yomi, *oy;
418      int oyl;
419 {
420   extern void get_kanji_str ();
421 
422   get_kanji_str (kptr, kanji, yomi, comment);
423   if (kanji)
424     {
425       kanji_esc_str (kanji, oy, oyl);
426     }
427   if (yomi)
428     {
429       kanji_esc_str (yomi, oy, oyl);
430     }
431 }
432 
433 
434 void
Get_knj1(kptr,oy,oyl,kanji2,kouho,yomi,comment)435 Get_knj1 (kptr, oy, oyl, kanji2, kouho, yomi, comment)
436      UCHAR *kptr;
437      int kanji2;
438      w_char *kouho, *comment, *yomi, *oy;
439      int oyl;
440 {
441   int tcnt;
442   for (tcnt = 0; tcnt < kanji2; tcnt++)
443     {
444       kptr += *kptr;
445     }
446   Get_kanji (kptr, oy, oyl, kouho, yomi, comment);
447   return;
448 }
449