1 /*
2  *  $Id: gethinsi.c,v 1.6 2002/03/24 01:25:13 hiroo Exp $
3  */
4 
5 /*
6  * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
7  * This file is part of FreeWnn.
8  *
9  * Copyright Kyoto University Research Institute for Mathematical Sciences
10  *                 1987, 1988, 1989, 1990, 1991, 1992
11  * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
12  * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
13  * Copyright FreeWnn Project 1999, 2000, 2002
14  *
15  * Maintainer:  FreeWnn Project   <freewnn@tomo.gr.jp>
16  *
17  * This program is free software; you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation; either version 2 of the License, or
20  * (at your option) any later version.
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30  */
31 
32 /*  �ʻ�ե�����ι�¤�˴ؤ������  */
33 
34 #ifdef HAVE_CONFIG_H
35 #  include <config.h>
36 #endif
37 
38 #include <stdio.h>
39 #if STDC_HEADERS
40 #  include <string.h>
41 #elif HAVE_STRINGS_H
42 #  include <strings.h>
43 #endif /* STDC_HEADERS */
44 #include "commonhd.h"
45 #include "wnn_config.h"
46 #include "wnnerror.h"
47 #include "jslib.h"
48 #include "hinsi_file.h"
49 #include "wnn_os.h"
50 #include "wnn_string.h"
51 
52     /*
53        wnn_loadhinsi(NULL)                       �ʻ�ξ�����ɤ߹���
54 
55        wnn_find_hinsi_by_name(c)                 ̾����Ϳ���ơ��ʻ��ֹ����
56        char *c;
57 
58        char *wnn_get_hinsi_name(k)       �ʻ��ֹ椫��̾������
59        int k;
60 
61        int
62        wnn_get_fukugou_component(k,str, )  ʣ���ʻ���Ф��� �������Ǥ����
63        int k;                         �������ǤθĿ������ͤȤ����֤��졢
64        unsigned short **str;          �������Ǥ� str �ʹߤ��֤���롣
65 
66        #ifdef nodef
67        int wnn_get_hinsi_list(area)  �ʻ�Υꥹ�Ȥ����롣
68        �ʻ�ϡ��ʻ�̾������Ȥ��ƴ�������Ƥ��롣
69        �������Ƭ���Ϥ� area �����졢������礭�����֤���
70        w_char ***area;
71 
72        int wnn_get_fukugou_list(area, start) ʣ���ʻ�Υꥹ�Ȥ�����
73        ʣ���ʻ�ϡ�ʣ���ʻ칽¤�Τ�����Ȥ��ƴ�������Ƥ��롣
74        �������Ƭ���Ϥ� area �����졢������礭�����֤���
75        n ���ܤ�ʣ���ʻ���ʻ��ֹ�ϡ�FUKUGOU_START - n �Ǥ��롣
76        FUKUGOU_START ���ͤ� start ��������֤���
77        struct wnn_fukugou **area;
78        int *start;
79        int wnn_hinsi_node_component(name, area)
80        w_char **area;
81        w_char *name;
82        �ʻ�Ρ���̾���顢���λҤɤ�ΥΡ��ɤ�̾����������롣
83        �Ŀ����֤롣
84        �⤷���Ρ��ɤǤϤʤ��������ʻ�̾(�꡼��)�ʤ顢0 ���֤롣
85        �Ρ��ɤǤ��ʻ�̾�Ǥ�ʤ��Ȥ���-1 ���֤롣
86        #endif
87      */
88 
89 extern int wnn_errorno;
90 
91 #ifdef JSERVER
92 /* must be #include "de_header.h" ? */
93 extern void log_debug ();
94 #define error1 log_debug
95 #endif
96 
97 /*
98  *here start the real program
99  */
100 
101 
102 int wnnerror_hinsi;
103 
104 static int hinsi_loaded = 0;
105 
106 static int line_no = 0;
107 
108 static w_char heap[HEAP_LEN];
109 static w_char *hp = heap;
110 
111 static unsigned short wheap[WHEAP_LEN];
112 static unsigned short *whp = wheap;
113 
114 #define SIZE 1024
115 
116 static w_char *hinsi[MAXHINSI];
117 static struct wnn_fukugou fukugou[MAXFUKUGOU];
118 static struct wnn_hinsi_node node[MAXNODE];
119 
120 int mhinsi = 0;
121 int mfukugou = 0;
122 int mnode = 0;
123 
124 static void
error_long()125 error_long ()
126 {
127   wnnerror_hinsi = WNN_TOO_LONG_HINSI_FILE_LINE;
128 }
129 
130 static void
error_no_heap()131 error_no_heap ()
132 {
133   wnnerror_hinsi = WNN_TOO_BIG_HINSI_FILE;
134 }
135 
136 static int
get_char0(fp)137 get_char0 (fp)
138      FILE *fp;
139 {
140   int c, d;
141 
142   for (; (c = getc (fp)) == COMMENT_CHAR || c == CONTINUE_CHAR || c == IGNORE_CHAR1 || c == IGNORE_CHAR2;)
143     {
144       if (c == CONTINUE_CHAR)
145         {
146           if ((d = getc (fp)) == EOF)
147             {
148               break;
149             }
150           if (d == '\n')
151             {
152               line_no += 1;
153               continue;
154             }
155           else
156             {
157               ungetc (d, fp);
158               break;
159             }
160         }
161       else if (c == COMMENT_CHAR)
162         {
163           for (;;)
164             {
165               if ((c = getc (fp)) == EOF)
166                 {
167                   return (EOF);
168                 }
169               if (c == '\n')
170                 {
171                   ungetc (c, fp);
172                   line_no += 1;
173                   break;
174                 }
175             }
176         }
177     }
178   if (c == '\n')
179     line_no += 1;
180   return (c);
181 }
182 
183 static int
get_char(fp)184 get_char (fp)                   /* remove null lines */
185      FILE *fp;
186 {
187   static int c = -1;
188   int d;
189   static int fufufu = 0;
190 
191   if (c != -1)
192     {
193       d = c;
194       c = -1;
195       return (d);
196     }
197   else
198     {
199       if (fufufu == 0)
200         {                       /* remove all new lines in the head of the file */
201           for (; (d = get_char0 (fp)) == '\n';);
202           fufufu = 1;
203         }
204       else
205         {
206           d = get_char0 (fp);
207         }
208       if (d == '\n')
209         {
210           while ((c = get_char0 (fp)) == '\n');
211         }
212       return (d);
213     }
214 }
215 
216 /* get one phrase and return the separater */
217 static int
get_phrase(s0,size,fp)218 get_phrase (s0, size, fp)
219      UCHAR *s0;
220      int size;
221      FILE *fp;
222 {
223   UCHAR *s = s0;
224   int c;
225   static int eof = 0;
226 
227   if (eof)
228     {
229       *s0 = 0;
230       return (EOF);
231     }
232   while ((c = get_char (fp)) != '\n' && c != DEVIDE_CHAR && c != NODE_CHAR && c != HINSI_SEPARATE_CHAR && c != EOF)
233     {
234       if (s - s0 >= size)
235         {
236           error_long ();
237           return (HINSI_ERR);
238         }
239       *s++ = c;
240     }
241   if (c == EOF)
242     eof = 1;
243   if (s - s0 >= size - 1)
244     {
245       error_long ();
246       return (HINSI_ERR);
247     }
248   *s++ = '\0';
249   return (c);
250 }
251 
252 static int
stradd(cp,str)253 stradd (cp, str)
254      w_char **cp;
255      char *str;
256 {
257   int len = strlen (str);
258 
259   if (hp + len + 1 >= heap + HEAP_LEN)
260     {
261       error_no_heap ();
262       return (-1);
263     }
264   *cp = hp;
265   wnn_Sstrcpy (hp, str);
266   hp += wnn_Strlen (hp) + 1;
267   return (0);
268 }
269 
270 static int
w_stradd(cp,str)271 w_stradd (cp, str)
272      unsigned short **cp;
273      unsigned short *str;
274 {
275 
276   *cp = whp;
277   for (; *str != TERMINATE; str++, whp++)
278     {
279       if (whp >= wheap + WHEAP_LEN)
280         {
281           error_no_heap ();
282           return (-1);
283         }
284       *whp = *str;
285     }
286   *whp++ = TERMINATE;
287   return (0);
288 }
289 
290 int
wnn_loadhinsi(fname)291 wnn_loadhinsi (fname)
292      unsigned char *fname;
293 {
294   FILE *fp;
295   UCHAR buf[SIZE];
296   unsigned short fukugou_str[MAXHINSI];
297   int sep;
298   int h;
299   unsigned short *c;
300   char tmp[256];
301   extern int wnn_find_hinsi_by_name ();
302 
303   if (fname == NULL)
304     {
305 #ifdef  JSERVER
306       if (hinsi_loaded)
307         return (0);
308 #endif /* JSERVER */
309       strcpy (tmp, LIBDIR);
310       strcat (tmp, HINSIDATA_FILE);
311       fname = (unsigned char *) tmp;
312     }
313 
314 #ifdef  JSERVER
315   error1 ("Read HINSI DATA FILE %s\n", fname);
316 #endif /* JSERVER */
317 
318   if ((fp = fopen ((char *) fname, "r")) == NULL)
319     {
320       wnnerror_hinsi = WNN_NO_HINSI_DATA_FILE;
321       goto err_1;
322     }
323   hinsi_loaded = 1;
324 
325   while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
326     {
327       if (sep == HINSI_ERR)
328         {
329           goto err;             /* wnnerror_hinsi set in get_phrase */
330         }
331       if (buf[0] == YOYAKU_CHAR)
332         {                       /* yoyaku */
333           if (sep != '\n')
334             {
335               wnnerror_hinsi = WNN_BAD_HINSI_FILE;
336               goto err;
337             }
338           hinsi[mhinsi++] = NULL;
339         }
340       else if (sep == '\n')
341         {                       /* hinsi */
342           if (stradd (&hinsi[mhinsi++], buf))
343             goto err;
344         }
345       else if (sep == DEVIDE_CHAR)
346         {                       /* fukugou */
347           if (stradd (&fukugou[mfukugou].name, buf))
348             goto err;
349           c = fukugou_str;
350           while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
351             {
352               if (sep == -1)
353                 {
354                   goto err;     /* wnnerror_hinsi set in get_phrase */
355                 }
356               if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n')
357                 {
358                   wnnerror_hinsi = WNN_BAD_HINSI_FILE;
359                   goto err;
360                 }
361               if ((h = wnn_find_hinsi_by_name (buf)) == -1 || h >= mhinsi)
362                 {
363                   wnnerror_hinsi = WNN_BAD_HINSI_FILE;
364                   goto err;
365                 }
366               *c++ = h;
367               if (sep == '\n' || sep == EOF)
368                 break;
369             }
370           *c = TERMINATE;
371           if (w_stradd (&fukugou[mfukugou++].component, fukugou_str))
372             goto err;
373         }
374       else if (sep == NODE_CHAR)
375         {
376           int first = 1;
377           w_char *dummy;
378 
379           node[mnode].kosuu = 0;
380           if (stradd (&node[mnode].name, buf))
381             goto err;
382           while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
383             {
384               if (sep == -1)
385                 {
386                   goto err;     /* wnnerror_hinsi set in get_phrase */
387                 }
388               if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n')
389                 {
390                   wnnerror_hinsi = WNN_BAD_HINSI_FILE;
391                   goto err;
392                 }
393               node[mnode].kosuu++;
394               if (first)
395                 {
396                   if (stradd (&node[mnode].son, buf))
397                     goto err;
398                   first = 0;
399                 }
400               else
401                 {
402                   if (stradd (&dummy, buf))
403                     goto err;
404                 }
405               if (sep == '\n' || sep == EOF)
406                 break;
407             }
408           mnode++;
409         }
410     }
411   fclose (fp);
412   return (0);
413 err:
414   fclose (fp);
415 err_1:
416 #ifdef  JSERVER
417   error1 ("Error reading HINSI DATA FILE %s\n", fname);
418 #endif /* JSERVER */
419   return (HINSI_ERR);
420 }
421 
422 static int
find_hinsi_by_name(c)423 find_hinsi_by_name (c)
424      register w_char *c;
425 {
426   register int k;
427   if (!hinsi_loaded)
428     {
429       if (wnn_loadhinsi (NULL) != 0)
430         {
431           return (-1);
432         }
433     }
434   for (k = 0; k < mhinsi; k++)
435     {
436       if (hinsi[k] && wnn_Strcmp (hinsi[k], c) == 0)
437         {
438           return (k);
439         }
440     }
441   for (k = 0; k < mfukugou; k++)
442     {
443       if (fukugou[k].name && wnn_Strcmp (fukugou[k].name, c) == 0)
444         {
445           return (FUKUGOU_START - k);
446         }
447     }
448   return (-1);
449 }
450 
451 
452 int
wnn_find_hinsi_by_name(c)453 wnn_find_hinsi_by_name (c)
454      register char *c;
455 {
456   w_char hin[WNN_HINSI_NAME_LEN];
457 
458   wnn_Sstrcpy (hin, c);
459   return (find_hinsi_by_name (hin));
460 }
461 
462 
463 static w_char *
get_hinsi_name(k)464 get_hinsi_name (k)
465      int k;
466 {
467   if (!hinsi_loaded)
468     {
469       if (wnn_loadhinsi (NULL) != 0)
470         {
471           return (NULL);
472         }
473     }
474   if (k < mhinsi && k >= 0)
475     {
476       return (hinsi[k]);
477     }
478   else if (k > FUKUGOU_START - mfukugou)
479     {
480       return (fukugou[FUKUGOU_START - k].name);
481     }
482   return (NULL);
483 }
484 
485 char *
wnn_get_hinsi_name(k)486 wnn_get_hinsi_name (k)
487      int k;
488 {
489   w_char *s;
490   static char hin[WNN_HINSI_NAME_LEN * 2];
491 
492   if ((s = get_hinsi_name (k)) == NULL)
493     return (NULL);
494   wnn_sStrcpy (hin, s);
495   return (hin);
496 }
497 
498 #ifndef JSERVER
499 static
500 #endif                          /* JSERVER */
501   int
wnn_get_fukugou_component_body(k,shp)502 wnn_get_fukugou_component_body (k, shp)
503      register int k;
504      register unsigned short **shp;
505 {
506   static unsigned short tmp;
507   register unsigned short *s;
508   int index;                    /* need for NEWS-OS 6.0 */
509   if (k < mhinsi && k >= 0)
510     {
511       tmp = k;
512       *shp = &tmp;
513       return (1);
514     }
515   if (k > FUKUGOU_START - mfukugou && k <= FUKUGOU_START)
516     {
517       index = FUKUGOU_START - k;
518       for (*shp = s = fukugou[index].component; *s != TERMINATE; s++);
519 /*
520         If next line in NEWS-OS 6.0, jserver down when kanji henkan.
521         for(*shp = s = fukugou[FUKUGOU_START - k].component;*s != TERMINATE;s++);
522 */
523       return (s - *shp);
524     }
525   return (-1);
526 }
527 
528 int
wnn_get_fukugou_component(k,shp)529 wnn_get_fukugou_component (k, shp)
530      register int k;
531      register unsigned short **shp;
532 {
533   if (!hinsi_loaded)
534     {
535       if (wnn_loadhinsi (NULL) != 0)
536         {
537           return (-1);
538         }
539     }
540   return (wnn_get_fukugou_component_body (k, shp));
541 }
542 
543 
544 #ifdef JSERVER
545 
546 w_char *
wnn_hinsi_name(no)547 wnn_hinsi_name (no)
548      int no;
549 {
550   w_char *c;
551   if ((c = get_hinsi_name (no)) == NULL)
552     {
553       wnn_errorno = WNN_BAD_HINSI_NO;
554     }
555   return (c);
556 }
557 
558 int
wnn_hinsi_number(name)559 wnn_hinsi_number (name)
560      w_char *name;
561 {
562   int n;
563   if ((n = find_hinsi_by_name (name)) == -1)
564     {
565       wnn_errorno = WNN_BAD_HINSI_NAME;
566     }
567   return (n);
568 }
569 
570 int
wnn_hinsi_list(name,c,mynode,mmynode)571 wnn_hinsi_list (name, c, mynode, mmynode)
572      w_char *name;
573      w_char **c;
574      struct wnn_hinsi_node *mynode;
575      int mmynode;
576 {
577   int k;
578 
579   if (mynode == NULL)
580     {
581       mynode = node;
582       mmynode = mnode;
583     }
584   if (!hinsi_loaded)
585     wnn_loadhinsi (NULL);
586   for (k = 0; k < mmynode; k++)
587     {
588       if (wnn_Strcmp (name, mynode[k].name) == 0)
589         {
590           *c = mynode[k].son;
591           return (mynode[k].kosuu);
592         }
593     }
594   if (find_hinsi_by_name (name) == -1)
595     {
596       wnn_errorno = WNN_BAD_HINSI_NAME;
597       return (-1);
598     }
599   return (0);
600 }
601 
602 int
wnn_has_hinsi(mynode,mmynode,name)603 wnn_has_hinsi (mynode, mmynode, name)
604      struct wnn_hinsi_node *mynode;
605      int mmynode;
606      w_char *name;
607 {
608   w_char *c;
609   int k, j;
610   if (mynode == NULL)
611     {
612       mynode = node;
613       mmynode = mnode;
614     }
615   for (k = 0; k < mmynode; k++)
616     {
617       if (wnn_Strcmp (name, mynode[k].name) == 0)
618         {
619           return (1);
620         }
621       else
622         {
623           c = mynode[k].son;
624           for (j = 0; j < mynode[k].kosuu; j++)
625             {
626               if (wnn_Strcmp (name, c) == 0)
627                 {
628                   return (1);
629                 }
630               else
631                 {
632                   c += wnn_Strlen (c) + 1;
633                 }
634             }
635         }
636     }
637   return (0);
638 }
639 
640 #endif
641