1 /*
2 * $Id: gethinsi.c,v 1.6 2002/03/24 01:25:13 hiroo Exp $
3 */
4
5 /*
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
7 * This file is part of FreeWnn.
8 *
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
10 * 1987, 1988, 1989, 1990, 1991, 1992
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
13 * Copyright FreeWnn Project 1999, 2000, 2002
14 *
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 */
31
32 /* �ʻ�ե�����ι�¤�˴ؤ������ */
33
34 #ifdef HAVE_CONFIG_H
35 # include <config.h>
36 #endif
37
38 #include <stdio.h>
39 #if STDC_HEADERS
40 # include <string.h>
41 #elif HAVE_STRINGS_H
42 # include <strings.h>
43 #endif /* STDC_HEADERS */
44 #include "commonhd.h"
45 #include "wnn_config.h"
46 #include "wnnerror.h"
47 #include "jslib.h"
48 #include "hinsi_file.h"
49 #include "wnn_os.h"
50 #include "wnn_string.h"
51
52 /*
53 wnn_loadhinsi(NULL) �ʻ�ξ�����ɤ߹���
54
55 wnn_find_hinsi_by_name(c) ̾����Ϳ���ơ��ʻ��ֹ����
56 char *c;
57
58 char *wnn_get_hinsi_name(k) �ʻ��ֹ椫��̾������
59 int k;
60
61 int
62 wnn_get_fukugou_component(k,str, ) ʣ���ʻ���Ф��� �������Ǥ����
63 int k; �������ǤθĿ������ͤȤ����֤��졢
64 unsigned short **str; �������Ǥ� str �ʹߤ��֤���롣
65
66 #ifdef nodef
67 int wnn_get_hinsi_list(area) �ʻ�Υꥹ�Ȥ����롣
68 �ʻ�ϡ��ʻ�̾������Ȥ��ƴ�������Ƥ��롣
69 �������Ƭ���Ϥ� area �����졢������礭�����֤���
70 w_char ***area;
71
72 int wnn_get_fukugou_list(area, start) ʣ���ʻ�Υꥹ�Ȥ�����
73 ʣ���ʻ�ϡ�ʣ���ʻ칽¤�Τ�����Ȥ��ƴ�������Ƥ��롣
74 �������Ƭ���Ϥ� area �����졢������礭�����֤���
75 n ���ܤ�ʣ���ʻ���ʻ��ֹ�ϡ�FUKUGOU_START - n �Ǥ��롣
76 FUKUGOU_START ���ͤ� start ��������֤���
77 struct wnn_fukugou **area;
78 int *start;
79 int wnn_hinsi_node_component(name, area)
80 w_char **area;
81 w_char *name;
82 �ʻ�Ρ���̾���顢���λҤɤ�ΥΡ��ɤ�̾����������롣
83 �Ŀ����֤롣
84 �⤷���Ρ��ɤǤϤʤ��������ʻ�̾(���)�ʤ顢0 ���֤롣
85 �Ρ��ɤǤ��ʻ�̾�Ǥ�ʤ��Ȥ���-1 ���֤롣
86 #endif
87 */
88
89 extern int wnn_errorno;
90
91 #ifdef JSERVER
92 /* must be #include "de_header.h" ? */
93 extern void log_debug ();
94 #define error1 log_debug
95 #endif
96
97 /*
98 *here start the real program
99 */
100
101
102 int wnnerror_hinsi;
103
104 static int hinsi_loaded = 0;
105
106 static int line_no = 0;
107
108 static w_char heap[HEAP_LEN];
109 static w_char *hp = heap;
110
111 static unsigned short wheap[WHEAP_LEN];
112 static unsigned short *whp = wheap;
113
114 #define SIZE 1024
115
116 static w_char *hinsi[MAXHINSI];
117 static struct wnn_fukugou fukugou[MAXFUKUGOU];
118 static struct wnn_hinsi_node node[MAXNODE];
119
120 int mhinsi = 0;
121 int mfukugou = 0;
122 int mnode = 0;
123
124 static void
error_long()125 error_long ()
126 {
127 wnnerror_hinsi = WNN_TOO_LONG_HINSI_FILE_LINE;
128 }
129
130 static void
error_no_heap()131 error_no_heap ()
132 {
133 wnnerror_hinsi = WNN_TOO_BIG_HINSI_FILE;
134 }
135
136 static int
get_char0(fp)137 get_char0 (fp)
138 FILE *fp;
139 {
140 int c, d;
141
142 for (; (c = getc (fp)) == COMMENT_CHAR || c == CONTINUE_CHAR || c == IGNORE_CHAR1 || c == IGNORE_CHAR2;)
143 {
144 if (c == CONTINUE_CHAR)
145 {
146 if ((d = getc (fp)) == EOF)
147 {
148 break;
149 }
150 if (d == '\n')
151 {
152 line_no += 1;
153 continue;
154 }
155 else
156 {
157 ungetc (d, fp);
158 break;
159 }
160 }
161 else if (c == COMMENT_CHAR)
162 {
163 for (;;)
164 {
165 if ((c = getc (fp)) == EOF)
166 {
167 return (EOF);
168 }
169 if (c == '\n')
170 {
171 ungetc (c, fp);
172 line_no += 1;
173 break;
174 }
175 }
176 }
177 }
178 if (c == '\n')
179 line_no += 1;
180 return (c);
181 }
182
183 static int
get_char(fp)184 get_char (fp) /* remove null lines */
185 FILE *fp;
186 {
187 static int c = -1;
188 int d;
189 static int fufufu = 0;
190
191 if (c != -1)
192 {
193 d = c;
194 c = -1;
195 return (d);
196 }
197 else
198 {
199 if (fufufu == 0)
200 { /* remove all new lines in the head of the file */
201 for (; (d = get_char0 (fp)) == '\n';);
202 fufufu = 1;
203 }
204 else
205 {
206 d = get_char0 (fp);
207 }
208 if (d == '\n')
209 {
210 while ((c = get_char0 (fp)) == '\n');
211 }
212 return (d);
213 }
214 }
215
216 /* get one phrase and return the separater */
217 static int
get_phrase(s0,size,fp)218 get_phrase (s0, size, fp)
219 UCHAR *s0;
220 int size;
221 FILE *fp;
222 {
223 UCHAR *s = s0;
224 int c;
225 static int eof = 0;
226
227 if (eof)
228 {
229 *s0 = 0;
230 return (EOF);
231 }
232 while ((c = get_char (fp)) != '\n' && c != DEVIDE_CHAR && c != NODE_CHAR && c != HINSI_SEPARATE_CHAR && c != EOF)
233 {
234 if (s - s0 >= size)
235 {
236 error_long ();
237 return (HINSI_ERR);
238 }
239 *s++ = c;
240 }
241 if (c == EOF)
242 eof = 1;
243 if (s - s0 >= size - 1)
244 {
245 error_long ();
246 return (HINSI_ERR);
247 }
248 *s++ = '\0';
249 return (c);
250 }
251
252 static int
stradd(cp,str)253 stradd (cp, str)
254 w_char **cp;
255 char *str;
256 {
257 int len = strlen (str);
258
259 if (hp + len + 1 >= heap + HEAP_LEN)
260 {
261 error_no_heap ();
262 return (-1);
263 }
264 *cp = hp;
265 wnn_Sstrcpy (hp, str);
266 hp += wnn_Strlen (hp) + 1;
267 return (0);
268 }
269
270 static int
w_stradd(cp,str)271 w_stradd (cp, str)
272 unsigned short **cp;
273 unsigned short *str;
274 {
275
276 *cp = whp;
277 for (; *str != TERMINATE; str++, whp++)
278 {
279 if (whp >= wheap + WHEAP_LEN)
280 {
281 error_no_heap ();
282 return (-1);
283 }
284 *whp = *str;
285 }
286 *whp++ = TERMINATE;
287 return (0);
288 }
289
290 int
wnn_loadhinsi(fname)291 wnn_loadhinsi (fname)
292 unsigned char *fname;
293 {
294 FILE *fp;
295 UCHAR buf[SIZE];
296 unsigned short fukugou_str[MAXHINSI];
297 int sep;
298 int h;
299 unsigned short *c;
300 char tmp[256];
301 extern int wnn_find_hinsi_by_name ();
302
303 if (fname == NULL)
304 {
305 #ifdef JSERVER
306 if (hinsi_loaded)
307 return (0);
308 #endif /* JSERVER */
309 strcpy (tmp, LIBDIR);
310 strcat (tmp, HINSIDATA_FILE);
311 fname = (unsigned char *) tmp;
312 }
313
314 #ifdef JSERVER
315 error1 ("Read HINSI DATA FILE %s\n", fname);
316 #endif /* JSERVER */
317
318 if ((fp = fopen ((char *) fname, "r")) == NULL)
319 {
320 wnnerror_hinsi = WNN_NO_HINSI_DATA_FILE;
321 goto err_1;
322 }
323 hinsi_loaded = 1;
324
325 while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
326 {
327 if (sep == HINSI_ERR)
328 {
329 goto err; /* wnnerror_hinsi set in get_phrase */
330 }
331 if (buf[0] == YOYAKU_CHAR)
332 { /* yoyaku */
333 if (sep != '\n')
334 {
335 wnnerror_hinsi = WNN_BAD_HINSI_FILE;
336 goto err;
337 }
338 hinsi[mhinsi++] = NULL;
339 }
340 else if (sep == '\n')
341 { /* hinsi */
342 if (stradd (&hinsi[mhinsi++], buf))
343 goto err;
344 }
345 else if (sep == DEVIDE_CHAR)
346 { /* fukugou */
347 if (stradd (&fukugou[mfukugou].name, buf))
348 goto err;
349 c = fukugou_str;
350 while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
351 {
352 if (sep == -1)
353 {
354 goto err; /* wnnerror_hinsi set in get_phrase */
355 }
356 if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n')
357 {
358 wnnerror_hinsi = WNN_BAD_HINSI_FILE;
359 goto err;
360 }
361 if ((h = wnn_find_hinsi_by_name (buf)) == -1 || h >= mhinsi)
362 {
363 wnnerror_hinsi = WNN_BAD_HINSI_FILE;
364 goto err;
365 }
366 *c++ = h;
367 if (sep == '\n' || sep == EOF)
368 break;
369 }
370 *c = TERMINATE;
371 if (w_stradd (&fukugou[mfukugou++].component, fukugou_str))
372 goto err;
373 }
374 else if (sep == NODE_CHAR)
375 {
376 int first = 1;
377 w_char *dummy;
378
379 node[mnode].kosuu = 0;
380 if (stradd (&node[mnode].name, buf))
381 goto err;
382 while ((sep = get_phrase (buf, SIZE, fp)) != EOF)
383 {
384 if (sep == -1)
385 {
386 goto err; /* wnnerror_hinsi set in get_phrase */
387 }
388 if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n')
389 {
390 wnnerror_hinsi = WNN_BAD_HINSI_FILE;
391 goto err;
392 }
393 node[mnode].kosuu++;
394 if (first)
395 {
396 if (stradd (&node[mnode].son, buf))
397 goto err;
398 first = 0;
399 }
400 else
401 {
402 if (stradd (&dummy, buf))
403 goto err;
404 }
405 if (sep == '\n' || sep == EOF)
406 break;
407 }
408 mnode++;
409 }
410 }
411 fclose (fp);
412 return (0);
413 err:
414 fclose (fp);
415 err_1:
416 #ifdef JSERVER
417 error1 ("Error reading HINSI DATA FILE %s\n", fname);
418 #endif /* JSERVER */
419 return (HINSI_ERR);
420 }
421
422 static int
find_hinsi_by_name(c)423 find_hinsi_by_name (c)
424 register w_char *c;
425 {
426 register int k;
427 if (!hinsi_loaded)
428 {
429 if (wnn_loadhinsi (NULL) != 0)
430 {
431 return (-1);
432 }
433 }
434 for (k = 0; k < mhinsi; k++)
435 {
436 if (hinsi[k] && wnn_Strcmp (hinsi[k], c) == 0)
437 {
438 return (k);
439 }
440 }
441 for (k = 0; k < mfukugou; k++)
442 {
443 if (fukugou[k].name && wnn_Strcmp (fukugou[k].name, c) == 0)
444 {
445 return (FUKUGOU_START - k);
446 }
447 }
448 return (-1);
449 }
450
451
452 int
wnn_find_hinsi_by_name(c)453 wnn_find_hinsi_by_name (c)
454 register char *c;
455 {
456 w_char hin[WNN_HINSI_NAME_LEN];
457
458 wnn_Sstrcpy (hin, c);
459 return (find_hinsi_by_name (hin));
460 }
461
462
463 static w_char *
get_hinsi_name(k)464 get_hinsi_name (k)
465 int k;
466 {
467 if (!hinsi_loaded)
468 {
469 if (wnn_loadhinsi (NULL) != 0)
470 {
471 return (NULL);
472 }
473 }
474 if (k < mhinsi && k >= 0)
475 {
476 return (hinsi[k]);
477 }
478 else if (k > FUKUGOU_START - mfukugou)
479 {
480 return (fukugou[FUKUGOU_START - k].name);
481 }
482 return (NULL);
483 }
484
485 char *
wnn_get_hinsi_name(k)486 wnn_get_hinsi_name (k)
487 int k;
488 {
489 w_char *s;
490 static char hin[WNN_HINSI_NAME_LEN * 2];
491
492 if ((s = get_hinsi_name (k)) == NULL)
493 return (NULL);
494 wnn_sStrcpy (hin, s);
495 return (hin);
496 }
497
498 #ifndef JSERVER
499 static
500 #endif /* JSERVER */
501 int
wnn_get_fukugou_component_body(k,shp)502 wnn_get_fukugou_component_body (k, shp)
503 register int k;
504 register unsigned short **shp;
505 {
506 static unsigned short tmp;
507 register unsigned short *s;
508 int index; /* need for NEWS-OS 6.0 */
509 if (k < mhinsi && k >= 0)
510 {
511 tmp = k;
512 *shp = &tmp;
513 return (1);
514 }
515 if (k > FUKUGOU_START - mfukugou && k <= FUKUGOU_START)
516 {
517 index = FUKUGOU_START - k;
518 for (*shp = s = fukugou[index].component; *s != TERMINATE; s++);
519 /*
520 If next line in NEWS-OS 6.0, jserver down when kanji henkan.
521 for(*shp = s = fukugou[FUKUGOU_START - k].component;*s != TERMINATE;s++);
522 */
523 return (s - *shp);
524 }
525 return (-1);
526 }
527
528 int
wnn_get_fukugou_component(k,shp)529 wnn_get_fukugou_component (k, shp)
530 register int k;
531 register unsigned short **shp;
532 {
533 if (!hinsi_loaded)
534 {
535 if (wnn_loadhinsi (NULL) != 0)
536 {
537 return (-1);
538 }
539 }
540 return (wnn_get_fukugou_component_body (k, shp));
541 }
542
543
544 #ifdef JSERVER
545
546 w_char *
wnn_hinsi_name(no)547 wnn_hinsi_name (no)
548 int no;
549 {
550 w_char *c;
551 if ((c = get_hinsi_name (no)) == NULL)
552 {
553 wnn_errorno = WNN_BAD_HINSI_NO;
554 }
555 return (c);
556 }
557
558 int
wnn_hinsi_number(name)559 wnn_hinsi_number (name)
560 w_char *name;
561 {
562 int n;
563 if ((n = find_hinsi_by_name (name)) == -1)
564 {
565 wnn_errorno = WNN_BAD_HINSI_NAME;
566 }
567 return (n);
568 }
569
570 int
wnn_hinsi_list(name,c,mynode,mmynode)571 wnn_hinsi_list (name, c, mynode, mmynode)
572 w_char *name;
573 w_char **c;
574 struct wnn_hinsi_node *mynode;
575 int mmynode;
576 {
577 int k;
578
579 if (mynode == NULL)
580 {
581 mynode = node;
582 mmynode = mnode;
583 }
584 if (!hinsi_loaded)
585 wnn_loadhinsi (NULL);
586 for (k = 0; k < mmynode; k++)
587 {
588 if (wnn_Strcmp (name, mynode[k].name) == 0)
589 {
590 *c = mynode[k].son;
591 return (mynode[k].kosuu);
592 }
593 }
594 if (find_hinsi_by_name (name) == -1)
595 {
596 wnn_errorno = WNN_BAD_HINSI_NAME;
597 return (-1);
598 }
599 return (0);
600 }
601
602 int
wnn_has_hinsi(mynode,mmynode,name)603 wnn_has_hinsi (mynode, mmynode, name)
604 struct wnn_hinsi_node *mynode;
605 int mmynode;
606 w_char *name;
607 {
608 w_char *c;
609 int k, j;
610 if (mynode == NULL)
611 {
612 mynode = node;
613 mmynode = mnode;
614 }
615 for (k = 0; k < mmynode; k++)
616 {
617 if (wnn_Strcmp (name, mynode[k].name) == 0)
618 {
619 return (1);
620 }
621 else
622 {
623 c = mynode[k].son;
624 for (j = 0; j < mynode[k].kosuu; j++)
625 {
626 if (wnn_Strcmp (name, c) == 0)
627 {
628 return (1);
629 }
630 else
631 {
632 c += wnn_Strlen (c) + 1;
633 }
634 }
635 }
636 }
637 return (0);
638 }
639
640 #endif
641