1 /*
2  *  $Id: ujisf.c,v 1.7 2002/07/14 04:26:57 hiroo Exp $
3  */
4 
5 /*
6  * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
7  * This file is part of FreeWnn.
8  *
9  * Copyright Kyoto University Research Institute for Mathematical Sciences
10  *                 1987, 1988, 1989, 1990, 1991, 1992
11  * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
12  * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
13  * Copyright FreeWnn Project 1999, 2000, 2002
14  *
15  * Maintainer:  FreeWnn Project   <freewnn@tomo.gr.jp>
16  *
17  * This program is free software; you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation; either version 2 of the License, or
20  * (at your option) any later version.
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30  */
31 
32 /*
33  * Ujis format <--> internal data.
34  */
35 
36 #ifdef HAVE_CONFIG_H
37 #  include <config.h>
38 #endif
39 
40 #include <stdio.h>
41 #if STDC_HEADERS
42 #  include <stdlib.h>
43 #  include <string.h>
44 #else
45 #  if HAVE_MALLOC_H
46 #    include <malloc.h>
47 #  endif
48 #  if HAVE_STRINGS_H
49 #    include <strings.h>
50 #  endif
51 #endif /* STDC_HEADERS */
52 
53 #include "commonhd.h"
54 #include "jslib.h"
55 #include "jh.h"
56 #include "jdata.h"
57 #include "wnn_os.h"
58 #include "wnn_string.h"
59 
60 #ifdef CHINESE
61 #include "cplib.h"
62 int pzy_flag = CWNN_PINYIN;     /* Pinyin or Zhuyin */
63 static void sisheng_num (), read_kanji_str_w ();
64 #endif
65 
66 #ifndef min
67 #define min(a, b) ((a > b)? b:a)
68 #define max(a, b) ((a < b)? b:a)
69 #endif
70 
71 extern unsigned char kanjiaddr ();
72 extern void Print_entry ();
73 extern int wnn_find_hinsi_by_name ();
74 #ifdef CHINESE
75 extern void cwnn_zy_str_analysis (), cwnn_py_str_analysis ();
76 #endif
77 int sort_func (), Sorted (), w_stradd ();
78 static void Kanjistradd (), bunpou_num (), read_kanji_str (), toesc ();
79 void exit1 ();
80 
81 extern struct JT jt;
82 
83 /* extern variables */
84 
85 struct je **jeary;
86 int wnnerror;
87 
88 #define WNN_HINSI_LEN 4096
89 
90 w_char file_comment[WNN_COMMENT_LEN];
91 w_char hinsi_list[WNN_HINSI_LEN];
92 
93 
94 
95 /* static variables */
96 static UCHAR *heap, *hp, *heapend;
97 static w_char *yomi_heap, *yhp, *yheapend;
98 static struct je *je_heap, *jehp, *jeheapend;
99 static FILE *ifpter;
100 static int maxline;
101 
102 /* extern functions of this file are
103    ujis_header();
104    read_ujis(reversep, to_esc, which_dict);
105    reverse_yomi();
106    sort();
107    uniq_je(func);
108    output_ujis(opter, serial_out, esc_exp);
109    */
110 
111 int lc;
112 
113 static char stack[LINE_SIZE] = { 0 };
114 
115 int
get_line(c)116 get_line (c)
117      register char *c;
118 {
119   if (stack[0])
120     {
121       strcpy (c, stack);
122       stack[0] = 0;
123     }
124   else
125     {
126       if (fgets (c, LINE_SIZE, ifpter) == NULL)
127         {
128           return (EOF);
129         }
130     }
131   return (0);
132 }
133 
134 void
unget_line(c)135 unget_line (c)
136      char *c;
137 {
138   strcpy (stack, c);
139 }
140 
141 
142 char *
get_string(str,buf)143 get_string (str, buf)
144      register char *str;
145      char *buf;
146 {
147   register char *c = buf;
148   for (; *c == '\t' || *c == ' '; c++);
149   if (*c == '\0' || *c == '\n')
150     {
151       *str = 0;
152       return (NULL);
153     }
154   for (; *c != '\t' && *c != ' ' && *c != '\n' && *c != '\0'; c++)
155     {
156       *str++ = *c;
157     }
158   *str = 0;
159   return (c);
160 }
161 
162 void
bad_line(bf)163 bad_line (bf)
164      char *bf;
165 {
166   static int badl = 0;
167 
168   fprintf (stderr, "Bad line \"%s\"\n", bf);
169   fprintf (stderr, "Bad line omitted\n");
170   if (++badl > BADLMAX)
171     {
172       fprintf (stderr, "Too many bad lines.\n");
173       exit1 ();
174     }
175 }
176 
177 void
error_no_heap()178 error_no_heap ()
179 {
180   fprintf (stderr, "Heap area is exhausted.\n");
181   exit1 ();
182 }
183 
184 static int
get_one_line(buffer,jep,rev,to_esc,which_dict)185 get_one_line (buffer, jep, rev, to_esc, which_dict)
186      char *buffer;
187      register struct je **jep;
188      int rev;
189      int to_esc;
190      int which_dict;
191 {
192   register char *c = buffer;
193   static char tmp[LINE_SIZE];
194   static char ckanji[LINE_SIZE];
195   static char cyomi[LINE_SIZE];
196   static w_char yomi[LINE_SIZE];
197   static w_char kanji[LINE_SIZE];
198   static w_char comm[LINE_SIZE];
199 #ifdef CHINESE
200   static w_char un_sisheng_yincod_str[LINE_SIZE];
201   static w_char yincod_str[LINE_SIZE];
202   static char csisheng[LINE_SIZE];
203   static w_char wtmp[LINE_SIZE];
204 #endif
205   char *c1;
206 
207   if (jehp == jeheapend)
208     {
209       if ((jehp = je_heap = (struct je *) malloc ((HEAPINC * sizeof (struct je)))) == NULL)
210         {
211           fprintf (stderr, "Malloc Failed\n");
212           return (-1);
213         }
214       jeheapend = je_heap + HEAPINC;
215     }
216   *jep = jehp;
217   jehp++;
218 
219   if (rev == REVERSE)
220     {
221       if ((c = get_string (ckanji, c)) == NULL)
222         return (1);
223     }
224   else
225     {
226       if ((c = get_string (cyomi, c)) == NULL)
227         return (1);
228     }
229   if (rev == REVERSE)
230     {
231       if ((c = get_string (cyomi, c)) == NULL)
232         return (-1);
233     }
234   else
235     {
236       if ((c = get_string (ckanji, c)) == NULL)
237         return (-1);
238     }
239 #ifdef CHINESE
240 /* here ,should seperate pinyin to two part    */
241 /* one is usually pinyin string like Zhong.Guo.  */
242 /* the is sisheng string like 23                */
243 
244   if (which_dict == CWNN_REV_DICT || which_dict == BWNN_REV_DICT)
245     {
246       if (pzy_flag == CWNN_ZHUYIN)
247         cwnn_zy_str_analysis (cyomi, csisheng, un_sisheng_yincod_str, yincod_str);
248       else
249         cwnn_py_str_analysis (cyomi, csisheng, un_sisheng_yincod_str, yincod_str);
250 
251       sisheng_num (csisheng, &((*jep)->ss));
252       read_kanji_str_w (wtmp, un_sisheng_yincod_str);
253       wnn_Strcpy (yomi, wtmp);
254     }
255   else
256     {
257       read_kanji_str (tmp, cyomi);
258       wnn_Sstrcpy (yomi, tmp);
259     }
260 #else
261   read_kanji_str (tmp, cyomi);
262   wnn_Sstrcpy (yomi, tmp);
263 #endif
264   if (wnn_Strlen (yomi) >= LENGTHYOMI)
265     {
266       fprintf (stderr, "YOMI is longer in line %d.\n", lc);
267       return (-1);
268     }
269   w_stradd (yomi, &((*jep)->yomi));
270 
271   read_kanji_str (tmp, ckanji);
272   wnn_Sstrcpy (kanji, tmp);
273   if (wnn_Strlen (kanji) >= LENGTHYOMI)
274     {
275       fprintf (stderr, "KANJI is longer in line %d.\n", lc);
276       return (-1);
277     }
278   w_stradd (kanji, &((*jep)->kan));
279 
280   if ((c = get_string (tmp, c)) == NULL)
281     return (-1);
282   bunpou_num (tmp, &((*jep)->hinsi));
283 
284   if ((c = get_string (tmp, c)) == NULL)
285     return (-1);
286   if (tmp[0] == '-')
287     {
288       (*jep)->hindo = -1;       /*  Real hindo == -1 means Not to use it */
289     }
290   else
291     {
292       sscanf (tmp, "%d", &((*jep)->hindo));
293     }
294 
295   if ((get_string (tmp, c)) == NULL)
296     {
297       c1 = NULL;
298       (*jep)->comm = NULL;
299       comm[0] = 0;
300     }
301   else
302     {
303 /*    left entries are all considered as comment */
304       for (; *c == '\t' || *c == ' '; c++);
305       if (c[strlen (c) - 1] == '\n')
306         c[strlen (c) - 1] = '\0';
307       c1 = c;
308       wnn_Sstrcpy (comm, c1);
309       if (wnn_Strlen (comm) >= LENGTHYOMI)
310         {
311           fprintf (stderr, "COMMENT is longer in line %d.\n", lc);
312           return (-1);
313         }
314       w_stradd (comm, &((*jep)->comm));
315     }
316 
317   if (to_esc)
318     {
319       toesc (ckanji, cyomi);
320     }
321 /*
322     if(strchr(ckanji, DIC_COMMENT_CHAR) ||
323        strchr(ckanji, DIC_YOMI_CHAR)){
324         fprintf(stderr, "Bad character in kanji\n");
325         return(-1);
326     }
327     if(which_dict){
328         if(strchr(cyomi, DIC_COMMENT_CHAR) ||
329            strchr(cyomi, DIC_YOMI_CHAR)){
330             fprintf(stderr, "Bad character in yomi\n");
331             return(-1);
332         }
333     }
334 */
335   Kanjistradd (kanji,
336 #ifdef CHINESE
337                ((which_dict == CWNN_REV_DICT || which_dict == BWNN_REV_DICT) ? yincod_str : ((which_dict == WNN_REV_DICT) ? yomi : NULL)),
338 #else
339                (which_dict) ? yomi : NULL,
340 #endif
341                comm, &(*jep)->kanji);
342   return (0);
343 }
344 
345 static void
Kanjistradd(k,y,c,cp)346 Kanjistradd (k, y, c, cp)
347      register UCHAR **cp;
348      w_char *k, *y, *c;
349 {
350   int len;
351   if (hp + LENGTHKANJI >= heapend)
352     {
353       if ((hp = heap = (UCHAR *) malloc ((HEAPINC * HEAP_PER_LINE))) == NULL)
354         {
355           fprintf (stderr, "Malloc Failed\n");
356           exit (1);
357         }
358       heapend = heap + (HEAPINC * HEAP_PER_LINE);
359     }
360   *cp = hp;
361   if ((len = kanjiaddr (hp, k, y, c)) >= LENGTHKANJI)
362     {
363       fprintf (stderr, "KANJI is longer in line %d.\n", lc);
364       exit (1);
365     }
366   hp += len;
367 }
368 
369 int
w_stradd(str,cp)370 w_stradd (str, cp)
371      register w_char **cp;
372      register w_char *str;
373 {
374   register int len = wnn_Strlen (str);
375 
376   if (yhp + len + 1 >= yheapend)
377     {
378       if ((yhp = yomi_heap = (w_char *) malloc ((HEAPINC * sizeof (w_char)))) == NULL)
379         {
380           fprintf (stderr, "Malloc Failed\n");
381           return (-1);
382         }
383       yheapend = yomi_heap + HEAPINC;
384     }
385   *cp = yhp;
386   wnn_Strcpy (yhp, str);
387   yhp += len + 1;
388   return (0);
389 }
390 
391 void
392 #ifdef CHINESE
ujis_header(which_dict)393 ujis_header (which_dict)
394      int *which_dict;
395 #else
396 ujis_header ()
397 #endif
398 {
399   char buffer[LINE_SIZE];
400   char *c = buffer;
401   char str[LINE_SIZE];
402 
403   jt.total = 0;
404   file_comment[0] = 0;
405   hinsi_list[0] = 0;
406 
407   for (;;)
408     {
409       if (get_line (buffer) == EOF)
410         {
411           goto EOF_HEAD;
412         }
413       c = buffer;
414       if ((c = get_string (str, c)) == NULL)
415         continue;
416       if (strcmp (str, COMMENT) == 0)
417         {
418 /*          for(;;){
419                 if(get_line(buffer) == EOF){
420                 goto EOF_EHAD;
421                 }
422                 if(buffer[0] == '\\'){
423                     unget_line(buffer);
424                     break;
425                 }
426                 if(wnn_Strlen(file_comment) + strlen(buffer)
427                     >= WNN_COMMENT_LEN){
428                     fprintf(stderr, "Too Long Comment.\n");
429                     exit1();
430                 }
431                 wnn_Sstrcpy(file_comment + wnn_Strlen(file_comment), buffer);
432             }
433 */
434           get_string (str, c);
435 /*
436             if(str[strlen(str) - 1] == '\n'){
437                 c[strlen(str) - 1] = '\0';
438             }
439 */
440           wnn_Sstrcpy (file_comment, str);
441 #ifdef CHINESE
442         }
443       else if (strcmp (str, PINYIN) == 0)
444         {
445           *which_dict = CWNN_REV_DICT;
446           pzy_flag = CWNN_PINYIN;
447         }
448       else if (strcmp (str, ZHUYIN) == 0)
449         {
450           *which_dict = CWNN_REV_DICT;
451           pzy_flag = CWNN_ZHUYIN;
452         }
453       else if (strcmp (str, BIXING) == 0)
454         {
455           *which_dict = BWNN_REV_DICT;
456 #endif
457         }
458       else if (strcmp (str, HINSI) == 0
459 #ifdef CHINESE
460                || strcmp (str, CHINSI) == 0
461 #endif
462         )
463         {
464           for (;;)
465             {
466               if (get_line (buffer) == EOF)
467                 {
468                   goto EOF_HEAD;
469                 }
470               if (buffer[0] == '\\' || buffer[0] == '\n')
471                 {
472                   unget_line (buffer);
473                   break;
474                 }
475               wnn_Sstrcpy (hinsi_list + wnn_Strlen (hinsi_list), buffer);
476             }
477         }
478       else if (strcmp (str, TOTAL) == 0)
479         {
480           get_string (str, c);
481           jt.total = atoi (str);
482         }
483       else if (strcmp (str, DIC_NO) == 0)
484         {                       /* for word_reg.c */
485           get_string (str, c);
486           jt.total = atoi (str);
487         }
488       else
489         {
490           unget_line (buffer);
491           break;
492         }
493     }
494 EOF_HEAD:
495   jt.maxcomment = wnn_Strlen (file_comment);
496   jt.maxhinsi_list = wnn_Strlen (hinsi_list) + 1;
497 }
498 
499 void
read_ujis(rev,to_esc,which_dict)500 read_ujis (rev, to_esc, which_dict)
501      int rev;
502      int to_esc;
503      int which_dict;
504 {
505   char buffer[LINE_SIZE];
506   register int tmp;
507 
508   for (lc = 0; get_line (buffer) != EOF;)
509     {
510       if ((tmp = get_one_line (buffer, jeary + lc, rev, to_esc, which_dict)) == -1)
511         {
512           bad_line (buffer);
513         }
514       else if (tmp == 0)
515         {                       /* succeed */
516           lc++;
517           if (lc > maxline)
518             {
519               error_no_heap ();
520             }
521         }
522     }
523   jt.maxserial = lc;            /* i starts with 1 in order to leave 0 unused */
524 #ifdef CHINESE
525   jt.syurui = which_dict;
526 #endif
527 }
528 
529 void
reverse_yomi()530 reverse_yomi ()
531 {
532   register int i;
533   w_char ytmp[LINE_SIZE];
534 
535   for (i = 0; i < jt.maxserial; i++)
536     {
537       if (jeary[i]->yomi != 0)
538         {                       /* �������Ƥʤ���Τ��� */
539           wnn_Sreverse (ytmp, jeary[i]->yomi);
540           wnn_Strcpy (jeary[i]->yomi, ytmp);
541           wnn_Sreverse (ytmp, jeary[i]->kan);
542           wnn_Strcpy (jeary[i]->kan, ytmp);
543         }
544 
545     }
546 }
547 
548 extern char *wnn_get_hinsi_name ();
549 
550 void
print_je(jep,opter,serial_out,esc_exp)551 print_je (jep, opter, serial_out, esc_exp)
552      register FILE *opter;
553      register struct je *jep;
554      int serial_out;
555      int esc_exp;
556 {
557 /*    if (jep->yomi != 0) { */
558   if (jep->hinsi != SAKUJO_HINSI)
559     {
560       Print_entry (jep->yomi, jep->kan, jep->comm, jep->hindo, 0, jep->hinsi, serial_out ? jep->serial : -1, opter, esc_exp);
561     }
562 }
563 
564 #ifdef nodef
kprint(fp,kpter)565 kprint (fp, kpter)
566      register FILE *fp;
567      register w_char *kpter;
568 {
569   char out_str[LENGTHKANJI];
570   register int out_len;
571   char tmp[LENGTHKANJI];
572 
573   wnn_sStrcpy (tmp, kpter);
574   out_len = make_kanji_str (out_str, tmp);
575   fprintf (fp, "%s", out_str);
576   if (out_len < 8)
577     putc ('\t', fp);
578   if (out_len < 16)
579     putc ('\t', fp);
580   putc ('\t', fp);
581 }
582 #endif
583 
584 void
output_ujis(opter,serial_out,esc_exp)585 output_ujis (opter, serial_out, esc_exp)
586      register FILE *opter;
587      int serial_out;
588      int esc_exp;
589 {
590   register struct je **jep;
591   char buffer[WNN_COMMENT_LEN + WNN_HINSI_LEN];
592   register int i;
593 
594   wnn_sStrcpy (buffer, file_comment);
595   fprintf (opter, "%s\t%s\n", COMMENT, buffer);
596   fprintf (opter, "%s\t%d\n", TOTAL, jt.total);
597   wnn_sStrcpy (buffer, hinsi_list);
598 #ifdef CHINESE
599   fprintf (opter, "%s\n", CHINSI);
600   if (jt.syurui == CWNN_REV_DICT)
601     {
602       if (pzy_flag == CWNN_PINYIN)
603         {
604           fprintf (opter, "%s\n", PINYIN);
605         }
606       else
607         {
608           fprintf (opter, "%s\n", ZHUYIN);
609         }
610     }
611   else if (jt.syurui == BWNN_REV_DICT)
612     {
613       fprintf (opter, "%s\n", BIXING);
614     }
615 #else
616   fprintf (opter, "%s\n", HINSI);
617 #endif
618   fprintf (opter, "%s", buffer);
619   fprintf (opter, "\n");
620   for (jep = jeary, i = 0; i < jt.maxserial; i++, jep++)
621     {
622       print_je (*jep, opter, serial_out, esc_exp);
623     }
624 }
625 
626 int
init_heap(hpb,yhpb,l,rl,ipf)627 init_heap (hpb, yhpb, l, rl, ipf)
628      int hpb, yhpb, l, rl;
629      FILE *ipf;
630 {
631   jehp = je_heap = (struct je *) malloc ((rl * sizeof (struct je)));
632   hp = heap = (UCHAR *) malloc (hpb);
633   yhp = yomi_heap = (w_char *) malloc ((yhpb * sizeof (w_char)));
634   if ((jeary = (struct je **) calloc (l, sizeof (struct je *))) == NULL)
635     {
636       fprintf (stderr, "Malloc Failed\n");
637       return (-1);
638     }
639   ifpter = ipf;
640   maxline = l;
641   heapend = heap + hpb;
642   yheapend = yomi_heap + yhpb;
643   jeheapend = je_heap + rl;
644   return (0);
645 }
646 
647 void
init_jeary()648 init_jeary ()
649 {
650   int k;
651   for (k = 0; k < jt.maxserial; k++)
652     {
653       jeary[k] = je_heap + k;
654     }
655 }
656 
657 /* test  program
658    main()
659    {
660    yhp = yomi_heap = (w_char *)malloc(100000);
661    jeary = (struct je *)malloc(100000);
662 
663    ifpter = stdin;
664    ujis_header();
665    read_ujis();
666 
667    sort();
668    uniq_je(func);
669    output_ujis(stdout, 0, 1);
670    }
671    */
672 
673 void
exit1()674 exit1 ()
675 {
676   exit (1);
677 }
678 
679 /* must be updated later */
680 
681 static void
bunpou_num(a,p)682 bunpou_num (a, p)
683      register char *a;
684      register int *p;
685 {
686   int tmp;
687   if ((tmp = wnn_find_hinsi_by_name (a)) == -1)
688     {
689       if (sscanf (a, "%d", p) == 0)
690         {
691           fprintf (stderr, "Bad hinsi name \"%s\".\n", a);
692           exit1 ();
693         }
694     }
695   else
696     {
697       *p = tmp;
698     }
699 #ifdef nodef
700   sscanf (a, "%d", p);
701 #endif
702 }
703 
704 #ifdef CHINESE
705 static void
sisheng_num(a,p)706 sisheng_num (a, p)
707      register char *a;
708      register int *p;
709 {
710   sscanf (a, "%d", p);
711 }
712 #endif
713 
714 int
sort_func_je(a,b)715 sort_func_je (a, b)
716      char *a, *b;
717 {
718   return (sort_func (a, b, D_YOMI));
719 }
720 
721 int
sort_func_je_kanji(a,b)722 sort_func_je_kanji (a, b)
723      char *a, *b;
724 {
725   return (sort_func (a, b, D_KANJI));
726 }
727 
728 int
sort_func(a,b,which)729 sort_func (a, b, which)
730      register char *a, *b;
731      int which;
732 {
733   register int tmp;
734   register struct je *pa, *pb;
735   pa = *((struct je **) a);
736   pb = *((struct je **) b);
737   if (pa->hinsi == SAKUJO_HINSI)
738     {
739       if (pb->hinsi == SAKUJO_HINSI)
740         return (0);
741       return (-1);
742     }
743   if (pb->hinsi == SAKUJO_HINSI)
744     return (1);
745 
746   if (which == D_YOMI)
747     {
748 
749       if (!(pa->yomi) || !(pb->yomi))
750         return (0);
751       tmp = wnn_Strcmp (pa->yomi, pb->yomi);
752       if (tmp)
753         return (tmp);
754 
755       if (pa->hinsi != pb->hinsi)
756         {
757           return ((int) (pa->hinsi) - (int) (pb->hinsi));
758         }
759 
760 #ifdef CHINESE
761       if (jt.syurui == CWNN_REV_DICT)
762         {
763           if (pa->ss != pb->ss)
764             {
765               return ((int) (pa->ss) - (int) (pb->ss));
766             }
767         }
768 #endif
769       if (!(pa->kan) || !(pb->kan))
770         return (0);
771       tmp = wnn_Strcmp (pa->kan, pb->kan);
772       if (tmp)
773         return (tmp);
774     }
775   else
776     {
777       if (!(pa->kan) || !(pb->kan))
778         return (0);
779       tmp = wnn_Strcmp (pa->kan, pb->kan);
780       if (tmp)
781         return (tmp);
782 
783       if (pa->hinsi != pb->hinsi)
784         {
785           return ((int) (pa->hinsi) - (int) (pb->hinsi));
786         }
787 #ifdef CHINESE
788       if (jt.syurui == CWNN_REV_DICT)
789         {
790           if (pa->ss != pb->ss)
791             {
792               return ((int) (pa->ss) - (int) (pb->ss));
793             }
794         }
795 #endif
796       if (!(pa->yomi) || !(pb->yomi))
797         return (0);
798       tmp = wnn_Strcmp (pa->yomi, pb->yomi);
799       if (tmp)
800         return (tmp);
801     }
802   return (0);
803 }
804 
805 void
sort()806 sort ()
807 {
808   qsort ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je);
809 }
810 
811 void
sort_if_not_sorted()812 sort_if_not_sorted ()
813 {
814   if (!Sorted ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je))
815     {
816       sort ();
817     }
818 }
819 
820 void
sort_kanji()821 sort_kanji ()
822 {
823   qsort ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je_kanji);
824 }
825 
826 void
827 uniq_je (func)
828      int (*func) ();
829 {
830   int k;
831   struct je **prev, **jep;
832 
833   if (jt.maxserial == 0)
834     return;
835   prev = &jeary[0];
836   for (k = 1; k < jt.maxserial; k++)
837     {
838       jep = &jeary[k];
839       if (func ((char *) jep, (char *) prev) == 0)
840         {
841           w_char tmp[LENGTHYOMI];
842           char tmp1[LENGTHYOMI];
843           char tmp2[LENGTHKANJI];
844 #ifdef CHINESE
845           char tmp3[LENGTHKANJI];
846           if (jt.syurui == BWNN_REV_DICT || jt.syurui == CWNN_REV_DICT)
847             {
848               wnn_Strcpy (tmp, (*jep)->yomi);
849               wnn_sStrcpy (tmp1, tmp);
850               wnn_Strcpy (tmp, (*jep)->kan);
851               wnn_sStrcpy (tmp2, tmp);
852               sprintf (tmp3, "%d", (*jep)->ss);
853             }
854           else
855             {
856 #endif
857               wnn_Sreverse (tmp, (*jep)->yomi);
858               wnn_sStrcpy (tmp1, tmp);
859               wnn_Sreverse (tmp, (*jep)->kan);
860               wnn_sStrcpy (tmp2, tmp);
861 #ifdef CHINESE
862             }
863           if (jt.syurui == CWNN_REV_DICT)
864             fprintf (stderr, "Entries with same yomi(%s), kanji(%s), hinsi(%s),sisheng(%s) are merged.\n", tmp1, tmp2, wnn_get_hinsi_name ((*jep)->hinsi), tmp3);
865           else
866 #endif
867             fprintf (stderr, "Entries with same yomi(%s), kanji(%s), hinsi(%s) are merged.\n", tmp1, tmp2, wnn_get_hinsi_name ((*jep)->hinsi));
868         }
869       else
870         {
871           prev++;
872           if (prev != jep)
873             {
874               *prev = *jep;
875             }
876         }
877     }
878   prev++;
879   jt.maxserial = prev - &jeary[0];
880 }
881 
882 #ifdef nodef
make_kanji_str(o,c)883 make_kanji_str (o, c)
884      register UCHAR *o, *c;
885 {
886   register UCHAR *o0 = o;
887 
888   for (; *c; c++)
889     {
890       if (*c == '\\')
891         {
892           *o++ = '\\';
893           *o++ = '\\';
894         }
895       else if (*c > 0x20)
896         {
897           *o++ = *c;
898         }
899       else
900         {
901           sprintf (o, "\\0%o", *c);
902           for (; *o; o++);
903         }
904     }
905   *o = 0;
906   return (o - o0);
907 }
908 #endif
909 
910 static void
read_kanji_str(c,o)911 read_kanji_str (c, o)
912      register char *c, *o;
913 {
914   for (; *o; c++)
915     {
916       if (*o == '\\')
917         {
918           if (*++o == '0')
919             {
920               o += 1;
921               if (*o >= '0' && *o <= '7')
922                 {
923                   *c = (*o++ - '0');
924                 }
925               else
926                 continue;
927               if (*o >= '0' && *o <= '7')
928                 {
929                   *c *= 8;
930                   *c |= (*o++ - '0');
931                 }
932               else
933                 continue;
934             }
935           else
936             {
937               *c = *o++;
938             }
939         }
940       else
941         {
942           *c = *o++;
943         }
944     }
945   *c = 0;
946 }
947 
948 #ifdef CHINESE
949 static void
read_kanji_str_w(c,o)950 read_kanji_str_w (c, o)
951      register w_char *c, *o;
952 {
953   for (; *o; c++)
954     {
955       if (*o == (w_char) '\\')
956         {
957           if (*++o == (w_char) '0')
958             {
959               o += 1;
960               if (*o >= (w_char) '0' && *o <= (w_char) '7')
961                 {
962                   *c = (*o++ - (w_char) '0');
963                 }
964               else
965                 continue;
966               if (*o >= (w_char) '0' && *o <= (w_char) '7')
967                 {
968                   *c *= 8;
969                   *c |= (*o++ - (w_char) '0');
970                 }
971               else
972                 continue;
973             }
974           else
975             {
976               *c = *o++;
977             }
978         }
979       else
980         {
981           *c = *o++;
982         }
983     }
984   *c = 0;
985 }
986 #endif
987 
988 int
Sorted(st,lc,size,sort_fun)989 Sorted (st, lc, size, sort_fun)
990      register char *st;
991      register int lc;
992      int size;
993      int (*sort_fun) ();
994 {
995   char *dst = st + size;
996   for (lc--; lc > 0; lc--, st = dst, dst += size)
997     {
998       if (sort_fun (st, dst) > 0)
999         {
1000           return (0);
1001         }
1002     }
1003   return (1);
1004 }
1005 
1006 int
is_katakana(k,y)1007 is_katakana (k, y)
1008      register char *k, *y;
1009 {
1010   for (; *k && *y;)
1011     {
1012       if (*y == (char) 0xa1 && *k == (char) 0xa1 && *(y + 1) == (char) 0xbc && *(y + 1) == (char) 0xbc)
1013         {                       /*"��" */
1014           y += 2;
1015           k += 2;
1016           continue;
1017         }
1018       if (*y++ != (char) 0xa4 || *k++ != (char) 0xa5)
1019         return (0);
1020       /* be careful, char comparison. */
1021       if (*y++ != *k++)
1022         {
1023           return (0);
1024         }
1025     }
1026   return (!(*k | *y));
1027 }
1028 
1029 static void
toesc(ckanji,cyomi)1030 toesc (ckanji, cyomi)
1031      char *ckanji, *cyomi;
1032 {
1033   if (strcmp (ckanji, cyomi) == 0)
1034     {
1035       strcpy (ckanji, DIC_HIRAGANA);
1036     }
1037   else if (is_katakana (ckanji, cyomi))
1038     {
1039       strcpy (ckanji, DIC_KATAKANA);
1040     }
1041 }
1042