1 /* Copyright 1992 NEC Corporation, Tokyo, Japan.
2  *
3  * Permission to use, copy, modify, distribute and sell this software
4  * and its documentation for any purpose is hereby granted without
5  * fee, provided that the above copyright notice appear in all copies
6  * and that both that copyright notice and this permission notice
7  * appear in supporting documentation, and that the name of NEC
8  * Corporation not be used in advertising or publicity pertaining to
9  * distribution of the software without specific, written prior
10  * permission.  NEC Corporation makes no representations about the
11  * suitability of this software for any purpose.  It is provided "as
12  * is" without express or implied warranty.
13  *
14  * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
15  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
16  * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
17  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
18  * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
19  * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
20  * PERFORMANCE OF THIS SOFTWARE.
21  */
22 
23 #ifndef lint
24 static char rcsid[]="@(#) 102.1 $Id: dpxdic.c,v 1.7.2.2 2003/12/27 17:15:22 aida_s Exp $";
25 #endif
26 
27 #include "RKintern.h"
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <fcntl.h>
31 #include <time.h>
32 #ifdef SVR4
33 #include	<unistd.h>
34 #endif
35 
36 #include "ccompat.h"
37 #include "RKindep/file.h"
38 
39 #ifndef HYOUJUN_GRAM
40 #ifdef USE_OBSOLETE_STYLE_FILENAME
41 #define HYOUJUN_GRAM "/usr/lib/canna/dic/canna/fuzokugo.d"
42 #else
43 #define HYOUJUN_GRAM "/usr/lib/canna/dic/canna/fuzokugo.cbd"
44 #endif
45 #endif
46 
47 int	inv = 0;
48 static	char	*program;
49 static	unsigned char ebuf[8048];
50 
51 unsigned char *
show_a_cand(gram,wrec,or)52 show_a_cand(gram, wrec, or)
53      struct RkKxGram	*gram;
54      unsigned char	*wrec;
55      unsigned		*or;
56 {
57   unsigned	j, clen, row;
58   char		*ptr, rowname[128];
59   Wchar		*dst, wbuf[1024];
60 
61   clen = (*wrec >> 1) & 0x7f;
62   row = _RkRowNumber(wrec);
63   wrec += NW_PREFIX;
64   if (row == 0) {
65     fprintf(stderr, "bad hinshi\n");
66     return(wrec);
67   }
68   for (j = 0, dst = wbuf; j < clen; j++) {
69     Wchar wch;
70 
71     wch = bst2_to_s(wrec);
72     if (wch == (Wchar)'\\' || wch == (Wchar)' ' || wch == (Wchar)'\t') {
73       *dst++ = (Wchar)'\\';
74     }
75     *dst++ = wch;
76     wrec += sizeof(Wchar);
77   }
78   *dst = (Wchar)0;
79   if (gram) {
80     if ((ptr = (char *)RkGetGramName(gram, row)) != NULL) {
81       (void)strcpy(rowname, ptr);
82       if (*or != row) {
83 	printf(" #%s", rowname);
84 	*or = row;
85       }
86     } else {
87       fprintf(stderr, "bad hinshi\n");
88       return(wrec);
89     }
90   } else {
91     if (*or != row) {
92       printf(" #%d", row);
93       *or = row;
94     }
95   }
96   ustoeuc(wbuf, dst - wbuf, ebuf, RkNumber(ebuf));
97   printf(" %s", ebuf);
98   return wrec;
99 }
100 
101 unsigned char *
show_a_icand(gram,wrec)102 show_a_icand(gram, wrec)
103      struct RkKxGram	*gram;
104      unsigned char	*wrec;
105 {
106   unsigned	j, clen, row;
107   char		*ptr, rowname[128];
108   Wchar		*dst, wbuf[1024];
109 
110   clen = (*wrec >> 1) & 0x7f;
111   row = _RkRowNumber(wrec);
112   wrec += NW_PREFIX;
113   if (row == 0) {
114     fprintf(stderr, "bad hinshi\n");
115     exit(1);
116   }
117   for (j = 0, dst = wbuf; j < clen; j++) {
118     *dst++ = bst2_to_s(wrec);
119     wrec += sizeof(Wchar);
120   }
121   *dst = (Wchar)0;
122   ustoeuc(wbuf, dst - wbuf, ebuf, RkNumber(ebuf));
123   printf("%s", ebuf);
124   if (gram && (ptr = (char *)RkGetGramName(gram, row))) {
125     (void)strcpy(rowname, ptr);
126     printf(" #%s", rowname);
127   } else {
128     printf(" #%d", row);
129   }
130   return wrec;
131 }
132 
133 void
show_a_wrec(gram,wrec,yomi,n)134 show_a_wrec(gram, wrec, yomi, n)
135      struct RkKxGram	*gram;
136      unsigned char	*wrec;
137      Wchar		*yomi;
138      unsigned		n;
139 {
140   unsigned	i, left, nc, or;
141   Wchar		*src, *dst, syomi[1024], wch;
142 
143   left = (*wrec >> 1) & 0x3f;
144   nc = _RkCandNumber(wrec);
145   if (*wrec & 0x80)
146     wrec += 2;
147   wrec += 2;
148   for (i = 0, src = yomi, dst = syomi ; i < n ; i++) {
149     wch = *src++;
150     if (wch == (Wchar)'\\' || wch == (Wchar)' ' || wch == (Wchar)'\t') {
151       *dst++ = (Wchar)'\\';
152     }
153     *dst++ = wch;
154   }
155   for (i = 0 ; i < left ; i++) {
156     wch = bst2_to_s(wrec);
157     if (wch == (Wchar)'\\' || wch == (Wchar)' ' || wch == (Wchar)'\t') {
158       *dst++ = (Wchar)'\\';
159     }
160     *dst++ = wch;
161     wrec += sizeof(Wchar);
162   }
163   *dst = 0;
164   if (inv) {
165     for (i = 0; i < nc; i++) {
166       wrec = show_a_icand(gram, wrec);
167       ustoeuc(syomi, dst - syomi, ebuf, RkNumber(ebuf));
168       printf(" %s", ebuf);
169       printf("\n");
170     }
171   } else {
172     ustoeuc(syomi, dst - syomi, ebuf, RkNumber(ebuf));
173     printf("%s", ebuf);
174     or = 0;
175     for (i = 0; i < nc; i++) {
176       wrec = show_a_cand(gram, wrec, &or);
177     }
178     printf("\n");
179   }
180 }
181 
182 static int
loadDic(dic)183 loadDic(dic)
184      struct ND	*dic;
185 {
186   unsigned char	*buf;
187   off_t		off = dic->doff;
188   unsigned	size = dic->drsz;
189   int		fd = dic->fd;
190   int		i;
191 
192   if (!dic->pgs) {
193     unsigned   tblsz = dic->ttlpg * sizeof(struct NP);
194 
195     if (!(dic->pgs = (struct NP *)malloc(tblsz)))
196       return(-1);
197     for (i = 0; i < dic->ttlpg; i++) {
198       dic->pgs[i].lnksz = (unsigned) 0;
199       dic->pgs[i].ndsz = (unsigned) 0;
200       dic->pgs[i].lvo = (unsigned) 0;
201       dic->pgs[i].csn = (unsigned) 0;
202       dic->pgs[i].flags = (unsigned) 0;
203       dic->pgs[i].buf = (unsigned char *) 0;
204     }
205   }
206   if (!(buf = (unsigned char *)malloc(size)))
207     return(-1);
208   (void)lseek(fd, off, 0);
209   if (read(fd, (char *)buf, size) != size)
210     return(-1);
211   dic->buf = buf;
212 
213   return(0);
214 }
215 
216 static int
loadPage(dic,id)217 loadPage(dic, id)
218      struct ND	*dic;
219      int	id;
220 {
221   unsigned	off = dic->doff + dic->drsz + dic->pgsz * id;
222   unsigned	size = dic->pgsz;
223   unsigned char	*buf;
224   int		fd = dic->fd;
225 
226   if (!dic->pgs) {
227     fprintf(stderr, "no page table\n");
228     return(-1);
229   }
230 
231   if (id >= dic->ttlpg) {
232     fprintf(stderr, "ERROR: %dth page is greater than max page %lu\n",
233 	    id, dic->ttlpg);
234     return(-1);
235   }
236   if (!isLoadedPage(dic->pgs + id)) {
237     if (!(buf = (unsigned char *)malloc(size))) {
238       fprintf(stderr, "malloc failed.\n");
239       return(-1);
240     }
241 
242     (void)lseek(fd, off, 0);
243     if (read(fd, (char *)buf, size) != size) {
244       (void)fprintf(stderr, "cannot read page %d (%d)\n", id, size);
245       return(-1);
246     }
247     dic->pgs[id].buf = buf;
248     dic->pgs[id].count = 0;
249     dic->pgs[id].flags = RK_PG_LOADED;
250     dic->pgs[id].ndsz = bst2_to_s(buf + 2);
251     dic->pgs[id].lnksz = bst2_to_s(buf + 4);
252     dic->pgs[id].lvo = bst3_to_l(buf + 7);
253     dic->pgs[id].csn = bst3_to_l(buf + 10);
254   }
255   return(0);
256 }
257 
258 unsigned char *
offset2ptr(dic,off,which)259 offset2ptr(dic, off, which)
260      struct ND	*dic;
261      unsigned	off;
262      int	*which;
263 {
264   unsigned char *p;
265   int		pg;
266 
267   if (off < dic->drsz) {
268     p = dic->buf + off;
269     pg = -1;
270   } else {
271     pg = (off - dic->drsz) / dic->pgsz;
272     if (loadPage(dic, pg) < 0)
273       return((unsigned char *)0);
274     p = dic->pgs[pg].buf + off - dic->drsz - pg * dic->pgsz;
275   }
276   if (which)
277     *which = pg;
278   return(p);
279 }
280 
281 unsigned char *
off2ptr(dic,off,id)282 off2ptr(dic, off, id)
283      struct ND	*dic;
284      unsigned	off;
285      int	id;
286 {
287   unsigned char *p;
288 
289   p = dic->pgs[id].buf + off;
290   return(p);
291 }
292 
293 int
show_nip(gram,dic,yomi,n,p,pg)294 show_nip(gram, dic, yomi, n, p, pg)
295      struct RkKxGram	*gram;
296      struct ND		*dic;
297      Wchar		*yomi;
298      unsigned		n;
299      unsigned char	*p;
300      int		pg;
301 {
302   Wchar		w;
303   unsigned char	*pp;
304   unsigned	nn;
305   unsigned	val;
306   int		iw;
307   int		il = 0;
308 
309   while (!il) {
310     w = bst2_to_s(p); p += 2;
311     iw = *p & WORD_NODE;
312     il = *p & LAST_NODE;
313     val = ((p[0] & 0x3f) << BIT_UNIT) | p[1];
314     p += 2;
315     if (w != (Wchar) 0) {
316       yomi[n] = w;
317       nn = n + 1;
318     } else {
319       nn = n;
320     }
321     yomi[nn] = (Wchar)0;
322     if (iw) {
323       show_a_wrec(gram, off2ptr(dic, val, pg), yomi, nn);
324     } else {
325       if (!(pp = off2ptr(dic, val, pg))) {
326 	fprintf(stderr, "bad offset in nip\n");
327 	exit(1);
328       }
329       show_nip(gram, dic, yomi, nn, pp, pg);
330     }
331   }
332   return(0);
333 }
334 
335 int
compit(a,b)336 compit(a, b)
337      unsigned char *a;
338      unsigned char *b;
339 {
340   if (*a > *b || ((*a == *b) && *(a+1) >= *(b+1))) {
341     return(1);
342   }
343   return(-1);
344 }
345 
346 static
347 show_nid(gram, dic, yomi, n, ptr)
348      struct RkKxGram	*gram;
349      struct ND		*dic;
350      Wchar		*yomi;
351      unsigned		n;
352      unsigned char	*ptr;
353 {
354   unsigned char	*p;
355   Wchar		wc, i;
356   unsigned	val;
357   int		wn;
358   unsigned	nn;
359   int		which;
360 
361   p = ptr;
362   wc = bst2_to_s(p); p += 5;
363   qsort((char *)p, (unsigned)wc, 5,
364         (int (*) pro((const void *, const void *)))compit);
365   for (i = 0; i < wc; i++) {
366     Wchar	w;
367 
368     w = bst2_to_s(p); p += 2;
369     val = bst3_to_l(p); p += 3;
370     if (w == (Wchar) 0xffff)
371       continue;
372     wn = val & 0x800000;
373     val &= 0x7fffff;
374     if (w != (Wchar)0) {
375       yomi[n] = w;
376       nn = n + 1;
377     } else {
378       nn = n;
379     }
380     yomi[nn] = (Wchar)0;
381     if (wn)
382       show_a_wrec(gram, offset2ptr(dic, val, &which), yomi, nn);
383     else {
384       unsigned char	*pp;
385 
386       if (!(pp = offset2ptr(dic, val, &which))) {
387 	fprintf(stderr, "bad offset\n");
388 	exit(1);
389       }
390       if (which < 0) {
391 	show_nid(gram, dic, yomi, nn, pp);
392       } else {
393 	show_nip(gram, dic, yomi, nn, pp, which);
394       }
395     }
396   }
397   return(0);
398 }
399 
400 int
getdic(dic,filenm,dmnm)401 getdic(dic, filenm, dmnm)
402      struct ND	*dic;
403      char	*filenm;
404      char	*dmnm;
405 {
406   struct HD	hd;
407   int		fd, lk;
408   off_t	off, doff;
409   unsigned err;
410   unsigned char	ll[4];
411 
412   if (!filenm)
413     return(-1);
414   if ((fd = open(filenm, O_RDONLY)) < 0)
415     return(-1);
416 #ifdef __CYGWIN32__
417   setmode(fd, O_BINARY);
418 #endif
419   for (off = 0, lk = 1, doff = 0, err = 0;
420        !err && lk && _RkReadHeader(fd, &hd, off) >= 0;
421        lk = dmnm ? strcmp(dmnm, (char *)hd.data[HD_DMNM].ptr) : 1) {
422     if (!dmnm) {
423       time_t		tloc;
424       char date[26];
425 
426       tloc = hd.data[HD_TIME].var;
427       strcpy(date, ctime(&tloc));
428       date[24] = 0;
429       (void)fprintf(stderr, "%s [ %s ] = %ld + %ld\n",
430 		    (char *)hd.data[HD_DMNM].ptr,
431 		    date,
432 		    hd.data[HD_CAN].var,
433 		    hd.data[HD_REC].var);
434     }
435     doff = off;
436     off += hd.data[HD_SIZ].var;
437     if (HD_VERSION(&hd) < 300702L &&
438 	!strncmp(".swd", (char *)(hd.data[HD_DMNM].ptr + strlen((char *)hd.data[HD_DMNM].ptr) - 4), 4)) {
439       if (lseek(fd, off, 0) < 0 || read(fd, (char *)ll, 4) != 4)
440 	err = 1;
441       off += bst4_to_l(ll) + 4;
442     }
443   }
444   if (!dmnm)
445     return(0);
446   if (lk)
447     return(-1);
448   dic->doff = doff + hd.data[HD_HSZ].var;
449   dic->sz = hd.data[HD_SIZ].var;
450   dic->drsz = hd.data[HD_PGOF].var - hd.data[HD_DROF].var;
451   dic->pgsz = _RkCalcUnlog2(hd.data[HD_L2P].var) + 1;
452   dic->ttlpg = hd.data[HD_PAG].var;
453   dic->fd = fd;
454   dic->buf = (unsigned char *)0;
455   dic->pgs = (struct NP *)0;
456 
457   return(loadDic(dic));
458 }
459 
main(argc,argv)460 main (argc, argv)
461   int argc;
462   char *argv [];
463 {
464   char			*dmnm = 0;
465   int			i;
466   struct RkKxGram	*gram;
467   Wchar			yomi[1024];
468   struct ND		Dic;
469   struct ND		*dic = &Dic;
470   int			which;
471   unsigned char		*p;
472   char			*cnj = (char *)0;
473   char			bn[256];
474   int			fd;
475 
476   program = RkiBasename(argv[0]);
477   for (i = 1; i < argc && argv[i][0] == '-'; i++) {
478     if (!strcmp(argv[i], "-i") ) {
479       inv = 1;
480       continue;
481     } else if (!strcmp(argv[i], "-D")) {
482       if ( ++i < argc && !cnj) {
483 	cnj = argv[i];
484 	continue;
485       }
486     } else {
487       (void)fprintf(stderr,
488 		    "usage: %s [-i] [-D bunpou] <filename> [dictionary-name]\n",
489 		    program);
490       exit(1);
491     }
492   }
493   if (i > argc - 1) {
494     (void) fprintf(stderr,
495 		   "usage: %s [-i] [-D bunpou] <filename> [dictionary-name]\n",
496 		   program);
497     exit(1);
498   }
499   if (!cnj) {
500     if(!(gram = RkOpenGram(HYOUJUN_GRAM))) {
501       (void)fprintf(stderr, "Warning: cannot open grammar file %s.\n", HYOUJUN_GRAM);
502     }
503   } else {
504     if ((fd = open(cnj, 0)) < 0) {
505       (void)fprintf(stderr, "%s: cannot open grammar file %s.\n", program, cnj);
506       exit(1);
507     }
508 #ifdef __CYGWIN32__
509     setmode(fd, O_BINARY);
510 #endif
511     gram = RkReadGram(fd, (size_t)-1);
512     close(fd);
513   }
514   (void)strcpy(bn, argv[i]);
515   if (!(dmnm = argv[i+1])) {
516     if (getdic(dic, bn, dmnm) < 0) {
517       (void)fprintf(stderr, "%s: cannot read file %s\n", program, bn);
518     }
519     exit(1);
520   }
521 
522   if (getdic(dic, bn, dmnm) < 0) {
523     (void)fprintf(stderr, "%s: cannot read file %s or dictionary %s\n", program, bn, dmnm);
524     exit(1);
525   }
526   p = offset2ptr(dic, (unsigned)0, &which);
527   if (which != -1) {
528     (void)fprintf(stderr, "incollect dictionary\n");
529     exit(1);
530   }
531   show_nid(gram, dic, yomi, (unsigned)0, p);
532   (void)close(dic->fd);
533   fflush(stdout);
534   return 0;
535 }
536