1 /* Copyright 1992 NEC Corporation, Tokyo, Japan.
2 *
3 * Permission to use, copy, modify, distribute and sell this software
4 * and its documentation for any purpose is hereby granted without
5 * fee, provided that the above copyright notice appear in all copies
6 * and that both that copyright notice and this permission notice
7 * appear in supporting documentation, and that the name of NEC
8 * Corporation not be used in advertising or publicity pertaining to
9 * distribution of the software without specific, written prior
10 * permission. NEC Corporation makes no representations about the
11 * suitability of this software for any purpose. It is provided "as
12 * is" without express or implied warranty.
13 *
14 * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
16 * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
20 * PERFORMANCE OF THIS SOFTWARE.
21 */
22
23 #ifndef lint
24 static char rcsid[]="@(#) 102.1 $Id: dpxdic.c,v 1.7.2.2 2003/12/27 17:15:22 aida_s Exp $";
25 #endif
26
27 #include "RKintern.h"
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <fcntl.h>
31 #include <time.h>
32 #ifdef SVR4
33 #include <unistd.h>
34 #endif
35
36 #include "ccompat.h"
37 #include "RKindep/file.h"
38
39 #ifndef HYOUJUN_GRAM
40 #ifdef USE_OBSOLETE_STYLE_FILENAME
41 #define HYOUJUN_GRAM "/usr/lib/canna/dic/canna/fuzokugo.d"
42 #else
43 #define HYOUJUN_GRAM "/usr/lib/canna/dic/canna/fuzokugo.cbd"
44 #endif
45 #endif
46
47 int inv = 0;
48 static char *program;
49 static unsigned char ebuf[8048];
50
51 unsigned char *
show_a_cand(gram,wrec,or)52 show_a_cand(gram, wrec, or)
53 struct RkKxGram *gram;
54 unsigned char *wrec;
55 unsigned *or;
56 {
57 unsigned j, clen, row;
58 char *ptr, rowname[128];
59 Wchar *dst, wbuf[1024];
60
61 clen = (*wrec >> 1) & 0x7f;
62 row = _RkRowNumber(wrec);
63 wrec += NW_PREFIX;
64 if (row == 0) {
65 fprintf(stderr, "bad hinshi\n");
66 return(wrec);
67 }
68 for (j = 0, dst = wbuf; j < clen; j++) {
69 Wchar wch;
70
71 wch = bst2_to_s(wrec);
72 if (wch == (Wchar)'\\' || wch == (Wchar)' ' || wch == (Wchar)'\t') {
73 *dst++ = (Wchar)'\\';
74 }
75 *dst++ = wch;
76 wrec += sizeof(Wchar);
77 }
78 *dst = (Wchar)0;
79 if (gram) {
80 if ((ptr = (char *)RkGetGramName(gram, row)) != NULL) {
81 (void)strcpy(rowname, ptr);
82 if (*or != row) {
83 printf(" #%s", rowname);
84 *or = row;
85 }
86 } else {
87 fprintf(stderr, "bad hinshi\n");
88 return(wrec);
89 }
90 } else {
91 if (*or != row) {
92 printf(" #%d", row);
93 *or = row;
94 }
95 }
96 ustoeuc(wbuf, dst - wbuf, ebuf, RkNumber(ebuf));
97 printf(" %s", ebuf);
98 return wrec;
99 }
100
101 unsigned char *
show_a_icand(gram,wrec)102 show_a_icand(gram, wrec)
103 struct RkKxGram *gram;
104 unsigned char *wrec;
105 {
106 unsigned j, clen, row;
107 char *ptr, rowname[128];
108 Wchar *dst, wbuf[1024];
109
110 clen = (*wrec >> 1) & 0x7f;
111 row = _RkRowNumber(wrec);
112 wrec += NW_PREFIX;
113 if (row == 0) {
114 fprintf(stderr, "bad hinshi\n");
115 exit(1);
116 }
117 for (j = 0, dst = wbuf; j < clen; j++) {
118 *dst++ = bst2_to_s(wrec);
119 wrec += sizeof(Wchar);
120 }
121 *dst = (Wchar)0;
122 ustoeuc(wbuf, dst - wbuf, ebuf, RkNumber(ebuf));
123 printf("%s", ebuf);
124 if (gram && (ptr = (char *)RkGetGramName(gram, row))) {
125 (void)strcpy(rowname, ptr);
126 printf(" #%s", rowname);
127 } else {
128 printf(" #%d", row);
129 }
130 return wrec;
131 }
132
133 void
show_a_wrec(gram,wrec,yomi,n)134 show_a_wrec(gram, wrec, yomi, n)
135 struct RkKxGram *gram;
136 unsigned char *wrec;
137 Wchar *yomi;
138 unsigned n;
139 {
140 unsigned i, left, nc, or;
141 Wchar *src, *dst, syomi[1024], wch;
142
143 left = (*wrec >> 1) & 0x3f;
144 nc = _RkCandNumber(wrec);
145 if (*wrec & 0x80)
146 wrec += 2;
147 wrec += 2;
148 for (i = 0, src = yomi, dst = syomi ; i < n ; i++) {
149 wch = *src++;
150 if (wch == (Wchar)'\\' || wch == (Wchar)' ' || wch == (Wchar)'\t') {
151 *dst++ = (Wchar)'\\';
152 }
153 *dst++ = wch;
154 }
155 for (i = 0 ; i < left ; i++) {
156 wch = bst2_to_s(wrec);
157 if (wch == (Wchar)'\\' || wch == (Wchar)' ' || wch == (Wchar)'\t') {
158 *dst++ = (Wchar)'\\';
159 }
160 *dst++ = wch;
161 wrec += sizeof(Wchar);
162 }
163 *dst = 0;
164 if (inv) {
165 for (i = 0; i < nc; i++) {
166 wrec = show_a_icand(gram, wrec);
167 ustoeuc(syomi, dst - syomi, ebuf, RkNumber(ebuf));
168 printf(" %s", ebuf);
169 printf("\n");
170 }
171 } else {
172 ustoeuc(syomi, dst - syomi, ebuf, RkNumber(ebuf));
173 printf("%s", ebuf);
174 or = 0;
175 for (i = 0; i < nc; i++) {
176 wrec = show_a_cand(gram, wrec, &or);
177 }
178 printf("\n");
179 }
180 }
181
182 static int
loadDic(dic)183 loadDic(dic)
184 struct ND *dic;
185 {
186 unsigned char *buf;
187 off_t off = dic->doff;
188 unsigned size = dic->drsz;
189 int fd = dic->fd;
190 int i;
191
192 if (!dic->pgs) {
193 unsigned tblsz = dic->ttlpg * sizeof(struct NP);
194
195 if (!(dic->pgs = (struct NP *)malloc(tblsz)))
196 return(-1);
197 for (i = 0; i < dic->ttlpg; i++) {
198 dic->pgs[i].lnksz = (unsigned) 0;
199 dic->pgs[i].ndsz = (unsigned) 0;
200 dic->pgs[i].lvo = (unsigned) 0;
201 dic->pgs[i].csn = (unsigned) 0;
202 dic->pgs[i].flags = (unsigned) 0;
203 dic->pgs[i].buf = (unsigned char *) 0;
204 }
205 }
206 if (!(buf = (unsigned char *)malloc(size)))
207 return(-1);
208 (void)lseek(fd, off, 0);
209 if (read(fd, (char *)buf, size) != size)
210 return(-1);
211 dic->buf = buf;
212
213 return(0);
214 }
215
216 static int
loadPage(dic,id)217 loadPage(dic, id)
218 struct ND *dic;
219 int id;
220 {
221 unsigned off = dic->doff + dic->drsz + dic->pgsz * id;
222 unsigned size = dic->pgsz;
223 unsigned char *buf;
224 int fd = dic->fd;
225
226 if (!dic->pgs) {
227 fprintf(stderr, "no page table\n");
228 return(-1);
229 }
230
231 if (id >= dic->ttlpg) {
232 fprintf(stderr, "ERROR: %dth page is greater than max page %lu\n",
233 id, dic->ttlpg);
234 return(-1);
235 }
236 if (!isLoadedPage(dic->pgs + id)) {
237 if (!(buf = (unsigned char *)malloc(size))) {
238 fprintf(stderr, "malloc failed.\n");
239 return(-1);
240 }
241
242 (void)lseek(fd, off, 0);
243 if (read(fd, (char *)buf, size) != size) {
244 (void)fprintf(stderr, "cannot read page %d (%d)\n", id, size);
245 return(-1);
246 }
247 dic->pgs[id].buf = buf;
248 dic->pgs[id].count = 0;
249 dic->pgs[id].flags = RK_PG_LOADED;
250 dic->pgs[id].ndsz = bst2_to_s(buf + 2);
251 dic->pgs[id].lnksz = bst2_to_s(buf + 4);
252 dic->pgs[id].lvo = bst3_to_l(buf + 7);
253 dic->pgs[id].csn = bst3_to_l(buf + 10);
254 }
255 return(0);
256 }
257
258 unsigned char *
offset2ptr(dic,off,which)259 offset2ptr(dic, off, which)
260 struct ND *dic;
261 unsigned off;
262 int *which;
263 {
264 unsigned char *p;
265 int pg;
266
267 if (off < dic->drsz) {
268 p = dic->buf + off;
269 pg = -1;
270 } else {
271 pg = (off - dic->drsz) / dic->pgsz;
272 if (loadPage(dic, pg) < 0)
273 return((unsigned char *)0);
274 p = dic->pgs[pg].buf + off - dic->drsz - pg * dic->pgsz;
275 }
276 if (which)
277 *which = pg;
278 return(p);
279 }
280
281 unsigned char *
off2ptr(dic,off,id)282 off2ptr(dic, off, id)
283 struct ND *dic;
284 unsigned off;
285 int id;
286 {
287 unsigned char *p;
288
289 p = dic->pgs[id].buf + off;
290 return(p);
291 }
292
293 int
show_nip(gram,dic,yomi,n,p,pg)294 show_nip(gram, dic, yomi, n, p, pg)
295 struct RkKxGram *gram;
296 struct ND *dic;
297 Wchar *yomi;
298 unsigned n;
299 unsigned char *p;
300 int pg;
301 {
302 Wchar w;
303 unsigned char *pp;
304 unsigned nn;
305 unsigned val;
306 int iw;
307 int il = 0;
308
309 while (!il) {
310 w = bst2_to_s(p); p += 2;
311 iw = *p & WORD_NODE;
312 il = *p & LAST_NODE;
313 val = ((p[0] & 0x3f) << BIT_UNIT) | p[1];
314 p += 2;
315 if (w != (Wchar) 0) {
316 yomi[n] = w;
317 nn = n + 1;
318 } else {
319 nn = n;
320 }
321 yomi[nn] = (Wchar)0;
322 if (iw) {
323 show_a_wrec(gram, off2ptr(dic, val, pg), yomi, nn);
324 } else {
325 if (!(pp = off2ptr(dic, val, pg))) {
326 fprintf(stderr, "bad offset in nip\n");
327 exit(1);
328 }
329 show_nip(gram, dic, yomi, nn, pp, pg);
330 }
331 }
332 return(0);
333 }
334
335 int
compit(a,b)336 compit(a, b)
337 unsigned char *a;
338 unsigned char *b;
339 {
340 if (*a > *b || ((*a == *b) && *(a+1) >= *(b+1))) {
341 return(1);
342 }
343 return(-1);
344 }
345
346 static
347 show_nid(gram, dic, yomi, n, ptr)
348 struct RkKxGram *gram;
349 struct ND *dic;
350 Wchar *yomi;
351 unsigned n;
352 unsigned char *ptr;
353 {
354 unsigned char *p;
355 Wchar wc, i;
356 unsigned val;
357 int wn;
358 unsigned nn;
359 int which;
360
361 p = ptr;
362 wc = bst2_to_s(p); p += 5;
363 qsort((char *)p, (unsigned)wc, 5,
364 (int (*) pro((const void *, const void *)))compit);
365 for (i = 0; i < wc; i++) {
366 Wchar w;
367
368 w = bst2_to_s(p); p += 2;
369 val = bst3_to_l(p); p += 3;
370 if (w == (Wchar) 0xffff)
371 continue;
372 wn = val & 0x800000;
373 val &= 0x7fffff;
374 if (w != (Wchar)0) {
375 yomi[n] = w;
376 nn = n + 1;
377 } else {
378 nn = n;
379 }
380 yomi[nn] = (Wchar)0;
381 if (wn)
382 show_a_wrec(gram, offset2ptr(dic, val, &which), yomi, nn);
383 else {
384 unsigned char *pp;
385
386 if (!(pp = offset2ptr(dic, val, &which))) {
387 fprintf(stderr, "bad offset\n");
388 exit(1);
389 }
390 if (which < 0) {
391 show_nid(gram, dic, yomi, nn, pp);
392 } else {
393 show_nip(gram, dic, yomi, nn, pp, which);
394 }
395 }
396 }
397 return(0);
398 }
399
400 int
getdic(dic,filenm,dmnm)401 getdic(dic, filenm, dmnm)
402 struct ND *dic;
403 char *filenm;
404 char *dmnm;
405 {
406 struct HD hd;
407 int fd, lk;
408 off_t off, doff;
409 unsigned err;
410 unsigned char ll[4];
411
412 if (!filenm)
413 return(-1);
414 if ((fd = open(filenm, O_RDONLY)) < 0)
415 return(-1);
416 #ifdef __CYGWIN32__
417 setmode(fd, O_BINARY);
418 #endif
419 for (off = 0, lk = 1, doff = 0, err = 0;
420 !err && lk && _RkReadHeader(fd, &hd, off) >= 0;
421 lk = dmnm ? strcmp(dmnm, (char *)hd.data[HD_DMNM].ptr) : 1) {
422 if (!dmnm) {
423 time_t tloc;
424 char date[26];
425
426 tloc = hd.data[HD_TIME].var;
427 strcpy(date, ctime(&tloc));
428 date[24] = 0;
429 (void)fprintf(stderr, "%s [ %s ] = %ld + %ld\n",
430 (char *)hd.data[HD_DMNM].ptr,
431 date,
432 hd.data[HD_CAN].var,
433 hd.data[HD_REC].var);
434 }
435 doff = off;
436 off += hd.data[HD_SIZ].var;
437 if (HD_VERSION(&hd) < 300702L &&
438 !strncmp(".swd", (char *)(hd.data[HD_DMNM].ptr + strlen((char *)hd.data[HD_DMNM].ptr) - 4), 4)) {
439 if (lseek(fd, off, 0) < 0 || read(fd, (char *)ll, 4) != 4)
440 err = 1;
441 off += bst4_to_l(ll) + 4;
442 }
443 }
444 if (!dmnm)
445 return(0);
446 if (lk)
447 return(-1);
448 dic->doff = doff + hd.data[HD_HSZ].var;
449 dic->sz = hd.data[HD_SIZ].var;
450 dic->drsz = hd.data[HD_PGOF].var - hd.data[HD_DROF].var;
451 dic->pgsz = _RkCalcUnlog2(hd.data[HD_L2P].var) + 1;
452 dic->ttlpg = hd.data[HD_PAG].var;
453 dic->fd = fd;
454 dic->buf = (unsigned char *)0;
455 dic->pgs = (struct NP *)0;
456
457 return(loadDic(dic));
458 }
459
main(argc,argv)460 main (argc, argv)
461 int argc;
462 char *argv [];
463 {
464 char *dmnm = 0;
465 int i;
466 struct RkKxGram *gram;
467 Wchar yomi[1024];
468 struct ND Dic;
469 struct ND *dic = &Dic;
470 int which;
471 unsigned char *p;
472 char *cnj = (char *)0;
473 char bn[256];
474 int fd;
475
476 program = RkiBasename(argv[0]);
477 for (i = 1; i < argc && argv[i][0] == '-'; i++) {
478 if (!strcmp(argv[i], "-i") ) {
479 inv = 1;
480 continue;
481 } else if (!strcmp(argv[i], "-D")) {
482 if ( ++i < argc && !cnj) {
483 cnj = argv[i];
484 continue;
485 }
486 } else {
487 (void)fprintf(stderr,
488 "usage: %s [-i] [-D bunpou] <filename> [dictionary-name]\n",
489 program);
490 exit(1);
491 }
492 }
493 if (i > argc - 1) {
494 (void) fprintf(stderr,
495 "usage: %s [-i] [-D bunpou] <filename> [dictionary-name]\n",
496 program);
497 exit(1);
498 }
499 if (!cnj) {
500 if(!(gram = RkOpenGram(HYOUJUN_GRAM))) {
501 (void)fprintf(stderr, "Warning: cannot open grammar file %s.\n", HYOUJUN_GRAM);
502 }
503 } else {
504 if ((fd = open(cnj, 0)) < 0) {
505 (void)fprintf(stderr, "%s: cannot open grammar file %s.\n", program, cnj);
506 exit(1);
507 }
508 #ifdef __CYGWIN32__
509 setmode(fd, O_BINARY);
510 #endif
511 gram = RkReadGram(fd, (size_t)-1);
512 close(fd);
513 }
514 (void)strcpy(bn, argv[i]);
515 if (!(dmnm = argv[i+1])) {
516 if (getdic(dic, bn, dmnm) < 0) {
517 (void)fprintf(stderr, "%s: cannot read file %s\n", program, bn);
518 }
519 exit(1);
520 }
521
522 if (getdic(dic, bn, dmnm) < 0) {
523 (void)fprintf(stderr, "%s: cannot read file %s or dictionary %s\n", program, bn, dmnm);
524 exit(1);
525 }
526 p = offset2ptr(dic, (unsigned)0, &which);
527 if (which != -1) {
528 (void)fprintf(stderr, "incollect dictionary\n");
529 exit(1);
530 }
531 show_nid(gram, dic, yomi, (unsigned)0, p);
532 (void)close(dic->fd);
533 fflush(stdout);
534 return 0;
535 }
536