1 /*
2 * SKK-like Kana-Kanji translation library
3 *
4 * by A.ITO November, 1991
5 */
6
7 /*
8 (C) Copyright 1992,1993 by Akinori ITO, Yutaka KANEKO and
9 Masatoshi WATANABE
10 You may freely use, copy, modify and distribute this soft-
11 ware. This software is provided "as is" without warranty
12 of any kind. The entire risk as to the quality and per-
13 formance of the program is with you.
14 */
15
16
17 #include "skkconfig.h"
18 #include <stdio.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <string.h>
22 #include "skklib.h"
23
24 CandList getCandList();
25 /*static char *allocStr();*/
26 int hashVal();
27
28 /*
29 * Open SKK
30 */
31 Dictionary
openSKK(dicname)32 openSKK(dicname)
33 char *dicname;
34 {
35 FILE *f;
36 DicList ditem,ditem2;
37 DicList globaldic;
38 char buf[512];
39 char *p,c;
40 Hash *dhash;
41 Dictionary dic;
42 int i,nitem = 0;
43 int okuriAri = 1;
44 struct stat st;
45
46 dhash = (Hash*)calloc(HASHSIZE,sizeof(Hash));
47 dic = _NEW(Dictionary);
48 dic->dhash = dhash;
49 dic->okuriAriFirst = NULL;
50 dic->okuriNasiFirst = NULL;
51 dic->dlist = NULL;
52 ditem2 = NULL;
53 globaldic = NULL;
54 if ((f = fopen(dicname,"r")) == NULL) {
55 return dic;
56 }
57 fstat(fileno(f),&st);
58 while (!feof(f)) {
59 while ((c = fgetc(f)) == ' ' || c == '\t' || c == '\n');
60 if (feof(f)) break;
61 if (c == ';') { /* comment */
62 i = 0;
63 while (c != '\n' && !feof(f)) {
64 c = fgetc(f);
65 buf[i++] = c;
66 }
67 buf[i] = '\0';
68 if (!strncmp(buf,"; okuri-ari entries.",20)) {
69 okuriAri = 1;
70 }
71 else if (!strncmp(buf,"; okuri-nasi entries.",21)) {
72 okuriAri = 0;
73 }
74 continue;
75 }
76 nitem++;
77 for (buf[0] = c, p = buf+1;
78 !feof(f) && (*p = fgetc(f)) != ' ';
79 p++) {
80 }
81 *p = '\0';
82 ditem = _NEW2(DicList,strlen(buf));
83 ditem->nextitem = NULL;
84 if (ditem2)
85 ditem2->nextitem = ditem;
86 if (globaldic == NULL)
87 globaldic = ditem;
88 strcpy(ditem->kanaword,buf);
89 ditem->cand = getCandList(f,ditem,okuriAri);
90 addHash(dhash,ditem);
91 ditem2 = ditem;
92 if (okuriAri) {
93 if (!dic->okuriAriFirst)
94 dic->okuriAriFirst = ditem2;
95 }
96 else {
97 if (!dic->okuriNasiFirst)
98 dic->okuriNasiFirst = ditem2;
99 }
100 }
101 fclose(f);
102 dic->dlist = globaldic;
103 dic->mtime = st.st_mtime;
104 return dic;
105 }
106
107 /*
108 * Check if word is an OKURI-ARI entry or not
109 */
110 int
isConjugate(word,l)111 isConjugate(word,l)
112 char word[];
113 int l;
114 {
115 int r;
116 if ((word[0] & 0x80) || word[0] == '#') {
117 if (word[l-1] & 0x80)
118 r = 0;
119 else
120 r = (word[l-1] != '#');
121 }
122 else
123 r = 0;
124 return r;
125 }
126
127 /*
128 * Add new word entry to the dictionary
129 */
130 DicList
addNewItem(dic,word,clist)131 addNewItem(dic,word,clist)
132 Dictionary dic;
133 char *word;
134 CandList clist;
135 {
136 DicList ditem;
137 int l = strlen(word);
138
139 ditem = _NEW2(DicList,l);
140 strcpy(ditem->kanaword,word);
141 ditem->cand = clist;
142 addHash(dic->dhash,ditem);
143 if (isConjugate(word,l)) {
144 if (dic->okuriAriFirst) {
145 ditem->nextitem = dic->okuriAriFirst->nextitem;
146 dic->okuriAriFirst->nextitem = ditem;
147 }
148 else {
149 if (dic->dlist) {
150 dic->okuriAriFirst = ditem;
151 ditem->nextitem = dic->okuriNasiFirst;
152 dic->dlist = ditem;
153 }
154 else {
155 dic->dlist = ditem;
156 dic->okuriAriFirst = ditem;
157 }
158 }
159 }
160 else {
161 if (dic->okuriNasiFirst) {
162 ditem->nextitem = dic->okuriNasiFirst->nextitem;
163 dic->okuriNasiFirst->nextitem = ditem;
164 }
165 else {
166 if (dic->dlist) {
167 ditem->nextitem = dic->dlist->nextitem;
168 dic->dlist->nextitem = ditem;
169 dic->okuriNasiFirst = ditem;
170 }
171 else {
172 dic->dlist = ditem;
173 dic->okuriNasiFirst = ditem;
174 }
175 }
176 }
177 return ditem;
178 }
179
180
181 CandList
getCandList(f,ditem,okuri)182 getCandList(f,ditem,okuri)
183 FILE *f;
184 DicList ditem;
185 int okuri;
186 {
187 char buf[256];
188 CandList citem,citem2,citem0 = NULL;
189 CandList ccitem,ccitem2;
190 char c,*p;
191
192 citem2 = NULL;
193 while ((c = fgetc(f)) != '\n' && !feof(f)) {
194 if (c == '/') continue;
195 if (okuri && c == '[') {
196 for (p = buf; (*p = fgetc(f)) != '/'; p++);
197 *p = '\0';
198 citem = _NEW2(CandList,strlen(buf));
199 citem->okuri = NULL;
200 citem->nextcand = NULL;
201 citem->prevcand = citem2;
202 citem->dicitem = ditem;
203 strcpy(citem->candword,buf);
204 ccitem2 = citem;
205 for (;;) {
206 if ((c = fgetc(f)) == ']')
207 break;
208 for (buf[0] = c, p = buf+1;
209 (*p = fgetc(f)) != '/';
210 p++);
211 *p = '\0';
212 ccitem = _NEW2(CandList,strlen(buf));
213 ccitem->nextcand = NULL;
214 ccitem->okuri = NULL;
215 ccitem->dicitem = ditem;
216 strcpy(ccitem->candword,buf);
217 if (ccitem2 == citem) {
218 ccitem2->okuri = ccitem;
219 ccitem->prevcand = NULL;
220 }
221 else {
222 ccitem2->nextcand = ccitem;
223 ccitem->prevcand = ccitem2;
224 }
225 ccitem2 = ccitem;
226 }
227 }
228 else {
229 for (buf[0] = c, p = buf+1;
230 (*p = fgetc(f)) != '/';
231 p++);
232 *p = '\0';
233 citem = _NEW2(CandList,strlen(buf));
234 citem->okuri = NULL;
235 citem->nextcand = NULL;
236 citem->prevcand = citem2;
237 citem->dicitem = ditem;
238 strcpy(citem->candword,buf);
239 }
240 if (citem2)
241 citem2->nextcand = citem;
242 else
243 citem0 = citem;
244 citem2 = citem;
245 }
246 return citem0;
247 }
248
249 void
closeSKK(dic,dicname)250 closeSKK(dic,dicname)
251 Dictionary dic;
252 char *dicname;
253 {
254 char *buf;
255 FILE *f;
256 DicList dlist,dlist2;
257 DicList globaldic = dic->dlist;
258 int okuri = 1;
259 int l;
260 char *wd;
261 struct stat sbuf;
262 int old = 0;
263
264 #ifdef USE_SERVER
265 closeSKKserv();
266 #endif
267
268 buf = malloc(256);
269 /* backup skk-jisyo if jisyo is not empty. */
270 sprintf(buf,"%s.BAK",dicname);
271 if ((stat(dicname, &sbuf) == 0) && (sbuf.st_size != 0)) {
272 if (dic->mtime < sbuf.st_mtime) {
273 printf("The dictionary is changed. merging...\n");
274 mergeDictionary(dic,dicname);
275 }
276 rename(dicname,buf);
277 old = 1;
278 }
279 if ((f = fopen(dicname,"w")) == NULL) {
280 free(buf);
281 return;
282 }
283
284 fprintf(f,";; okuri-ari entries.\n");
285 for (dlist = globaldic;
286 dlist != NULL;
287 dlist2 = dlist, dlist = dlist->nextitem, free(dlist2)) {
288 wd = dlist->kanaword;
289 l = strlen(wd);
290 if (okuri && (!isConjugate(wd,l))) {
291 fprintf(f,";; okuri-nasi entries.\n");
292 okuri = 0;
293 }
294 fprintf(f,"%s ",dlist->kanaword);
295 printCand(dlist->cand,f,FREE_CAND);
296 }
297 fclose(f);
298 if (old)
299 chmod(dicname,sbuf.st_mode);
300
301 for (l = 0; l < HASHSIZE; l++) {
302 Hash h1, h2;
303
304 for (h1 = dic->dhash[l]; h1; h1 = h2) {
305 h2 = h1->next;
306 free(h1);
307 }
308 }
309 free(dic->dhash);
310 free(dic);
311 free(buf);
312 }
313
314 /* #define DEBUG_MERGE debug dictionary merge */
315
316 void
mergeDictionary(dic,dicname)317 mergeDictionary(dic,dicname)
318 Dictionary dic;
319 char *dicname;
320 {
321 FILE *f;
322 CandList cand,dcand;
323 DicList ditem;
324 char *buf;
325 char *p,c;
326 int i;
327 #ifdef DEBUG_MERGE
328 DicList change[10]; int n = 0;
329 #endif
330
331 buf = malloc(512);
332 if ((f = fopen(dicname,"r")) == NULL) {
333 free(buf);
334 return;
335 }
336 while (!feof(f)) {
337 while ((c = fgetc(f)) == ' ' || c == '\t' || c == '\n');
338 if (feof(f)) break;
339 if (c == ';') { /* comment */
340 while (c != '\n' && !feof(f)) {
341 c = fgetc(f);
342 }
343 continue;
344 }
345 for (buf[0] = c, p = buf+1;
346 !feof(f) && (*p = fgetc(f)) != ' ';
347 p++) {
348 }
349 *p = '\0';
350 i = strlen(buf);
351 dcand = getCand(dic,buf);
352 if (dcand == NULL) {
353 cand = getCandList(f,NULL,isConjugate(buf,i));
354 ditem = addNewItem(dic,buf,cand);
355 for (; cand; cand = cand->nextcand)
356 cand->dicitem = ditem;
357 #ifdef DEBUG_MERGE
358 change[n++] = ditem;
359 #endif
360 }
361 else {
362 cand = getCandList(f,dcand->dicitem,isConjugate(buf,i));
363 cand = deleteCand(cand,dcand);
364 if (cand) {
365 dcand->dicitem->cand = cand;
366 while (cand->nextcand != NULL)
367 cand = cand->nextcand;
368 cand->nextcand = dcand;
369 dcand->prevcand = cand;
370 #ifdef DEBUG_MERGE
371 change[n++] = dcand->dicitem;
372 #endif
373 }
374 }
375 }
376 fclose(f);
377 #ifdef DEBUG_MERGE
378 for (i=0;i<n;i++) {
379 printf("i=%d; ",i); fflush(stdout);
380 printf("register(%d): %s (%x)",i,change[i]->kanaword,change[i]->cand); fflush(stdout);
381 printCand(change[i]->cand,stdout,NOFREE_CAND); putchar('\n');
382 }
383 #endif
384 free(buf);
385 }
386
387
388 void
printCand(cl,f,fre)389 printCand(cl,f,fre)
390 CandList cl;
391 FILE *f;
392 int fre;
393 {
394 CandList clist,clist2,cclist,cclist2;
395
396 fputc('/',f);
397 for (clist = cl;
398 clist != NULL;
399 clist2 = clist, clist = clist->nextcand,
400 (fre ? (free(clist2),0) : 0)) {
401 if (clist->okuri) {
402 fprintf(f,"[%s/",clist->candword);
403 for (cclist = clist->okuri;
404 cclist != NULL;
405 cclist2 = cclist,
406 cclist = cclist->nextcand,
407 (fre ? (free(cclist2),0) : 0)) {
408 fprintf(f,"%s/",cclist->candword);
409 }
410 fputs("]/",f);
411 }
412 else
413 fprintf(f,"%s/",clist->candword);
414 }
415 fputc('\n',f);
416 }
417
418 /*
419 static char*
420 allocStr(s)
421 char *s;
422 {
423 int l = strlen(s);
424 char *p = malloc(l+1);
425
426 strcpy(p,s);
427 return p;
428 }
429 */
430
431 int
hashVal(s)432 hashVal(s)
433 char *s;
434 {
435 int n = 0;
436
437 while (*s) {
438 n += (*s)*(*s);
439 s++;
440 }
441 return n%HASHSIZE;
442 }
443
addHash(hash,ditem)444 void addHash(hash,ditem)
445 Hash *hash;
446 DicList ditem;
447 {
448 Hash h;
449 int v;
450
451 v = hashVal(ditem->kanaword);
452 h = _NEW(Hash);
453 h->h_index = ditem;
454 h->length = strlen(ditem->kanaword);
455 h->next = hash[v];
456 hash[v] = h;
457 }
458
459 CandList
getCand(dic,s)460 getCand(dic,s)
461 Dictionary dic;
462 char *s;
463 {
464 int l,v;
465 Hash h;
466
467 l = strlen(s);
468 v = hashVal(s);
469 for (h = dic->dhash[v]; h != NULL; h = h->next) {
470 if (h->length != l ||
471 strcmp(h->h_index->kanaword,s)) continue;
472 return h->h_index->cand;
473 }
474 return NULL;
475 }
476
477 void
selectCand(first,cand)478 selectCand(first,cand)
479 CandList *first;
480 CandList cand;
481 {
482 if (cand->prevcand) {
483 cand->prevcand->nextcand = cand->nextcand;
484 if (cand->nextcand)
485 cand->nextcand->prevcand = cand->prevcand;
486 cand->prevcand = NULL;
487 }
488 if (*first != cand) {
489 (*first)->prevcand = cand;
490 cand->nextcand = *first;
491 *first = cand;
492 }
493 }
494
495 void
freeCand(cl)496 freeCand(cl)
497 CandList cl;
498 {
499 CandList clist,clist2,cclist,cclist2;
500
501 for (clist = cl;
502 clist != NULL;
503 clist2 = clist, clist = clist->nextcand, free(clist2)) {
504 if (clist->okuri) {
505 for (cclist = clist->okuri;
506 cclist != NULL;
507 cclist2 = cclist,
508 cclist = cclist->nextcand,
509 free(cclist2)) ;
510 }
511 }
512 }
513
514 CandList
deleteCand(frlist,itlist)515 deleteCand(frlist,itlist)
516 CandList frlist,itlist;
517 {
518 CandList l;
519 while (itlist != NULL) {
520 for (l = frlist; l != NULL; l = l->nextcand) {
521 if (!strcmp(itlist->candword, l->candword)) {
522 if (l->prevcand == NULL) {
523 frlist = l->nextcand;
524 if (l->nextcand)
525 l->nextcand->prevcand = NULL;
526 }
527 else {
528 l->prevcand->nextcand = l->nextcand;
529 if (l->nextcand)
530 l->nextcand->prevcand = l->prevcand;
531 }
532 l->nextcand = NULL;
533 freeCand(l);
534 break;
535 }
536 }
537 itlist = itlist->nextcand;
538 }
539 return frlist;
540 }
541
542 CandList
firstCand(l)543 firstCand(l)
544 CandList l;
545 {
546 while (l->prevcand)
547 l = l->prevcand;
548 return l;
549 }
550
551 CandList
searchOkuri(cl,okuri,newfirst)552 searchOkuri(cl,okuri,newfirst)
553 CandList cl;
554 char *okuri;
555 CandList **newfirst;
556 {
557 CandList ll;
558
559 for (ll = cl; ll != NULL; ll = ll->nextcand) {
560 if (ll->okuri && !strcmp(ll->candword,okuri)) {
561 if (newfirst)
562 *newfirst = &(ll->okuri);
563 return ll->okuri;
564 }
565 }
566 if (newfirst && cl->dicitem) {
567 if (cl->dicitem->cand->okuri) {
568 return NULL;
569 }
570 *newfirst = &(cl->dicitem->cand);
571 }
572 return cl;
573 }
574
575