1 /*
2  *  SKK-like Kana-Kanji translation library
3  *
4  * by A.ITO November, 1991
5  */
6 
7 /*
8        (C) Copyright 1992,1993 by Akinori ITO, Yutaka KANEKO  and
9        Masatoshi WATANABE
10        You may freely use, copy, modify and distribute this soft-
11        ware.  This software is provided "as is" without  warranty
12        of  any  kind.  The entire risk as to the quality and per-
13        formance of the program is with you.
14 */
15 
16 
17 #include "skkconfig.h"
18 #include <stdio.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <string.h>
22 #include "skklib.h"
23 
24 CandList getCandList();
25 /*static char *allocStr();*/
26 int hashVal();
27 
28 /*
29  * Open SKK
30  */
31 Dictionary
openSKK(dicname)32 openSKK(dicname)
33 char *dicname;
34 {
35 	FILE *f;
36 	DicList ditem,ditem2;
37 	DicList globaldic;
38 	char buf[512];
39 	char *p,c;
40 	Hash *dhash;
41 	Dictionary dic;
42 	int i,nitem = 0;
43 	int okuriAri = 1;
44 	struct stat st;
45 
46 	dhash = (Hash*)calloc(HASHSIZE,sizeof(Hash));
47 	dic = _NEW(Dictionary);
48 	dic->dhash = dhash;
49 	dic->okuriAriFirst = NULL;
50 	dic->okuriNasiFirst = NULL;
51 	dic->dlist = NULL;
52 	ditem2 = NULL;
53 	globaldic = NULL;
54 	if ((f = fopen(dicname,"r")) == NULL) {
55 		return dic;
56 	}
57 	fstat(fileno(f),&st);
58 	while (!feof(f)) {
59 		while ((c = fgetc(f)) == ' ' || c == '\t' || c == '\n');
60 		if (feof(f)) break;
61 		if (c == ';') {	/* comment */
62 			i = 0;
63 			while (c != '\n' && !feof(f)) {
64 				c = fgetc(f);
65 				buf[i++] = c;
66 			}
67 			buf[i] = '\0';
68 			if (!strncmp(buf,"; okuri-ari entries.",20)) {
69 				okuriAri = 1;
70 			}
71 			else if (!strncmp(buf,"; okuri-nasi entries.",21)) {
72 				okuriAri = 0;
73 			}
74 			continue;
75 		}
76 		nitem++;
77 		for (buf[0] = c, p = buf+1;
78 		     !feof(f) && (*p = fgetc(f)) != ' ';
79 		     p++) {
80 		}
81 		*p = '\0';
82 		ditem = _NEW2(DicList,strlen(buf));
83 		ditem->nextitem = NULL;
84 		if (ditem2)
85 			ditem2->nextitem = ditem;
86 		if (globaldic == NULL)
87 			globaldic = ditem;
88 		strcpy(ditem->kanaword,buf);
89 		ditem->cand = getCandList(f,ditem,okuriAri);
90 		addHash(dhash,ditem);
91 		ditem2 = ditem;
92 		if (okuriAri) {
93 			if (!dic->okuriAriFirst)
94 				dic->okuriAriFirst = ditem2;
95 		}
96 		else {
97 			if (!dic->okuriNasiFirst)
98 				dic->okuriNasiFirst = ditem2;
99 		}
100 	}
101 	fclose(f);
102 	dic->dlist = globaldic;
103 	dic->mtime = st.st_mtime;
104 	return dic;
105 }
106 
107 /*
108  * Check if word is an OKURI-ARI entry or not
109  */
110 int
isConjugate(word,l)111 isConjugate(word,l)
112 char word[];
113 int l;
114 {
115 	int r;
116 	if ((word[0] & 0x80) || word[0] == '#') {
117 		if (word[l-1] & 0x80)
118 			r = 0;
119 		else
120 			r = (word[l-1] != '#');
121 	}
122 	else
123 		r = 0;
124 	return r;
125 }
126 
127 /*
128  * Add new word entry to the dictionary
129  */
130 DicList
addNewItem(dic,word,clist)131 addNewItem(dic,word,clist)
132 Dictionary dic;
133 char *word;
134 CandList clist;
135 {
136 	DicList ditem;
137 	int l = strlen(word);
138 
139 	ditem = _NEW2(DicList,l);
140 	strcpy(ditem->kanaword,word);
141 	ditem->cand = clist;
142 	addHash(dic->dhash,ditem);
143 	if (isConjugate(word,l)) {
144 		if (dic->okuriAriFirst) {
145 			ditem->nextitem = dic->okuriAriFirst->nextitem;
146 			dic->okuriAriFirst->nextitem = ditem;
147 		}
148 		else {
149 			if (dic->dlist) {
150 				dic->okuriAriFirst = ditem;
151 				ditem->nextitem = dic->okuriNasiFirst;
152 				dic->dlist = ditem;
153 			}
154 			else {
155 				dic->dlist = ditem;
156 				dic->okuriAriFirst = ditem;
157 			}
158 		}
159 	}
160 	else {
161 		if (dic->okuriNasiFirst) {
162 			ditem->nextitem = dic->okuriNasiFirst->nextitem;
163 			dic->okuriNasiFirst->nextitem = ditem;
164 		}
165 		else {
166 			if (dic->dlist) {
167 				ditem->nextitem = dic->dlist->nextitem;
168 				dic->dlist->nextitem = ditem;
169 				dic->okuriNasiFirst = ditem;
170 			}
171 			else {
172 				dic->dlist = ditem;
173 				dic->okuriNasiFirst = ditem;
174 			}
175 		}
176 	}
177 	return ditem;
178 }
179 
180 
181 CandList
getCandList(f,ditem,okuri)182 getCandList(f,ditem,okuri)
183 FILE *f;
184 DicList ditem;
185 int okuri;
186 {
187 	char buf[256];
188 	CandList citem,citem2,citem0 = NULL;
189 	CandList ccitem,ccitem2;
190 	char c,*p;
191 
192 	citem2 = NULL;
193 	while ((c = fgetc(f)) != '\n' && !feof(f)) {
194 		if (c == '/') continue;
195 		if (okuri && c == '[') {
196 			for (p = buf; (*p = fgetc(f)) != '/'; p++);
197 			*p = '\0';
198 			citem = _NEW2(CandList,strlen(buf));
199 			citem->okuri = NULL;
200 			citem->nextcand = NULL;
201 			citem->prevcand = citem2;
202 			citem->dicitem = ditem;
203 			strcpy(citem->candword,buf);
204 			ccitem2 = citem;
205 			for (;;) {
206 				if ((c = fgetc(f)) == ']')
207 					break;
208 				for (buf[0] = c, p = buf+1;
209 				     (*p = fgetc(f)) != '/';
210 				     p++);
211 				*p = '\0';
212 				ccitem = _NEW2(CandList,strlen(buf));
213 				ccitem->nextcand = NULL;
214 				ccitem->okuri = NULL;
215 				ccitem->dicitem = ditem;
216 				strcpy(ccitem->candword,buf);
217 				if (ccitem2 == citem) {
218 					ccitem2->okuri = ccitem;
219 					ccitem->prevcand = NULL;
220 				}
221 				else {
222 					ccitem2->nextcand = ccitem;
223 					ccitem->prevcand = ccitem2;
224 				}
225 				ccitem2 = ccitem;
226 			}
227 		}
228 		else {
229 			for (buf[0] = c, p = buf+1;
230 			     (*p = fgetc(f)) != '/';
231 			     p++);
232 			*p = '\0';
233 			citem = _NEW2(CandList,strlen(buf));
234 			citem->okuri = NULL;
235 			citem->nextcand = NULL;
236 			citem->prevcand = citem2;
237 			citem->dicitem = ditem;
238 			strcpy(citem->candword,buf);
239 		}
240 		if (citem2)
241 			citem2->nextcand = citem;
242 		else
243 			citem0 = citem;
244 		citem2 = citem;
245 	}
246 	return citem0;
247 }
248 
249 void
closeSKK(dic,dicname)250 closeSKK(dic,dicname)
251 Dictionary dic;
252 char *dicname;
253 {
254 	char *buf;
255 	FILE *f;
256 	DicList dlist,dlist2;
257 	DicList globaldic = dic->dlist;
258 	int okuri = 1;
259 	int l;
260 	char *wd;
261 	struct stat sbuf;
262 	int old = 0;
263 
264 #ifdef USE_SERVER
265 	closeSKKserv();
266 #endif
267 
268 	buf = malloc(256);
269 	/* backup skk-jisyo if jisyo is not empty. */
270 	sprintf(buf,"%s.BAK",dicname);
271 	if ((stat(dicname, &sbuf) == 0) && (sbuf.st_size != 0)) {
272 		if (dic->mtime < sbuf.st_mtime) {
273 			printf("The dictionary is changed. merging...\n");
274 			mergeDictionary(dic,dicname);
275 		}
276 		rename(dicname,buf);
277 		old = 1;
278 	}
279 	if ((f = fopen(dicname,"w")) == NULL) {
280 		free(buf);
281 		return;
282 	}
283 
284 	fprintf(f,";; okuri-ari entries.\n");
285 	for (dlist = globaldic;
286 	     dlist != NULL;
287 	     dlist2 = dlist, dlist = dlist->nextitem, free(dlist2)) {
288 		wd = dlist->kanaword;
289 		l = strlen(wd);
290 		if (okuri && (!isConjugate(wd,l))) {
291 			fprintf(f,";; okuri-nasi entries.\n");
292 			okuri = 0;
293 		}
294 		fprintf(f,"%s ",dlist->kanaword);
295 		printCand(dlist->cand,f,FREE_CAND);
296 	}
297 	fclose(f);
298 	if (old)
299 		chmod(dicname,sbuf.st_mode);
300 
301 	for (l = 0; l < HASHSIZE; l++) {
302 		Hash h1, h2;
303 
304 		for (h1 = dic->dhash[l]; h1; h1 = h2) {
305 			h2 = h1->next;
306 			free(h1);
307 		}
308 	}
309 	free(dic->dhash);
310 	free(dic);
311 	free(buf);
312 }
313 
314 /* #define DEBUG_MERGE debug dictionary merge */
315 
316 void
mergeDictionary(dic,dicname)317 mergeDictionary(dic,dicname)
318 Dictionary dic;
319 char *dicname;
320 {
321 	FILE *f;
322 	CandList cand,dcand;
323 	DicList ditem;
324 	char *buf;
325 	char *p,c;
326 	int i;
327 #ifdef DEBUG_MERGE
328 	DicList change[10]; int n = 0;
329 #endif
330 
331 	buf = malloc(512);
332 	if ((f = fopen(dicname,"r")) == NULL) {
333 		free(buf);
334 		return;
335 	}
336 	while (!feof(f)) {
337 		while ((c = fgetc(f)) == ' ' || c == '\t' || c == '\n');
338 		if (feof(f)) break;
339 		if (c == ';') {	/* comment */
340 			while (c != '\n' && !feof(f)) {
341 				c = fgetc(f);
342 			}
343 			continue;
344 		}
345 		for (buf[0] = c, p = buf+1;
346 		     !feof(f) && (*p = fgetc(f)) != ' ';
347 		     p++) {
348 		}
349 		*p = '\0';
350 		i = strlen(buf);
351 		dcand = getCand(dic,buf);
352 		if (dcand == NULL) {
353 			cand = getCandList(f,NULL,isConjugate(buf,i));
354 			ditem = addNewItem(dic,buf,cand);
355 			for (; cand; cand = cand->nextcand)
356 				cand->dicitem = ditem;
357 #ifdef DEBUG_MERGE
358 			change[n++] = ditem;
359 #endif
360 		}
361 		else {
362 			cand = getCandList(f,dcand->dicitem,isConjugate(buf,i));
363 			cand = deleteCand(cand,dcand);
364 			if (cand) {
365 				dcand->dicitem->cand = cand;
366 				while (cand->nextcand != NULL)
367 					cand = cand->nextcand;
368 				cand->nextcand = dcand;
369 				dcand->prevcand = cand;
370 #ifdef DEBUG_MERGE
371 				change[n++] = dcand->dicitem;
372 #endif
373 			}
374 		}
375 	}
376 	fclose(f);
377 #ifdef DEBUG_MERGE
378 	for (i=0;i<n;i++) {
379 		printf("i=%d; ",i); fflush(stdout);
380 		printf("register(%d): %s (%x)",i,change[i]->kanaword,change[i]->cand); fflush(stdout);
381 		printCand(change[i]->cand,stdout,NOFREE_CAND); putchar('\n');
382 	}
383 #endif
384 	free(buf);
385 }
386 
387 
388 void
printCand(cl,f,fre)389 printCand(cl,f,fre)
390 CandList cl;
391 FILE *f;
392 int fre;
393 {
394 	CandList clist,clist2,cclist,cclist2;
395 
396 	fputc('/',f);
397 	for (clist = cl;
398 	     clist != NULL;
399 	     clist2 = clist, clist = clist->nextcand,
400 	     (fre ? (free(clist2),0) : 0)) {
401 		if (clist->okuri) {
402 			fprintf(f,"[%s/",clist->candword);
403 			for (cclist = clist->okuri;
404 			     cclist != NULL;
405 			     cclist2 = cclist,
406 			     cclist = cclist->nextcand,
407 			     (fre ? (free(cclist2),0) : 0)) {
408 				fprintf(f,"%s/",cclist->candword);
409 			}
410 			fputs("]/",f);
411 		}
412 		else
413 			fprintf(f,"%s/",clist->candword);
414 	}
415 	fputc('\n',f);
416 }
417 
418 /*
419 static char*
420 allocStr(s)
421 char *s;
422 {
423 	int l = strlen(s);
424 	char *p = malloc(l+1);
425 
426 	strcpy(p,s);
427 	return p;
428 }
429 */
430 
431 int
hashVal(s)432 hashVal(s)
433 char *s;
434 {
435 	int n = 0;
436 
437 	while (*s) {
438 		n += (*s)*(*s);
439 		s++;
440 	}
441 	return n%HASHSIZE;
442 }
443 
addHash(hash,ditem)444 void addHash(hash,ditem)
445 Hash *hash;
446 DicList ditem;
447 {
448 	Hash h;
449 	int v;
450 
451 	v = hashVal(ditem->kanaword);
452 	h = _NEW(Hash);
453 	h->h_index = ditem;
454 	h->length = strlen(ditem->kanaword);
455 	h->next = hash[v];
456 	hash[v] = h;
457 }
458 
459 CandList
getCand(dic,s)460 getCand(dic,s)
461 Dictionary dic;
462 char *s;
463 {
464 	int l,v;
465 	Hash h;
466 
467 	l = strlen(s);
468 	v = hashVal(s);
469 	for (h = dic->dhash[v]; h != NULL; h = h->next) {
470 		if (h->length != l ||
471 		    strcmp(h->h_index->kanaword,s)) continue;
472 		return h->h_index->cand;
473 	}
474 	return NULL;
475 }
476 
477 void
selectCand(first,cand)478 selectCand(first,cand)
479 CandList *first;
480 CandList cand;
481 {
482 	if (cand->prevcand) {
483 		cand->prevcand->nextcand = cand->nextcand;
484 		if (cand->nextcand)
485 			cand->nextcand->prevcand = cand->prevcand;
486 		cand->prevcand = NULL;
487 	}
488 	if (*first != cand) {
489 		(*first)->prevcand = cand;
490 		cand->nextcand = *first;
491 		*first = cand;
492 	}
493 }
494 
495 void
freeCand(cl)496 freeCand(cl)
497 CandList cl;
498 {
499 	CandList clist,clist2,cclist,cclist2;
500 
501 	for (clist = cl;
502 	     clist != NULL;
503 	     clist2 = clist, clist = clist->nextcand, free(clist2)) {
504 		if (clist->okuri) {
505 			for (cclist = clist->okuri;
506 			     cclist != NULL;
507 			     cclist2 = cclist,
508 			     cclist = cclist->nextcand,
509 			     free(cclist2)) ;
510 		}
511 	}
512 }
513 
514 CandList
deleteCand(frlist,itlist)515 deleteCand(frlist,itlist)
516 CandList frlist,itlist;
517 {
518 	CandList l;
519 	while (itlist != NULL) {
520 		for (l = frlist; l != NULL; l = l->nextcand) {
521 			if (!strcmp(itlist->candword, l->candword)) {
522 				if (l->prevcand == NULL) {
523 					frlist = l->nextcand;
524 					if (l->nextcand)
525 						l->nextcand->prevcand = NULL;
526 				}
527 				else {
528 					l->prevcand->nextcand = l->nextcand;
529 					if (l->nextcand)
530 						l->nextcand->prevcand = l->prevcand;
531 				}
532 				l->nextcand = NULL;
533 				freeCand(l);
534 				break;
535 			}
536 		}
537 		itlist = itlist->nextcand;
538 	}
539 	return frlist;
540 }
541 
542 CandList
firstCand(l)543 firstCand(l)
544 CandList l;
545 {
546 	while (l->prevcand)
547 		l = l->prevcand;
548 	return l;
549 }
550 
551 CandList
searchOkuri(cl,okuri,newfirst)552 searchOkuri(cl,okuri,newfirst)
553 CandList cl;
554 char *okuri;
555 CandList **newfirst;
556 {
557 	CandList ll;
558 
559 	for (ll = cl; ll != NULL; ll = ll->nextcand) {
560 		if (ll->okuri && !strcmp(ll->candword,okuri)) {
561 			if (newfirst)
562 				*newfirst = &(ll->okuri);
563 			return ll->okuri;
564 		}
565 	}
566 	if (newfirst && cl->dicitem) {
567 		if (cl->dicitem->cand->okuri) {
568 			return NULL;
569 		}
570 		*newfirst = &(cl->dicitem->cand);
571 	}
572 	return cl;
573 }
574 
575