1 /*
2 Copyright (C) 1995-2008 Edward Der-Hua Liu, Hsin-Chu, Taiwan
3 */
4
5 #include "gcin.h"
6 #include "pho.h"
7 #include "tsin.h"
8 #include "gtab.h"
9 #include "gst.h"
10 #include "lang.h"
11 #include <sys/stat.h>
12
13 TSIN_HANDLE tsin_hand, en_hand;
14
15 //int ph_key_sz; // bytes
16
17 #define PHIDX_SKIP (sizeof(tsin_hand.phcount) + sizeof(tsin_hand.hashidx))
18
19 #if 0
20 char *current_tsin_fname;
21 time_t current_modify_time;
22 #endif
23
24 void get_gcin_user_or_sys_fname(char *name, char fname[]);
25
26
get_modify_time(TSIN_HANDLE * ptsin_hand)27 static void get_modify_time(TSIN_HANDLE *ptsin_hand)
28 {
29 struct stat st;
30 if (!fstat(fileno(ptsin_hand->fph), &st)) {
31 ptsin_hand->modify_time = st.st_mtime;
32 }
33 }
34
free_tsin_ex(TSIN_HANDLE * th)35 void free_tsin_ex(TSIN_HANDLE *th)
36 {
37 free(th->tsin_fname); th->tsin_fname=NULL;
38
39 if (th->fph) {
40 fclose(th->fph); th->fph = NULL;
41 }
42
43 if (th->fp_phidx) {
44 fclose(th->fp_phidx); th->fp_phidx=NULL;
45 }
46 #if MEM_TSIN
47 free(th->mem_tsin); th->mem_tsin=NULL;
48 #endif
49 #if MEM_PHIDX
50 free(th->mem_phidx); th->mem_phidx=NULL;
51 #endif
52 }
53
load_tsin_db_ex(TSIN_HANDLE * ptsin_hand,char * infname,gboolean read_only,gboolean use_idx)54 gboolean load_tsin_db_ex(TSIN_HANDLE *ptsin_hand, char *infname, gboolean read_only, gboolean use_idx)
55 {
56 char tsidxfname[512];
57 char *fmod = read_only?"rb":"rb+";
58 // dbg("cur %s %s\n", infname, current_tsin_fname);
59
60 if (ptsin_hand->fph) {
61 if (!strcmp(ptsin_hand->tsin_fname, infname))
62 return TRUE;
63 free_tsin_ex(ptsin_hand);
64 }
65
66 if (ptsin_hand->tsin_fname)
67 free(ptsin_hand->tsin_fname);
68
69 ptsin_hand->tsin_fname = strdup(infname);
70
71 strcpy(tsidxfname, infname);
72 strcat(tsidxfname, ".idx");
73
74 // dbg("tsidxfname %s\n", tsidxfname);
75 #define BF_SZ (16 * 1024)
76
77 FILE *fp_phidx = ptsin_hand->fp_phidx, *fph = ptsin_hand->fph;
78
79 if (use_idx) {
80 if ((fp_phidx=fopen(tsidxfname, fmod))==NULL) {
81 dbg("load_tsin_db_ex A Cannot open '%s'\n", tsidxfname);
82 return FALSE;
83 }
84 ptsin_hand->fp_phidx = fp_phidx;
85 #if 0
86 setvbuf (fp_phidx, NULL , _IOFBF, BF_SZ);
87 #endif
88 int rn;
89 rn=fread(&ptsin_hand->phcount,4,1, fp_phidx);
90 rn=fread(&ptsin_hand->hashidx,1,sizeof(ptsin_hand->hashidx), fp_phidx);
91 #if 0
92 dbg("phcount:%d\n",phcount);
93 #endif
94 ptsin_hand->a_phcount=ptsin_hand->phcount+256;
95 }
96
97
98 if (fph)
99 fclose(fph);
100
101 // dbg("tsfname: %s\n", infname);
102
103 if ((fph=fopen(infname, fmod))==NULL)
104 p_err("load_tsin_db0 B Cannot open '%s'", infname);
105 #if 0
106 setvbuf (fph, NULL , _IOFBF, BF_SZ);
107 #endif
108 ptsin_hand->fph = fph;
109
110 // free(current_tsin_fname);
111 // current_tsin_fname = strdup(infname);
112
113
114 get_modify_time(ptsin_hand);
115
116 gboolean is_gtab_i = FALSE;
117
118 TSIN_GTAB_HEAD head;
119 int rn;
120 rn = fread(&head, sizeof(head), 1, fph);
121
122 if (!strcmp(head.signature, TSIN_GTAB_KEY)) {
123 is_gtab_i = TRUE;
124 if (head.keybits*head.maxkey > 32) {
125 ptsin_hand->ph_key_sz = 8;
126 // tsin_hash_shift = TSIN_HASH_SHIFT_64;
127 }
128 else {
129 ptsin_hand->ph_key_sz = 4;
130 // tsin_hash_shift = TSIN_HASH_SHIFT_32;
131 }
132 } else
133 if (!strcmp(head.signature, TSIN_EN_WORD_KEY)) {
134 ptsin_hand->ph_key_sz = 1;
135 } else {
136 ptsin_hand->ph_key_sz = 2;
137 // tsin_hash_shift = TSIN_HASH_SHIFT;
138 }
139
140 ptsin_hand->tsin_is_gtab = is_gtab_i;
141
142 #if MEM_PHIDX
143 struct stat st;
144 int n;
145 if (fp_phidx) {
146 stat(tsidxfname, &st);
147 ptsin_hand->mem_phidx = (char *)malloc(st.st_size);
148 rewind(fp_phidx);
149 n = fread(ptsin_hand->mem_phidx, 1, st.st_size, fp_phidx);
150 }
151 #endif
152
153 #if MEM_TSIN
154 stat(infname, &st);
155 ptsin_hand->mem_tsin = (char *)malloc(st.st_size + 128); // CH_SZ problem
156 rewind(fph);
157 n = fread(ptsin_hand->mem_tsin, 1, st.st_size, fph);
158 #endif
159
160 return TRUE;
161 }
162
163
load_tsin_db0(char * infname,gboolean is_gtab_i)164 void load_tsin_db0(char *infname, gboolean is_gtab_i)
165 {
166 load_tsin_db_ex(&tsin_hand, infname, FALSE, TRUE);
167 }
168
load_en_db0(char * infname)169 void load_en_db0(char *infname)
170 {
171 load_tsin_db_ex(&en_hand, infname, FALSE, TRUE);
172 }
173
174
free_tsin()175 void free_tsin()
176 {
177 free_tsin_ex(&tsin_hand);
178 }
179
free_en()180 void free_en()
181 {
182 free_tsin_ex(&en_hand);
183 }
184
185
186 extern gboolean is_chs;
load_tsin_db()187 void load_tsin_db()
188 {
189 char tsfname[512];
190 char *fname = tsin32_f;
191
192 get_gcin_user_fname(fname, tsfname);
193 load_tsin_db0(tsfname, FALSE);
194 }
195
load_en_db()196 void load_en_db()
197 {
198 char tsfname[512];
199
200 get_gcin_user_fname(TSIN_EN_FILE, tsfname);
201 load_en_db0(tsfname);
202 }
203
204 #define PHIDX_INDEX(i) (PHIDX_SKIP + i*sizeof(int))
205
seek_fp_phidx(TSIN_HANDLE * ptsin_hand,int i)206 static void seek_fp_phidx(TSIN_HANDLE *ptsin_hand, int i)
207 {
208 fseek(ptsin_hand->fp_phidx, PHIDX_INDEX(i), SEEK_SET);
209 }
210
reload_tsin_db_ex(TSIN_HANDLE * th)211 void reload_tsin_db_ex(TSIN_HANDLE *th)
212 {
213 char tt[512];
214
215 if (!th->tsin_fname)
216 return;
217
218 strcpy(tt, th->tsin_fname);
219 free_tsin_ex(th);
220
221 // free(current_tsin_fname); current_tsin_fname = NULL;
222 // load_tsin_db0(tt, th->tsin_is_gtab);
223 load_tsin_db_ex(th, tt, FALSE, TRUE);
224 }
225
226
reload_tsin_db()227 void reload_tsin_db()
228 {
229 reload_tsin_db_ex(&tsin_hand);
230 }
231
reload_en_db()232 void reload_en_db()
233 {
234 reload_tsin_db_ex(&en_hand);
235 }
236
237
get_phidx(TSIN_HANDLE * ptsin_hand,int i)238 inline static int get_phidx(TSIN_HANDLE *ptsin_hand, int i)
239 {
240 #if MEM_PHIDX
241 int t;
242 memcpy(&t, ptsin_hand->mem_phidx + PHIDX_INDEX(i), sizeof(t));
243 #else
244 seek_fp_phidx(ptsin_hand, i);
245 int t, rn;
246 rn = fread(&t, sizeof(int), 1, ptsin_hand->fp_phidx);
247 #endif
248 if (ptsin_hand->tsin_is_gtab || ptsin_hand->ph_key_sz ==1)
249 t += sizeof(TSIN_GTAB_HEAD);
250
251 return t;
252 }
253
phokey_t_seq8(u_char * a,u_char * b,int len)254 inline static int phokey_t_seq8(u_char *a, u_char *b, int len)
255 {
256 return memcmp(a, b, len);
257 }
258
259
phokey_t_seq16(phokey_t * a,phokey_t * b,int len)260 inline static int phokey_t_seq16(phokey_t *a, phokey_t *b, int len)
261 {
262 int i;
263
264 for (i=0;i<len;i++) {
265 if (a[i] > b[i]) return 1;
266 else
267 if (a[i] < b[i]) return -1;
268 }
269
270 return 0;
271 }
272
273
phokey_t_seq32(u_int * a,u_int * b,int len)274 inline static int phokey_t_seq32(u_int *a, u_int *b, int len)
275 {
276 int i;
277
278 for (i=0;i<len;i++) {
279 if (a[i] > b[i]) return 1;
280 else
281 if (a[i] < b[i]) return -1;
282 }
283
284 return 0;
285 }
286
287
phokey_t_seq64(u_int64_t * a,u_int64_t * b,int len)288 inline static int phokey_t_seq64(u_int64_t *a, u_int64_t *b, int len)
289 {
290 int i;
291
292 for (i=0;i<len;i++) {
293 if (a[i] > b[i]) return 1;
294 else
295 if (a[i] < b[i]) return -1;
296 }
297
298 return 0;
299 }
300
301
phokey_t_seq(TSIN_HANDLE * th,void * a,void * b,int len)302 static inline int phokey_t_seq(TSIN_HANDLE *th, void *a, void *b, int len)
303 {
304 if (th->ph_key_sz==1)
305 return phokey_t_seq8((u_char *)a, (u_char *)b, len);
306 else if (th->ph_key_sz==2)
307 return phokey_t_seq16((phokey_t *)a, (phokey_t *)b, len);
308 else if (th->ph_key_sz==4)
309 return phokey_t_seq32((u_int *)a, (u_int *)b, len);
310 else if (th->ph_key_sz==8)
311 return phokey_t_seq64((u_int64_t*)a, (u_int64_t*)b, len);
312 return 0;
313 }
314
315
phseq(TSIN_HANDLE * th,u_char * a,u_char * b)316 static inline int phseq(TSIN_HANDLE *th, u_char *a, u_char *b)
317 {
318 u_char lena, lenb, mlen;
319
320 lena=*(a++); lenb=*(b++);
321 a+=sizeof(usecount_t); b+=sizeof(usecount_t); // skip usecount
322
323 mlen=Min(lena,lenb);
324 u_int64_t ka[MAX_PHRASE_LEN], kb[MAX_PHRASE_LEN];
325
326 memcpy(ka, a, th->ph_key_sz * mlen);
327 memcpy(kb, b, th->ph_key_sz * mlen);
328
329 int d = phokey_t_seq(th, ka, kb, mlen);
330 if (d)
331 return d;
332
333 if (lena > lenb) return 1;
334 if (lena < lenb) return -1;
335 return 0;
336 }
337
338 gboolean inc_tsin_use_count(TSIN_HANDLE *th, void *pho, char *ch, int N);
339
340 static gboolean saved_phrase;
341
342
reload_if_modified(TSIN_HANDLE * th)343 static void reload_if_modified(TSIN_HANDLE *th)
344 {
345 struct stat st;
346 if (fstat(fileno(th->fph), &st) || th->modify_time != st.st_mtime) {
347 reload_tsin_db_ex(th);
348 }
349 }
350
351
save_phrase_to_db(TSIN_HANDLE * th,void * phkeys,char * utf8str,int len,usecount_t usecount)352 gboolean save_phrase_to_db(TSIN_HANDLE *th, void *phkeys, char *utf8str, int len, usecount_t usecount)
353 {
354 reload_if_modified(th);
355
356 int mid, ord = 0, ph_ofs, hashno;
357 u_char tbuf[MAX_PHRASE_LEN*(sizeof(u_int64_t)+CH_SZ) + 1 + sizeof(usecount_t)],
358 sbuf[MAX_PHRASE_LEN*(sizeof(u_int64_t)+CH_SZ) + 1 + sizeof(usecount_t)];
359
360 saved_phrase = TRUE;
361
362 tbuf[0]=len;
363 memcpy(&tbuf[1], &usecount, sizeof(usecount)); // usecount
364 int tlen = (utf8str && th->ph_key_sz != 1)?utf8_tlen(utf8str, len):0;
365 #if 0
366 dbg("tlen %d '", tlen);
367 for(i=0; i < tlen; i++)
368 putchar(utf8str[i]);
369 dbg("'\n");
370 #endif
371
372 dbg("save_phrase_to_db '%s' tlen:%d ph_key_sz:%d\n", utf8str, tlen, th->ph_key_sz);
373
374 memcpy(&tbuf[1 + sizeof(usecount_t)], phkeys, th->ph_key_sz * len);
375 if (th->ph_key_sz > 1)
376 memcpy(&tbuf[th->ph_key_sz*len + 1 + sizeof(usecount_t)], utf8str, tlen);
377
378 if (th->ph_key_sz==1)
379 hashno= *((u_char *)phkeys);
380 else if (th->ph_key_sz==2)
381 hashno= *((phokey_t *)phkeys) >> TSIN_HASH_SHIFT;
382 else if (th->ph_key_sz==4)
383 hashno= *((u_int *)phkeys) >> TSIN_HASH_SHIFT_32;
384 else
385 hashno= *((u_int64_t *)phkeys) >> TSIN_HASH_SHIFT_64;
386
387 // dbg("hashno %d\n", hashno);
388
389 if (hashno >= TSIN_HASH_N)
390 return FALSE;
391
392 for(mid=th->hashidx[hashno]; mid<th->hashidx[hashno+1]; mid++) {
393 ph_ofs=get_phidx(th, mid);
394
395 fseek(th->fph, ph_ofs, SEEK_SET);
396 int rn;
397 rn = fread(sbuf,1,1, th->fph);
398 rn = fread(&sbuf[1], sizeof(usecount_t), 1, th->fph); // use count
399 rn = fread(&sbuf[1+sizeof(usecount_t)], 1, th->ph_key_sz * sbuf[0] + tlen, th->fph);
400
401 if ((ord=phseq(th, sbuf,tbuf)) > 0)
402 break;
403
404 if (!ord && (th->ph_key_sz==1 || !memcmp(&sbuf[sbuf[0]*th->ph_key_sz+1+sizeof(usecount_t)], utf8str, tlen))) {
405 // bell();
406 dbg("Phrase already exists\n");
407 inc_tsin_use_count(th, phkeys, utf8str, len);
408 return FALSE;
409 }
410 }
411
412 int wN = th->phcount - mid;
413
414 // dbg("wN %d phcount:%d mid:%d\n", wN, phcount, mid);
415
416 if (wN > 0) {
417 int *phidx = tmalloc(int, wN);
418 seek_fp_phidx(th, mid);
419 int rn;
420 rn = fread(phidx, sizeof(int), wN, th->fp_phidx);
421 seek_fp_phidx(th, mid+1);
422 fwrite(phidx, sizeof(int), wN, th->fp_phidx);
423 free(phidx);
424 }
425
426 fseek(th->fph,0,SEEK_END);
427
428 ph_ofs=ftell(th->fph);
429 if (th->ph_key_sz !=2)
430 ph_ofs -= sizeof(TSIN_GTAB_HEAD);
431
432 // dbg("ph_ofs %d ph_key_sz:%d\n", ph_ofs, ph_key_sz);
433 seek_fp_phidx(th, mid);
434 fwrite(&ph_ofs, sizeof(int), 1, th->fp_phidx);
435 th->phcount++;
436
437 fwrite(tbuf, 1, th->ph_key_sz*len + tlen + 1+ sizeof(usecount_t), th->fph);
438 fflush(th->fph);
439
440 if (th->hashidx[hashno]>mid)
441 th->hashidx[hashno]=mid;
442
443 for(hashno++; hashno<TSIN_HASH_N; hashno++)
444 th->hashidx[hashno]++;
445
446 rewind(th->fp_phidx);
447 fwrite(&th->phcount, sizeof(th->phcount), 1, th->fp_phidx);
448 fwrite(th->hashidx,sizeof(th->hashidx),1, th->fp_phidx);
449 fflush(th->fp_phidx);
450
451 get_modify_time(th);
452 #if MEM_PHIDX
453 reload_tsin_db_ex(th);
454 #endif
455 // dbg("ofs %d\n", get_phidx(mid));
456 return TRUE;
457 }
458
459
460 #include <sys/stat.h>
461
462 #if MEM_TSIN
load_tsin_entry0_ex(TSIN_HANDLE * th,int ofs,char * len,usecount_t * usecount,void * pho,u_char * ch)463 void load_tsin_entry0_ex(TSIN_HANDLE *th, int ofs, char *len, usecount_t *usecount, void *pho, u_char *ch)
464 #else
465 void load_tsin_entry0_ex(TSIN_HANDLE *th, char *len, usecount_t *usecount, void *pho, u_char *ch)
466 #endif
467 {
468 *usecount = 0;
469 *len = 0;
470 #if MEM_TSIN
471 char *p = th->mem_tsin + ofs;
472 memcpy(len, p, 1);
473 p++;
474 #else
475 int rn;
476 rn = fread(len, 1, 1, th->fph);
477 #endif
478 // dbg("rn %d\n", rn);
479
480 if (*len > MAX_PHRASE_LEN /* || *len <= 0 */) {
481 dbg("err: tsin db changed reload len:%d\n", *len);
482 reload_tsin_db_ex(th); // probably db changed, reload;
483 *len = 0;
484 return;
485 }
486
487 gboolean en_has_str = FALSE;
488 if (ch)
489 ch[0]=0;
490
491 if (*len < 0) {
492 *len = - (*len);
493 en_has_str = TRUE;
494 }
495 #if MEM_TSIN
496 memcpy(usecount, p, sizeof(usecount_t));
497 p+=sizeof(usecount_t);
498 int tlen=(*len) * th->ph_key_sz;
499 memcpy(pho, p, tlen);
500 p+=tlen;
501 #else
502 rn = fread(usecount, sizeof(usecount_t), 1, th->fph); // use count
503 rn = fread(pho, th->ph_key_sz, (int)(*len), th->fph);
504 #endif
505 if (ch && (th->ph_key_sz!=1 || en_has_str)) {
506 #if MEM_TSIN
507 int tlen = utf8_tlen(p, *len);
508 memcpy(ch, p, tlen);
509 #else
510 rn = fread(ch, CH_SZ, (int)(*len), th->fph);
511 int tlen = utf8_tlen((char *)ch, *len);
512 #endif
513 ch[tlen]=0;
514 }
515 }
516
load_tsin_entry_ex(TSIN_HANDLE * th,int idx,char * len,usecount_t * usecount,void * pho,u_char * ch)517 void load_tsin_entry_ex(TSIN_HANDLE *th, int idx, char *len, usecount_t *usecount, void *pho, u_char *ch)
518 {
519 *usecount = 0;
520
521 // dbg("load_tsin_entry_ex idx:%d phcount:%d\n", idx, ptsin_hand->phcount);
522
523 if (idx >= th->phcount) {
524 reload_tsin_db(); // probably db changed, reload;
525 *len = 0;
526 return;
527 }
528
529 int ph_ofs=get_phidx(th, idx);
530 // dbg("ph_ofs:%d\n", ph_ofs);
531
532 #if MEM_TSIN
533 load_tsin_entry0_ex(th, ph_ofs, len, usecount, pho, ch);
534 #else
535 fseek(th->fph, ph_ofs , SEEK_SET);
536 load_tsin_entry0_ex(th, len, usecount, pho, ch);
537 #endif
538 }
539
540
load_tsin_entry(int idx,char * len,usecount_t * usecount,void * pho,u_char * ch)541 void load_tsin_entry(int idx, char *len, usecount_t *usecount, void *pho, u_char *ch)
542 {
543 load_tsin_entry_ex(&tsin_hand, idx, len, usecount, pho, ch);
544 }
545
546 // tone_mask : 1 -> pho has tone
mask_tone(phokey_t * pho,int plen,char * tone_mask)547 void mask_tone(phokey_t *pho, int plen, char *tone_mask)
548 {
549 int i;
550 // dbg("mask_tone\n");
551 if (!tone_mask)
552 return;
553
554 for(i=0; i < plen; i++) {
555 if (!tone_mask[i])
556 pho[i] &= (~7);
557 }
558 }
559
560
561 // tone_mask : 1 -> pho has tone, pho[i] longer than refpho[i] is clipped.
mask_pho_ref(phokey_t * pho,phokey_t * refpho,int plen,char * tone_mask)562 void mask_pho_ref(phokey_t *pho, phokey_t *refpho, int plen, char *tone_mask)
563 {
564 int i;
565 // dbg("mask_tone\n");
566 if (!tone_mask)
567 return;
568
569 for(i=0; i < plen; i++) {
570 if (!tone_mask[i]) {
571 pho[i] &= (~7);
572 phokey_t r = refpho[i];
573 #define K1 (0xf<<3) // ㄚㄛㄜ
574 if ((r&K1)==0) {
575 pho[i]&=~K1;
576
577 #define K2 (0x3<<7) // ㄧㄨㄩ
578 if ((r&K2)==0)
579 pho[i]&=~K2;
580 }
581 }
582 }
583 }
584
585
586 // *** r_sti<= range < r_edi
tsin_seek_ex(TSIN_HANDLE * th,void * pho,int plen,int * r_sti,int * r_edi,char * tone_mask)587 gboolean tsin_seek_ex(TSIN_HANDLE *th, void *pho, int plen, int *r_sti, int *r_edi, char *tone_mask)
588 {
589 int mid, cmp;
590 u_int64_t ss[MAX_PHRASE_LEN], stk[MAX_PHRASE_LEN];
591 char len;
592 usecount_t usecount;
593 int hashi;
594
595 if (tone_mask) {
596 int i;
597 for(i=0;i<plen;i++)
598 if (!tone_mask[i])
599 break;
600 if (i==plen)
601 tone_mask=NULL;
602 }
603
604 #if 0
605 dbg("tsin_seek %d\n", plen);
606 dbg("> ");
607 prphs((phokey_t *)pho, plen);
608 dbg("\n");
609 #endif
610
611
612 if (th->ph_key_sz==1)
613 hashi= *((u_char *)pho);
614 else if (th->ph_key_sz==2)
615 hashi= *((phokey_t *)pho) >> TSIN_HASH_SHIFT;
616 else if (th->ph_key_sz==4)
617 hashi= *((u_int *)pho) >> TSIN_HASH_SHIFT_32;
618 else
619 hashi= *((u_int64_t *)pho) >> TSIN_HASH_SHIFT_64;
620
621 if (hashi >= TSIN_HASH_N) {
622 dbg("hashi >= TSIN_HASH_N\n");
623 return FALSE;
624 }
625
626 int top=th->hashidx[hashi];
627 int bot=th->hashidx[hashi+1];
628
629 // dbg("hashi:%d top:%d bot:%d\n", hashi, top, bot);
630
631 if (top>=th->phcount) {
632 // dbg("top>=phcount\n");
633 return FALSE;
634 }
635
636 while (top <= bot) {
637 mid=(top+bot)/ 2;
638 load_tsin_entry_ex(th, mid, &len, &usecount, ss, NULL);
639
640 u_char mlen;
641 if (len > plen)
642 mlen=plen;
643 else
644 mlen=len;
645
646 if (tone_mask)
647 mask_pho_ref((phokey_t *)ss, (phokey_t *)pho, mlen, tone_mask);
648
649 // prphs(ss, mlen);
650 // mask_tone((phokey_t *)ss, mlen, tone_mask);
651
652 #if DBG || 0
653 int j;
654 dbg("> ");
655 prphs(ss, len);
656 dbg("\n");
657 #endif
658
659 cmp=phokey_t_seq(th, ss, pho, mlen);
660
661 #if DEBUG && 0
662 if (th->ph_key_sz==1) {
663 dbg("mid %d ", mid);
664 utf8_putcharn((char*)ss, len);
665 dbg(" %d\n", cmp);
666 }
667 #endif
668
669 if (!cmp && len < plen) {
670 // dbg("-2\n");
671 cmp=-2;
672 }
673
674 if (cmp>0)
675 bot=mid-1;
676 else
677 if (cmp<0)
678 top=mid+1;
679 else
680 break;
681 }
682
683 if (cmp && !tone_mask) {
684 // dbg("no match %d\n", cmp);
685 return FALSE;
686 }
687
688 // dbg("<--\n");
689 // seek to the first match because binary search is used
690 // gboolean found=FALSE;
691
692 int sti;
693 for(sti = mid; sti>=0; sti--) {
694 load_tsin_entry_ex(th, sti, &len, &usecount, stk, NULL);
695
696 #if 0
697 int j;
698 dbg("%d] %d*> ", sti, len);
699 prphs((phokey_t *)stk, len);
700 dbg("\n");
701 #endif
702
703 u_char mlen;
704 if (len > plen)
705 mlen=plen;
706 else
707 mlen=len;
708 #if 0
709 prphs(stk, len);
710 #endif
711 #if 0
712 mask_tone((phokey_t *)stk, mlen, tone_mask);
713 #else
714 if (tone_mask) {
715 // mask_pho_ref((phokey_t *)stk, (phokey_t *)pho, mlen, tone_mask);
716 // for(int i=0; i < plen; i++)
717 for(int i=0; i < mlen; i++)
718 {
719 phokey_t r=((phokey_t*)stk)[i];
720 if ((r&K1)==0) {
721 r|=K1;
722 if ((r&K2)==0)
723 r|=K2;
724 }
725 r|=7;
726 ((phokey_t*)stk)[i]=r;
727 }
728 }
729 #endif
730
731 int v = phokey_t_seq(th, stk, pho, mlen);
732 // if (!v)
733 // found = TRUE;
734
735 if (!tone_mask) {
736 if (!v && len>=plen)
737 continue;
738 } else {
739 if (v>0 || !v && len >= plen)
740 continue;
741 }
742
743 break;
744 }
745 sti++;
746
747 // seek to the tail
748 #if 0
749 char tt[CH_SZ*MAX_PHRASE_LEN];
750 #define TTCH (u_char *)tt
751 #else
752 #define TTCH NULL
753 #endif
754
755 if (tone_mask) {
756 int top=th->hashidx[hashi];
757 int bot=th->hashidx[hashi+1];
758
759 if (top>=th->phcount) {
760 // dbg("top>=phcount\n");
761 return FALSE;
762 }
763
764 phokey_t tpho[MAX_PHRASE_LEN];
765
766 for(int i=0; i < plen; i++) {
767 phokey_t r = ((phokey_t*)pho)[i];
768 if ((r&K1)==0) {
769 r|=K1;
770 if ((r&K2)==0)
771 r|=K2;
772 }
773 r|=7;
774 // tpho[i]=((phokey_t*)pho)[i] | 7;
775 tpho[i]=r;
776 }
777
778 while (top <= bot) {
779 mid=(top+bot)/ 2;
780 load_tsin_entry_ex(th, mid, &len, &usecount, ss, NULL);
781
782 u_char mlen;
783 if (len > plen)
784 mlen=plen;
785 else
786 mlen=len;
787
788 // prphs(ss, mlen);
789
790 #if DBG || 0
791 int j;
792 dbg("> ");
793 prphs(ss, len);
794 dbg("\n");
795 #endif
796
797 // mask_pho_ref((phokey_t *)ss, (phokey_t *)pho, mlen, tone_mask);
798
799 cmp=phokey_t_seq(th, ss, tpho, mlen);
800
801 if (!cmp && len < plen)
802 cmp=-2;
803
804 if (cmp>0)
805 bot=mid-1;
806 else
807 if (cmp<0)
808 top=mid+1;
809 else
810 break;
811 }
812
813 int edi;
814 for(edi = mid; edi < th->phcount; edi++) {
815 load_tsin_entry_ex(th, edi, &len, &usecount, stk, TTCH);
816
817 u_char mlen;
818 if (len > plen)
819 mlen=plen;
820 else
821 mlen=len;
822 #if 0
823 prphs(stk, len);
824 #endif
825 #if 0
826 mask_tone((phokey_t *)stk, mlen, tone_mask);
827 #else
828 mask_pho_ref((phokey_t *)stk, (phokey_t *)tpho, mlen, tone_mask);
829 #endif
830
831 int v = phokey_t_seq(th, stk, tpho, mlen);
832
833 #if 0
834 dbg("^ %s edi%d -> ", tt, edi);
835 prphs((phokey_t *)stk, len);
836 dbg(" v:%d\n", v);
837 #endif
838 if (v<=0)
839 continue;
840 break;
841 }
842
843 #if 0
844 dbg("sti%d edi:%d\n", sti, edi);
845 #endif
846
847 *r_sti = sti;
848 *r_edi = edi;
849 return edi > sti;
850 }
851
852 int edi;
853 for(edi = mid; edi < th->phcount; edi++) {
854 load_tsin_entry_ex(th, edi, &len, &usecount, stk, TTCH);
855
856 u_char mlen;
857 if (len > plen)
858 mlen=plen;
859 else
860 mlen=len;
861 #if 0
862 prphs((phokey_t *)stk, len); dbg("%s\n", tt);
863 #endif
864
865 int v = phokey_t_seq(th, stk, pho, mlen);
866 // if (!v)
867 // found = TRUE;
868 #if 0
869 dbg("edi%d -> ", edi);
870 prphs((phokey_t *)stk, len);
871 dbg(" v:%d\n", v);
872 #endif
873
874 if (v <= 0)
875 continue;
876 break;
877 }
878
879 #if 0
880 dbg("@@ sti%d edi:%d\n", sti, edi);
881 #endif
882
883 *r_sti = sti;
884 *r_edi = edi;
885
886 return edi > sti;
887 }
888
tsin_seek_en_1(u_char * pho,int plen,int * ridx)889 static gboolean tsin_seek_en_1(u_char *pho, int plen, int *ridx)
890 {
891 TSIN_HANDLE *th = &en_hand;
892 int mid = -1, cmp;
893 u_char ss[MAX_PHRASE_STR_LEN];
894 char len;
895 usecount_t usecount;
896 int hashi;
897
898 #if 1
899 dbg("tsin_seek_en %d\n", plen);
900 #endif
901
902 hashi= *((u_char *)pho);
903
904 if (hashi >= TSIN_HASH_N) {
905 dbg("hashi >= TSIN_HASH_N\n");
906 *ridx = th->phcount;
907 return FALSE;
908 }
909
910 int top=th->hashidx[hashi];
911 int bot=th->hashidx[hashi+1];
912
913 dbg("hashi:%d top:%d bot:%d\n", hashi, top, bot);
914
915 if (top>=th->phcount) {
916 // dbg("top>=phcount\n");
917 *ridx = th->phcount;
918 return FALSE;
919 }
920
921 while (top <= bot) {
922 mid=(top+bot)/ 2;
923 load_tsin_entry_ex(th, mid, &len, &usecount, ss, NULL);
924
925 u_char mlen;
926 if (len > plen)
927 mlen=plen;
928 else
929 mlen=len;
930
931 cmp=phokey_t_seq8(ss, pho, mlen);
932
933 if (!cmp && len < plen) {
934 dbg("-2\n");
935 cmp=-2;
936 }
937
938 if (cmp>0)
939 bot=mid-1;
940 else
941 if (cmp<0)
942 top=mid+1;
943 else
944 break;
945 }
946
947 if (mid < 0)
948 mid = 0;
949
950 if (cmp) {
951 dbg("no match %d\n", cmp);
952 *ridx = mid;
953 return FALSE;
954 }
955
956 *ridx = mid;
957 return TRUE;
958 }
959
960
961
962 // *** r_sti<= range < r_edi
tsin_seek_en(u_char * pho,int plen,int * r_sti,int * r_edi)963 gboolean tsin_seek_en(u_char *pho, int plen, int *r_sti, int *r_edi)
964 {
965 TSIN_HANDLE *th = &en_hand;
966 // int cmp;
967 u_char ss[MAX_PHRASE_STR_LEN], stk[MAX_PHRASE_STR_LEN];
968 char len;
969 usecount_t usecount;
970
971 #if 0
972 dbg("tsin_seek_en %d\n", plen);
973 #endif
974
975 int eq_idx;
976 if (!tsin_seek_en_1(pho, plen, &eq_idx))
977 return FALSE;
978
979 int u_idx = -1; // upper bound
980 memcpy(ss, pho, plen);
981 // ss[plen-1]++;
982 ss[plen]=0; // fake upperbound string
983 tsin_seek_en_1(ss, plen+1, &u_idx);
984
985 // dbg("u_idx %d\n", u_idx);
986
987 // dbg("<--\n");
988 gboolean found=FALSE;
989 int sti;
990 for(sti = u_idx; sti>=0; sti--) {
991 load_tsin_entry_ex(th, sti, &len, &usecount, stk, NULL);
992
993 u_char mlen=0;
994 if (len > plen)
995 mlen=plen;
996 else
997 mlen=len;
998 #if 0
999 prphs(stk, len);
1000 #endif
1001
1002 int v = phokey_t_seq8(stk, pho, plen);
1003 if (v > 0)
1004 continue;
1005 // if (!v)
1006 // found = TRUE;
1007
1008 #if 0
1009 int j;
1010 dbg("%d] %d*> ", sti, mlen);
1011 prphs(stk, len);
1012 dbg(" v:%d\n", v);
1013 #endif
1014
1015 if (!v && len >= plen)
1016 continue;
1017 break;
1018 }
1019 sti++;
1020
1021 // seek to the tail
1022
1023 int l_idx = -1;
1024 memcpy(ss, pho, plen);
1025 // ss[plen-1]--;
1026 ss[plen]=127;
1027 tsin_seek_en_1(ss, plen+1, &l_idx);
1028
1029 // dbg("l_idx:%d\n", l_idx);
1030
1031 int edi;
1032 for(edi = l_idx; edi < th->phcount; edi++) {
1033 load_tsin_entry_ex(th, edi, &len, &usecount, stk, NULL);
1034
1035 u_char mlen=0;
1036 if (len > plen)
1037 mlen=plen;
1038 else
1039 mlen=len;
1040
1041 int v = phokey_t_seq8(stk, pho, mlen);
1042 if (v < 0)
1043 continue;
1044
1045 // if (!v)
1046 // found = TRUE;
1047 #if 0
1048 dbg("edi%d -> ", edi);
1049 dbg(" v:%d\n", v);
1050 #endif
1051
1052 if (!v && len >= plen)
1053 continue;
1054 break;
1055 }
1056
1057 #if 0
1058 dbg("sti%d edi:%d found:%d\n", sti, edi, found);
1059 #endif
1060
1061 *r_sti = sti;
1062 *r_edi = edi;
1063
1064 return edi > sti;
1065 }
1066
1067
tsin_seek(void * pho,int plen,int * r_sti,int * r_edi,char * tone_mask)1068 gboolean tsin_seek(void *pho, int plen, int *r_sti, int *r_edi, char *tone_mask)
1069 {
1070 return tsin_seek_ex(&tsin_hand, pho, plen, r_sti, r_edi, tone_mask);
1071 }
1072
inc_tsin_use_count(TSIN_HANDLE * th,void * pho,char * ch,int N)1073 gboolean inc_tsin_use_count(TSIN_HANDLE *th, void *pho, char *ch, int N)
1074 {
1075 int sti, edi;
1076
1077 reload_if_modified(th);
1078
1079 dbg("CH inc_dec_tsin_use_count '%s' N:%d\n", ch, N);
1080 #if 0
1081 if (th->ph_key_sz==2) {
1082 prphs(pho, N);
1083 }
1084 #endif
1085 if (!tsin_seek_ex(th, pho, N, &sti, &edi, NULL)) {
1086 dbg("inc_dec_tsin_use_count not found\n");
1087 return FALSE;
1088 }
1089
1090 int idx;
1091
1092 #if 0
1093 int tlen = ch?strlen(ch):0;
1094 dbg("otlen %d ", tlen);
1095 int i;
1096 for(i=0; i < tlen; i++)
1097 putchar(ch[i]);
1098 puts("");
1099 #endif
1100
1101 for(idx=sti; idx < edi; idx++) {
1102 char len;
1103 usecount_t usecount, n_usecount;
1104 u_int64_t phi[MAX_PHRASE_LEN];
1105 char stch[MAX_PHRASE_LEN * CH_SZ * 2];
1106
1107 load_tsin_entry_ex(th, idx, &len, &usecount, phi, (u_char *)stch);
1108 dbg("^^ %s %d\n", stch, usecount);
1109 n_usecount = usecount;
1110
1111 if (len!=N || phokey_t_seq(th, phi, pho, N))
1112 break;
1113 #if 0
1114 for(i=0; i < tlen; i++)
1115 putchar(stch[i]);
1116 dbg(" ppp\n");
1117 #endif
1118
1119 // dbg("stch %s\n", stch);
1120 if (th->ph_key_sz!=1 && strcmp(stch, ch))
1121 continue;
1122 #if 1
1123 dbg("found match %d\n", usecount);
1124 #endif
1125 int ph_ofs=get_phidx(th, idx);
1126 int sofs=ph_ofs + 1;
1127 fseek(th->fph, sofs, SEEK_SET);
1128
1129 if (usecount < 0x3fffffff)
1130 n_usecount++;
1131
1132 if (n_usecount != usecount) {
1133 fwrite(&n_usecount, sizeof(usecount_t), 1, th->fph); // use count
1134 fflush(th->fph);
1135 #if MEM_TSIN
1136 memcpy(th->mem_tsin + sofs, &n_usecount, sizeof(usecount_t));
1137 #endif
1138 }
1139 }
1140
1141 get_modify_time(th);
1142 return TRUE;
1143 }
1144
strtolower(char * u8,int len)1145 void strtolower(char *u8, int len)
1146 {
1147 int j;
1148 for(j=0;j<len;j++)
1149 u8[j] = tolower(u8[j]);
1150 }
1151
inc_tsin_use_count_en(char * s,int len)1152 gboolean inc_tsin_use_count_en(char *s, int len)
1153 {
1154 if (inc_tsin_use_count(&en_hand, s, NULL, len))
1155 return TRUE;
1156
1157 s[0]=tolower(s[0]);
1158 if (inc_tsin_use_count(&en_hand, s, NULL, len))
1159 return TRUE;
1160
1161 strtolower((char *)s,len);
1162 return inc_tsin_use_count(&en_hand, s, NULL, len);
1163 }
1164