1 /* Copyright 1994 NEC Corporation, Tokyo, Japan.
2 *
3 * Permission to use, copy, modify, distribute and sell this software
4 * and its documentation for any purpose is hereby granted without
5 * fee, provided that the above copyright notice appear in all copies
6 * and that both that copyright notice and this permission notice
7 * appear in supporting documentation, and that the name of NEC
8 * Corporation not be used in advertising or publicity pertaining to
9 * distribution of the software without specific, written prior
10 * permission. NEC Corporation makes no representations about the
11 * suitability of this software for any purpose. It is provided "as
12 * is" without express or implied warranty.
13 *
14 * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
16 * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
20 * PERFORMANCE OF THIS SOFTWARE.
21 */
22
23 #if !defined(lint) && !defined(__CODECENTER__)
24 static char rcsid[]="$Id: nword.c,v 1.5 2003/07/31 19:03:51 aida_s Exp $";
25 #endif
26
27 /* LINTLIBRARY */
28 #include "RKintern.h"
29
30 #if defined(DEBUG_NWORD) || defined(RK_DEBUG) || defined(TEST)
31 #include <stdio.h>
32 /*
33 * debug aids
34 */
35 #define D_CONC 1
36 #define D_PARSE 2
37 #define D_SUCC 4
38 #define D_SUBST 4
39 /*
40 int debug_flags = D_CONC|D_PARSE|D_SUCC;
41 int debug_flags = D_PARSE|D_SUCC;
42 */
43 int debug_flags = 0;
44 static void dumpSimpleWordRec(), dumpWordRec(), dumpAllBunq();
45 static void dumpXQH(), dumpXQ();
46
47 #else
48 #define rk_debug(file, fmt, a, b, c)
49 #endif
50
51 extern void usncopy();
52
53 static void
clearWord(w,bb)54 clearWord(w, bb) /* make word empty */
55 struct nword *w;
56 int bb;
57 {
58 if (w) {
59 w->nw_cache = (struct ncache *)0;
60 w->nw_rowcol = bb; /* ʸ�� */
61 w->nw_klen = w->nw_ylen = 0;
62 w->nw_class = ND_EMP;
63 w->nw_flags = 0;
64 w->nw_lit = 0;
65 w->nw_prio = 0L;
66 w->nw_count = 0;
67 w->nw_left = w->nw_next = (struct nword *)0;
68 w->nw_kanji = (Wrec *)0;
69 }
70 }
71
72 /*ARGSUSED*/
73 static void
setWord(w,rc,lit,yomi,ylen,kanji,klen,bb)74 setWord(w, rc, lit, yomi, ylen, kanji, klen, bb)
75 struct nword *w;
76 int rc;
77 int lit;
78 Wchar *yomi;
79 int ylen;
80 Wrec *kanji;
81 int klen;
82 int bb;
83 {
84 clearWord(w, bb);
85 w->nw_rowcol = rc;
86 w->nw_klen = klen;
87 w->nw_ylen = ylen;
88 w->nw_class = 0;
89 w->nw_flags = 0;
90 w->nw_lit = lit;
91 w->nw_kanji = kanji;
92 }
93
94 /* allocWord
95 * allocate a fresh word
96 */
97 /*ARGSUSED*/
98 static
99 struct nword *
allocWord(st,bb)100 allocWord(st, bb)
101 struct nstore *st;
102 int bb;
103 {
104 struct nword *new_word;
105
106 if (!SX.word) {
107 struct nword *new_page;
108 int i;
109 #define NW_PAGESIZE 1024
110 new_page = (struct nword *)malloc(sizeof(struct nword)*NW_PAGESIZE);
111 if (new_page) {
112 SX.page_in_use++;
113 new_page[0].nw_next = SX.page;
114 SX.page = &new_page[0];
115 SX.word = &new_page[1];
116 for (i = 1; i + 1 < NW_PAGESIZE; i++)
117 new_page[i].nw_next = &new_page[i + 1];
118 new_page[i].nw_next = (struct nword *)0;
119 };
120 };
121 new_word = SX.word;
122 if (new_word) {
123 SX.word = new_word->nw_next;
124 clearWord(new_word, bb);
125 st->word_in_use++;
126 SX.word_in_use++;
127 };
128 return new_word;
129 }
130
131 static void
derefWord(word)132 derefWord(word) /* decrease the reference counter */
133 struct nword *word;
134 {
135 for (; word; word = word->nw_next)
136 if (word->nw_cache)
137 (void)_RkDerefCache(word->nw_cache);
138 }
139
140 /*ARGSUSED*/
141 static void
killWord(st,word)142 killWord(st, word) /* dispose the unsed words */
143 struct nstore *st;
144 struct nword *word;
145 {
146 struct nword *p, *q;
147
148 if (word) {
149 for (p = q = word; p; q = p, p = p->nw_next) {
150 if (!p->nw_cache && p->nw_kanji) {
151 _Rkpanic("killWord this would never happen addr ", 0, 0, 0);
152 (void)free((char *)p->nw_kanji);
153 };
154 st->word_in_use--;
155 SX.word_in_use--;
156 }
157 q->nw_next = SX.word;
158 SX.word = word;
159 }
160 }
161
162 static void
freeWord(st,word)163 freeWord(st, word) /* freeWord = derefWord + killWord */
164 struct nstore *st;
165 struct nword *word;
166 {
167 derefWord(word);
168 killWord(st, word);
169 }
170
171 void
_RkFreeBunq(st)172 _RkFreeBunq(st) /* freeWord = derefWord + killWord */
173 struct nstore *st;
174 {
175 struct nbun *bunq = &st->bunq[st->curbun];
176
177 freeWord(st, bunq->nb_cand);
178 bunq->nb_cand = (struct nword *)0;
179 bunq->nb_yoff = 0;
180 bunq->nb_curlen = bunq->nb_maxcand = bunq->nb_curcand = 0;
181 bunq->nb_flags = (unsigned short)0;
182 return;
183 }
184
185 extern unsigned searchRut();
186 extern int entryRut();
187
188 static
189 struct nword *
concWord(cx,p,q,loc,bb)190 concWord(cx, p, q, loc, bb) /* create the concatinated word p+q */
191 struct RkContext *cx;
192 struct nword *p, *q; /* prefix word list, and right word */
193 int loc;
194 int bb;
195 {
196 struct nword conc;
197 struct nword *pq;
198
199 /* create a concatinated word temoprally */
200 conc = *q;
201 conc.nw_klen += p->nw_klen;
202 conc.nw_ylen += p->nw_ylen;
203 conc.nw_flags = p->nw_flags&(NW_PRE|NW_SUC|NW_SWD|NW_LOWPRI);
204 conc.nw_count = p->nw_count + 1;
205 /* check limit conditions */
206 if (conc.nw_klen > RK_LEN_WMAX ||
207 conc.nw_ylen > RK_LEN_WMAX ||
208 conc.nw_count >= RK_CONC_NMAX)
209 return (struct nword *)0;
210 #ifdef LOGIC_HACK
211 if (conc.nw_count >= 3) {
212 switch (RkCheckNegGram(cx->gram->gramdic,
213 p->nw_left->nw_rowcol, p->nw_rowcol, q->nw_rowcol))
214 {
215 case 1:
216 return (struct nword *)0;
217 case 2:
218 conc.nw_flags |= NW_LOWPRI;
219 }
220 }
221 #endif /* LOGIC_HACK */
222 if (p->nw_ylen == 1 && q->nw_rowcol == cx->gram->P_Ftte)
223 conc.nw_flags |= NW_LOWPRI; /* FIXME: replace to something better */
224 conc.nw_prio = p->nw_prio;
225 conc.nw_next = (struct nword *)0;
226 conc.nw_left = p;
227 switch(q->nw_class) {
228 /* kakko, kutouten ha setuzoku kankei ni eikyou sinai */
229 case ND_OPN:
230 case ND_CLS:
231 conc.nw_rowcol = p->nw_rowcol;
232 if (p->nw_class != ND_EMP) {
233 conc.nw_class = p->nw_class;
234 conc.nw_flags = p->nw_flags;
235 } else {
236 conc.nw_class = q->nw_class;
237 conc.nw_flags = q->nw_flags;
238 };
239 break;
240 case ND_PUN:
241 /* avoid punctionations where prohibited */
242 if (!CanSplitWord(p))
243 return (struct nword *)0;
244 /* don't remove loc check or you get stuck when a punctionation comes */
245 if (loc > 0 && p->nw_class == ND_EMP)
246 return (struct nword *)0;
247 conc.nw_rowcol = p->nw_rowcol;
248 conc.nw_class = ND_SWD;
249 break;
250 case ND_MWD:
251 conc.nw_flags |= NW_MWD;
252 conc.nw_flags |= (q->nw_flags & NW_LOWPRI);
253 conc.nw_prio = q->nw_prio;
254 break;
255 case ND_SWD:
256 if (!(conc.nw_flags&NW_SWD))
257 conc.nw_flags |= NW_SWD;
258 break;
259 case ND_PRE:
260 conc.nw_flags |= NW_PRE;
261 break;
262 case ND_SUC:
263 conc.nw_flags |= NW_SUC;
264 break;
265 };
266 /* cache no sanshoudo wo kousinn suru */
267 pq = allocWord(cx->store, bb);
268 if (pq) {
269 *pq = conc;
270 p->nw_flags |= NW_FOLLOW;
271 if (pq->nw_cache)
272 _RkEnrefCache(pq->nw_cache);
273 };
274 return pq;
275 }
276
277 /* clearQue
278 * clear word tree queue
279 */
280 static void
clearQue(xq)281 clearQue(xq)
282 struct nqueue *xq;
283 {
284 xq->tree = (struct nword *)0;
285 xq->maxlen = 0;
286 xq->status = 0;
287 }
288 /* RkFreeQue
289 * free word tree stored in [s, e)
290 */
291 void
_RkFreeQue(st,s,e)292 _RkFreeQue(st, s, e)
293 struct nstore *st;
294 int s;
295 int e;
296 {
297 struct nqueue *xq = st->xq;
298
299 while (s < e) {
300 if (xq[s].tree)
301 freeWord(st, xq[s].tree);
302 clearQue(&xq[s]);
303 s++;
304 };
305 }
306
307 /*
308 * Literal
309 */
310 static
311 int
cvtNum(dst,maxdst,src,maxsrc,format)312 cvtNum(dst, maxdst, src, maxsrc, format)
313 Wchar *dst;
314 int maxdst;
315 Wchar *src;
316 int maxsrc;
317 int format;
318 {
319 return RkwCvtSuuji(dst, maxdst, src, maxsrc, format - 1);
320 }
321
322 static
323 int
cvtAlpha(dst,maxdst,src,maxsrc,format)324 cvtAlpha(dst, maxdst, src, maxsrc, format)
325 Wchar *dst;
326 int maxdst;
327 Wchar *src;
328 int maxsrc;
329 int format;
330 {
331 switch(format) {
332 #ifdef ALPHA_CONVERSION
333 case 1: return RkwCvtZen(dst, maxdst, src, maxsrc);
334 case 2: return RkwCvtHan(dst, maxdst, src, maxsrc);
335 case 3: return -1;
336 #else
337 case 1: return RkwCvtNone(dst, maxdst, src, maxsrc);
338 case 2: return -1;
339 #endif
340 default: return 0;
341 }
342 }
343
344 static
345 int
cvtHira(dst,maxdst,src,maxsrc,format)346 cvtHira(dst, maxdst, src, maxsrc, format)
347 Wchar *dst;
348 int maxdst;
349 Wchar *src;
350 int maxsrc;
351 int format;
352 {
353 switch(format) {
354 case 1: return RkwCvtHira(dst, maxdst, src, maxsrc);
355 case 2: return RkwCvtKana(dst, maxdst, src, maxsrc);
356 default: return 0;
357 }
358 }
359
360 static
361 int
cvtLit(dst,maxdst,src,maxsrc,format,mode)362 cvtLit(dst, maxdst, src, maxsrc, format, mode)
363 Wchar *dst;
364 int maxdst;
365 Wchar *src;
366 int maxsrc;
367 int format;
368 unsigned long mode;
369 {
370 switch(format >> 4) {
371 case LIT_NUM:
372 if (mode & RK_MAKE_KANSUUJI)
373 return cvtNum(dst, maxdst, src, maxsrc, format&15);
374 else
375 return RkwCvtNone(dst, maxdst, src, maxsrc);
376 case LIT_ALPHA: return cvtAlpha(dst, maxdst, src, maxsrc, format&15);
377 case LIT_HIRA: return cvtHira(dst, maxdst, src, maxsrc, format&15);
378 default: return 0;
379 }
380 }
381
382 /* setLit
383 * create the literals as many as the context requires
384 */
385 static
386 struct nword *
setLit(cx,word,maxword,rc,src,srclen,format)387 setLit(cx, word, maxword, rc, src, srclen, format)
388 struct RkContext *cx;
389 struct nword *word;
390 int maxword;
391 int rc;
392 Wchar *src;
393 int srclen;
394 int format;
395 {
396 struct nword *w = word;
397 int dstlen;
398 unsigned long mode;
399
400 if (!cx->litmode)
401 return 0;
402 for (mode = cx->litmode[format]; mode; mode >>= RK_XFERBITS)
403 if (w < word + maxword) {
404 int code = MAKELIT(format, mode&RK_XFERMASK);
405
406 dstlen = cvtLit((Wchar *)0, 9999, src, srclen, code, (unsigned long)cx->concmode);
407 if (0 < dstlen && dstlen <= RK_LEN_WMAX)
408 setWord(w++, rc, code, src, srclen, (Wrec *)0, dstlen, cx->gram->P_BB);
409 if (dstlen < 0)
410 setWord(w++, rc, code, src, srclen, (Wrec *)0, srclen, cx->gram->P_BB);
411 }
412 return (struct nword *) w;
413 }
414
415 #define READWORD_MAXCACHE 128
416 static
417 struct nword *
readWord(cx,yy,ys,ye,class,nword,maxword,doflush,douniq)418 readWord(cx, yy, ys, ye, class, nword, maxword, doflush, douniq)
419 struct RkContext *cx;
420 int yy, ys, ye;
421 int class;
422 struct nword *nword;
423 int maxword;
424 int doflush;
425 int douniq;
426 {
427 Wchar *key = cx->store->yomi + yy;
428 struct nword *wrds;
429 struct MD *head = cx->md[class], *md;
430 int maxcache = READWORD_MAXCACHE;
431 #ifndef USE_MALLOC_FOR_BIG_ARRAY
432 unsigned permutation[RK_CAND_NMAX];
433 unsigned char *candidates[RK_CAND_NMAX];
434 struct nread nread[READWORD_MAXCACHE];
435 #else
436 unsigned *permutation;
437 unsigned char **candidates;
438 struct nread *nread;
439
440 permutation = (unsigned *)malloc(sizeof(unsigned) * RK_CAND_NMAX);
441 candidates = (unsigned char **)
442 malloc(sizeof(unsigned char *) * RK_CAND_NMAX);
443 nread = (struct nread *)malloc(sizeof(struct nread) * READWORD_MAXCACHE);
444 if (!permutation || !candidates || !nread) {
445 if (permutation) (void)free((char *)permutation);
446 if (candidates) (void)free((char *)candidates);
447 if (nread) (void)free((char *)nread);
448 return nword;
449 }
450 #endif
451
452 wrds = nword;
453 for (md = head->md_next; md != head; md = md->md_next) {
454 struct DM *dm = md->md_dic;
455 struct DM *qm = md->md_freq;
456 struct nword *pp, *qq;
457 int c, nc, num, cf = 0, nl;
458
459 if (maxword <= 0)
460 break;
461 if (!dm)
462 continue;
463 if (qm && !qm->dm_qbits)
464 qm = (struct DM *)0;
465 nc = DST_SEARCH(cx, dm, key, ye, nread, maxcache, &cf);
466 for (c = 0; c < nc; c++) {
467 struct nread *thisRead = nread + c;
468 struct ncache *thisCache = thisRead->cache;
469 unsigned char *wp = thisCache->nc_word;
470 unsigned long offset;
471 int nk, cnt = 1;
472 unsigned long csnb;
473 int bitSize;
474
475 nk = _RkCandNumber(wp);
476 nl = (*wp >> 1) & 0x3f;
477 if (!doflush && (cf || thisRead->nk > ye || thisRead->nk > RK_KEY_WMAX))
478 cx->poss_cont++;
479 if (*wp & 0x80)
480 wp += 2;
481 wp += 2 + nl *2;
482 csnb = thisRead->csn;
483 offset = thisRead->offset;
484 if (ys < thisRead->nk && thisRead->nk <= ye && thisRead->nk <= RK_KEY_WMAX) {
485 for (num = 0; num < nk; num++) {
486 candidates[num] = wp;
487 wp += 2 * ((*wp >> 1) & 0x7f) + 2;
488 };
489 if (qm) {
490 int ecount, cval, i;
491
492 bitSize = _RkCalcLog2(nk + 1) + 1;
493 _RkUnpackBits(permutation, qm->dm_qbits, offset, bitSize, nk);
494 for (ecount = cval = i = 0; i < nk; i++) {
495 if ((int)permutation[i]/2 > nk) {
496 ecount++;
497 break;
498 };
499 cval += permutation[i];
500 }
501 if (ecount || cval < (nk-1)*(nk-2)) {
502 for (i = 0; i < nk; i++)
503 permutation[i] = 2*i;
504 _RkPackBits(qm->dm_qbits, offset, bitSize, permutation, nk);
505 };
506 };
507 pp = wrds;
508 for (num = 0; num < nk; num++) {
509 unsigned permed;
510
511 if (maxword <= 0)
512 break;
513 if (qm) {
514 permed = permutation[num]/2;
515 if ((int)permed > nk) {
516 break;
517 } else if ((int)permed == nk)
518 continue;
519 } else
520 permed = num;
521 wp = candidates[permed];
522 clearWord(wrds, cx->gram->P_BB);
523 wrds->nw_kanji = wp;
524 wrds->nw_freq = qm;
525 wrds->nw_rowcol = _RkRowNumber(wp);
526 wrds->nw_cache = thisCache;
527 wrds->nw_ylen = thisRead->nk;
528 wrds->nw_klen = (*wp >> 1) & 0x7f;
529 wrds->nw_class = class;
530 wrds->nw_csn = csnb + permed;
531 wrds->nw_prio = 0L;
532 if (class == ND_MWD) {
533 if (qm && qm->dm_rut) {
534 if (cnt)
535 cnt = wrds->nw_prio = searchRut(qm->dm_rut, wrds->nw_csn);
536 } else if (DM2TYPE(dm)) {
537 if (num < 2)
538 wrds->nw_prio = ((struct TW *)thisCache->nc_address)->lucks[num];
539 }
540 if (wrds->nw_prio) {
541 long t;
542
543 t = _RkGetTick(0) - wrds->nw_prio;
544 wrds->nw_prio = (0 <= t && t < 0x2000) ? (0x2000 - t) << 4 : 0;
545 };
546 switch(num) {
547 case 0: wrds->nw_prio += 15L; break;
548 case 1: wrds->nw_prio += 11L; break;
549 case 2: wrds->nw_prio += 7L; break;
550 case 3: wrds->nw_prio += 3L; break;
551 };
552 wrds->nw_prio |= 0x01;
553 };
554 if (douniq) {
555 for (qq = pp; qq < wrds; qq++)
556 if (qq->nw_rowcol == wrds->nw_rowcol)
557 break;
558 if (qq < wrds)
559 continue;
560 }
561 _RkEnrefCache(thisCache);
562 wrds++;
563 maxword--;
564 };
565 };
566 _RkDerefCache(thisCache);
567 };
568 maxcache -= nc;
569 };
570 #ifdef USE_MALLOC_FOR_BIG_ARRAY
571 (void)free((char *)permutation);
572 (void)free((char *)candidates);
573 (void)free((char *)nread);
574 #endif
575 return(wrds);
576 }
577
578 /* makeWord
579 * jisho ni nai katakana, suuji, tokushu moji wo tango to minasu
580 */
581 /*ARGSUSED*/
582 static
583 struct nword *
makeWord(cx,yy,ys,ye,class,word,maxword,doflush,douniq)584 makeWord(cx, yy, ys, ye, class, word, maxword, doflush, douniq)
585 struct RkContext *cx;
586 int yy, ys, ye;
587 int class; /* word class */
588 struct nword *word;
589 int maxword;
590 int doflush;
591 int douniq;
592 {
593 struct nstore *st = cx->store;
594 Wchar *key = st->yomi + yy;
595 Wchar *k, *z;
596 struct nword *w = word;
597 Wchar c;
598 int clen;
599 int hinshi = cx->gram->P_BB;
600 int literal = -1;
601 int punct = 0;
602 int gobeyond = 0;
603
604 if (ye <= 0)
605 return w;
606 z = (k = key) + ye;
607 /* sentou moji wo yomu */
608 c = *k++;
609 clen = 1;
610 if (us_iscodeG0(c)) { /* ascii string */
611 if ('0' <= c && '9' >= c) { /* numeral */
612 if (!(cx->concmode & RK_MAKE_EISUUJI)) {
613 doflush++;
614 } else {
615 for (; k < z; k++, clen++)
616 if (clen >= RK_KEY_WMAX || !('0' <= *k && *k <= '9')) {
617 doflush++;
618 break;
619 };
620 }
621 hinshi = cx->gram->P_NN; literal = LIT_NUM;
622 } else { /* others */
623 if (!(cx->concmode & RK_MAKE_EISUUJI)) {
624 doflush++;
625 } else {
626 for (; k < z; k++, clen++)
627 if (clen >= RK_KEY_WMAX || !us_iscodeG0(*k)) {
628 doflush++;
629 break;
630 };
631 }
632 hinshi = cx->gram->P_T35; literal = LIT_ALPHA;
633 }
634 } else if (us_iscodeG1(c)) {
635 if (0xb000 <= c) { /* kanji string */
636 for (; k < z; k++, clen++)
637 if (clen >= RK_KEY_WMAX || *k < 0xb000) {
638 doflush++;
639 break;
640 };
641 hinshi = cx->gram->P_T00;
642 } else if (0xa1a2 <= c && c <= 0xa1db) {
643 /*
644 * now multiple punctiation characters constitute a single punct
645 */
646 for (; k < z; k++, clen++)
647 if (clen >= RK_KEY_WMAX || !(0xa1a2 <= *k && *k <= 0xa1db)) {
648 doflush++;
649 break;
650 };
651 switch(c) {
652 case 0xa1a2: case 0xa1a3: case 0xa1a4:
653 case 0xa1a5: case 0xa1a6: case 0xa1a7:
654 case 0xa1a8: case 0xa1a9: case 0xa1aa:
655 case 0xa1c4:
656 punct = ND_PUN;
657 break;
658 case 0xa1c6: case 0xa1c8: case 0xa1ca:
659 case 0xa1cc: case 0xa1ce:
660 case 0xa1d0: case 0xa1d2: case 0xa1d4:
661 case 0xa1d6: case 0xa1d8: case 0xa1da:
662 punct = ND_OPN;
663 break;
664 case 0xa1c7: case 0xa1c9: case 0xa1cb:
665 case 0xa1cd: case 0xa1cf: case 0xa1d1:
666 case 0xa1d3: case 0xa1d5: case 0xa1d7:
667 case 0xa1d9: case 0xa1db:
668 punct = ND_CLS;
669 break;
670 default:
671 hinshi = cx->gram->P_T00;
672 doflush++;
673 };
674 } else if (0xa3b0 <= c && c <= 0xa3b9) { /* suuji */
675 if (!(cx->concmode & RK_MAKE_EISUUJI)) {
676 doflush++;
677 } else {
678 for (; k < z; k++, clen++)
679 if (clen >= RK_KEY_WMAX || !(0xa3b0 <= *k && *k <= 0xa3b9)) {
680 doflush++;
681 break;
682 };
683 }
684 hinshi = cx->gram->P_NN; literal = LIT_NUM;
685 } else if ((0xa3c1 <= c && c <= 0xa3da)
686 || (0xa3e1 <= c && c <= 0xa3fa)) { /* eiji */
687 if (!(cx->concmode & RK_MAKE_EISUUJI)) {
688 doflush++;
689 } else {
690 for (; k < z; k++, clen++)
691 if (clen >= RK_KEY_WMAX
692 || !((0xa3c1 <= (c = *k) && c <= 0xa3da)
693 || (0xa3e1 <= c && c <= 0xa3fa))) {
694 doflush++;
695 break;
696 };
697 }
698 hinshi = cx->gram->P_T35; literal = LIT_ALPHA;
699 } else if (0xa5a1 <= c && c <= 0xa5f6) { /* zenkaku katakana */
700 for (; k < z; k++, clen++)
701 if (clen >= RK_KEY_WMAX ||
702 ((0xa5a1 > (c = *k) || c > 0xa5f6) &&
703 (0xa1a1 > c || c > 0xa1f6))) {
704 doflush++;
705 break;
706 };
707 hinshi = cx->gram->P_T30;
708 } else if (0xa4a1 <= c && c <= 0xa4f3) { /* hiragana */
709 for (; k < z; k++, clen++) {
710 if (clen >= RK_KEY_WMAX) {
711 doflush++;
712 break;
713 };
714 switch (*k) {
715 #ifndef LOGIC_HACK
716 case 0xa4a1: case 0xa4a3: case 0xa4a5:
717 case 0xa4a7: case 0xa4a9:
718 case 0xa4e3: case 0xa4e5: case 0xa4e7:
719 case 0xa4c3: case 0xa4f3:
720 #endif
721 case 0xa1ab: case 0xa1ac: case 0xa1b3:
722 case 0xa1b4: case 0xa1b5: case 0xa1b6:
723 case 0xa1bc:
724 continue;
725 default:
726 doflush++;
727 gobeyond++;
728 goto hira;
729 };
730 };
731 hira:
732 hinshi = cx->gram->P_T35;
733 } else {
734 doflush++;
735 hinshi = cx->gram->P_T35;
736 };
737 } else if (us_iscodeG2(c)) { /* hankaku katakana */
738 for (; k < z; k++, clen++)
739 if (clen >= RK_KEY_WMAX || !us_iscodeG2(*k)) {
740 doflush++;
741 break;
742 };
743 hinshi = cx->gram->P_T30;
744 } else {
745 doflush++;
746 hinshi = cx->gram->P_T35;
747 }
748 if ((ys <= clen && clen <= ye) || gobeyond) {
749 if (class == ND_MWD || punct) {
750 if (!doflush && !gobeyond)
751 cx->poss_cont++;
752 if (literal != -1) {
753 if (doflush)
754 w= setLit(cx, w, maxword, hinshi, key, clen, literal);
755 } else if (w < word + maxword) {
756 if (doflush) {
757 setWord(w++, hinshi, 0, key, clen, (Wrec *)0,
758 clen, cx->gram->P_BB);
759 if (punct)
760 w[-1].nw_class = punct;
761 };
762 }
763 }
764 }
765 return w;
766 }
767
768 static int
determinate(y1,y2,l)769 determinate(y1, y2, l)
770 Wrec *y1, *y2;
771 int l;
772 {
773 if ((int)*y1 > l)
774 return(0);
775 for (l = *y1, y1 += 2; l; l--) {
776 Wchar *wy = (Wchar *) y2;
777 Wrec c1 = (Wrec) ((*wy & 0xff00) >> 8);
778 Wrec c2 = (Wrec) (*wy & 0xff);
779
780 y2 += 2;
781 if (*y1++ != c1 || *y1++ != c2) {
782 return(0);
783 }
784 }
785 return(1);
786 }
787
788 static
789 int
positive(y1,y2,l)790 positive(y1, y2, l)
791 Wrec *y1, *y2;
792 int l;
793 {
794 l = (int)*y1 < l ? (int)*y1 : l;
795 for (y1 += 2; l; l--) {
796 if (*y1++ != *y2++ || *y1++ != *y2++) {
797 return(0);
798 }
799 }
800 return(1);
801 }
802
803 static
804 int
positiveRev(y1,y2,l)805 positiveRev(y1, y2, l)
806 Wrec *y1, *y2;
807 int l;
808 {
809 l = (int)*y1 < l ? (int)*y1 : l;
810 for (y1 += 2; l; l--) {
811 Wchar *wy = (Wchar *) y2;
812 Wrec c1 = (Wrec) ((*wy & 0xff00) >> 8);
813 Wrec c2 = (Wrec) (*wy & 0xff);
814
815 y2 += 2;
816 if (*y1++ != c1 || *y1++ != c2) {
817 return(0);
818 }
819 }
820 return(1);
821 }
822
823 static
824 void
cancelNVE(nv,p)825 cancelNVE(nv, p)
826 struct NV *nv;
827 struct NVE *p;
828 {
829 unsigned char *s = p->data;
830
831 nv->csz -= *s * 2 + 2;
832 nv->cnt--;
833 p->right->left = p->left;
834 p->left->right = p->right;
835 (void)free((char *)s);
836 (void)free((char *)p);
837 }
838
839 static
840 struct NVE *
newNVE(nv,y,l,v)841 newNVE(nv, y, l, v)
842 struct NV *nv;
843 Wrec *y;
844 int l;
845 int v;
846 {
847 unsigned short w;
848 struct NVE *p, **q, *r;
849 struct NVE *nve;
850 unsigned char *s;
851
852 nve = (struct NVE *)calloc(1, sizeof(struct NVE));
853 if (nve) {
854 s = (unsigned char *)malloc(l * 2 + 2);
855 if (s) {
856 nve->data = s;
857 *s++ = l;
858 *s++ = v;
859
860 memcpy(s, y, l * 2);
861 nv->csz += l * 2 + 2;
862 nv->cnt++;
863 while ((p = nv->head.right) != &nv->head && nv->csz >= (long)nv->sz) {
864 w = bst2_to_s(p->data + 2);
865 q = nv->buf + w % nv->tsz;
866 while ((r = *q) != (struct NVE *)0) {
867 if (r == p) {
868 *q = r->next;
869 cancelNVE(nv, p);
870 break;
871 } else
872 q = &r->next;
873 }
874 }
875 if (nv->csz >= (long)nv->sz) {
876 nv->csz -= l * 2 + 2;
877 nv->cnt--;
878 (void)free((char *)nve->data);
879 (void)free((char *)nve);
880 return((struct NVE *)0);
881 }
882 } else {
883 (void)free((char *)nve);
884 nve = (struct NVE *)0;
885 }
886 }
887 return(nve);
888 }
889
890 int
_RkRegisterNV(nv,yomi,len,half)891 _RkRegisterNV(nv, yomi, len, half)
892 struct NV *nv;
893 Wrec *yomi;
894 int len;
895 int half;
896 {
897 unsigned short v;
898 struct NVE *p, **q, **r;
899
900 if (nv && nv->tsz && nv->buf) {
901 v = bst2_to_s(yomi);
902 q = r = nv->buf + v % nv->tsz;
903 for (p = *q; p; p = *q) {
904 if (positive(p->data, yomi, len)) {
905 *q = p->next;
906 cancelNVE(nv, p);
907 } else {
908 q = &p->next;
909 }
910 }
911 p = newNVE(nv, yomi, len, half);
912 if (p) {
913 p->next = *r;
914 *r = p;
915 p->left = nv->head.left;
916 p->left->right = p;
917 p->right = &nv->head;
918 nv->head.left = p;
919 }
920 }
921 return(0);
922 }
923
924 #define TAILSIZE 256
925 #define RIGHTSIZE (64 * 16)
926
927 /* parseWord
928 * bunsestu no ki wo seichou saseru.
929 */
930 static int
parseWord(cx,yy,ys,ye,class,xqh,maxclen,doflush,douniq)931 parseWord(cx, yy, ys, ye, class, xqh, maxclen, doflush, douniq)
932 struct RkContext *cx;
933 int yy, ys, ye;
934 int class;
935 struct nword *xqh[]; /* indexed by nw_ylen */
936 int maxclen; /* saishou yomi no nagasa */
937 int doflush;
938 int douniq;
939 {
940 struct RkKxGram *gram = cx->gram->gramdic;
941 int clen;
942 static unsigned classmask[] = { /* ���ˤĤʤ��륯�饹 */
943 (1 << ND_SWD) | (1 << ND_SUC), /* MWD --> SUC | SWD */
944 (1 << ND_SWD), /* SWD --> SWD */
945 (1 << ND_MWD) | (1 << ND_SWD), /* PRE --> MWD | SWD */
946 (1 << ND_SWD), /* SUC --> SWD */
947 (1 << ND_MWD) | (1 << ND_SWD) | (1 << ND_PRE),/* EMP --> MWD | SWD | PRE */
948 };
949 #ifndef USE_MALLOC_FOR_BIG_ARRAY
950 struct nword *tail[TAILSIZE];
951 struct nword right[RIGHTSIZE];
952 #else
953 struct nword **tail, *right;
954 tail = (struct nword **)malloc(sizeof(struct nword *) * TAILSIZE);
955 right = (struct nword *)malloc(sizeof(struct nword) * RIGHTSIZE);
956 if (!tail || !right) {
957 if (tail) (void)free((char *)tail);
958 if (right) (void)free((char *)right);
959 return maxclen;
960 }
961 #endif
962
963 for (clen = 0; (clen <= maxclen && clen < ye); clen++) {
964 int sameLen;
965 int t;
966 struct nword *p, *q, *r;
967 int ys1, ye1;
968
969 /* �ɤߤ�Ĺ�� clen ��ñ��Τ��������� class �ǻ��ꤵ�줿ñ�줬
970 �Ĥʤ����ǽ���������Τ�ꥹ�ȥ��åפ���tail �˵�Ͽ���� */
971 for (p = xqh[clen], sameLen = 0; p; p = p->nw_next) {
972 if (classmask[p->nw_class] & (1<<class)) {
973 /* p �θ��� class ��ñ�줬�Ĥʤ����ǽ�������� */
974 if (sameLen < TAILSIZE) { /* �ޤ� tail �ˤ��������� */
975 tail[sameLen++] = p;
976 }
977 }
978 }
979 if (!sameLen)
980 continue;
981 ys1 = ys - clen; if (ys1 < 0) ys1 = 0;
982 ye1 = ye - clen;
983 r = readWord(cx, yy + clen, ys1, ye1, class,
984 right, RIGHTSIZE - 1, doflush, douniq);
985 if (Is_Word_Make(cx))
986 r = makeWord(cx, yy + clen, ys1, ye1, class,
987 r, RIGHTSIZE -1 - (int)(r - right), doflush, douniq);
988 for (t = 0; t < sameLen; t++) {
989 p = tail[t];
990 for (q = right; q < r; q++)
991 if (Is_Word_Connect(cx) &&
992 (q->nw_class >= ND_OPN ||
993 RkTestGram(gram, p->nw_rowcol, q->nw_rowcol))) {
994 struct nword *pq = concWord(cx, p, q, clen, cx->gram->P_BB);
995 if (pq) {
996 struct nword *s;
997 if (gram && !IsShuutan(gram, pq->nw_rowcol)) {
998 #ifdef BUNMATU
999 /* ʸ�����ˤ����ʤ�ʤ� */
1000 if (IsBunmatu(gram, pq->nw_rowcol)) {
1001 /* �������ʤɤξ��ȡ��ɤߤ�Ԥ����Ƥ�����ˤ�
1002 ʸ�������������� */
1003 if (q->nw_class >= ND_OPN ||
1004 (doflush && yy + pq->nw_ylen == cx->store->nyomi))
1005 pq->nw_flags &= ~NW_BUNMATU;
1006 else
1007 pq->nw_flags |= NW_BUNMATU;
1008 } else
1009 #endif
1010 DontSplitWord(pq);
1011 }
1012 if ((unsigned long)maxclen < (unsigned long)pq->nw_ylen) {
1013 while (++maxclen < (int)pq->nw_ylen)
1014 xqh[maxclen] = (struct nword *)0;
1015 xqh[maxclen] = pq;
1016 }
1017 else {
1018 s = xqh[pq->nw_ylen];
1019 if (s) {
1020 while (s->nw_next)
1021 s = s->nw_next;
1022 s->nw_next = pq;
1023 }
1024 else
1025 xqh[pq->nw_ylen] = pq;
1026 }
1027 pq->nw_next = (struct nword *)0;
1028 }
1029 }
1030 }
1031 for (q = right; q < r; q++)
1032 if (q->nw_cache)
1033 _RkDerefCache(q->nw_cache);
1034 if (!gram)
1035 goto done;
1036 }
1037 done:
1038 #ifdef USE_MALLOC_FOR_BIG_ARRAY
1039 (void)free((char *)tail);
1040 (void)free((char *)right);
1041 #endif
1042 return maxclen;
1043 }
1044
1045 /*ARGSUSED*/
1046 static int
doParse(cx,yy,ys,ye,xqh,maxclen,doflush,douniq)1047 doParse(cx, yy, ys, ye, xqh, maxclen, doflush, douniq)
1048 struct RkContext *cx;
1049 int yy, ys, ye;
1050 struct nword *xqh[];
1051 int maxclen;
1052 int doflush;
1053 int douniq;
1054 {
1055 maxclen = parseWord(cx, yy, ys, ye, ND_PRE, xqh, maxclen, doflush, douniq);
1056 maxclen = parseWord(cx, yy, ys, ye, ND_MWD, xqh, maxclen, doflush, douniq);
1057 maxclen = parseWord(cx, yy, ys, ye, ND_SUC, xqh, maxclen, doflush, douniq);
1058 maxclen = parseWord(cx, yy, ys, ye, ND_SWD, xqh, maxclen, doflush, douniq);
1059 return maxclen;
1060 }
1061
1062 /* getKanji
1063 * get kanji in reverse order
1064 */
1065 Wchar *
_RkGetKanji(cw,key,mode)1066 _RkGetKanji(cw, key, mode)
1067 unsigned long mode;
1068 struct nword *cw;
1069 Wchar *key;
1070 {
1071 Wrec *str;
1072 static Wchar tmp[RK_LEN_WMAX+1]; /* static! */
1073 Wchar *p = tmp;
1074 int klen, ylen;
1075 struct nword *lw = cw->nw_left;
1076
1077 klen = cw->nw_klen - lw->nw_klen;
1078 ylen = cw->nw_ylen - lw->nw_ylen;
1079 /* nw_cache --> nw_kanji !nw_lit */
1080 /* !nw_cache --> !nw_kanji nw_lit */
1081
1082 if (cw->nw_cache) {
1083 if ((*(cw->nw_kanji) >> 1) & 0x7f) {
1084 str = cw->nw_kanji + NW_PREFIX;
1085 for (; klen-- ; str += 2)
1086 *p++ = S2TOS(str);
1087 return tmp;
1088 } else
1089 return key;
1090 } else if (cw->nw_kanji) {
1091 _Rkpanic("_RkGetKanji\n", 0, 0, 0);
1092 str = cw->nw_kanji + NW_PREFIX;
1093 for (; klen-- ; str += 2)
1094 *p++ = S2TOS(str);
1095 return tmp;
1096 } else if (cw->nw_lit) {
1097 if (cvtLit(tmp, klen + 1, key, ylen, cw->nw_lit, mode) > 0)
1098 return tmp;
1099 else
1100 return key;
1101 } else
1102 return key;
1103 }
1104
1105 static
1106 int
getKanji(w,key,d,mode)1107 getKanji(w, key, d, mode)
1108 struct nword *w;
1109 Wchar *key;
1110 Wchar *d;
1111 unsigned long mode;
1112 {
1113 struct nword *cw, *lw;
1114 int hash, klen;
1115
1116 hash = 0;
1117 for (cw = w; cw; cw = lw) {
1118 Wchar *s, *t;
1119
1120 if (!(lw = cw->nw_left))
1121 continue;
1122 klen = (cw->nw_klen - lw->nw_klen);
1123 s = _RkGetKanji(cw, key + lw->nw_ylen, mode);
1124 t = s + klen;
1125 /* copy */
1126 while (s < t) {
1127 *d++ = *--t;
1128 hash += *t;
1129 }
1130 }
1131 return hash;
1132 }
1133
1134 #define HEAPSIZE 512
1135
1136 /* uniqWord
1137 * unique word list
1138 */
1139 static void
uniqWord(key,words,ylen,mode)1140 uniqWord(key, words, ylen, mode)
1141 Wchar *key;
1142 struct nword *words;
1143 unsigned ylen;
1144 unsigned long mode;
1145 {
1146 struct nword *p;
1147 long hp = 0;
1148 long uniq[16];
1149 #ifndef USE_MALLOC_FOR_BIG_ARRAY
1150 long heap[HEAPSIZE];
1151 #else
1152 long *heap = (long *)malloc(sizeof(long) * HEAPSIZE);
1153 if (!heap) {
1154 return;
1155 }
1156 #endif
1157
1158 if (!(!key || ylen <= 0)) {
1159 /* clear hash table */
1160 uniq[ 0] = uniq[ 1] = uniq[ 2] = uniq[ 3] =
1161 uniq[ 4] = uniq[ 5] = uniq[ 6] = uniq[ 7] =
1162 uniq[ 8] = uniq[ 9] = uniq[10] = uniq[11] =
1163 uniq[12] = uniq[13] = uniq[14] = uniq[15] = -1;
1164 for (p = words; p; p = p->nw_next) {
1165 if (CanSplitWord(p) && p->nw_ylen == ylen) {
1166 int wsize;
1167 /* compute word size */
1168 wsize = (2*p->nw_klen + sizeof(long)-1)/sizeof(long);
1169 if (hp + 1 + wsize < HEAPSIZE) {
1170 long hno, h;
1171 /* put kanji string without EOS */
1172 heap[hp + wsize] = 0;
1173 hno = getKanji(p, key, (Wchar *)&heap[hp + 1], mode)&15;
1174 /* search on the hash list */
1175 for (h = uniq[hno]; h >= 0; h = heap[h&0xffff])
1176 if ((h >> 16) == p->nw_klen) { /* same length */
1177 long *p1 = &heap[(h&0xffff) + 1];
1178 long *p2 = &heap[hp + 1];
1179 int i;
1180 /* compare by word */
1181 switch(wsize) {
1182 case 3: if (*p1++ != *p2++) goto next;
1183 case 2: if (*p1++ != *p2++) goto next;
1184 case 1: if (*p1++ != *p2++) goto next;
1185 case 0: break;
1186 default:
1187 for (i = wsize; i--;)
1188 if (*p1++ != *p2++) goto next;
1189 break;
1190 }
1191 /* match */
1192 DontSplitWord(p);
1193 goto done;
1194 next:
1195 continue;
1196 }
1197 /* enter new entry */
1198 heap[hp + 0] = uniq[hno];
1199 uniq[hno] = (((unsigned long) (p->nw_klen))<<16)|hp;
1200 hp += 1 + wsize;
1201 }
1202 done:
1203 continue;
1204 }
1205 }
1206 }
1207 #ifdef USE_MALLOC_FOR_BIG_ARRAY
1208 (void)free((char *)heap);
1209 #endif
1210 }
1211
1212 /* sortWord
1213 * word list wo sort suru
1214 */
1215 struct compRec {
1216 struct nword *word;
1217 long prio;
1218 };
1219
1220 static compword pro((const struct compRec *, const struct compRec *));
1221
1222 static
1223 int
compword(x,y)1224 compword(x, y)
1225 const struct compRec *x, *y;
1226 {
1227 int lowdiff = (int)((unsigned char)y->word->nw_flags & NW_LOWPRI)
1228 - (int)((unsigned char)x->word->nw_flags & NW_LOWPRI);
1229 long d = ((long) y->word->nw_prio) - ((long) (x->word->nw_prio));
1230
1231 if (lowdiff > 0) return(-1);
1232 else if (lowdiff < 0) return(1);
1233 if (d > 0) return(1);
1234 else if(d < 0) return(-1);
1235 else {
1236 long dd = x->prio - y->prio;
1237
1238 if (dd > 0) return(1);
1239 else if (dd < 0) return(-1);
1240 else return(0);
1241 }
1242 }
1243
1244 static
1245 struct nword *
sortWord(words)1246 sortWord(words)
1247 struct nword *words;
1248 {
1249 unsigned long nwords, pos, neg;
1250 long i, p, n;
1251 struct compRec *wptr;
1252 struct nword *w;
1253 /* count number of words */
1254 pos = neg = 0L;
1255 for (w = words; w; w = w->nw_next)
1256 if (w->nw_prio > 0)
1257 pos++;
1258 else
1259 neg++;
1260 nwords = pos + neg;
1261 if (nwords <= 0)
1262 return words;
1263 /* sort word list using work space if possible */
1264 wptr = (struct compRec *)malloc(sizeof(struct compRec)*nwords);
1265 if (wptr) {
1266 p = 0L;
1267 n = pos;
1268 /* store pointers */
1269 for (w = words; w; w = w->nw_next)
1270 if (w->nw_prio > 0) { /* positive list */
1271 wptr[p].word = w;
1272 wptr[p].prio = p;
1273 p++;
1274 } else { /* negative list && null word */
1275 wptr[n].word = w;
1276 n++;
1277 }
1278 /* positive list no sakusei */
1279 if (pos > 1)
1280 (void)qsort((char *)wptr, (int)pos, sizeof(struct compRec),
1281 (int (*) pro((const void *, const void *)))compword);
1282 for (i = 1; i < (int)nwords; i++)
1283 wptr[i - 1].word->nw_next = wptr[i].word;
1284 words = wptr[0].word;
1285 (void)free((char *)wptr);
1286 }
1287 return words;
1288 }
1289
1290 static
1291 struct nword *
height2list(height,maxclen)1292 height2list(height, maxclen)
1293 struct nword *height[];
1294 int maxclen;
1295 {
1296 int i;
1297 struct nword *e, *p, *head, *tail;
1298
1299 e = height[0];
1300 tail = (struct nword *)0;
1301 for (i = 1; i <= maxclen; i++)
1302 if (height[i]) {
1303 for (p = height[i] ; p->nw_next ;) {
1304 p = p->nw_next;
1305 }
1306 if (tail)
1307 tail->nw_next = height[i];
1308 else
1309 head = height[i];
1310 tail = p;
1311 }
1312 if (tail)
1313 tail->nw_next = e;
1314 else
1315 head = e;
1316 return head;
1317 }
1318 static
1319 void
list2height(height,maxclen,parse)1320 list2height(height, maxclen, parse)
1321 struct nword *height[];
1322 int maxclen;
1323 struct nword *parse;
1324 {
1325 int i;
1326 struct nword *p, *q;
1327
1328 for (i = 0; i <= maxclen; i++)
1329 height[i] = (struct nword *)0;
1330 for (p = parse; p; p = p->nw_next)
1331 if ((unsigned long)p->nw_ylen <= (unsigned long)maxclen && !height[p->nw_ylen])
1332 height[p->nw_ylen] = p;
1333 for (i = 0; i <= maxclen; i++)
1334 if (height[i]) {
1335 for (p = height[i] ; (q = p->nw_next) != (struct nword *)0; p = q) {
1336 if (q->nw_ylen != i) {
1337 p->nw_next = (struct nword *)0;
1338 break;
1339 }
1340 }
1341 }
1342 }
1343
1344 /* parseBun
1345 * key yori hajimaru bunsetsu wo kaiseki suru
1346 */
1347 static
1348 struct nword *
parseBun(cx,yy,ys,ye,doflush,douniq,maxclen)1349 parseBun(cx, yy, ys, ye, doflush, douniq, maxclen)
1350 struct RkContext *cx;
1351 int yy, ys, ye; /* kaiseki seiyaku */
1352 int doflush;
1353 int douniq; /* unique shori sitei */
1354 int *maxclen; /* bunsetu saidai moji suu */
1355 {
1356 struct nstore *st = cx->store;
1357 struct nword **xqh = st->xqh;
1358
1359 #ifdef TEST
1360 printf("parseBun[yy = %d, ys = %d, ye = %d]\n", yy, ys, ye);
1361 #endif
1362
1363 xqh[0] = allocWord(st, cx->gram->P_BB);
1364 if (xqh[0]) {
1365 *maxclen = doParse(cx, yy, ys, ye, xqh, 0, doflush, douniq);
1366 return height2list(xqh, *maxclen);
1367 } else { /* kaiseki funou */
1368 *maxclen = 0;
1369 return (struct nword *)0;
1370 }
1371 }
1372
1373 #ifdef BUNMATU
1374 static
1375 struct nword *
modifyPrio(cx,words)1376 modifyPrio(cx, words)
1377 struct RkContext *cx;
1378 struct nword *words;
1379 {
1380 struct RkKxGram *gram = cx->gram->gramdic;
1381 struct nword *w;
1382
1383 for (w = words; w; w = w->nw_next)
1384 if (w->nw_prio > 0 && !IsBunmatu(gram, w->nw_rowcol))
1385 w->nw_prio += 0x2000 << 4;
1386 return words;
1387 }
1388 #endif
1389
1390 static
1391 void
storeBun(cx,yy,ys,ye,bun)1392 storeBun(cx, yy, ys, ye, bun)
1393 struct RkContext *cx;
1394 int yy, ys, ye;
1395 struct nbun *bun;
1396 {
1397 struct nword *full;
1398 struct nword *w;
1399 int maxclen;
1400
1401 #ifdef BUNMATU
1402 full = sortWord(modifyPrio(cx, parseBun(cx, yy, ys, ye, 1, 0, &maxclen)));
1403 #else
1404 full = sortWord(parseBun(cx, yy, ys, ye, 1, 0, &maxclen));
1405 #endif
1406 bun->nb_cand = full;
1407 bun->nb_yoff = yy;
1408 /* kouho wo unique ni suru */
1409 uniqWord(cx->store->yomi + yy, full, bun->nb_curlen, cx->concmode);
1410 bun->nb_curcand = (unsigned short)0;
1411 bun->nb_maxcand = (unsigned short)0;
1412 for (w = full; w; w = w->nw_next) {
1413 if (CanSplitWord(w) && w->nw_ylen == bun->nb_curlen)
1414 bun->nb_maxcand++;
1415 }
1416 }
1417
1418 /*
1419 * SPLIT
1420 */
1421 struct splitParm {
1422 unsigned long u2;
1423 int l2;
1424 };
1425
1426 #ifdef LOGIC_HACK
1427 static
1428 void
evalSplit(cx,suc,ul)1429 evalSplit(cx, suc, ul)
1430 struct RkContext *cx;
1431 struct nword *suc;
1432 struct splitParm *ul;
1433 {
1434 struct nword *p;
1435 unsigned l2;
1436 unsigned long u2;
1437
1438 l2 = 0;
1439 u2 = 0L;
1440 for (p = suc; p; p = p->nw_next)
1441 {
1442 if (!CanSplitWord(p) || /* ʸ��ˤʤ�ʤ� */
1443 OnlyBunmatu(p) || /* ��ƥ���ľ���Ǥ���ʸ��ˤʤ�ʤ� */
1444 (p->nw_rowcol == cx->gram->P_KJ) || /* �� */
1445 (p->nw_flags & NW_LOWPRI) || /* ͥ���٤��㤤ʸ�� */
1446 (p->nw_flags & NW_SUC))
1447 continue;
1448 if (l2 <= p->nw_ylen) {
1449 l2 = p->nw_ylen;
1450 /* �ɤߤ���ʸ����ñ���ͥ���٤Ϲ�θ���ʤ� */
1451 if (u2 < p->nw_prio && p->nw_ylen > 1)
1452 u2 = p->nw_prio;
1453 }
1454 }
1455 ul->l2 = l2;
1456 ul->u2 = u2;
1457 }
1458 #else /* LOGIC_HACK */
1459 static
1460 void
evalSplit(cx,suc,ul)1461 evalSplit(cx, suc, ul)
1462 struct RkContext *cx;
1463 struct nword *suc;
1464 struct splitParm *ul;
1465 {
1466 struct nword *p;
1467 int l2;
1468 unsigned long u2;
1469
1470 l2 = 0;
1471 u2 = 0L;
1472 for (p = suc; p; p = p->nw_next)
1473 {
1474 if (!CanSplitWord(p) || (p->nw_flags & NW_SUC))
1475 continue;
1476 if ((unsigned long)l2 < (unsigned long)p->nw_ylen)
1477 l2 = p->nw_ylen;
1478 if (u2 < p->nw_prio)
1479 u2 = p->nw_prio;
1480 };
1481 ul->l2 = l2;
1482 ul->u2 = u2;
1483 }
1484 #endif /* LOGIC_HACK */
1485
1486 #define PARMSIZE 256
1487
1488 static
1489 int
calcSplit(cx,yy,top,xq,maxclen,flush)1490 calcSplit(cx, yy, top, xq, maxclen, flush)
1491 struct RkContext *cx;
1492 int yy;
1493 struct nword *top;
1494 struct nqueue xq[]; /* indexed by nw_ylen */
1495 int maxclen;
1496 int flush;
1497 {
1498 #ifdef LOGIC_HACK
1499 int L, L1 = 0, L2;
1500 unsigned long U;
1501 #else
1502 unsigned L, L1 = 0, L2;
1503 unsigned U2;
1504 #endif
1505 struct nword *w;
1506 int i;
1507 int maxary = PARMSIZE - 1;
1508 struct nstore *st = cx->store;
1509 struct NVE *p, **r;
1510 #ifndef USE_MALLOC_FOR_BIG_ARRAY
1511 struct splitParm ul2[PARMSIZE];
1512 #else
1513 struct splitParm *ul2 = (struct splitParm *)
1514 malloc(sizeof(struct splitParm) * PARMSIZE);
1515 if (!ul2) {
1516 return L1;
1517 }
1518 #endif
1519
1520 L2 = st->nyomi - yy;
1521 if (cx->nv && cx->nv->tsz && cx->nv->buf) {
1522 r = cx->nv->buf + *(st->yomi + yy) % cx->nv->tsz;
1523 for (p = *r; p; p = p->next) {
1524 if (determinate(p->data, (Wrec *)(st->yomi + yy), (int)L2)) {
1525 if (*(p->data+1) > L1)
1526 L1 = *(p->data + 1);
1527 }
1528 }
1529 }
1530 if (L1 == 0) {
1531 L = (L1 = 1)+ (L2 = 0);
1532 #ifdef LOGIC_HACK
1533 U = 0L;
1534 #else
1535 U2 = (unsigned)0;
1536 #endif
1537 if (maxary > maxclen)
1538 maxary = maxclen;
1539 for (i = 0; i <= maxary; i++)
1540 ul2[i].l2 = ul2[i].u2 = 0L;
1541 for (w = top; w; w = w->nw_next) {
1542 int l, l1;
1543 #ifdef LOGIC_HACK
1544 unsigned long u;
1545 #endif
1546 struct splitParm ul;
1547 /* ʸ��ˤʤ�ʤ� */
1548 if (!CanSplitWord(w)) {
1549 continue;
1550 }
1551 #ifdef LOGIC_HACK
1552 /* ͥ���٤��㤤ʸ��θ�Ǥ��ڤ�ʤ� */
1553 if (w->nw_flags & NW_LOWPRI) {
1554 DontSplitWord(w);
1555 continue;
1556 }
1557 #endif
1558 if ((w->nw_flags & NW_PRE) && (w->nw_flags & NW_SUC)) {
1559 continue;
1560 }
1561 /* �ɤߤ���Ƥ��ʤ� */
1562 l1 = w->nw_ylen;
1563 if (l1 <= 0) {
1564 continue;
1565 }
1566 /* ��ʸ��ˤ���Τ���Ĺ */
1567 if (flush && (unsigned)yy + w->nw_ylen == cx->store->nyomi) {
1568 L1 = l1;
1569 break;
1570 }
1571 #ifdef BUNMATU
1572 /* ³��ʸ���ƥ��Ǥʤ��ʤ�ʸ�����ʻ��ʸ������ˤʤ�ʤ� */
1573 else if (OnlyBunmatu(w) && xq[l1].tree->nw_lit == 0) {
1574 DontSplitWord(w);
1575 continue;
1576 }
1577 #endif
1578 #ifdef LOGIC_HACK
1579 /* ñ������ʸ��������о줷�ʤ� */
1580 if (w->nw_rowcol == cx->gram->P_KJ) {
1581 DontSplitWord(w);
1582 continue;
1583 }
1584 #endif
1585 /* ���٤�ʸ������ */
1586 if (l1 <= maxary) {
1587 if (!ul2[l1].l2)
1588 evalSplit(cx, xq[l1].tree, &ul2[l1]);
1589 ul = ul2[l1];
1590 }
1591 else {
1592 evalSplit(cx, xq[l1].tree, &ul);
1593 }
1594 /* hikaku */
1595 l = l1 + ul.l2;
1596 #ifdef LOGIC_HACK
1597 u = w->nw_prio + ul.u2;
1598 if ((L < l) || /* ��ʸ���Ĺ */
1599 ((L == l) &&
1600 (U < u || /* ͥ���٤ι�� */
1601 (U == u && (L2 < ul.l2))))) { /* ��ʸ���ܤ�Ĺ�� */
1602 L = l;
1603 U = u;
1604 L1 = l1;
1605 L2 = ul.l2;
1606 }
1607 #else
1608 if ((((int)L < l)) ||
1609 (((int)L == l) && (U2 < ul.u2)) ||
1610 (((int)L == l) && (U2 == ul.u2) && ((int)L2 < ul.l2))
1611 ) {
1612 L = l;
1613 L1 = l1;
1614 L2 = ul.l2;
1615 U2 = ul.u2;
1616 }
1617 #endif
1618 }
1619 }
1620 #ifdef USE_MALLOC_FOR_BIG_ARRAY
1621 (void)free((char *)ul2);
1622 #endif
1623 return L1;
1624 }
1625
1626 static
1627 int
splitBun(cx,yy,ys,ye)1628 splitBun(cx, yy, ys, ye)
1629 struct RkContext *cx;
1630 int yy, ys, ye;
1631 {
1632 struct nstore *st = cx->store;
1633 struct nqueue *xq = st->xq;
1634 struct nword *w;
1635 int maxclen;
1636 int i, count, junk;
1637
1638 /* create the initial bun-tree table */
1639 xq[0].tree = parseBun(cx, yy, ys, ye, 1, 1, &maxclen);
1640
1641 #ifdef TEST
1642 {
1643 printf("show splitBun [yy = %d, ys = %d, ye = %d, clen = %d]\n",
1644 yy, ys, ye, maxclen);
1645 #if 1
1646 showWord(xq[0].tree);
1647 #endif
1648 }
1649 #endif
1650
1651 for (i = 1; i <= maxclen; i++)
1652 clearQue(&xq[i]);
1653 /* create the following buns from every possible position */
1654 for (w = xq[0].tree; w; w = w->nw_next) {
1655 if (CanSplitWord(w) && !xq[w->nw_ylen].tree) {
1656 int len = w->nw_ylen;
1657 int ys1 = (ys >= len) ? (ys - len) : 0;
1658 int ye1 = (ye - len);
1659
1660 xq[w->nw_ylen].tree = parseBun(cx, yy+len, ys1, ye1, 1, 1, &junk);
1661 };
1662 };
1663
1664 /* compute the proper bunsetu length */
1665 count = calcSplit(cx, yy, xq[0].tree, xq, maxclen, 1);
1666 _RkFreeQue(st, 0, st->maxxq + 1);
1667
1668 #ifdef TEST
1669 printf("End SplitBun\n");
1670 #endif
1671
1672 return count;
1673 }
1674
1675 /* parseQue
1676 * queue jou de bunsetu wo kaiseki suru.
1677 */
1678
1679 static void parseQue pro((struct RkContext *, int, int, int, int, int));
1680
1681 static void
parseQue(cx,maxq,yy,ys,ye,doflush)1682 parseQue(cx, maxq, yy, ys, ye, doflush)
1683 struct RkContext *cx;
1684 int maxq;
1685 int yy, ys, ye;
1686 int doflush;
1687 {
1688 struct nstore *st = cx->store;
1689 struct nqueue *xq = st->xq;
1690 struct nword **xqh = st->xqh;
1691 int i, j;
1692
1693 /* put a new seed to start an analysis. */
1694 if (!xq[0].tree) {
1695 xq[0].tree = allocWord(st, cx->gram->P_BB);
1696 xq[0].maxlen = 0;
1697 xq[0].status = 0;
1698 }
1699 /* try to extend each tree in the queue. */
1700 for (i = 0; i <= maxq; i++) {
1701 if (xq[i].tree) {
1702 int old = cx->poss_cont;
1703 list2height(xqh, xq[i].maxlen, xq[i].tree);
1704 xq[i].maxlen = doParse(cx, yy, ys, ye, xqh, xq[i].maxlen, doflush, 1);
1705 /* set up new analysis points */
1706 for (j = 0; j <= xq[i].maxlen; j++)
1707 if (xqh[j] && !xq[i+j].tree) {
1708 xq[i+j].tree = allocWord(st, cx->gram->P_BB);
1709 xq[i+j].maxlen = 0;
1710 xq[i+j].status = 0;
1711 xq[i+j].status = 0x80;
1712 }
1713 xq[i].tree = height2list(xqh, xq[i].maxlen);
1714 if (cx->poss_cont != old)
1715 xq[i].status |= 0x80;
1716 else
1717 xq[i].status &= ~0x80;
1718 }
1719 ++yy;
1720 if (--ys < 0) ys = 0;
1721 --ye;
1722 }
1723 }
1724
1725 /* Que2Bun
1726 * queue kara bunsetu wo toridasu.
1727 */
1728 static
1729 int
IsStableQue(cx,c,doflush)1730 IsStableQue(cx, c, doflush)
1731 struct RkContext *cx;
1732 int c;
1733 int doflush;
1734 {
1735 struct nqueue *xq = cx->store->xq;
1736 struct nword *w;
1737
1738 if (doflush)
1739 {
1740 if (xq[c].maxlen <= 0)
1741 return 0;
1742 else
1743 return 1;
1744 };
1745 if (xq[c].maxlen <= 0)
1746 return(!c ? 0 : 1);
1747
1748 for (w = xq[c].tree; w; w = w->nw_next)
1749 {
1750 if (xq[c + w->nw_ylen].status)
1751 return 0;
1752 if (!c && w->nw_ylen && !IsStableQue(cx, c + w->nw_ylen, doflush))
1753 return 0;
1754 };
1755 return 1;
1756 }
1757
1758 static
1759 int
Que2Bun(cx,yy,ys,ye,doflush)1760 Que2Bun(cx, yy, ys, ye, doflush)
1761 struct RkContext *cx;
1762 int yy, ys, ye;
1763 int doflush;
1764 {
1765 struct nstore *st = cx->store;
1766 struct nqueue *xq = st->xq;
1767 unsigned i;
1768 struct NVE *p, **r;
1769
1770 if (doflush)
1771 for (i = 0; (int)i <= st->maxxq; i++)
1772 xq[i].status = 0;
1773 while (IsStableQue(cx, 0, doflush)) {
1774 struct nbun *bun = &st->bunq[st->maxbun];
1775 int count;
1776
1777 i = 0;
1778 if (!doflush) {
1779 if (cx->nv && cx->nv->tsz && cx->nv->buf) {
1780 r = cx->nv->buf + *(st->yomi + yy) % cx->nv->tsz;
1781 for (p = *r; p; p = p->next) {
1782 if (positiveRev(p->data, (Wrec *)(st->yomi + yy), st->nyomi - yy)) {
1783 if (*(p->data + 1) > i)
1784 i = *(p->data + 1);
1785 }
1786 }
1787 }
1788 if (i > st->nyomi - yy)
1789 break;
1790 }
1791 if ((count = calcSplit(cx, yy, xq[0].tree, xq, xq[0].maxlen, 1)) > 0) {
1792 /* shift queue to left */
1793 _RkFreeQue(st, 0, count);
1794 for (i = count; (int)i <= st->maxxq; i++) {
1795 xq[i-count] = xq[i];
1796 clearQue(&xq[i]);
1797 };
1798 bun->nb_curlen = count;
1799 storeBun(cx, (int)bun->nb_yoff, 0, ye, bun);
1800 st->maxbun++;
1801 st->bunq[st->maxbun].nb_yoff = yy + bun->nb_curlen;
1802 }
1803 yy = yy + bun->nb_curlen;
1804 ys = ys - bun->nb_curlen;
1805 ye = ye - bun->nb_curlen;
1806 }
1807 return st->maxbun;
1808 }
1809
1810 /* _RkRenbun2
1811 * current bunsetsu kara migi wo saihenkan suru
1812 */
1813 int
_RkRenbun2(cx,firstlen)1814 _RkRenbun2(cx, firstlen)
1815 struct RkContext *cx;
1816 int firstlen; /* bunsetsu chou sitei(ow 0) */
1817 {
1818 struct nstore *st = cx->store;
1819 struct nbun *bun = &st->bunq[st->curbun];
1820 int count;
1821 int yy, ys, ye; /* yomi kensaku hani */
1822 int oldcurbun = st->curbun;
1823 int uyomi;
1824 int i;
1825
1826 yy = bun->nb_yoff;
1827 ys = 0;
1828 ye = st->nyomi - bun->nb_yoff;
1829 /* release queue */
1830 uyomi = st->nyomi - st->bunq[st->maxbun].nb_yoff;
1831 if (IS_XAUTCTX(cx)) {
1832 if (uyomi >= 0)
1833 _RkFreeQue(st, 0, uyomi+1);
1834 };
1835 /*
1836 *
1837 */
1838 for (count = 0; ye > 0; count++)
1839 {
1840 /* sudeni kaiseki zumi deareba, sono kekka wo mochiiru */
1841 if (count && !uyomi)
1842 {
1843 int b, c;
1844 for (b = st->curbun; b < (int)st->maxbun; b++)
1845 if (st->bunq[b].nb_yoff == yy) {
1846 /* dispose inbetween bun-trees */
1847 for (c = st->curbun; c < b; c++) {
1848 freeWord(st, st->bunq[c].nb_cand);
1849 st->bunq[c].nb_cand = (struct nword *)0;
1850 }
1851 /* shift bunq forward */
1852 while (b < (int)st->maxbun)
1853 st->bunq[st->curbun++] = st->bunq[b++];
1854 goto exit;
1855 }
1856 }
1857 /* dispose the current bun-tree */
1858 if (st->curbun < (int)st->maxbun) {
1859 freeWord(st, bun->nb_cand);
1860 bun->nb_cand = (struct nword *)0;
1861 }
1862 /* compute the length of bun */
1863 if (st->curbun >= (int)st->maxbunq) /* too many buns */
1864 bun->nb_curlen = ye;
1865 else {
1866 if (firstlen) { /* length specified */
1867 bun->nb_curlen = firstlen;
1868 firstlen = 0;
1869 } else {
1870 /* destroy */
1871 bun->nb_curlen = splitBun(cx, yy, ys, ye);
1872 if (!bun->nb_curlen) /* fail to split */
1873 bun->nb_curlen = ye;
1874 }
1875 }
1876 /* set up bun (xqh is destroyed */
1877 storeBun(cx, yy, ys, ye, bun);
1878 #if defined(TEST) && 0
1879 showWord(bun->nb_cand);
1880 #endif
1881 yy += bun->nb_curlen;
1882 if ((ys -= (int)bun->nb_curlen) < 0)
1883 ys = 0;
1884 ye -= bun->nb_curlen;
1885 bun++;
1886 st->curbun++;
1887 }
1888 /* free the remaining bun-trees */
1889 while ((int)st->maxbun > st->curbun) {
1890 freeWord(st, st->bunq[--st->maxbun].nb_cand);
1891 st->bunq[st->maxbun].nb_cand = (struct nword *)0;
1892 }
1893 /* do final settings */
1894 exit:
1895 st->maxbun = st->curbun;
1896 st->curbun = oldcurbun;
1897 st->bunq[st->maxbun].nb_yoff = 0;
1898 /* i hate this fake, ... */
1899 for (i = 0; i < (int)st->maxbun; i++)
1900 st->bunq[st->maxbun].nb_yoff += st->bunq[i].nb_curlen;
1901 /* this case will never happen */
1902 if (0 != (st->nyomi - st->bunq[st->maxbun].nb_yoff))
1903 _Rkpanic("Renbun2: uyomi destroyed %d %d\n",
1904 st->nyomi, st->bunq[st->maxbun].nb_yoff, 0);
1905 bun = &st->bunq[st->maxbun];
1906 if (IS_XAUTCTX(cx) && uyomi > 0)
1907 {
1908 _RkSubstYomi(cx, 0, uyomi, st->yomi + bun->nb_yoff, uyomi);
1909 st->curbun = oldcurbun;
1910 };
1911 return st->maxbun;
1912 }
1913
1914 /* RkSubstYomi
1915 */
1916 int
_RkSubstYomi(cx,ys,ye,yomi,newLen)1917 _RkSubstYomi(cx, ys, ye, yomi, newLen)
1918 struct RkContext *cx;
1919 int ys;
1920 int ye;
1921 Wchar *yomi;
1922 int newLen;
1923 {
1924 struct nstore *st = cx->store;
1925 extern struct nstore *_RkReallocBunStorage();
1926 struct nbun *bun;
1927 struct nqueue *xq;
1928 struct nword **xqh;
1929 int i, j;
1930 int count;
1931 int yf;
1932 int cs, ce, cf;
1933 Wchar *d, *s, *be;
1934 int nbun;
1935 int new_size;
1936
1937 yf = ys + newLen;
1938 cs = ys;
1939 ce = ye;
1940 /*
1941 * STEP 0: reallocate resources if needed
1942 * youmigana buffer should be reallocated as well.
1943 */
1944 new_size = st->nyomi + (newLen - (ye - ys));
1945 if (new_size > (int)st->maxyomi || new_size > (int)st->maxbunq ||
1946 new_size > (int)st->maxxq)
1947 {
1948 st = _RkReallocBunStorage(st, (int)(new_size*1.2+10));
1949 if (!st)
1950 return -1;
1951 cx->store = st;
1952 };
1953 /*
1954 * STEP 1: update yomigana buffer
1955 */
1956 /* move unchanged text portion [ye, ...) */
1957 bun = &st->bunq[st->maxbun];
1958 be = st->yomi + bun->nb_yoff;
1959 xq = st->xq;
1960 xqh = st->xqh;
1961 count = (st->nyomi - bun->nb_yoff) - ye;
1962 if (yf < ye) { /* shrunk */
1963 d = be + yf;
1964 s = be + ye;
1965 while (count--) *d++ = *s++;
1966 } else if (ye < yf) { /* enlarged */
1967 d = (s = st->yomi + st->nyomi) + count;
1968 while (count--)
1969 *--d = *--s;
1970 }
1971 /* replace the new text in [ys, yf) */
1972 usncopy(be + ys, yomi, newLen);
1973 st->nyomi += (yf - ye);
1974 cf = yf;
1975 /*
1976 * STEP 2: remove affected words from XQ
1977 */
1978 /* Trim the words which terminate in [cs, ...) */
1979
1980 for (i = 0; i < cs; i++)
1981 if (xq[i].tree && cs - i <= xq[i].maxlen) {
1982 list2height(xqh, xq[i].maxlen, xq[i].tree);
1983 for (j = cs - i; j < xq[i].maxlen; j++)
1984 if (xqh[j + 1]) {
1985 freeWord(st, xqh[j + 1]);
1986 xqh[j + 1] = (struct nword *)0;
1987 }
1988 xq[i].maxlen = 0;
1989 for (j = cs - i ; j >= 0 && !xqh[j] ;) {
1990 j--;
1991 }
1992 if (j > 0)
1993 xq[i].maxlen = j;
1994 else {
1995 xq[i].maxlen = 0;
1996 if (!j) {
1997 freeWord(st, xqh[0]);
1998 xqh[0] = (struct nword *)0;
1999 }
2000 }
2001 xq[i].tree = height2list(xqh, xq[i].maxlen);
2002 xq[i].status = 0;
2003 }
2004 /* Kill the whole trees in [cs, ce) and shift XQ to fill it. */
2005 _RkFreeQue(st, cs, ce);
2006 if (cf < ce)
2007 for (i = cf, j = ce; j <= st->maxxq; i++, j++) {
2008 xq[i] = xq[j];
2009 clearQue(&xq[j]);
2010 }
2011 if (ce < cf)
2012 for (i = st->maxxq, j = st->maxxq - (cf - ce); j >= ce; i--, j--) {
2013 xq[i] = xq[j];
2014 clearQue(&xq[j]);
2015 }
2016 /*
2017 * STEP 3 restore queues by parsing yomigana after ys.
2018 */
2019 nbun = st->maxbun;
2020 count = (st->nyomi - bun->nb_yoff) - ys;
2021 while (count > 0) {
2022 int yy;
2023 yy = st->bunq[st->maxbun].nb_yoff;
2024 ys = st->nyomi - yy - count;
2025 parseQue(cx, cf-1, yy, ys, ys + 1, 0);
2026 nbun = Que2Bun(cx, yy, ys, ys + 1, 0);
2027 ys++;
2028 count--;
2029 }
2030 st->curbun = 0;
2031 return nbun;
2032 }
2033
2034 /* RkFlushYomi
2035 */
2036 int
_RkFlushYomi(cx)2037 _RkFlushYomi(cx)
2038 struct RkContext *cx;
2039 {
2040 int yy = cx->store->bunq[cx->store->maxbun].nb_yoff;
2041 int ys = cx->store->nyomi - yy;
2042 int ret;
2043
2044 parseQue(cx, cx->store->maxxq, yy, ys, ys, 1);
2045 if ((ret = Que2Bun(cx, yy, ys, ys, 1)) != -1)
2046 cx->store->curbun = 0;
2047 return(ret);
2048 }
2049
2050 /* _RkLearnBun
2051 * bunsetu jouho wo motoni gakushuu suru
2052 * sarani, word wo kaihou suru
2053 */
2054 static
blkcpy(d,s,e)2055 void blkcpy(d, s, e)
2056 unsigned char *d;
2057 unsigned char *s, *e;
2058 { while (s < e) *d++ = *s++; }
2059
2060 static
2061 void
doLearn(cx,thisW)2062 doLearn(cx, thisW)
2063 struct RkContext *cx;
2064 struct nword *thisW;
2065 {
2066 struct nword *leftW;
2067 #ifndef USE_MALLOC_FOR_BIG_ARRAY
2068 unsigned char *candidates[RK_CAND_NMAX];
2069 unsigned permutation[RK_CAND_NMAX];
2070 unsigned char tmp[RK_WREC_BMAX];
2071 #else
2072 unsigned char **candidates, *tmp;
2073 unsigned *permutation;
2074 candidates = (unsigned char **)
2075 malloc(sizeof(unsigned char *) * RK_CAND_NMAX);
2076 permutation = (unsigned *)malloc(sizeof(unsigned) * RK_CAND_NMAX);
2077 tmp = (unsigned char *)malloc(RK_WREC_BMAX);
2078 if (!candidates || !permutation || !tmp) {
2079 if (candidates) (void)free((char *)candidates);
2080 if (permutation) (void)free((char *)permutation);
2081 if (tmp) (void)free((char *)tmp);
2082 return;
2083 }
2084 #endif
2085
2086 for (; (leftW = thisW->nw_left) != (struct nword *)0 ; thisW = leftW) {
2087 struct ncache *thisCache = thisW->nw_cache;
2088
2089 if (thisCache) {
2090 struct DM *dm = thisCache->nc_dic;
2091 struct DM *qm = thisW->nw_freq;
2092 unsigned char *wp;
2093 int ncands;
2094 int nl;
2095 unsigned long offset;
2096 int i;
2097 int current;
2098 unsigned long _RkGetOffset();
2099
2100 cx->time = _RkGetTick(1);
2101 if (thisCache->nc_flags & NC_ERROR)
2102 continue;
2103 if (!(wp = thisCache->nc_word))
2104 continue;
2105 ncands = _RkCandNumber(wp);
2106 nl = (*wp >> 1) & 0x3f;
2107 if (qm && qm->dm_qbits)
2108 offset = _RkGetOffset((struct ND *)dm->dm_extdata.var, wp);
2109 else
2110 offset = 0L;
2111 if (*wp & 0x80)
2112 wp += 2;
2113 wp += 2 + nl * 2;
2114 for (i = 0; i < ncands; i++) {
2115 candidates[i] = wp;
2116 wp += 2 * ((*wp >> 1) & 0x7f) + 2;
2117 };
2118 /*
2119 if (thisCache->nc_count)
2120 continue;
2121 */
2122 if (qm && qm->dm_qbits) {
2123 int bits;
2124
2125 if (!(qm->dm_flags & DM_WRITABLE))
2126 continue;
2127 bits = _RkCalcLog2(ncands + 1) + 1;
2128 _RkUnpackBits(permutation, qm->dm_qbits, offset, bits, ncands);
2129 for (current = 0; current < ncands; current++)
2130 if (ncands > (int)permutation[current]/2 &&
2131 candidates[permutation[current]/2] == thisW->nw_kanji)
2132 break;
2133 if (current < ncands) {
2134 entryRut(qm->dm_rut, thisW->nw_csn, cx->time);
2135 if (0 < current) {
2136 _RkCopyBits(tmp, (unsigned long) 0L, bits,
2137 qm->dm_qbits, (unsigned long) offset, current);
2138 _RkCopyBits(qm->dm_qbits, (unsigned long) (offset + 0L), bits,
2139 qm->dm_qbits, (unsigned long) (offset + current*bits),
2140 1);
2141 _RkCopyBits(qm->dm_qbits, (unsigned long) (offset + bits), bits,
2142 tmp, (unsigned long) 0L, current);
2143
2144 };
2145 qm->dm_flags |= DM_UPDATED;
2146 }
2147 } else {
2148 if (!(dm->dm_flags & DM_WRITABLE))
2149 continue;
2150 for (current = 0; current < ncands; current++)
2151 if (candidates[current] == thisW->nw_kanji)
2152 break;
2153 if (DM2TYPE(dm)) {
2154 if (current) {
2155 unsigned char *t = candidates[0];
2156 unsigned char *l = candidates[current];
2157 unsigned char *c = l + 2 * ((*l >> 1) & 0x7f) + 2;
2158
2159 ((struct TW *)thisCache->nc_address)->lucks[1]
2160 = ((struct TW *)thisCache->nc_address)->lucks[0];
2161 blkcpy(tmp, t, l);
2162 blkcpy(t, l, c);
2163 blkcpy(t + (int)(c - l), tmp, tmp + (int)(l - t));
2164 thisCache->nc_flags |= NC_DIRTY;
2165 }
2166 ((struct TW *)thisCache->nc_address)->lucks[0] = cx->time;
2167 dm->dm_flags |= DM_UPDATED;
2168 }
2169 }
2170 }
2171 }
2172 #ifdef USE_MALLOC_FOR_BIG_ARRAY
2173 (void)free((char *)candidates);
2174 (void)free((char *)permutation);
2175 (void)free((char *)tmp);
2176 #endif
2177 }
2178
2179 void
_RkLearnBun(cx,cur,mode)2180 _RkLearnBun(cx, cur, mode)
2181 struct RkContext *cx;
2182 int cur, mode;
2183 {
2184 struct nstore *st = cx->store;
2185 struct nbun *bun = &st->bunq[cur];
2186 struct nword *w;
2187 int count = bun->nb_curcand;
2188 Wchar *yomi = st->yomi + bun->nb_yoff;
2189 int ylen;
2190 int pos;
2191
2192 derefWord(bun->nb_cand);
2193 if (mode) {
2194 if (bun->nb_flags & RK_REARRANGED) {
2195 ylen = bun->nb_curlen
2196 + (cur < (int)st->maxbun - 1 ? (bun + 1)->nb_curlen : 0);
2197 pos = bun->nb_curlen;
2198 if (ylen < 32) {
2199 Wchar *ey = yomi + ylen, *p;
2200 #ifndef USE_MALLOC_FOR_BIG_ARRAY
2201 Wrec yomwrec[32 * sizeof(Wchar)];
2202 Wrec *dp = yomwrec;
2203 #else
2204 Wrec *dp;
2205 Wrec *yomwrec = (Wrec *)malloc(sizeof(Wrec) * 32 * sizeof(Wchar));
2206 if (!yomwrec) {
2207 return;
2208 }
2209 dp = yomwrec;
2210 #endif
2211 for (p = yomi ; p < ey ; p++) {
2212 *dp++ = (unsigned)*p >> 8;
2213 *dp++ = (unsigned)*p & 0x0ff;
2214 }
2215 _RkRegisterNV(cx->nv, yomwrec, ylen, pos);
2216 #ifdef USE_MALLOC_FOR_BIG_ARRAY
2217 (void)free((char *)yomwrec);
2218 #endif
2219 }
2220 }
2221 for (w = bun->nb_cand; w; w = w->nw_next) {
2222 if (CanSplitWord(w) && w->nw_ylen == bun->nb_curlen) {
2223 if (count-- <= 0) {
2224 doLearn(cx, w);
2225 break;
2226 }
2227 }
2228 }
2229 }
2230 killWord(st, bun->nb_cand);
2231 }
2232
2233 /* vim: set sw=2: */
2234