1 /* Copyright 1994 NEC Corporation, Tokyo, Japan.
2  *
3  * Permission to use, copy, modify, distribute and sell this software
4  * and its documentation for any purpose is hereby granted without
5  * fee, provided that the above copyright notice appear in all copies
6  * and that both that copyright notice and this permission notice
7  * appear in supporting documentation, and that the name of NEC
8  * Corporation not be used in advertising or publicity pertaining to
9  * distribution of the software without specific, written prior
10  * permission.  NEC Corporation makes no representations about the
11  * suitability of this software for any purpose.  It is provided "as
12  * is" without express or implied warranty.
13  *
14  * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
15  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
16  * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
17  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
18  * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
19  * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
20  * PERFORMANCE OF THIS SOFTWARE.
21  */
22 
23 #if !defined(lint) && !defined(__CODECENTER__)
24 static char rcsid[]="$Id: nword.c,v 1.5 2003/07/31 19:03:51 aida_s Exp $";
25 #endif
26 
27 /* LINTLIBRARY */
28 #include	"RKintern.h"
29 
30 #if defined(DEBUG_NWORD) || defined(RK_DEBUG) || defined(TEST)
31 #include <stdio.h>
32 /*
33  * debug aids
34  */
35 #define		D_CONC	1
36 #define		D_PARSE	2
37 #define		D_SUCC	4
38 #define		D_SUBST	4
39 /*
40 int		debug_flags = D_CONC|D_PARSE|D_SUCC;
41 int		debug_flags = D_PARSE|D_SUCC;
42 */
43 int		debug_flags = 0;
44 static void	dumpSimpleWordRec(), dumpWordRec(), dumpAllBunq();
45 static void	dumpXQH(), dumpXQ();
46 
47 #else
48 #define rk_debug(file, fmt, a, b, c)
49 #endif
50 
51 extern	void	usncopy();
52 
53 static void
clearWord(w,bb)54 clearWord(w, bb)			/* make word empty */
55      struct nword	*w;
56      int bb;
57 {
58   if (w) {
59     w->nw_cache = (struct ncache *)0;
60     w->nw_rowcol = bb; /* ʸ�� */
61     w->nw_klen = w->nw_ylen = 0;
62     w->nw_class = ND_EMP;
63     w->nw_flags = 0;
64     w->nw_lit = 0;
65     w->nw_prio = 0L;
66     w->nw_count = 0;
67     w->nw_left = w->nw_next = (struct nword *)0;
68     w->nw_kanji = (Wrec *)0;
69   }
70 }
71 
72 /*ARGSUSED*/
73 static void
setWord(w,rc,lit,yomi,ylen,kanji,klen,bb)74 setWord(w, rc, lit, yomi, ylen, kanji, klen, bb)
75      struct nword	*w;
76      int		rc;
77      int		lit;
78      Wchar		*yomi;
79      int		ylen;
80      Wrec		*kanji;
81      int		klen;
82      int		bb;
83 {
84   clearWord(w, bb);
85   w->nw_rowcol = rc;
86   w->nw_klen = klen;
87   w->nw_ylen = ylen;
88   w->nw_class = 0;
89   w->nw_flags = 0;
90   w->nw_lit = lit;
91   w->nw_kanji = kanji;
92 }
93 
94 /* allocWord
95  *	allocate a fresh word
96  */
97 /*ARGSUSED*/
98 static
99 struct nword *
allocWord(st,bb)100 allocWord(st, bb)
101      struct nstore *st;
102      int bb;
103 {
104   struct nword 	*new_word;
105 
106   if (!SX.word) {
107     struct nword	*new_page;
108     int			i;
109 #define	NW_PAGESIZE	1024
110     new_page = (struct nword *)malloc(sizeof(struct nword)*NW_PAGESIZE);
111     if (new_page) {
112       SX.page_in_use++;
113       new_page[0].nw_next = SX.page;
114       SX.page = &new_page[0];
115       SX.word = &new_page[1];
116       for (i = 1; i + 1 < NW_PAGESIZE; i++)
117 	new_page[i].nw_next = &new_page[i + 1];
118       new_page[i].nw_next = (struct nword *)0;
119     };
120   };
121   new_word = SX.word;
122   if (new_word) {
123     SX.word = new_word->nw_next;
124     clearWord(new_word, bb);
125     st->word_in_use++;
126     SX.word_in_use++;
127   };
128   return new_word;
129 }
130 
131 static void
derefWord(word)132 derefWord(word)			/* decrease the reference counter */
133      struct nword 	*word;
134 {
135   for (; word; word = word->nw_next)
136     if (word->nw_cache)
137       (void)_RkDerefCache(word->nw_cache);
138 }
139 
140 /*ARGSUSED*/
141 static void
killWord(st,word)142 killWord(st, word)			/* dispose the unsed words */
143 struct nstore	*st;
144 struct nword	*word;
145 {
146   struct nword *p, *q;
147 
148   if (word) {
149     for (p = q = word; p; q = p, p = p->nw_next) {
150       if (!p->nw_cache && p->nw_kanji) {
151         _Rkpanic("killWord this would never happen addr ", 0, 0, 0);
152 	(void)free((char *)p->nw_kanji);
153       };
154       st->word_in_use--;
155       SX.word_in_use--;
156     }
157     q->nw_next = SX.word;
158     SX.word = word;
159   }
160 }
161 
162 static void
freeWord(st,word)163 freeWord(st, word)			/* freeWord = derefWord + killWord */
164 struct nstore 	*st;
165 struct nword 	*word;
166 {
167   derefWord(word);
168   killWord(st, word);
169 }
170 
171 void
_RkFreeBunq(st)172 _RkFreeBunq(st)			/* freeWord = derefWord + killWord */
173      struct nstore *st;
174 {
175   struct nbun *bunq = &st->bunq[st->curbun];
176 
177   freeWord(st, bunq->nb_cand);
178   bunq->nb_cand = (struct nword *)0;
179   bunq->nb_yoff = 0;
180   bunq->nb_curlen = bunq->nb_maxcand = bunq->nb_curcand = 0;
181   bunq->nb_flags = (unsigned short)0;
182   return;
183 }
184 
185 extern unsigned	searchRut();
186 extern int	entryRut();
187 
188 static
189 struct nword	*
concWord(cx,p,q,loc,bb)190 concWord(cx, p, q, loc, bb) 		/* create the concatinated word p+q */
191   struct RkContext	*cx;
192   struct nword		*p, *q;	/* prefix word list, and right word */
193   int			loc;
194   int			bb;
195 {
196     struct nword	conc;
197     struct nword	*pq;
198 
199 /* create a concatinated word temoprally */
200     conc = *q;
201     conc.nw_klen  += p->nw_klen;
202     conc.nw_ylen  += p->nw_ylen;
203     conc.nw_flags = p->nw_flags&(NW_PRE|NW_SUC|NW_SWD|NW_LOWPRI);
204     conc.nw_count = p->nw_count + 1;
205 /* check limit conditions */
206     if (conc.nw_klen > RK_LEN_WMAX ||
207 	conc.nw_ylen > RK_LEN_WMAX ||
208 	conc.nw_count >= RK_CONC_NMAX)
209 	return (struct nword *)0;
210 #ifdef LOGIC_HACK
211     if (conc.nw_count >= 3) {
212       switch (RkCheckNegGram(cx->gram->gramdic,
213 	    p->nw_left->nw_rowcol, p->nw_rowcol, q->nw_rowcol))
214       {
215       case 1:
216 	return (struct nword *)0;
217       case 2:
218 	conc.nw_flags |= NW_LOWPRI;
219       }
220     }
221 #endif /* LOGIC_HACK */
222     if (p->nw_ylen == 1 && q->nw_rowcol == cx->gram->P_Ftte)
223       conc.nw_flags |= NW_LOWPRI; /* FIXME: replace to something better */
224     conc.nw_prio = p->nw_prio;
225     conc.nw_next = (struct nword *)0;
226     conc.nw_left = p;
227     switch(q->nw_class)  {
228 /* kakko, kutouten ha setuzoku kankei ni eikyou sinai */
229     case	ND_OPN:
230     case	ND_CLS:
231 	conc.nw_rowcol = p->nw_rowcol;
232 	if (p->nw_class != ND_EMP) {
233 	    conc.nw_class = p->nw_class;
234 	    conc.nw_flags = p->nw_flags;
235 	} else  {
236 	    conc.nw_class = q->nw_class;
237 	    conc.nw_flags = q->nw_flags;
238 	};
239 	break;
240     case	ND_PUN:
241     /* avoid punctionations where prohibited */
242         if (!CanSplitWord(p))
243 	    return (struct nword *)0;
244     /* don't remove loc check or you get stuck when a punctionation comes */
245         if (loc > 0 && p->nw_class == ND_EMP)
246 	    return (struct nword *)0;
247 	conc.nw_rowcol = p->nw_rowcol;
248 	conc.nw_class = ND_SWD;
249 	break;
250     case	ND_MWD:
251 	conc.nw_flags |= NW_MWD;
252 	conc.nw_flags |= (q->nw_flags & NW_LOWPRI);
253 	conc.nw_prio = q->nw_prio;
254 	break;
255     case	ND_SWD:
256 	if (!(conc.nw_flags&NW_SWD))
257 	    conc.nw_flags |= NW_SWD;
258 	break;
259     case	ND_PRE:
260 	conc.nw_flags |= NW_PRE;
261 	break;
262     case	ND_SUC:
263 	conc.nw_flags |= NW_SUC;
264 	break;
265     };
266 /* cache no sanshoudo wo kousinn suru */
267     pq = allocWord(cx->store, bb);
268     if (pq) {
269 	*pq = conc;
270         p->nw_flags |= NW_FOLLOW;
271         if (pq->nw_cache)
272 	  _RkEnrefCache(pq->nw_cache);
273     };
274     return pq;
275 }
276 
277 /* clearQue
278  *	clear word tree queue
279  */
280 static void
clearQue(xq)281 clearQue(xq)
282 struct nqueue	*xq;
283 {
284   xq->tree = (struct nword *)0;
285   xq->maxlen = 0;
286   xq->status = 0;
287 }
288 /* RkFreeQue
289  *	free word tree stored in [s, e)
290  */
291 void
_RkFreeQue(st,s,e)292 _RkFreeQue(st, s, e)
293 struct nstore *st;
294 int 	      s;
295 int	      e;
296 {
297   struct nqueue *xq = st->xq;
298 
299   while (s < e) {
300     if (xq[s].tree)
301       freeWord(st, xq[s].tree);
302     clearQue(&xq[s]);
303     s++;
304   };
305 }
306 
307 /*
308  * Literal
309  */
310 static
311 int
cvtNum(dst,maxdst,src,maxsrc,format)312 cvtNum(dst, maxdst, src, maxsrc, format)
313      Wchar	*dst;
314      int	maxdst;
315      Wchar	*src;
316      int	maxsrc;
317      int	format;
318 {
319   return RkwCvtSuuji(dst, maxdst, src, maxsrc, format - 1);
320 }
321 
322 static
323 int
cvtAlpha(dst,maxdst,src,maxsrc,format)324 cvtAlpha(dst, maxdst, src, maxsrc, format)
325      Wchar	*dst;
326      int	maxdst;
327      Wchar	*src;
328      int	maxsrc;
329      int	format;
330 {
331     switch(format) {
332 #ifdef ALPHA_CONVERSION
333     case 1: 	return RkwCvtZen(dst, maxdst, src, maxsrc);
334     case 2: 	return RkwCvtHan(dst, maxdst, src, maxsrc);
335     case 3: 	return -1;
336 #else
337     case 1: 	return RkwCvtNone(dst, maxdst, src, maxsrc);
338     case 2: 	return -1;
339 #endif
340     default: 	return 0;
341     }
342 }
343 
344 static
345 int
cvtHira(dst,maxdst,src,maxsrc,format)346 cvtHira(dst, maxdst, src, maxsrc, format)
347      Wchar	*dst;
348      int	maxdst;
349      Wchar	*src;
350      int	maxsrc;
351      int	format;
352 {
353   switch(format) {
354   case 1: 	return RkwCvtHira(dst, maxdst, src, maxsrc);
355   case 2: 	return RkwCvtKana(dst, maxdst, src, maxsrc);
356   default: 	return 0;
357   }
358 }
359 
360 static
361 int
cvtLit(dst,maxdst,src,maxsrc,format,mode)362 cvtLit(dst, maxdst, src, maxsrc, format, mode)
363      Wchar		*dst;
364      int		maxdst;
365      Wchar		*src;
366      int		maxsrc;
367      int		format;
368      unsigned long	mode;
369 {
370   switch(format >> 4) {
371   case LIT_NUM:
372     if (mode & RK_MAKE_KANSUUJI)
373       return cvtNum(dst, maxdst, src, maxsrc, format&15);
374     else
375       return RkwCvtNone(dst, maxdst, src, maxsrc);
376   case LIT_ALPHA: 	return cvtAlpha(dst, maxdst, src, maxsrc, format&15);
377   case LIT_HIRA: 	return cvtHira(dst, maxdst, src, maxsrc, format&15);
378   default:		return 0;
379   }
380 }
381 
382 /* setLit
383  *	create the literals as many as the context requires
384  */
385 static
386 struct nword	*
setLit(cx,word,maxword,rc,src,srclen,format)387 setLit(cx, word, maxword, rc, src, srclen, format)
388      struct RkContext	*cx;
389      struct nword	*word;
390      int		maxword;
391      int		rc;
392      Wchar		*src;
393      int		srclen;
394      int		format;
395 {
396   struct nword	*w = word;
397   int 		dstlen;
398   unsigned long	mode;
399 
400   if (!cx->litmode)
401     return 0;
402   for (mode = cx->litmode[format]; mode; mode >>= RK_XFERBITS)
403     if (w < word + maxword) {
404       int	code = MAKELIT(format, mode&RK_XFERMASK);
405 
406       dstlen = cvtLit((Wchar *)0, 9999, src, srclen, code, (unsigned long)cx->concmode);
407       if (0 < dstlen  && dstlen <= RK_LEN_WMAX)
408 	setWord(w++, rc, code, src, srclen, (Wrec *)0, dstlen, cx->gram->P_BB);
409       if (dstlen < 0)
410 	setWord(w++, rc, code, src, srclen, (Wrec *)0, srclen, cx->gram->P_BB);
411     }
412   return (struct nword *) w;
413 }
414 
415 #define READWORD_MAXCACHE 128
416 static
417 struct nword	*
readWord(cx,yy,ys,ye,class,nword,maxword,doflush,douniq)418 readWord(cx, yy, ys, ye, class, nword, maxword, doflush, douniq)
419      struct RkContext	*cx;
420      int		yy, ys, ye;
421      int		class;
422      struct nword	*nword;
423      int		maxword;
424      int		doflush;
425      int		douniq;
426 {
427   Wchar		*key = cx->store->yomi + yy;
428   struct nword	*wrds;
429   struct MD	*head = cx->md[class], *md;
430   int		maxcache = READWORD_MAXCACHE;
431 #ifndef USE_MALLOC_FOR_BIG_ARRAY
432   unsigned 	permutation[RK_CAND_NMAX];
433   unsigned char	*candidates[RK_CAND_NMAX];
434   struct nread	nread[READWORD_MAXCACHE];
435 #else
436   unsigned *permutation;
437   unsigned char **candidates;
438   struct nread *nread;
439 
440   permutation = (unsigned *)malloc(sizeof(unsigned) * RK_CAND_NMAX);
441   candidates = (unsigned char **)
442     malloc(sizeof(unsigned char *) * RK_CAND_NMAX);
443   nread = (struct nread *)malloc(sizeof(struct nread) * READWORD_MAXCACHE);
444   if (!permutation || !candidates || !nread) {
445     if (permutation) (void)free((char *)permutation);
446     if (candidates) (void)free((char *)candidates);
447     if (nread) (void)free((char *)nread);
448     return nword;
449   }
450 #endif
451 
452   wrds = nword;
453   for (md = head->md_next; md != head; md = md->md_next) {
454     struct DM		*dm = md->md_dic;
455     struct DM		*qm = md->md_freq;
456     struct nword	*pp, *qq;
457     int c, nc, num, cf = 0, nl;
458 
459     if (maxword <=  0)
460       break;
461     if (!dm)
462       continue;
463     if (qm && !qm->dm_qbits)
464       qm = (struct DM *)0;
465     nc = DST_SEARCH(cx, dm, key, ye, nread, maxcache, &cf);
466     for (c = 0; c < nc; c++) {
467       struct nread	*thisRead = nread + c;
468       struct ncache	*thisCache = thisRead->cache;
469       unsigned char	*wp = thisCache->nc_word;
470       unsigned long	offset;
471       int nk, cnt = 1;
472       unsigned long	csnb;
473       int		bitSize;
474 
475       nk = _RkCandNumber(wp);
476       nl = (*wp >> 1) & 0x3f;
477       if (!doflush && (cf || thisRead->nk > ye || thisRead->nk > RK_KEY_WMAX))
478 	cx->poss_cont++;
479       if (*wp & 0x80)
480 	wp += 2;
481       wp += 2 + nl *2;
482       csnb = thisRead->csn;
483       offset = thisRead->offset;
484       if (ys < thisRead->nk && thisRead->nk <= ye && thisRead->nk <= RK_KEY_WMAX)  {
485 	for (num = 0; num < nk; num++) {
486 	  candidates[num] = wp;
487 	  wp += 2 * ((*wp >> 1) & 0x7f) + 2;
488 	};
489 	if (qm) {
490 	  int	ecount, cval, i;
491 
492 	  bitSize = _RkCalcLog2(nk + 1) + 1;
493 	  _RkUnpackBits(permutation, qm->dm_qbits, offset, bitSize, nk);
494 	  for (ecount = cval = i = 0; i < nk; i++) {
495 	    if ((int)permutation[i]/2 >  nk) {
496 	      ecount++;
497 	      break;
498 	    };
499 	    cval += permutation[i];
500 	  }
501 	  if (ecount || cval < (nk-1)*(nk-2)) {
502 	    for (i = 0; i < nk; i++)
503 	      permutation[i] = 2*i;
504 	    _RkPackBits(qm->dm_qbits, offset, bitSize, permutation, nk);
505 	  };
506 	};
507 	pp = wrds;
508 	for (num = 0; num < nk; num++) {
509 	  unsigned permed;
510 
511 	  if (maxword <=  0)
512 	    break;
513 	  if (qm) {
514 	    permed = permutation[num]/2;
515 	    if ((int)permed > nk) {
516 	      break;
517 	    }  else if ((int)permed == nk)
518 	      continue;
519 	  } else
520 	    permed = num;
521 	  wp = candidates[permed];
522 	  clearWord(wrds, cx->gram->P_BB);
523 	  wrds->nw_kanji = wp;
524 	  wrds->nw_freq = qm;
525 	  wrds->nw_rowcol = _RkRowNumber(wp);
526 	  wrds->nw_cache = thisCache;
527 	  wrds->nw_ylen = thisRead->nk;
528 	  wrds->nw_klen = (*wp >> 1) & 0x7f;
529 	  wrds->nw_class = class;
530 	  wrds->nw_csn = csnb + permed;
531 	  wrds->nw_prio = 0L;
532 	  if (class == ND_MWD) {
533 	    if (qm && qm->dm_rut) {
534 	      if (cnt)
535 		cnt = wrds->nw_prio = searchRut(qm->dm_rut, wrds->nw_csn);
536 	    } else if (DM2TYPE(dm)) {
537 	      if (num < 2)
538 		wrds->nw_prio = ((struct TW *)thisCache->nc_address)->lucks[num];
539 	    }
540 	    if (wrds->nw_prio) {
541 	      long    t;
542 
543 	      t = _RkGetTick(0) - wrds->nw_prio;
544 	      wrds->nw_prio = (0 <= t && t < 0x2000) ? (0x2000 - t) << 4 : 0;
545 	    };
546 	    switch(num) {
547 	    case 0: wrds->nw_prio += 15L; break;
548 	    case 1: wrds->nw_prio += 11L; break;
549 	    case 2: wrds->nw_prio += 7L; break;
550 	    case 3: wrds->nw_prio += 3L; break;
551 	    };
552 	    wrds->nw_prio |= 0x01;
553 	  };
554 	  if (douniq) {
555 	    for (qq = pp; qq < wrds; qq++)
556 	      if (qq->nw_rowcol == wrds->nw_rowcol)
557 		break;
558 	    if (qq < wrds)
559 	      continue;
560 	  }
561 	  _RkEnrefCache(thisCache);
562 	  wrds++;
563 	  maxword--;
564 	};
565       };
566       _RkDerefCache(thisCache);
567     };
568     maxcache -= nc;
569   };
570 #ifdef USE_MALLOC_FOR_BIG_ARRAY
571   (void)free((char *)permutation);
572   (void)free((char *)candidates);
573   (void)free((char *)nread);
574 #endif
575   return(wrds);
576 }
577 
578 /* makeWord
579  *	jisho ni nai katakana, suuji, tokushu moji wo tango to minasu
580  */
581 /*ARGSUSED*/
582 static
583 struct nword	*
makeWord(cx,yy,ys,ye,class,word,maxword,doflush,douniq)584 makeWord(cx, yy, ys, ye, class, word, maxword, doflush, douniq)
585      struct RkContext	*cx;
586      int		yy, ys, ye;
587      int		class;		/* word class */
588      struct nword	*word;
589      int		maxword;
590      int		doflush;
591      int		douniq;
592 {
593   struct nstore	*st = cx->store;
594   Wchar		*key = st->yomi + yy;
595   Wchar		*k, *z;
596   struct nword	*w = word;
597   Wchar		c;
598   int			clen;
599   int			hinshi = cx->gram->P_BB;
600   int			literal = -1;
601   int			punct = 0;
602   int			gobeyond = 0;
603 
604   if (ye <= 0)
605     return w;
606   z = (k = key) + ye;
607   /* sentou moji wo yomu */
608   c = *k++;
609   clen = 1;
610   if (us_iscodeG0(c)) {		/* ascii string */
611     if ('0' <= c && '9' >= c) {	/* numeral */
612       if (!(cx->concmode & RK_MAKE_EISUUJI)) {
613 	doflush++;
614       } else {
615 	for (; k < z; k++, clen++)
616 	  if (clen >= RK_KEY_WMAX || !('0' <= *k && *k <= '9')) {
617 	    doflush++;
618 	    break;
619 	  };
620       }
621       hinshi = cx->gram->P_NN; literal = LIT_NUM;
622     } else {				/* others */
623       if (!(cx->concmode & RK_MAKE_EISUUJI)) {
624 	doflush++;
625       } else {
626 	for (; k < z; k++, clen++)
627 	  if (clen >= RK_KEY_WMAX || !us_iscodeG0(*k)) {
628 	    doflush++;
629 	    break;
630 	  };
631       }
632       hinshi = cx->gram->P_T35; literal = LIT_ALPHA;
633     }
634   } else if (us_iscodeG1(c)) {
635     if (0xb000 <= c) {		/* kanji string */
636       for (; k < z; k++, clen++)
637 	if (clen >= RK_KEY_WMAX || *k < 0xb000) {
638 	  doflush++;
639 	  break;
640 	};
641       hinshi = cx->gram->P_T00;
642     } else if (0xa1a2 <= c && c <= 0xa1db) {
643       /*
644        *	now multiple punctiation characters constitute a single punct
645        */
646       for (; k < z; k++, clen++)
647 	if (clen >= RK_KEY_WMAX || !(0xa1a2 <= *k && *k <= 0xa1db)) {
648 	  doflush++;
649 	  break;
650 	};
651       switch(c) {
652       case	0xa1a2:	case	0xa1a3:	case	0xa1a4:
653       case	0xa1a5: case	0xa1a6:	case	0xa1a7:
654       case	0xa1a8:	case	0xa1a9:	case	0xa1aa:
655       case	0xa1c4:
656 	punct = ND_PUN;
657 	break;
658       case	0xa1c6:	case	0xa1c8:	case	0xa1ca:
659       case	0xa1cc: case	0xa1ce:
660       case	0xa1d0:	case	0xa1d2:	case	0xa1d4:
661       case	0xa1d6:	case	0xa1d8:	case	0xa1da:
662 	punct = ND_OPN;
663 	break;
664       case	0xa1c7:	case	0xa1c9:	case	0xa1cb:
665       case	0xa1cd:	case	0xa1cf:	case	0xa1d1:
666       case	0xa1d3:	case	0xa1d5:	case	0xa1d7:
667       case	0xa1d9:	case	0xa1db:
668 	punct = ND_CLS;
669 	break;
670       default:
671 	hinshi = cx->gram->P_T00;
672 	doflush++;
673       };
674     } else if (0xa3b0 <= c && c <= 0xa3b9) {	/* suuji */
675       if (!(cx->concmode & RK_MAKE_EISUUJI)) {
676 	doflush++;
677       } else {
678 	for (; k < z; k++, clen++)
679 	  if (clen >= RK_KEY_WMAX || !(0xa3b0 <= *k && *k <= 0xa3b9)) {
680 	    doflush++;
681 	    break;
682 	  };
683       }
684       hinshi = cx->gram->P_NN; literal = LIT_NUM;
685     } else if ((0xa3c1 <= c && c <= 0xa3da)
686 	       || (0xa3e1 <= c && c <= 0xa3fa)) {	/* eiji */
687       if (!(cx->concmode & RK_MAKE_EISUUJI)) {
688 	doflush++;
689       } else {
690 	for (; k < z; k++, clen++)
691 	  if (clen >= RK_KEY_WMAX
692 	      || !((0xa3c1 <= (c = *k) && c <= 0xa3da)
693 		   || (0xa3e1 <= c && c <= 0xa3fa))) {
694 	    doflush++;
695 	    break;
696 	  };
697       }
698       hinshi = cx->gram->P_T35; literal = LIT_ALPHA;
699     } else if (0xa5a1 <= c && c <= 0xa5f6) {	/* zenkaku katakana */
700       for (; k < z; k++, clen++)
701 	if (clen >= RK_KEY_WMAX ||
702 	    ((0xa5a1 > (c = *k) || c > 0xa5f6) &&
703 	     (0xa1a1 > c || c > 0xa1f6))) {
704 	  doflush++;
705 	  break;
706 	};
707       hinshi = cx->gram->P_T30;
708     } else if (0xa4a1 <= c && c <= 0xa4f3) {	/* hiragana */
709       for (; k < z; k++, clen++) {
710 	if (clen >= RK_KEY_WMAX) {
711 	  doflush++;
712 	  break;
713 	};
714 	switch (*k) {
715 #ifndef LOGIC_HACK
716 	case 0xa4a1: case 0xa4a3: case 0xa4a5:
717 	case 0xa4a7: case 0xa4a9:
718 	case 0xa4e3: case 0xa4e5: case 0xa4e7:
719 	case 0xa4c3: case 0xa4f3:
720 #endif
721 	case 0xa1ab: case 0xa1ac: case 0xa1b3:
722 	case 0xa1b4: case 0xa1b5: case 0xa1b6:
723 	case 0xa1bc:
724 	  continue;
725 	default:
726 	  doflush++;
727 	  gobeyond++;
728 	  goto hira;
729 	};
730       };
731     hira:
732       hinshi = cx->gram->P_T35;
733     } else {
734       doflush++;
735       hinshi = cx->gram->P_T35;
736     };
737   } else if (us_iscodeG2(c)) {	/* hankaku katakana */
738     for (; k < z; k++, clen++)
739       if (clen >= RK_KEY_WMAX || !us_iscodeG2(*k)) {
740 	doflush++;
741 	break;
742       };
743     hinshi = cx->gram->P_T30;
744   } else {
745     doflush++;
746     hinshi = cx->gram->P_T35;
747   }
748   if ((ys <= clen && clen <= ye) || gobeyond) {
749     if (class == ND_MWD || punct) {
750       if (!doflush && !gobeyond)
751 	cx->poss_cont++;
752       if (literal != -1) {
753 	if (doflush)
754 	  w= setLit(cx, w, maxword, hinshi, key, clen, literal);
755       } else if (w < word + maxword) {
756 	  if (doflush) {
757 	    setWord(w++, hinshi, 0, key, clen, (Wrec *)0,
758 		    clen, cx->gram->P_BB);
759 	    if (punct)
760 	      w[-1].nw_class = punct;
761 	  };
762       }
763     }
764   }
765   return w;
766 }
767 
768 static int
determinate(y1,y2,l)769 determinate(y1, y2, l)
770      Wrec	*y1, *y2;
771      int	l;
772 {
773   if ((int)*y1 > l)
774     return(0);
775   for (l = *y1, y1 += 2; l; l--) {
776     Wchar *wy = (Wchar *) y2;
777     Wrec c1 = (Wrec) ((*wy & 0xff00) >> 8);
778     Wrec c2 = (Wrec) (*wy & 0xff);
779 
780     y2 += 2;
781     if (*y1++ != c1 || *y1++ != c2) {
782       return(0);
783     }
784   }
785   return(1);
786 }
787 
788 static
789 int
positive(y1,y2,l)790 positive(y1, y2, l)
791      Wrec	*y1, *y2;
792      int	l;
793 {
794   l = (int)*y1 < l ? (int)*y1 : l;
795   for (y1 += 2; l; l--) {
796     if (*y1++ != *y2++ || *y1++ != *y2++) {
797       return(0);
798     }
799   }
800   return(1);
801 }
802 
803 static
804 int
positiveRev(y1,y2,l)805 positiveRev(y1, y2, l)
806      Wrec	*y1, *y2;
807      int	l;
808 {
809   l = (int)*y1 < l ? (int)*y1 : l;
810   for (y1 += 2; l; l--) {
811     Wchar *wy = (Wchar *) y2;
812     Wrec c1 = (Wrec) ((*wy & 0xff00) >> 8);
813     Wrec c2 = (Wrec) (*wy & 0xff);
814 
815     y2 += 2;
816     if (*y1++ != c1 || *y1++ != c2) {
817       return(0);
818     }
819   }
820   return(1);
821 }
822 
823 static
824 void
cancelNVE(nv,p)825 cancelNVE(nv, p)
826      struct NV	*nv;
827      struct NVE *p;
828 {
829   unsigned char	*s = p->data;
830 
831   nv->csz -= *s * 2 + 2;
832   nv->cnt--;
833   p->right->left = p->left;
834   p->left->right = p->right;
835   (void)free((char *)s);
836   (void)free((char *)p);
837 }
838 
839 static
840 struct NVE *
newNVE(nv,y,l,v)841 newNVE(nv, y, l, v)
842      struct NV	*nv;
843      Wrec	*y;
844      int	l;
845      int	v;
846 {
847   unsigned short	w;
848   struct NVE		*p, **q, *r;
849   struct NVE		*nve;
850   unsigned char		*s;
851 
852   nve = (struct NVE *)calloc(1, sizeof(struct NVE));
853   if (nve) {
854     s = (unsigned char *)malloc(l * 2 + 2);
855     if (s) {
856       nve->data = s;
857       *s++ = l;
858       *s++ = v;
859 
860       memcpy(s, y, l * 2);
861       nv->csz += l * 2 + 2;
862       nv->cnt++;
863       while ((p = nv->head.right) != &nv->head && nv->csz >= (long)nv->sz) {
864 	w = bst2_to_s(p->data + 2);
865 	q =  nv->buf + w % nv->tsz;
866 	while ((r = *q) != (struct NVE *)0) {
867 	  if (r == p) {
868 	    *q = r->next;
869 	    cancelNVE(nv, p);
870 	    break;
871 	  } else
872 	    q = &r->next;
873 	}
874       }
875       if (nv->csz >= (long)nv->sz) {
876 	nv->csz -= l * 2 + 2;
877 	nv->cnt--;
878 	(void)free((char *)nve->data);
879 	(void)free((char *)nve);
880 	return((struct NVE *)0);
881       }
882     } else {
883       (void)free((char *)nve);
884       nve = (struct NVE *)0;
885     }
886   }
887   return(nve);
888 }
889 
890 int
_RkRegisterNV(nv,yomi,len,half)891 _RkRegisterNV(nv, yomi, len, half)
892      struct NV	*nv;
893      Wrec	*yomi;
894      int	len;
895      int	half;
896 {
897   unsigned short	v;
898   struct NVE		*p, **q, **r;
899 
900   if (nv && nv->tsz && nv->buf) {
901     v = bst2_to_s(yomi);
902     q = r = nv->buf + v % nv->tsz;
903     for (p = *q; p; p = *q) {
904       if (positive(p->data, yomi, len)) {
905 	*q = p->next;
906 	cancelNVE(nv, p);
907       } else {
908 	q = &p->next;
909       }
910     }
911     p = newNVE(nv, yomi, len, half);
912     if (p) {
913       p->next = *r;
914       *r = p;
915       p->left = nv->head.left;
916       p->left->right = p;
917       p->right = &nv->head;
918       nv->head.left = p;
919     }
920   }
921   return(0);
922 }
923 
924 #define TAILSIZE 256
925 #define RIGHTSIZE (64 * 16)
926 
927 /* parseWord
928  *	bunsestu no ki wo seichou saseru.
929  */
930 static int
parseWord(cx,yy,ys,ye,class,xqh,maxclen,doflush,douniq)931 parseWord(cx, yy, ys, ye, class, xqh, maxclen, doflush, douniq)
932      struct RkContext	*cx;
933      int		yy, ys, ye;
934      int		class;
935      struct nword	*xqh[];	/* indexed by nw_ylen */
936      int		maxclen;	/* saishou yomi no nagasa */
937      int		doflush;
938      int		douniq;
939 {
940   struct RkKxGram	*gram = cx->gram->gramdic;
941   int			clen;
942   static unsigned	classmask[] = { /* ���ˤĤʤ��륯�饹 */
943     (1 << ND_SWD) | (1 << ND_SUC),	/* MWD --> SUC | SWD */
944     (1 << ND_SWD),			/* SWD --> SWD */
945     (1 << ND_MWD) | (1 << ND_SWD),	/* PRE --> MWD | SWD */
946     (1 << ND_SWD),			/* SUC --> SWD */
947     (1 << ND_MWD) | (1 << ND_SWD) | (1 << ND_PRE),/* EMP --> MWD | SWD | PRE */
948   };
949 #ifndef USE_MALLOC_FOR_BIG_ARRAY
950   struct nword	*tail[TAILSIZE];
951   struct nword	right[RIGHTSIZE];
952 #else
953   struct nword **tail, *right;
954   tail = (struct nword **)malloc(sizeof(struct nword *) * TAILSIZE);
955   right = (struct nword *)malloc(sizeof(struct nword) * RIGHTSIZE);
956   if (!tail || !right) {
957     if (tail) (void)free((char *)tail);
958     if (right) (void)free((char *)right);
959     return maxclen;
960   }
961 #endif
962 
963   for (clen = 0; (clen <= maxclen && clen < ye); clen++) {
964     int			sameLen;
965     int			t;
966     struct nword	*p, *q, *r;
967     int			ys1, ye1;
968 
969     /* �ɤߤ�Ĺ�� clen ��ñ��Τ��������� class �ǻ��ꤵ�줿ñ�줬
970        �Ĥʤ����ǽ���������Τ�ꥹ�ȥ��åפ���tail �˵�Ͽ���� */
971     for (p = xqh[clen], sameLen = 0; p; p = p->nw_next) {
972       if (classmask[p->nw_class] & (1<<class)) {
973 	/* p �θ��� class ��ñ�줬�Ĥʤ����ǽ�������� */
974 	if (sameLen < TAILSIZE) { /* �ޤ� tail �ˤ��������� */
975 	  tail[sameLen++] = p;
976 	}
977       }
978     }
979     if (!sameLen)
980       continue;
981     ys1 = ys - clen; if (ys1 < 0)  ys1 = 0;
982     ye1 = ye - clen;
983     r = readWord(cx, yy + clen, ys1, ye1, class,
984 		 right, RIGHTSIZE - 1, doflush, douniq);
985     if (Is_Word_Make(cx))
986       r = makeWord(cx, yy + clen, ys1, ye1, class,
987 		   r, RIGHTSIZE -1 - (int)(r - right), doflush, douniq);
988     for (t = 0; t < sameLen; t++) {
989       p = tail[t];
990       for (q = right; q < r; q++)
991 	if (Is_Word_Connect(cx) &&
992 	    (q->nw_class >= ND_OPN ||
993 	     RkTestGram(gram, p->nw_rowcol, q->nw_rowcol)))  {
994 	  struct nword	*pq = concWord(cx, p, q, clen, cx->gram->P_BB);
995 	  if (pq) {
996 	    struct nword	*s;
997 	    if (gram && !IsShuutan(gram, pq->nw_rowcol)) {
998 #ifdef BUNMATU
999 	      /* ʸ�����ˤ����ʤ�ʤ� */
1000 	      if (IsBunmatu(gram, pq->nw_rowcol)) {
1001 		/* �������ʤɤξ��ȡ��ɤߤ�Ԥ����Ƥ�����ˤ�
1002 		   ʸ�������������� */
1003 		if (q->nw_class >= ND_OPN ||
1004 		    (doflush && yy + pq->nw_ylen == cx->store->nyomi))
1005 		  pq->nw_flags &= ~NW_BUNMATU;
1006 		else
1007 		  pq->nw_flags |= NW_BUNMATU;
1008 	      } else
1009 #endif
1010 		DontSplitWord(pq);
1011 	    }
1012 	    if ((unsigned long)maxclen < (unsigned long)pq->nw_ylen) {
1013 	      while (++maxclen < (int)pq->nw_ylen)
1014 		xqh[maxclen] = (struct nword *)0;
1015 	      xqh[maxclen] = pq;
1016 	    }
1017 	    else {
1018 	      s = xqh[pq->nw_ylen];
1019 	      if (s) {
1020 		while (s->nw_next)
1021 		  s = s->nw_next;
1022 		s->nw_next = pq;
1023 	      }
1024 	      else
1025 		xqh[pq->nw_ylen] = pq;
1026 	    }
1027 	    pq->nw_next = (struct nword *)0;
1028 	  }
1029 	}
1030     }
1031     for (q = right; q < r; q++)
1032       if (q->nw_cache)
1033 	_RkDerefCache(q->nw_cache);
1034     if (!gram)
1035       goto done;
1036   }
1037  done:
1038 #ifdef USE_MALLOC_FOR_BIG_ARRAY
1039   (void)free((char *)tail);
1040   (void)free((char *)right);
1041 #endif
1042   return maxclen;
1043 }
1044 
1045 /*ARGSUSED*/
1046 static int
doParse(cx,yy,ys,ye,xqh,maxclen,doflush,douniq)1047 doParse(cx, yy, ys, ye, xqh, maxclen, doflush, douniq)
1048      struct RkContext	*cx;
1049      int		yy, ys, ye;
1050      struct nword	*xqh[];
1051      int		maxclen;
1052      int		doflush;
1053      int		douniq;
1054 {
1055   maxclen = parseWord(cx, yy, ys, ye, ND_PRE, xqh, maxclen, doflush, douniq);
1056   maxclen = parseWord(cx, yy, ys, ye, ND_MWD, xqh, maxclen, doflush, douniq);
1057   maxclen = parseWord(cx, yy, ys, ye, ND_SUC, xqh, maxclen, doflush, douniq);
1058   maxclen = parseWord(cx, yy, ys, ye, ND_SWD, xqh, maxclen, doflush, douniq);
1059   return maxclen;
1060 }
1061 
1062 /* getKanji
1063  *	get kanji in reverse order
1064  */
1065 Wchar *
_RkGetKanji(cw,key,mode)1066 _RkGetKanji(cw, key, mode)
1067      unsigned long	mode;
1068      struct nword	*cw;
1069      Wchar		*key;
1070 {
1071   Wrec			 *str;
1072   static Wchar		tmp[RK_LEN_WMAX+1]; /* static! */
1073   Wchar	 		*p = tmp;
1074   int		   	klen, ylen;
1075   struct nword		*lw = cw->nw_left;
1076 
1077   klen = cw->nw_klen - lw->nw_klen;
1078   ylen = cw->nw_ylen - lw->nw_ylen;
1079 /* nw_cache --> nw_kanji !nw_lit */
1080 /* !nw_cache --> !nw_kanji nw_lit */
1081 
1082   if (cw->nw_cache) {
1083     if ((*(cw->nw_kanji) >> 1) & 0x7f) {
1084       str = cw->nw_kanji + NW_PREFIX;
1085       for (; klen-- ; str += 2)
1086 	*p++ = S2TOS(str);
1087       return tmp;
1088     } else
1089       return key;
1090   } else if (cw->nw_kanji) {
1091     _Rkpanic("_RkGetKanji\n", 0, 0, 0);
1092     str = cw->nw_kanji + NW_PREFIX;
1093     for (; klen-- ; str += 2)
1094       *p++ = S2TOS(str);
1095     return tmp;
1096   } else if (cw->nw_lit) {
1097     if (cvtLit(tmp, klen + 1, key, ylen, cw->nw_lit, mode) > 0)
1098       return tmp;
1099     else
1100       return key;
1101   } else
1102     return key;
1103 }
1104 
1105 static
1106 int
getKanji(w,key,d,mode)1107 getKanji(w, key, d, mode)
1108      struct nword	*w;
1109      Wchar		*key;
1110      Wchar		*d;
1111      unsigned long	mode;
1112 {
1113   struct nword	*cw, *lw;
1114   int			hash, klen;
1115 
1116   hash = 0;
1117   for (cw = w; cw; cw = lw) {
1118     Wchar	*s, *t;
1119 
1120     if (!(lw = cw->nw_left))
1121       continue;
1122     klen = (cw->nw_klen - lw->nw_klen);
1123     s = _RkGetKanji(cw, key + lw->nw_ylen, mode);
1124     t = s + klen;
1125     /* copy */
1126     while (s < t) {
1127       *d++ = *--t;
1128 	hash += *t;
1129     }
1130   }
1131   return hash;
1132 }
1133 
1134 #define HEAPSIZE 512
1135 
1136 /* uniqWord
1137  *	unique word list
1138  */
1139 static void
uniqWord(key,words,ylen,mode)1140 uniqWord(key, words, ylen, mode)
1141      Wchar		*key;
1142      struct nword	*words;
1143      unsigned		ylen;
1144      unsigned long	mode;
1145 {
1146   struct nword	*p;
1147   long			hp = 0;
1148   long uniq[16];
1149 #ifndef USE_MALLOC_FOR_BIG_ARRAY
1150   long heap[HEAPSIZE];
1151 #else
1152   long *heap = (long *)malloc(sizeof(long) * HEAPSIZE);
1153   if (!heap) {
1154     return;
1155   }
1156 #endif
1157 
1158   if (!(!key || ylen <= 0)) {
1159     /* clear hash table */
1160     uniq[ 0] = uniq[ 1] = uniq[ 2] = uniq[ 3] =
1161       uniq[ 4] = uniq[ 5] = uniq[ 6] = uniq[ 7] =
1162 	uniq[ 8] = uniq[ 9] = uniq[10] = uniq[11] =
1163 	  uniq[12] = uniq[13] = uniq[14] = uniq[15] = -1;
1164     for (p = words; p; p = p->nw_next) {
1165       if (CanSplitWord(p) && p->nw_ylen == ylen) {
1166 	int			wsize;
1167 	/* compute word size */
1168 	wsize = (2*p->nw_klen + sizeof(long)-1)/sizeof(long);
1169 	if (hp + 1 + wsize < HEAPSIZE) {
1170 	  long	hno, h;
1171 	  /* put kanji string without EOS */
1172 	  heap[hp + wsize] = 0;
1173 	  hno = getKanji(p, key, (Wchar *)&heap[hp + 1], mode)&15;
1174 	  /* search on the hash list */
1175 	  for (h = uniq[hno]; h >= 0; h = heap[h&0xffff])
1176 	    if ((h >> 16) == p->nw_klen) { /* same length */
1177 	      long *p1 = &heap[(h&0xffff) + 1];
1178 	      long *p2 = &heap[hp + 1];
1179 	      int		 i;
1180 	      /* compare by word */
1181 	      switch(wsize) {
1182 	      case 3:	if (*p1++ != *p2++) goto next;
1183 		      case 2:	if (*p1++ != *p2++) goto next;
1184 		      case 1:	if (*p1++ != *p2++) goto next;
1185 		      case 0:	break;
1186 		      default:
1187 			for (i = wsize; i--;)
1188 			  if (*p1++ != *p2++) goto next;
1189 			break;
1190 		      }
1191 	      /* match */
1192 	      DontSplitWord(p);
1193 	      goto  done;
1194 	    next:
1195 	      continue;
1196 	    }
1197 	  /* enter new entry */
1198 	  heap[hp + 0] = uniq[hno];
1199 	  uniq[hno] = (((unsigned long) (p->nw_klen))<<16)|hp;
1200 	  hp += 1 + wsize;
1201 	}
1202       done:
1203 	continue;
1204       }
1205     }
1206   }
1207 #ifdef USE_MALLOC_FOR_BIG_ARRAY
1208   (void)free((char *)heap);
1209 #endif
1210 }
1211 
1212 /* sortWord
1213  *	word list wo sort suru
1214  */
1215 struct compRec {
1216     struct nword	*word;
1217     long			prio;
1218 };
1219 
1220 static compword pro((const struct compRec *, const struct compRec *));
1221 
1222 static
1223 int
compword(x,y)1224 compword(x, y)
1225 const struct compRec *x, *y;
1226 {
1227   int lowdiff = (int)((unsigned char)y->word->nw_flags & NW_LOWPRI)
1228     - (int)((unsigned char)x->word->nw_flags & NW_LOWPRI);
1229   long	d =  ((long) y->word->nw_prio) - ((long) (x->word->nw_prio));
1230 
1231   if (lowdiff > 0) return(-1);
1232   else if (lowdiff < 0) return(1);
1233   if (d > 0) return(1);
1234   else if(d < 0) return(-1);
1235   else {
1236     long dd = x->prio - y->prio;
1237 
1238     if (dd > 0) return(1);
1239     else if (dd < 0) return(-1);
1240     else return(0);
1241   }
1242 }
1243 
1244 static
1245 struct nword	*
sortWord(words)1246 sortWord(words)
1247      struct nword	*words;
1248 {
1249   unsigned long 	nwords, pos, neg;
1250   long			i, p, n;
1251   struct compRec	*wptr;
1252   struct nword		*w;
1253 /* count number of words */
1254   pos = neg = 0L;
1255   for (w = words; w; w = w->nw_next)
1256     if (w->nw_prio > 0)
1257       pos++;
1258     else
1259       neg++;
1260   nwords = pos + neg;
1261   if (nwords <= 0)
1262     return words;
1263   /* sort word list using work space if possible */
1264   wptr = (struct compRec *)malloc(sizeof(struct compRec)*nwords);
1265   if (wptr) {
1266     p = 0L;
1267     n = pos;
1268     /* store pointers */
1269     for (w = words; w; w = w->nw_next)
1270       if (w->nw_prio > 0) {	/* positive list */
1271 	wptr[p].word = w;
1272 	wptr[p].prio = p;
1273 	p++;
1274       } else {			/* negative list && null word */
1275 	wptr[n].word = w;
1276 	n++;
1277       }
1278     /* positive list no sakusei */
1279     if (pos > 1)
1280 	(void)qsort((char *)wptr, (int)pos, sizeof(struct compRec),
1281                     (int (*) pro((const void *, const void *)))compword);
1282     for (i = 1; i < (int)nwords; i++)
1283       wptr[i - 1].word->nw_next = wptr[i].word;
1284     words = wptr[0].word;
1285     (void)free((char *)wptr);
1286   }
1287   return words;
1288 }
1289 
1290 static
1291 struct nword	*
height2list(height,maxclen)1292 height2list(height, maxclen)
1293      struct nword *height[];
1294      int maxclen;
1295 {
1296   int			i;
1297   struct nword		*e, *p, *head, *tail;
1298 
1299   e = height[0];
1300   tail = (struct nword *)0;
1301   for (i = 1; i <= maxclen; i++)
1302     if (height[i]) {
1303       for (p = height[i] ; p->nw_next ;) {
1304 	p = p->nw_next;
1305       }
1306       if (tail)
1307 	tail->nw_next = height[i];
1308       else
1309 	head = height[i];
1310       tail = p;
1311     }
1312   if (tail)
1313     tail->nw_next = e;
1314   else
1315     head = e;
1316   return head;
1317 }
1318 static
1319 void
list2height(height,maxclen,parse)1320 list2height(height, maxclen, parse)
1321      struct nword	*height[];
1322      int	maxclen;
1323      struct nword	*parse;
1324 {
1325   int		i;
1326   struct nword	*p, *q;
1327 
1328   for (i = 0; i <= maxclen; i++)
1329     height[i] = (struct nword *)0;
1330   for (p = parse; p; p = p->nw_next)
1331     if ((unsigned long)p->nw_ylen <= (unsigned long)maxclen && !height[p->nw_ylen])
1332       height[p->nw_ylen] = p;
1333   for (i = 0; i <= maxclen; i++)
1334     if (height[i]) {
1335       for (p = height[i] ; (q = p->nw_next) != (struct nword *)0; p = q) {
1336 	if (q->nw_ylen != i) {
1337 	  p->nw_next = (struct nword *)0;
1338 	  break;
1339 	}
1340       }
1341     }
1342 }
1343 
1344 /* parseBun
1345  *	key yori hajimaru bunsetsu wo kaiseki suru
1346  */
1347 static
1348 struct nword	*
parseBun(cx,yy,ys,ye,doflush,douniq,maxclen)1349 parseBun(cx, yy, ys, ye, doflush, douniq, maxclen)
1350      struct RkContext	*cx;
1351      int		yy, ys, ye;	/* kaiseki seiyaku */
1352      int		doflush;
1353      int		douniq;		/* unique shori sitei */
1354      int		*maxclen;	/* bunsetu saidai moji suu */
1355 {
1356   struct nstore	*st = cx->store;
1357   struct nword	**xqh = st->xqh;
1358 
1359 #ifdef TEST
1360   printf("parseBun[yy = %d, ys = %d, ye = %d]\n", yy, ys, ye);
1361 #endif
1362 
1363   xqh[0] = allocWord(st, cx->gram->P_BB);
1364   if (xqh[0]) {
1365     *maxclen = doParse(cx, yy, ys, ye, xqh, 0, doflush, douniq);
1366     return  height2list(xqh, *maxclen);
1367   } else {	/* kaiseki funou */
1368     *maxclen = 0;
1369     return  (struct nword *)0;
1370   }
1371 }
1372 
1373 #ifdef BUNMATU
1374 static
1375 struct nword	*
modifyPrio(cx,words)1376 modifyPrio(cx, words)
1377     struct RkContext	*cx;
1378     struct nword	*words;
1379 {
1380   struct RkKxGram	*gram = cx->gram->gramdic;
1381   struct nword		*w;
1382 
1383   for (w = words; w; w = w->nw_next)
1384     if (w->nw_prio > 0 && !IsBunmatu(gram, w->nw_rowcol))
1385 	w->nw_prio += 0x2000 << 4;
1386   return words;
1387 }
1388 #endif
1389 
1390 static
1391 void
storeBun(cx,yy,ys,ye,bun)1392 storeBun(cx, yy, ys, ye, bun)
1393      struct RkContext	*cx;
1394      int			yy, ys, ye;
1395      struct nbun	*bun;
1396 {
1397   struct nword	*full;
1398   struct nword	*w;
1399   int		maxclen;
1400 
1401 #ifdef BUNMATU
1402   full = sortWord(modifyPrio(cx, parseBun(cx, yy, ys, ye, 1, 0, &maxclen)));
1403 #else
1404   full = sortWord(parseBun(cx, yy, ys, ye, 1, 0, &maxclen));
1405 #endif
1406   bun->nb_cand = full;
1407   bun->nb_yoff = yy;
1408 /* kouho wo unique ni suru */
1409   uniqWord(cx->store->yomi + yy, full, bun->nb_curlen, cx->concmode);
1410   bun->nb_curcand = (unsigned short)0;
1411   bun->nb_maxcand = (unsigned short)0;
1412   for (w = full; w; w = w->nw_next) {
1413     if (CanSplitWord(w) && w->nw_ylen == bun->nb_curlen)
1414       bun->nb_maxcand++;
1415   }
1416 }
1417 
1418 /*
1419  * SPLIT
1420  */
1421 struct splitParm {
1422   unsigned long	u2;
1423   int		l2;
1424 };
1425 
1426 #ifdef LOGIC_HACK
1427 static
1428 void
evalSplit(cx,suc,ul)1429 evalSplit(cx, suc, ul)
1430      struct RkContext	*cx;
1431      struct nword	*suc;
1432      struct splitParm	*ul;
1433 {
1434   struct nword	*p;
1435   unsigned	l2;
1436   unsigned long	u2;
1437 
1438   l2 = 0;
1439   u2 = 0L;
1440   for (p = suc; p; p = p->nw_next)
1441   {
1442     if (!CanSplitWord(p) || /* ʸ��ˤʤ�ʤ� */
1443 	OnlyBunmatu(p) || /* ��ƥ���ľ���Ǥ���ʸ��ˤʤ�ʤ� */
1444 	(p->nw_rowcol == cx->gram->P_KJ) || /* ñ���� */
1445 	(p->nw_flags & NW_LOWPRI) || /* ͥ���٤��㤤ʸ�� */
1446 	(p->nw_flags & NW_SUC))
1447       continue;
1448     if (l2 <= p->nw_ylen) {
1449       l2 = p->nw_ylen;
1450       /* �ɤߤ���ʸ����ñ���ͥ���٤Ϲ�θ���ʤ� */
1451       if (u2 < p->nw_prio && p->nw_ylen > 1)
1452         u2 = p->nw_prio;
1453     }
1454   }
1455   ul->l2 = l2;
1456   ul->u2 = u2;
1457 }
1458 #else /* LOGIC_HACK */
1459 static
1460 void
evalSplit(cx,suc,ul)1461 evalSplit(cx, suc, ul)
1462      struct RkContext	*cx;
1463      struct nword	*suc;
1464      struct splitParm	*ul;
1465 {
1466   struct nword	*p;
1467   int		l2;
1468   unsigned long	u2;
1469 
1470   l2 = 0;
1471   u2 = 0L;
1472   for (p = suc; p; p = p->nw_next)
1473   {
1474     if (!CanSplitWord(p) || (p->nw_flags & NW_SUC))
1475       continue;
1476     if ((unsigned long)l2 < (unsigned long)p->nw_ylen)
1477       l2 = p->nw_ylen;
1478     if (u2 < p->nw_prio)
1479       u2 = p->nw_prio;
1480   };
1481   ul->l2 = l2;
1482   ul->u2 = u2;
1483 }
1484 #endif /* LOGIC_HACK */
1485 
1486 #define PARMSIZE 256
1487 
1488 static
1489 int
calcSplit(cx,yy,top,xq,maxclen,flush)1490 calcSplit(cx, yy, top, xq, maxclen, flush)
1491      struct RkContext	*cx;
1492      int		yy;
1493      struct nword	*top;
1494      struct nqueue	xq[];		/* indexed by nw_ylen */
1495      int		maxclen;
1496      int		flush;
1497 {
1498 #ifdef LOGIC_HACK
1499   int			L, L1 = 0, L2;
1500   unsigned long		U;
1501 #else
1502   unsigned		L, L1 = 0, L2;
1503   unsigned		U2;
1504 #endif
1505   struct nword	*w;
1506   int			i;
1507   int			maxary = PARMSIZE - 1;
1508   struct nstore		*st = cx->store;
1509   struct NVE		*p, **r;
1510 #ifndef USE_MALLOC_FOR_BIG_ARRAY
1511   struct splitParm	ul2[PARMSIZE];
1512 #else
1513   struct splitParm *ul2 = (struct splitParm *)
1514     malloc(sizeof(struct splitParm) * PARMSIZE);
1515   if (!ul2) {
1516     return L1;
1517   }
1518 #endif
1519 
1520   L2 = st->nyomi - yy;
1521   if (cx->nv && cx->nv->tsz && cx->nv->buf) {
1522     r = cx->nv->buf + *(st->yomi + yy) % cx->nv->tsz;
1523     for (p = *r; p; p = p->next) {
1524       if (determinate(p->data, (Wrec *)(st->yomi + yy), (int)L2)) {
1525 	if (*(p->data+1) > L1)
1526 	  L1 = *(p->data + 1);
1527       }
1528     }
1529   }
1530   if (L1 == 0) {
1531     L = (L1 = 1)+ (L2 = 0);
1532 #ifdef LOGIC_HACK
1533     U = 0L;
1534 #else
1535     U2 = (unsigned)0;
1536 #endif
1537     if (maxary > maxclen)
1538       maxary = maxclen;
1539     for (i = 0; i <= maxary; i++)
1540       ul2[i].l2 = ul2[i].u2 = 0L;
1541     for (w = top; w; w = w->nw_next) {
1542       int			l, l1;
1543 #ifdef LOGIC_HACK
1544       unsigned long		u;
1545 #endif
1546       struct splitParm		ul;
1547       /* ʸ��ˤʤ�ʤ� */
1548       if (!CanSplitWord(w)) {
1549 	continue;
1550       }
1551 #ifdef LOGIC_HACK
1552       /* ͥ���٤��㤤ʸ��θ�Ǥ��ڤ�ʤ� */
1553       if (w->nw_flags & NW_LOWPRI) {
1554 	  DontSplitWord(w);
1555 	  continue;
1556       }
1557 #endif
1558       if ((w->nw_flags & NW_PRE) && (w->nw_flags & NW_SUC)) {
1559 	continue;
1560       }
1561       /* �ɤߤ�����Ƥ��ʤ� */
1562       l1 = w->nw_ylen;
1563       if (l1 <= 0) {
1564 	continue;
1565       }
1566       /* ��ʸ��ˤ���Τ���Ĺ */
1567       if (flush && (unsigned)yy + w->nw_ylen == cx->store->nyomi) {
1568 	L1 = l1;
1569 	break;
1570       }
1571 #ifdef BUNMATU
1572       /*  ³��ʸ�᤬��ƥ��Ǥʤ��ʤ�ʸ�����ʻ��ʸ������ˤʤ�ʤ� */
1573       else if (OnlyBunmatu(w) && xq[l1].tree->nw_lit == 0) {
1574 	DontSplitWord(w);
1575 	continue;
1576       }
1577 #endif
1578 #ifdef LOGIC_HACK
1579       /* ñ������ʸ��������о줷�ʤ� */
1580       if (w->nw_rowcol == cx->gram->P_KJ) {
1581 	  DontSplitWord(w);
1582 	  continue;
1583       }
1584 #endif
1585       /* ���٤�ʸ������ */
1586       if (l1 <= maxary) {
1587 	if (!ul2[l1].l2)
1588 	  evalSplit(cx, xq[l1].tree, &ul2[l1]);
1589 	ul = ul2[l1];
1590       }
1591       else {
1592 	evalSplit(cx, xq[l1].tree, &ul);
1593       }
1594       /* hikaku */
1595       l = l1 + ul.l2;
1596 #ifdef LOGIC_HACK
1597       u = w->nw_prio + ul.u2;
1598       if ((L < l) || /* ��ʸ���Ĺ */
1599 	  ((L == l) &&
1600 	   (U < u || /* ͥ���٤ι�� */
1601 	    (U == u && (L2 < ul.l2))))) { /* ��ʸ���ܤ�Ĺ�� */
1602 	  L = l;
1603 	  U = u;
1604 	  L1 = l1;
1605 	  L2 = ul.l2;
1606       }
1607 #else
1608       if ((((int)L < l)) ||
1609 	  (((int)L == l) &&  (U2 < ul.u2)) ||
1610 	  (((int)L == l) &&  (U2 == ul.u2) && ((int)L2 < ul.l2))
1611 	  ) {
1612 	L = l;
1613 	L1 = l1;
1614 	L2 = ul.l2;
1615 	U2 = ul.u2;
1616       }
1617 #endif
1618     }
1619   }
1620 #ifdef USE_MALLOC_FOR_BIG_ARRAY
1621   (void)free((char *)ul2);
1622 #endif
1623   return L1;
1624 }
1625 
1626 static
1627 int
splitBun(cx,yy,ys,ye)1628 splitBun(cx, yy, ys, ye)
1629      struct RkContext	*cx;
1630      int			yy, ys, ye;
1631 {
1632   struct nstore			*st = cx->store;
1633   struct nqueue	*xq = st->xq;
1634   struct nword		*w;
1635   int		 		maxclen;
1636   int				i, count, junk;
1637 
1638 /* create the initial bun-tree table */
1639     xq[0].tree = parseBun(cx, yy, ys, ye, 1, 1, &maxclen);
1640 
1641 #ifdef TEST
1642   {
1643     printf("show splitBun [yy = %d, ys = %d, ye = %d, clen = %d]\n",
1644 	   yy, ys, ye, maxclen);
1645 #if 1
1646     showWord(xq[0].tree);
1647 #endif
1648   }
1649 #endif
1650 
1651     for (i = 1; i <= maxclen; i++)
1652 	clearQue(&xq[i]);
1653 /* create the following buns from every possible position */
1654     for (w = xq[0].tree; w; w = w->nw_next) {
1655 	 if (CanSplitWord(w) && !xq[w->nw_ylen].tree) {
1656 	     int	len = w->nw_ylen;
1657 	     int	ys1 = (ys >= len) ? (ys - len) : 0;
1658 	     int	ye1 = (ye - len);
1659 
1660 	     xq[w->nw_ylen].tree = parseBun(cx, yy+len, ys1, ye1, 1, 1, &junk);
1661 	   };
1662      };
1663 
1664 /* compute the proper bunsetu length */
1665     count = calcSplit(cx, yy, xq[0].tree, xq, maxclen, 1);
1666     _RkFreeQue(st, 0, st->maxxq + 1);
1667 
1668 #ifdef TEST
1669   printf("End SplitBun\n");
1670 #endif
1671 
1672     return count;
1673 }
1674 
1675 /* parseQue
1676  *	queue jou de bunsetu wo kaiseki suru.
1677  */
1678 
1679 static void parseQue pro((struct RkContext *, int, int, int, int, int));
1680 
1681 static void
parseQue(cx,maxq,yy,ys,ye,doflush)1682 parseQue(cx, maxq, yy, ys, ye, doflush)
1683      struct RkContext	*cx;
1684      int		maxq;
1685      int		yy, ys, ye;
1686      int		doflush;
1687 {
1688   struct nstore		 *st = cx->store;
1689   struct nqueue *xq = st->xq;
1690   struct nword	 **xqh = st->xqh;
1691   int		 i, j;
1692 
1693 /* put a new seed to start an analysis. */
1694     if (!xq[0].tree) {
1695 	xq[0].tree = allocWord(st, cx->gram->P_BB);
1696 	xq[0].maxlen = 0;
1697 	xq[0].status = 0;
1698     }
1699 /* try to extend each tree in the queue. */
1700   for (i = 0; i <= maxq; i++) {
1701     if (xq[i].tree) {
1702       int old = cx->poss_cont;
1703       list2height(xqh, xq[i].maxlen, xq[i].tree);
1704       xq[i].maxlen = doParse(cx, yy, ys, ye, xqh, xq[i].maxlen, doflush, 1);
1705       /* set up new analysis points */
1706       for (j = 0; j <= xq[i].maxlen; j++)
1707 	if (xqh[j] && !xq[i+j].tree) {
1708 	  xq[i+j].tree = allocWord(st, cx->gram->P_BB);
1709 	  xq[i+j].maxlen = 0;
1710 	  xq[i+j].status = 0;
1711 	  xq[i+j].status = 0x80;
1712 	}
1713       xq[i].tree = height2list(xqh, xq[i].maxlen);
1714       if (cx->poss_cont != old)
1715          xq[i].status |=  0x80;
1716       else
1717          xq[i].status &= ~0x80;
1718     }
1719     ++yy;
1720     if (--ys < 0)  ys = 0;
1721     --ye;
1722   }
1723 }
1724 
1725 /* Que2Bun
1726  *	queue kara bunsetu wo toridasu.
1727  */
1728 static
1729 int
IsStableQue(cx,c,doflush)1730 IsStableQue(cx, c, doflush)
1731      struct RkContext	*cx;
1732      int		c;
1733      int		doflush;
1734 {
1735   struct nqueue	*xq = cx->store->xq;
1736   struct nword	*w;
1737 
1738   if (doflush)
1739   {
1740     if (xq[c].maxlen <= 0)
1741       return 0;
1742     else
1743       return 1;
1744   };
1745   if (xq[c].maxlen <= 0)
1746     return(!c ? 0 : 1);
1747 
1748   for (w = xq[c].tree; w; w = w->nw_next)
1749   {
1750      if (xq[c + w->nw_ylen].status)
1751        return 0;
1752      if (!c && w->nw_ylen && !IsStableQue(cx, c + w->nw_ylen, doflush))
1753        return 0;
1754   };
1755   return 1;
1756 }
1757 
1758 static
1759 int
Que2Bun(cx,yy,ys,ye,doflush)1760 Que2Bun(cx, yy, ys, ye, doflush)
1761      struct RkContext	*cx;
1762      int		yy, ys, ye;
1763      int		doflush;
1764 {
1765   struct nstore	*st = cx->store;
1766   struct nqueue	*xq = st->xq;
1767   unsigned	i;
1768   struct NVE	*p, **r;
1769 
1770   if (doflush)
1771     for (i = 0; (int)i <= st->maxxq; i++)
1772       xq[i].status = 0;
1773   while (IsStableQue(cx, 0, doflush)) {
1774     struct nbun	*bun = &st->bunq[st->maxbun];
1775     int				count;
1776 
1777     i = 0;
1778     if (!doflush) {
1779       if (cx->nv && cx->nv->tsz && cx->nv->buf) {
1780 	r = cx->nv->buf + *(st->yomi + yy) % cx->nv->tsz;
1781 	for (p = *r; p; p = p->next) {
1782 	  if (positiveRev(p->data, (Wrec *)(st->yomi + yy), st->nyomi - yy)) {
1783 	    if (*(p->data + 1) > i)
1784 	      i = *(p->data + 1);
1785 	  }
1786 	}
1787       }
1788       if (i > st->nyomi - yy)
1789 	break;
1790     }
1791     if ((count = calcSplit(cx, yy, xq[0].tree, xq, xq[0].maxlen, 1)) > 0) {
1792       /* shift queue to left */
1793       _RkFreeQue(st, 0, count);
1794       for (i = count; (int)i <= st->maxxq; i++) {
1795 	xq[i-count] = xq[i];
1796 	clearQue(&xq[i]);
1797       };
1798       bun->nb_curlen = count;
1799       storeBun(cx, (int)bun->nb_yoff, 0, ye, bun);
1800       st->maxbun++;
1801       st->bunq[st->maxbun].nb_yoff = yy + bun->nb_curlen;
1802     }
1803     yy = yy + bun->nb_curlen;
1804     ys = ys - bun->nb_curlen;
1805     ye = ye - bun->nb_curlen;
1806   }
1807   return st->maxbun;
1808 }
1809 
1810 /* _RkRenbun2
1811  *	current bunsetsu kara migi wo saihenkan suru
1812  */
1813 int
_RkRenbun2(cx,firstlen)1814 _RkRenbun2(cx, firstlen)
1815      struct RkContext	*cx;
1816      int		firstlen;  /* bunsetsu chou sitei(ow 0) */
1817 {
1818   struct nstore		*st = cx->store;
1819   struct nbun	*bun = &st->bunq[st->curbun];
1820   int			count;
1821   int			yy, ys, ye;		/* yomi kensaku hani */
1822   int			oldcurbun = st->curbun;
1823   int			uyomi;
1824   int			i;
1825 
1826   yy = bun->nb_yoff;
1827   ys = 0;
1828   ye = st->nyomi - bun->nb_yoff;
1829 /* release queue */
1830   uyomi = st->nyomi - st->bunq[st->maxbun].nb_yoff;
1831   if (IS_XAUTCTX(cx)) {
1832     if (uyomi >= 0)
1833       _RkFreeQue(st, 0, uyomi+1);
1834   };
1835 /*
1836  *
1837  */
1838   for (count = 0; ye > 0; count++)
1839   {
1840 /* sudeni kaiseki zumi deareba, sono kekka wo mochiiru */
1841     if (count && !uyomi)
1842     {
1843       int	b, c;
1844       for (b = st->curbun; b < (int)st->maxbun; b++)
1845 	if (st->bunq[b].nb_yoff == yy) {
1846 	  /* dispose inbetween bun-trees  */
1847 	  for (c = st->curbun; c < b; c++) {
1848 	    freeWord(st, st->bunq[c].nb_cand);
1849 	    st->bunq[c].nb_cand = (struct nword *)0;
1850 	  }
1851 	  /* shift bunq forward */
1852 	  while (b < (int)st->maxbun)
1853 	    st->bunq[st->curbun++] = st->bunq[b++];
1854 	  goto	exit;
1855 	}
1856     }
1857 /* dispose the current bun-tree */
1858     if (st->curbun < (int)st->maxbun) {
1859       freeWord(st, bun->nb_cand);
1860       bun->nb_cand = (struct nword *)0;
1861     }
1862     /* compute the length of bun */
1863     if (st->curbun >= (int)st->maxbunq)	/* too many buns */
1864       bun->nb_curlen = ye;
1865     else {
1866       if (firstlen) { 			/* length specified */
1867 	bun->nb_curlen = firstlen;
1868 	firstlen = 0;
1869       } else {
1870       /* destroy */
1871 	bun->nb_curlen = splitBun(cx, yy, ys, ye);
1872 	if (!bun->nb_curlen)		/* fail to split */
1873 	  bun->nb_curlen = ye;
1874       }
1875     }
1876 /* set up bun (xqh is destroyed */
1877     storeBun(cx, yy, ys, ye, bun);
1878 #if defined(TEST) && 0
1879 	showWord(bun->nb_cand);
1880 #endif
1881     yy += bun->nb_curlen;
1882     if ((ys -= (int)bun->nb_curlen) < 0)
1883       ys = 0;
1884     ye -= bun->nb_curlen;
1885     bun++;
1886     st->curbun++;
1887   }
1888 /* free the remaining bun-trees */
1889   while ((int)st->maxbun > st->curbun) {
1890     freeWord(st, st->bunq[--st->maxbun].nb_cand);
1891     st->bunq[st->maxbun].nb_cand = (struct nword *)0;
1892   }
1893 /* do final settings */
1894  exit:
1895     st->maxbun = st->curbun;
1896     st->curbun = oldcurbun;
1897     st->bunq[st->maxbun].nb_yoff = 0;
1898 /* i hate this fake, ... */
1899     for (i = 0; i < (int)st->maxbun; i++)
1900       st->bunq[st->maxbun].nb_yoff += st->bunq[i].nb_curlen;
1901 /* this case will never happen */
1902     if (0 != (st->nyomi - st->bunq[st->maxbun].nb_yoff))
1903 	_Rkpanic("Renbun2: uyomi destroyed %d %d\n",
1904 		st->nyomi, st->bunq[st->maxbun].nb_yoff, 0);
1905     bun = &st->bunq[st->maxbun];
1906     if (IS_XAUTCTX(cx) && uyomi > 0)
1907     {
1908       _RkSubstYomi(cx, 0, uyomi, st->yomi + bun->nb_yoff, uyomi);
1909       st->curbun = oldcurbun;
1910     };
1911     return st->maxbun;
1912 }
1913 
1914 /* RkSubstYomi
1915  */
1916 int
_RkSubstYomi(cx,ys,ye,yomi,newLen)1917 _RkSubstYomi(cx, ys, ye, yomi, newLen)
1918      struct RkContext	*cx;
1919      int		ys;
1920      int		ye;
1921      Wchar		*yomi;
1922      int		newLen;
1923 {
1924   struct nstore		*st = cx->store;
1925   extern struct nstore	*_RkReallocBunStorage();
1926   struct nbun	*bun;
1927   struct nqueue		*xq;
1928   struct nword		**xqh;
1929   int			i, j;
1930   int			count;
1931   int			yf;
1932   int			cs, ce, cf;
1933   Wchar			*d, *s, *be;
1934   int			nbun;
1935   int			new_size;
1936 
1937   yf = ys + newLen;
1938   cs = ys;
1939   ce = ye;
1940   /*
1941    * STEP 0:	reallocate resources if needed
1942    *		youmigana buffer should be reallocated as well.
1943    */
1944   new_size = st->nyomi + (newLen - (ye - ys));
1945   if (new_size > (int)st->maxyomi || new_size > (int)st->maxbunq ||
1946       new_size > (int)st->maxxq)
1947   {
1948       st = _RkReallocBunStorage(st, (int)(new_size*1.2+10));
1949       if (!st)
1950 	  return -1;
1951       cx->store = st;
1952   };
1953   /*
1954    * STEP 1:	update yomigana buffer
1955    */
1956   /* move unchanged text portion [ye, ...) */
1957   bun = &st->bunq[st->maxbun];
1958   be = st->yomi + bun->nb_yoff;
1959   xq = st->xq;
1960   xqh = st->xqh;
1961   count = (st->nyomi - bun->nb_yoff) - ye;
1962   if (yf < ye) {	/* shrunk */
1963     d = be + yf;
1964     s = be + ye;
1965     while (count--) *d++ = *s++;
1966   } else if (ye < yf) {	/* enlarged */
1967     d = (s = st->yomi + st->nyomi) + count;
1968     while (count--)
1969       *--d = *--s;
1970   }
1971   /* replace the new text in [ys, yf) */
1972   usncopy(be + ys, yomi, newLen);
1973   st->nyomi += (yf - ye);
1974   cf = yf;
1975   /*
1976    *  STEP 2:	remove affected words from XQ
1977    */
1978 /* Trim the words which terminate in [cs, ...) */
1979 
1980   for (i = 0; i < cs; i++)
1981     if (xq[i].tree && cs - i <= xq[i].maxlen) {
1982       list2height(xqh, xq[i].maxlen, xq[i].tree);
1983       for (j = cs - i; j < xq[i].maxlen; j++)
1984 	if (xqh[j + 1]) {
1985 	  freeWord(st, xqh[j + 1]);
1986 	  xqh[j + 1] = (struct nword *)0;
1987 	}
1988       xq[i].maxlen = 0;
1989       for (j = cs - i ; j >= 0 && !xqh[j] ;) {
1990 	j--;
1991       }
1992       if (j > 0)
1993 	xq[i].maxlen = j;
1994       else {
1995 	xq[i].maxlen = 0;
1996 	if (!j) {
1997 	  freeWord(st, xqh[0]);
1998 	  xqh[0] = (struct nword *)0;
1999 	}
2000       }
2001       xq[i].tree = height2list(xqh, xq[i].maxlen);
2002       xq[i].status = 0;
2003     }
2004   /*  Kill the whole trees in  [cs, ce) and shift XQ to fill it. */
2005   _RkFreeQue(st, cs, ce);
2006   if (cf < ce)
2007     for (i = cf, j = ce; j <= st->maxxq; i++, j++) {
2008       xq[i] = xq[j];
2009       clearQue(&xq[j]);
2010     }
2011   if (ce < cf)
2012     for (i = st->maxxq, j = st->maxxq - (cf - ce); j >= ce; i--, j--) {
2013       xq[i] = xq[j];
2014       clearQue(&xq[j]);
2015     }
2016   /*
2017    * STEP 3	restore queues by parsing yomigana after ys.
2018    */
2019   nbun = st->maxbun;
2020   count = (st->nyomi - bun->nb_yoff) - ys;
2021   while (count > 0) {
2022     int		yy;
2023     yy = st->bunq[st->maxbun].nb_yoff;
2024     ys = st->nyomi - yy - count;
2025     parseQue(cx, cf-1, yy, ys, ys + 1, 0);
2026     nbun = Que2Bun(cx, yy, ys, ys + 1, 0);
2027     ys++;
2028     count--;
2029   }
2030   st->curbun = 0;
2031   return nbun;
2032 }
2033 
2034 /* RkFlushYomi
2035  */
2036 int
_RkFlushYomi(cx)2037 _RkFlushYomi(cx)
2038      struct RkContext	*cx;
2039 {
2040     int		yy = cx->store->bunq[cx->store->maxbun].nb_yoff;
2041     int		ys = cx->store->nyomi - yy;
2042     int		ret;
2043 
2044     parseQue(cx, cx->store->maxxq, yy, ys, ys, 1);
2045     if ((ret = Que2Bun(cx, yy, ys, ys, 1)) != -1)
2046       cx->store->curbun = 0;
2047     return(ret);
2048 }
2049 
2050 /* _RkLearnBun
2051  *	bunsetu jouho wo motoni gakushuu suru
2052  *	sarani, word wo kaihou suru
2053  */
2054 static
blkcpy(d,s,e)2055 void	blkcpy(d, s, e)
2056      unsigned char	*d;
2057      unsigned char	*s, *e;
2058 {	while (s < e)	*d++ = *s++;	}
2059 
2060 static
2061 void
doLearn(cx,thisW)2062 doLearn(cx, thisW)
2063      struct RkContext	*cx;
2064      struct nword	*thisW;
2065 {
2066   struct nword	*leftW;
2067 #ifndef USE_MALLOC_FOR_BIG_ARRAY
2068   unsigned char	*candidates[RK_CAND_NMAX];
2069   unsigned 	permutation[RK_CAND_NMAX];
2070   unsigned char	tmp[RK_WREC_BMAX];
2071 #else
2072   unsigned char **candidates, *tmp;
2073   unsigned *permutation;
2074   candidates = (unsigned char **)
2075     malloc(sizeof(unsigned char *) * RK_CAND_NMAX);
2076   permutation = (unsigned *)malloc(sizeof(unsigned) * RK_CAND_NMAX);
2077   tmp = (unsigned char *)malloc(RK_WREC_BMAX);
2078   if (!candidates || !permutation || !tmp) {
2079     if (candidates) (void)free((char *)candidates);
2080     if (permutation) (void)free((char *)permutation);
2081     if (tmp) (void)free((char *)tmp);
2082     return;
2083   }
2084 #endif
2085 
2086   for (; (leftW = thisW->nw_left) != (struct nword *)0 ; thisW = leftW) {
2087     struct ncache	*thisCache = thisW->nw_cache;
2088 
2089     if (thisCache) {
2090       struct DM		*dm = thisCache->nc_dic;
2091       struct DM		*qm = thisW->nw_freq;
2092       unsigned char	*wp;
2093       int		ncands;
2094       int		nl;
2095       unsigned long	offset;
2096       int		i;
2097       int		current;
2098       unsigned long	_RkGetOffset();
2099 
2100       cx->time = _RkGetTick(1);
2101       if (thisCache->nc_flags & NC_ERROR)
2102 	continue;
2103       if (!(wp = thisCache->nc_word))
2104 	continue;
2105       ncands = _RkCandNumber(wp);
2106       nl = (*wp >> 1) & 0x3f;
2107       if (qm && qm->dm_qbits)
2108 	offset = _RkGetOffset((struct ND *)dm->dm_extdata.var, wp);
2109       else
2110 	offset = 0L;
2111       if (*wp & 0x80)
2112 	wp += 2;
2113       wp += 2 + nl * 2;
2114       for (i = 0;  i < ncands;  i++) {
2115 	candidates[i] = wp;
2116 	wp += 2 * ((*wp >> 1) & 0x7f) + 2;
2117       };
2118 /*
2119       if (thisCache->nc_count)
2120 	continue;
2121 */
2122       if (qm && qm->dm_qbits) {
2123 	int		bits;
2124 
2125 	if (!(qm->dm_flags & DM_WRITABLE))
2126 	  continue;
2127 	bits = _RkCalcLog2(ncands + 1) + 1;
2128 	_RkUnpackBits(permutation, qm->dm_qbits, offset, bits, ncands);
2129 	for (current = 0; current < ncands; current++)
2130 	  if (ncands > (int)permutation[current]/2 &&
2131 	      candidates[permutation[current]/2] == thisW->nw_kanji)
2132 	    break;
2133 	if (current < ncands) {
2134 	  entryRut(qm->dm_rut, thisW->nw_csn, cx->time);
2135 	  if (0 < current) {
2136             _RkCopyBits(tmp, (unsigned long) 0L, bits,
2137                         qm->dm_qbits, (unsigned long) offset, current);
2138             _RkCopyBits(qm->dm_qbits, (unsigned long) (offset + 0L), bits,
2139                         qm->dm_qbits, (unsigned long) (offset + current*bits),
2140 			1);
2141             _RkCopyBits(qm->dm_qbits, (unsigned long) (offset + bits), bits,
2142                         tmp, (unsigned long) 0L, current);
2143 
2144 	  };
2145 	  qm->dm_flags |= DM_UPDATED;
2146 	}
2147       } else {
2148 	if (!(dm->dm_flags & DM_WRITABLE))
2149 	  continue;
2150 	for (current = 0; current < ncands; current++)
2151 	  if (candidates[current] == thisW->nw_kanji)
2152 	    break;
2153 	if (DM2TYPE(dm)) {
2154 	  if (current) {
2155 	    unsigned char	*t = candidates[0];
2156 	    unsigned char	*l = candidates[current];
2157 	    unsigned char	*c = l + 2 * ((*l >> 1) & 0x7f) + 2;
2158 
2159 	    ((struct TW *)thisCache->nc_address)->lucks[1]
2160 	      = ((struct TW *)thisCache->nc_address)->lucks[0];
2161 	    blkcpy(tmp, t, l);
2162 	    blkcpy(t, l, c);
2163 	    blkcpy(t + (int)(c - l), tmp, tmp + (int)(l - t));
2164 	    thisCache->nc_flags |= NC_DIRTY;
2165 	  }
2166 	  ((struct TW *)thisCache->nc_address)->lucks[0] = cx->time;
2167 	  dm->dm_flags |= DM_UPDATED;
2168 	}
2169       }
2170     }
2171   }
2172 #ifdef USE_MALLOC_FOR_BIG_ARRAY
2173   (void)free((char *)candidates);
2174   (void)free((char *)permutation);
2175   (void)free((char *)tmp);
2176 #endif
2177 }
2178 
2179 void
_RkLearnBun(cx,cur,mode)2180 _RkLearnBun(cx, cur, mode)
2181      struct RkContext	*cx;
2182      int		cur, mode;
2183 {
2184   struct nstore	*st = cx->store;
2185   struct nbun	*bun = &st->bunq[cur];
2186   struct nword	*w;
2187   int		count = bun->nb_curcand;
2188   Wchar		*yomi = st->yomi + bun->nb_yoff;
2189   int		ylen;
2190   int		pos;
2191 
2192   derefWord(bun->nb_cand);
2193   if (mode) {
2194     if (bun->nb_flags & RK_REARRANGED) {
2195       ylen = bun->nb_curlen
2196 	+ (cur < (int)st->maxbun - 1 ? (bun + 1)->nb_curlen : 0);
2197       pos = bun->nb_curlen;
2198       if (ylen < 32) {
2199 	Wchar *ey = yomi + ylen, *p;
2200 #ifndef USE_MALLOC_FOR_BIG_ARRAY
2201 	Wrec yomwrec[32 * sizeof(Wchar)];
2202 	Wrec *dp = yomwrec;
2203 #else
2204 	Wrec *dp;
2205 	Wrec *yomwrec = (Wrec *)malloc(sizeof(Wrec) * 32 * sizeof(Wchar));
2206 	if (!yomwrec) {
2207 	  return;
2208 	}
2209 	dp = yomwrec;
2210 #endif
2211 	for (p = yomi ; p < ey ; p++) {
2212 	  *dp++ = (unsigned)*p >> 8;
2213 	  *dp++ = (unsigned)*p & 0x0ff;
2214 	}
2215 	_RkRegisterNV(cx->nv, yomwrec, ylen, pos);
2216 #ifdef USE_MALLOC_FOR_BIG_ARRAY
2217 	(void)free((char *)yomwrec);
2218 #endif
2219       }
2220     }
2221     for (w = bun->nb_cand; w; w = w->nw_next) {
2222       if (CanSplitWord(w) && w->nw_ylen == bun->nb_curlen) {
2223 	if (count-- <= 0) {
2224 	  doLearn(cx, w);
2225 	    break;
2226 	}
2227       }
2228     }
2229   }
2230   killWord(st, bun->nb_cand);
2231 }
2232 
2233 /* vim: set sw=2: */
2234