1 /**
2  * @file   factoring_sub.c
3  *
4  * <JA>
5  * @brief  ���쥹������factoring�׻�����1�ѥ���
6  *
7  * ���Υե�����ˤϡ��裱�ѥ��ˤ����Ƹ��쥹������ factoring ��Ԥ������
8  * �ؿ����ޤޤ�Ƥ��ޤ�. �ڹ�¤�������ǤΥ��֥ĥ꡼���ñ��ꥹ��
9  * (successor list) �ι��ۡ������ǧ����θ��쥹�����׻��롼����
10  * �ޤޤ�ޤ�.
11  *
12  * successor list �ϡ��ڹ�¤������γƥΡ��ɤ˳���դ����롤
13  * ���ΥΡ��ɤ�ͭ����ñ��Υꥹ�ȤǤ�. �ڹ�¤������ˤ����ơ�
14  * ����ʬ�μ��ΥΡ��ɤ����Υꥹ�Ȥ��ݻ����ޤ�. �ºݤˤϥꥹ�Ȥ��Ѳ�����
15  * ��ꡤ���ʤ���ڹ�¤������λޤ�ʬ�����˳���դ����ޤ�.
16  * �㤨�С��ʲ��Τ褦���ڹ�¤������ξ�硤�����ν��Ƥ���Ρ��ɤ�
17  * successor list ������դ����ޤ�.
18  * <pre>
19  *
20  *        2-o-o - o-o-o - o-o-o          word "A"
21  *       /
22  *  1-o-o
23  *       \       4-o-o                   word "B"
24  *        \     /
25  *         3-o-o - 5-o-o - 7-o-o         word "C"
26  *              \        \
27  *               \        8-o-o          word "D"
28  *                6-o-o                  word "E"
29  * </pre>
30  *
31  * �� successor list �Ϥ��Υ��֥ĥ꡼�˴ޤޤ��ñ��Υꥹ�ȤǤ�.
32  * ������Ǥϰʲ��Τ褦�ˤʤ�ޤ�.
33  *
34  * <pre>
35  *   node  | successor list (wchmm->state[node].sc)
36  *   =======================
37  *     1   | A B C D E
38  *     2   | A
39  *     3   |   B C D E
40  *     4   |   B
41  *     5   |     C D
42  *     6   |         E
43  *     7   |     C
44  *     8   |       D
45  * </pre>
46  *
47  * ���� successor list �˴ޤޤ��ñ�줬���Ĥˤʤä��Ȥ������λ�����
48  * ñ�줬���ꤹ��. �嵭�ξ�硤ñ�� "A" �ϥΡ��� 2 �ΰ��֤Ǥ��Ǥ�
49  * ���θ�³ñ��Ȥ��� "A" �ʳ�̵���Τǡ������dz��ꤹ��.
50  * ���ʤ����ñ�� A �����Τʸ��쥹�����ϡ�ñ�콪ü���Ԥ����Ρ��� 2 �Ƿ�ޤ�.
51  *
52  * �裱�ѥ��ˤ����� factoring �η׻��ϡ��ºݤˤ� beam.c �ǹԤʤ���.
53  * 2-gram factoring�ξ�硤���Ρ��ɤ� successor list ��¸�ߤ����,
54  * ���� successor list ��ñ��� 2-gram �κ����ͤ���, ���¤��Ƥ��Ƥ���
55  * factoring �ͤ�������. successor list ��ñ�줬1�ĤΥΡ��ɤǤϡ�
56  * ������2-gram����ưŪ�˳�����Ƥ���.
57  * 1-gram factoring�ξ�硤���Ρ��ɤ� successor list ��¸�ߤ����硤
58  * ���� successor list ��ñ��� 1-gram �κ����ͤ��ᡤ���¤��Ƥ��Ƥ���
59  * factoring �ͤ�������. successor list ��ñ�줬���ĤΥΡ��ɤǡ��Ϥ����
60  * 2-gram ��׻�����.
61  *
62  * �ºݤǤ� 1-gram factoring �Ǥϳ� successor list �ˤ����� factoring ��
63  * ��ñ����������¸�ʤΤǡ�successor list ���ۻ������Ƥ��餫����׻�����
64  * ����. ���ʤ����������ư�����ڹ�¤��������۸塤successor list
65  * ���ۤ����顤ñ���2�İʾ�ޤ� successor list �ˤĤ��ƤϤ��� 1-gram ��
66  * �����ͤ�׻����ơ�������ΥΡ��ɤ� fscore ���Ф˳�Ǽ���Ƥ���������
67  * successor list �� free ���Ƥ��ޤ��Ф褤. ñ�줬���ĤΤߤ� successor list
68  * �ˤĤ��ƤϤ���ñ��ID��Ĥ��Ƥ�����õ�����˥ѥ�����������ã������
69  * ���Τ�2-gram��׻�������ɤ�.
70  *
71  * DFAʸˡ���ѻ��ϡ��ǥե���ȤǤϸ�������(���ƥ���������)��
72  * ���ƥ���ñ�̤��ڤ��ۤ��뤳�Ȥ���Ū��ɽ������. ���Τ��ᡤ
73  * ������ factoring �������Ѥ����ʤ�. ��������
74  * CATEGORY_TREE �� undefined �Ǥ���С�����Ū factoring ���Ѥ�����������
75  * Ŭ�Ѥ�Ԥ����Ȥ��ǽ�Ǥ���.
76  * ���ʤ�������Ρ��ɤ� successor list ��¸�ߤ����,
77  * ���� successor list ��γ�ñ���ľ��ñ���ñ���������Ĵ��,
78  * ���Τ�����ĤǤ���³��ǽ��ñ�줬����С��������ܤ��������Ĥ�
79  * �ʤ�������ܤ����ʤ�. ���ε�ǽ�ϵ��ѻ��ͤΤ���˻Ĥ���Ƥ���ΤߤǤ���.
80  * </JA>
81  *
82  * <EN>
83  * @brief  LM factoring on 1st pass.
84  * </EN>
85  *
86  * This file contains functions to do language score factoring on the 1st
87  * pass.  They build a successor lists which holds the successive words in
88  * each sub tree on the tree lexicon, and also provide a factored LM
89  * probability on each nodes on the tree lexicon.
90  *
91  * The "successor list" will be assigned for each lexicon tree node to
92  * represent a list of words that exist in the sub-tree and share the node.
93  * Actually they will be assigned to the branch node.
94  * Below is the example of successor lists on a tree lexicon, in which
95  * the lists is assigned to the numbered nodes.
96  *
97  * <pre>
98  *         2-o-o - o-o-o - o-o-o          word "A"
99  *        /
100  *   1-o-o
101  *        \       4-o-o                   word "B"
102  *         \     /
103  *          3-o-o - 5-o-o - 7-o-o         word "C"
104  *           \            \
105  *            \            8-o-o          word "D"
106  *             6-o-o                      word "E"
107  * </pre>
108  *
109  * The contents of the successor lists are the following:
110  *
111  * <pre>
112  *  node  | successor list (wchmm->state[node].sc)
113  *  =======================
114  *    1   | A B C D E
115  *    2   | A
116  *    3   |   B C D E
117  *    4   |   B
118  *    5   |     C D
119  *    6   |         E
120  *    7   |     C
121  *    8   |       D
122  * </pre>
123  *
124  * When the 1st pass proceeds, if the next going node has a successor list,
125  * all the word 2-gram scores in the successor list on the next node
126  * will be computed, and the propagating LM value in the token on
127  * the current node will be replaced by the maximum value of the scores
128  * when copied to the next node.  Appearently, if the successor list has
129  * only one word, it means that the word can be determined on that point,
130  * and the precise 2-gram value will be assigned as is.
131  *
132  * When using 1-gram factoring, the computation will be slightly different.
133  * Since the factoring value (maximum value of 1-gram scores on each successor
134  * list) is independent of the word context, they can be computed statically
135  * before the search.  Thus, for all the successor lists that have more than
136  * two words, the maximum 1-gram value is computed and stored to
137  * "fscore" member in tree lexicon, and the successor lists will be freed.
138  * The successor lists with only one word should still remain in the
139  * tree lexicon, to compute the precise 2-gram scores for the words.
140  *
141  *
142  * When using DFA grammar, Julian builds separated lexicon trees for every
143  * word categories, to statically express the catergory-pair constraint.
144  * Thus these factoring scheme is not used by default.
145  * However you can still force Julian to use the grammar-based
146  * deterministic factoring scheme by undefining CATEGORY_TREE.
147  * If CATEGORY_TREE is undefined, the word connection constraint will be
148  * performed based on the successor list at the middle of tree lexicon.
149  * This enables single tree search on Julian.  This function is left
150  * only for technical reference.
151  *
152  * @author Akinobu LEE
153  * @date   Mon Mar  7 23:20:26 2005
154  *
155  * $Revision: 1.3 $
156  *
157  */
158 
159 /*
160  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
161  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
162  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
163  * All rights reserved
164  */
165 
166 #include <julius/julius.h>
167 
168 /*----------------------------------------------------------------------*/
169 
170 /**
171  * <JA>
172  * @brief  �ڹ�¤�������Τ���Ρ��ɤ� successor list ��ñ����ɲä���.
173  *
174  * ���Ǥ�Ʊ��ñ�줬��Ͽ����Ƥ���С���������Ͽ�Ϥ���ʤ�.
175  * ñ���ID�Ǿ������¸�����.
176  *
177  * @param wchmm [i/o] �ڹ�¤������
178  * @param node [in] �Ρ����ֹ�
179  * @param w [in] ñ��ID
180  * </JA>
181  * <EN>
182  * @brief  Add a word to the successor list on a node in tree lexicon.
183  * Words in lists should be ordered by ID.
184  *
185  * @param wchmm [i/o] tree lexicon
186  * @param node [in] node id
187  * @param w [in] word id
188  * </EN>
189  */
190 static void
add_successor(WCHMM_INFO * wchmm,int node,WORD_ID w)191 add_successor(WCHMM_INFO *wchmm, int node, WORD_ID w)
192 {
193   S_CELL *sctmp, *sc;
194 
195   /* malloc a new successor list element */
196   sctmp=(S_CELL *) mymalloc(sizeof(S_CELL));
197   /* assign word ID to the new element */
198   sctmp->word = w;
199   /* add the new element to existing list (keeping order) */
200   if (wchmm->state[node].scid == 0) {
201     j_internal_error("add_successor: sclist id not assigned to branch node?\n");
202   }
203   sc = wchmm->sclist[wchmm->state[node].scid];
204   if (sc == NULL || sctmp->word < sc->word) {
205     sctmp->next = sc;
206     wchmm->sclist[wchmm->state[node].scid] = sctmp;
207   } else {
208     for(;sc;sc=sc->next) {
209       if (sc->next == NULL || sctmp->word < (sc->next)->word) {
210 	if (sctmp->word == sc->word) break; /* avoid duplication */
211 	sctmp->next = sc->next;
212 	sc->next = sctmp;
213 	break;
214       }
215     }
216   }
217 }
218 
219 /**
220  * <JA>
221  * ���ĤΥΡ��ɾ�� successor list �����פ��뤫�ɤ��������å�����
222  *
223  * @param wchmm [in] �ڹ�¤������
224  * @param node1 [in] ���Ĥ�ΥΡ���ID
225  * @param node2 [in] ���Ĥ�ΥΡ���ID
226  *
227  * @return �����˰��פ���� TRUE�����פ��ʤ���� FALSE.
228  * </JA>
229  * <EN>
230  * Check if successor lists on two nodes are the same.
231  *
232  * @param wchmm [in] tree lexicon
233  * @param node1 [in] 1st node id
234  * @param node2 [in] 2nd node id
235  *
236  * @return TRUE if they have the same successor list, or FALSE if they differ.
237  * </EN>
238  */
239 static boolean
match_successor(WCHMM_INFO * wchmm,int node1,int node2)240 match_successor(WCHMM_INFO *wchmm, int node1, int node2)
241 {
242   S_CELL *sc1,*sc2;
243 
244   /* assume successor is sorted by ID */
245   if (wchmm->state[node1].scid == 0 || wchmm->state[node2].scid == 0) {
246     j_internal_error("match_successor: sclist id not assigned to branch node?\n");
247   }
248   sc1 = wchmm->sclist[wchmm->state[node1].scid];
249   sc2 = wchmm->sclist[wchmm->state[node2].scid];
250   for (;;) {
251     if (sc1 == NULL || sc2 == NULL) {
252       if (sc1 == NULL && sc2 == NULL) {
253 	return TRUE;
254       } else {
255 	return FALSE;
256       }
257     } else if (sc1->word != sc2->word) {
258       return FALSE;
259     }
260     sc1 = sc1->next;
261     sc2 = sc2->next;
262   }
263 }
264 
265 /**
266  * <JA>
267  * ����Ρ��ɾ�� successor list ����ˤ���.
268  *
269  * @param wchmm [i/o] �ڹ�¤������
270  * @param scid [in] node id
271  * </JA>
272  * <EN>
273  * Free successor list at the node
274  *
275  * @param wchmm [i/o] tree lexicon
276  * @param scid [in] node id
277  * </EN>
278  */
279 static void
free_successor(WCHMM_INFO * wchmm,int scid)280 free_successor(WCHMM_INFO *wchmm, int scid)
281 {
282   S_CELL *sc;
283   S_CELL *sctmp;
284 
285   /* free sclist */
286   sc = wchmm->sclist[scid];
287   while (sc != NULL) {
288     sctmp = sc;
289     sc = sc->next;
290     free(sctmp);
291   }
292 }
293 
294 /**
295  * <JA>
296  * �ڹ�¤������夫�������ä��줿 successor list �ˤĤ��ơ�
297  * ���μ��Τ������ƥꥹ�Ȥ�ͤ�륬���١������쥯������Ԥ�.
298  *
299  * @param wchmm [i/o] �ڹ�¤������
300  * </JA>
301  * <EN>
302  * Garbage collection of the successor list, by deleting successor lists
303  * to which the link was deleted on the lexicon tree.
304  *
305  * @param wchmm [i/o] tree lexiton
306  * </EN>
307  */
308 static void
compaction_successor(WCHMM_INFO * wchmm)309 compaction_successor(WCHMM_INFO *wchmm)
310 {
311   int src, dst;
312 
313   dst = 1;
314   for(src=1;src<wchmm->scnum;src++) {
315     if (wchmm->state[wchmm->sclist2node[src]].scid <= 0) {
316       /* already freed, skip */
317       continue;
318     }
319     if (dst != src) {
320       wchmm->sclist[dst] = wchmm->sclist[src];
321       wchmm->sclist2node[dst] = wchmm->sclist2node[src];
322       wchmm->state[wchmm->sclist2node[dst]].scid = dst;
323     }
324     dst++;
325   }
326   if (debug2_flag) {
327     jlog("DEBUG: successor list shrinked from %d to %d\n", wchmm->scnum, dst);
328   }
329   wchmm->scnum = dst;
330 }
331 
332 /**
333  * <JA>
334  * successor list �Ѥ˳���դ���줿�����ΰ��ͭ����Ĺ���˽̤��.
335  * ������ۻ��䡤1-gram factoring �Τ���˺�����줿 successor list ʬ��
336  * ������������.
337  *
338  * @param wchmm [i/o] �ڹ�¤������
339  * </JA>
340  * <EN>
341  * Shrink the memory area that has been allocated for building successor list.
342  *
343  * @param wchmm [i/o] tree lexicon
344  * </EN>
345  */
346 static void
shrink_successor(WCHMM_INFO * wchmm)347 shrink_successor(WCHMM_INFO *wchmm)
348 {
349   if (wchmm->sclist) {
350     wchmm->sclist = (S_CELL **)myrealloc(wchmm->sclist, sizeof(S_CELL *) * wchmm->scnum);
351   }
352   if (wchmm->sclist2node) {
353     wchmm->sclist2node = (int *)myrealloc(wchmm->sclist2node, sizeof(int) * wchmm->scnum);
354   }
355 }
356 
357 /**
358  * <JA>
359  * �ڹ�¤�����������Ρ��ɤ� successor list ���ۤ���ᥤ��ؿ�
360  *
361  * @param wchmm [i/o] �ڹ�¤������
362  * </JA>
363  * <EN>
364  * Main function to build whole successor list to lexicon tree.
365  *
366  * @param wchmm [i/o] tree lexicon
367  * </EN>
368  *
369  * @callgraph
370  * @callergraph
371  *
372  */
373 void
make_successor_list(WCHMM_INFO * wchmm)374 make_successor_list(WCHMM_INFO *wchmm)
375 {
376   int node;
377   WORD_ID w;
378   int i;
379   boolean *freemark;
380   int s;
381 
382   jlog("STAT: make successor lists for factoring\n");
383 
384   /* 1. initialize */
385   /* initialize node->sclist index on wchmm tree */
386   for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0;
387 
388   /* parse the tree to get the maximum size of successor list */
389   s = 1;
390   for (w=0;w<wchmm->winfo->num;w++) {
391     for (i=0;i<wchmm->winfo->wlen[w];i++) {
392       if (wchmm->state[wchmm->offset[w][i]].scid == 0) {
393 	wchmm->state[wchmm->offset[w][i]].scid = s;
394 	s++;
395       }
396     }
397     if (wchmm->state[wchmm->wordend[w]].scid == 0) {
398       wchmm->state[wchmm->wordend[w]].scid = s;
399       s++;
400     }
401   }
402   wchmm->scnum = s;
403   if (debug2_flag) {
404     jlog("DEBUG: initial successor list size = %d\n", wchmm->scnum);
405   }
406 
407   /* allocate successor list for the maximum size */
408   wchmm->sclist = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->scnum);
409   for (i=1;i<wchmm->scnum;i++) wchmm->sclist[i] = NULL;
410   wchmm->sclist2node = (int *)mymalloc(sizeof(int) * wchmm->scnum);
411 
412   /* allocate misc. work area */
413   freemark = (boolean *)mymalloc(sizeof(boolean) * wchmm->scnum);
414   for (i=1;i<wchmm->scnum;i++) freemark[i] = FALSE;
415 
416   /* 2. make initial successor list: assign at all possible nodes */
417   for (w=0;w<wchmm->winfo->num;w++) {
418     /* at each start node of phonemes */
419     for (i=0;i<wchmm->winfo->wlen[w];i++) {
420       wchmm->sclist2node[wchmm->state[wchmm->offset[w][i]].scid] = wchmm->offset[w][i];
421       add_successor(wchmm, wchmm->offset[w][i], w);
422     }
423     /* at word end */
424     wchmm->sclist2node[wchmm->state[wchmm->wordend[w]].scid] = wchmm->wordend[w];
425     add_successor(wchmm, wchmm->wordend[w], w);
426   }
427 
428   /* 3. erase unnecessary successor list */
429   /* sucessor list same as the previous node is not needed, so */
430   /* parse lexicon tree from every leaf to find the same succesor list */
431   for (w=0;w<wchmm->winfo->num;w++) {
432     node = wchmm->wordend[w];	/* begin from the word end node */
433     i = wchmm->winfo->wlen[w]-1;
434     while (i >= 0) {		/* for each phoneme start node */
435       if (node == wchmm->offset[w][i]) {
436 	/* word with only 1 state: skip */
437 	i--;
438 	continue;
439       }
440       if (match_successor(wchmm, node, wchmm->offset[w][i])) {
441 	freemark[wchmm->state[node].scid] = TRUE;	/* mark the node */
442       }
443 /*
444  *	 if (freemark[wchmm->offset[w][i]] != FALSE) {
445  *	   break;
446  *	 }
447  */
448       node = wchmm->offset[w][i];
449       i--;
450     }
451   }
452   /* really free */
453   for (i=1;i<wchmm->scnum;i++) {
454     if (freemark[i] == TRUE) {
455       free_successor(wchmm, i);
456       /* reset node -> sclist link */
457       wchmm->state[wchmm->sclist2node[i]].scid = 0;
458     }
459   }
460   /* garbage collection of deleted sclist */
461   compaction_successor(wchmm);
462 
463   free(freemark);
464 
465   jlog("STAT: done\n");
466 }
467 
468 #ifdef UNIGRAM_FACTORING
469 
470 /**
471  * <JA>
472  * �ڹ�¤�����������Ρ��ɤ� successor list ���ۤ���ᥤ��ؿ�(unigram factoring ��
473  *
474  * @param wchmm [i/o] �ڹ�¤������
475  * </JA>
476  * <EN>
477  * Main function to build whole successor list to lexicon tree for unigram factoring
478  *
479  * @param wchmm [i/o] tree lexicon
480  * </EN>
481  *
482  * @callgraph
483  * @callergraph
484  *
485  */
486 void
make_successor_list_unigram_factoring(WCHMM_INFO * wchmm)487 make_successor_list_unigram_factoring(WCHMM_INFO *wchmm)
488 {
489 
490 #ifndef FAST_FACTOR1_SUCCESSOR_LIST
491 
492   /* old way */
493   make_successor_list(wchmm);
494   calc_all_unigram_factoring_values(wchmm);
495 
496 #else  /* ~FAST_FACTOR1_SUCCESSOR_LIST */
497 
498   /* new way */
499 
500   int node, node2;
501   WORD_ID w, w2;
502   int i, j, n, f;
503   int s;
504   LOGPROB tmpprob;
505 
506   jlog("STAT: make successor lists for unigram factoring\n");
507 
508   /* 1. initialize */
509   /* initialize node->sclist index on wchmm tree */
510   for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0;
511 
512   /* in unigram factoring, number of successor = vocabulary size */
513   wchmm->scnum = wchmm->winfo->num + 1;
514   if (debug2_flag) {
515     jlog("DEBUG: successor list size = %d\n", wchmm->scnum);
516   }
517 
518   /* allocate successor list */
519   wchmm->sclist = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->scnum);
520   for (i=1;i<wchmm->scnum;i++) wchmm->sclist[i] = NULL;
521   /* sclist2node is not used */
522 
523   /* 2. make successor list, and count needed fscore num */
524   f = 1;
525   s = 1;
526   for (w=0;w<wchmm->winfo->num;w++) {
527     for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) {
528       if (i < wchmm->winfo->wlen[w]) {
529 	node = wchmm->offset[w][i];
530       } else {
531 	node = wchmm->wordend[w];
532       }
533       if (wchmm->state[node].scid == 0) { /* not assigned */
534 	/* new node found, assign new and exit here */
535 	wchmm->state[node].scid = s++;
536 	if (s > wchmm->scnum) {
537 	  jlog("InternalError: make_successor_list_unigram_factoring: scid num exceeded?\n");
538 	  return;
539 	}
540 	add_successor(wchmm, node, w);
541 	break;
542       } else if (wchmm->state[node].scid > 0) {
543 	/* that node has sclist */
544 	/* move it to the current first isolated node in that word */
545 	w2 = wchmm->sclist[wchmm->state[node].scid]->word;
546 	for(j=i+1;j<wchmm->winfo->wlen[w2] + 1;j++) {
547 	  if (j < wchmm->winfo->wlen[w2]) {
548 	    node2 = wchmm->offset[w2][j];
549 	  } else {
550 	    node2 = wchmm->wordend[w2];
551 	  }
552 	  if (wchmm->state[node2].scid == 0) { /* not assigned */
553 	    /* move sclist to there */
554 	    wchmm->state[node2].scid = wchmm->state[node].scid;
555 	    break;
556 	  }
557 	}
558 	if (j >= wchmm->winfo->wlen[w2] + 1) {
559 	  /* not found? */
560 	  jlog("InternalError: make_successor_list_unigram_factoring: no isolated word for %d\n", w2);
561 	  return;
562 	}
563 	/* make current node as fscore node */
564 	n = f++;
565 	wchmm->state[node].scid = -n;
566 	/* not compute unigram factoring value yet */
567       }
568 
569     }
570   }
571 
572   /* 2. allocate fscore buffer */
573   wchmm->fsnum = f;
574   wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum);
575   for(n=0;n<wchmm->fsnum;n++) wchmm->fscore[n] = LOG_ZERO;
576 
577   /* 3. parse again to assign fscore values */
578   for (w=0;w<wchmm->winfo->num;w++) {
579     for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) {
580       if (i < wchmm->winfo->wlen[w]) {
581 	node = wchmm->offset[w][i];
582       } else {
583 	node = wchmm->wordend[w];
584       }
585       if (wchmm->state[node].scid < 0) {
586 	/* update max */
587 	if (wchmm->ngram) {
588 	  tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[w])
589 #ifdef CLASS_NGRAM
590 	    + wchmm->winfo->cprob[w]
591 #endif
592 	    ;
593 	} else {
594 	  tmpprob = LOG_ZERO;
595 	}
596 	if (wchmm->lmvar == LM_NGRAM_USER) {
597 	  tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, w, tmpprob);
598 	}
599 	n = - wchmm->state[node].scid;
600 	if (wchmm->fscore[n] < tmpprob) {
601 	  wchmm->fscore[n] = tmpprob;
602 	}
603       }
604 
605     }
606   }
607 
608 #endif  /* ~FAST_FACTOR1_SUCCESSOR_LIST */
609 
610   jlog("STAT: done\n");
611 }
612 
613 #endif /* UNIGRAM_FACTORING */
614 
615 
616 /**
617  * <JA>
618  * ���ۤ��줿 factoring ����� multipath �Ѥ�Ĵ������. factoring �����,
619  * ��ǥ����Τ����åפ������ܤ�������Ϥ�����β��Ǥإ��ԡ�����.
620  * �ޤ���(���Ϥ�����ʤ�)ʸƬʸˡ�Ρ��ɤ�ñ����Ƭ�Ρ��ɤ��饳�ԡ�����.
621  *
622  * @param wchmm [in] �ڹ�¤������
623  * </JA>
624  * <EN>
625  * Adjust factoring data in tree lexicon for multipath transition handling.
626  *
627  * @param wchmm [in] tree lexicon
628  * </EN>
629  *
630  * @callgraph
631  * @callergraph
632  *
633  */
634 void
adjust_sc_index(WCHMM_INFO * wchmm)635 adjust_sc_index(WCHMM_INFO *wchmm)
636 {
637   WORD_ID w;
638   int i,j,k;
639   HMM_Logical *ltmp;
640   int ltmp_state_num;
641   int ato;
642   LOGPROB prob;
643   int node, scid;
644   A_CELL2 *ac;
645 
646   /* duplicate scid for HMMs with more than one arc from initial state */
647   for(w=0;w<wchmm->winfo->num;w++) {
648     for(k=0;k<wchmm->winfo->wlen[w];k++) {
649       node = wchmm->offset[w][k];
650       scid = wchmm->state[node].scid;
651       if (scid == 0) continue;
652       ltmp = wchmm->winfo->wseq[w][k];
653       ltmp_state_num = hmm_logical_state_num(ltmp);
654       if ((hmm_logical_trans(ltmp))->a[0][ltmp_state_num-1] != LOG_ZERO) {
655 	j = k + 1;
656 	if (j == wchmm->winfo->wlen[w]) {
657 	  if (wchmm->state[wchmm->wordend[w]].scid == 0) {
658 	    jlog("STAT: word %d: factoring node copied for skip phone\n", w);
659 	    wchmm->state[wchmm->wordend[w]].scid = scid;
660 	  }
661 	} else {
662 	  if (wchmm->state[wchmm->offset[w][j]].scid == 0) {
663 	    jlog("STAT: word %d: factoring node copied for skip phone\n", w);
664 	    wchmm->state[wchmm->offset[w][j]].scid = scid;
665 	  }
666 	}
667       }
668       for(ato=1;ato<ltmp_state_num;ato++) {
669 	prob = (hmm_logical_trans(ltmp))->a[0][ato];
670 	if (prob != LOG_ZERO) {
671 	  wchmm->state[node+ato-1].scid = scid;
672 	}
673       }
674     }
675   }
676 
677   /* move scid and fscore on the head state to the head grammar state */
678   for(i=0;i<wchmm->startnum;i++) {
679     node = wchmm->startnode[i];
680     if (wchmm->state[node].out.state != NULL) {
681       j_internal_error("adjust_sc_index: outprob exist in word-head node??\n");
682     }
683     if (wchmm->next_a[node] != LOG_ZERO) {
684       if (wchmm->state[node+1].scid != 0) {
685 	if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[node+1].scid) {
686 	  j_internal_error("adjust_sc_index: different successor list within word-head phone?\n");
687 	}
688 	wchmm->state[node].scid = wchmm->state[node+1].scid;
689 	wchmm->state[node+1].scid = 0;
690       }
691     }
692     for(ac=wchmm->ac[node];ac;ac=ac->next) {
693       for(k=0;k<ac->n;k++) {
694 	if (wchmm->state[ac->arc[k]].scid != 0) {
695 	  if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[ac->arc[k]].scid) {
696 	    j_internal_error("adjust_sc_index: different successor list within word-head phone?\n");
697 	  }
698 	  wchmm->state[node].scid = wchmm->state[ac->arc[k]].scid;
699 	  wchmm->state[ac->arc[k]].scid = 0;
700 	}
701       }
702     }
703   }
704 }
705 
706 
707 /* -------------------------------------------------------------------- */
708 /* factoring computation */
709 
710 /**
711  * <JA>
712  * �ڹ�¤�������Ѥ� factoring ����å����������դ����ƽ��������.
713  * ���δؿ��ϥץ���೫�ϻ��˰��٤����ƤФ��.
714  *
715  * @param wchmm [i/o] �ڹ�¤������
716  * </JA>
717  * <EN>
718  * Initialize factoring cache for a tree lexicon, allocating memory for
719  * cache.  This should be called only once on start up.
720  *
721  * @param wchmm [i/o] tree lexicon
722  * </EN>
723  *
724  * @callgraph
725  * @callergraph
726  *
727  */
728 void
max_successor_cache_init(WCHMM_INFO * wchmm)729 max_successor_cache_init(WCHMM_INFO *wchmm)
730 {
731   int i;
732   LM_PROB_CACHE *l;
733   WORD_ID wnum;
734 
735   /* finally shrink the memory area of successor list here */
736   shrink_successor(wchmm);
737 
738   /* for word-internal */
739   l = &(wchmm->lmcache);
740 
741   l->probcache = (LOGPROB *) mymalloc(sizeof(LOGPROB) * wchmm->scnum);
742   l->lastwcache = (WORD_ID *) mymalloc(sizeof(WORD_ID) * wchmm->scnum);
743   for (i=0;i<wchmm->scnum;i++) {
744     l->lastwcache[i] = WORD_INVALID;
745   }
746   /* for cross-word */
747   if (wchmm->ngram) {
748     wnum = wchmm->ngram->max_word_num;
749   } else {
750     wnum = wchmm->winfo->num;
751   }
752 #ifdef HASH_CACHE_IW
753   l->iw_cache_num = wnum * jconf.search.pass1.iw_cache_rate / 100;
754   if (l->iw_cache_num < 10) l->iw_cache_num = 10;
755 #else
756   l->iw_cache_num = wnum;
757 #endif /* HASH_CACHE_IW */
758   l->iw_sc_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * l->iw_cache_num);
759   for (i=0;i<l->iw_cache_num;i++) {
760     l->iw_sc_cache[i] = NULL;
761   }
762 #ifdef HASH_CACHE_IW
763   l->iw_lw_cache = (WORD_ID *)mymalloc(sizeof(WORD_ID) * l->iw_cache_num);
764   for (i=0;i<l->iw_cache_num;i++) {
765     l->iw_lw_cache[i] = WORD_INVALID;
766   }
767 #endif
768 }
769 
770 /**
771  * <JA>
772  * ñ��֤� factoring cache �Υ����ΰ���������.
773  *
774  * @param wchmm [i/o] �ڹ�¤������
775  * </JA>
776  * <EN>
777  * Free cross-word factoring cache.
778  *
779  * @param wchmm [i/o] tree lexicon
780  * </EN>
781  */
782 static void
max_successor_prob_iw_free(WCHMM_INFO * wchmm)783 max_successor_prob_iw_free(WCHMM_INFO *wchmm)
784 {
785   int i;
786   LM_PROB_CACHE *l;
787   l = &(wchmm->lmcache);
788   for (i=0;i<l->iw_cache_num;i++) {
789     if (l->iw_sc_cache[i] != NULL) free(l->iw_sc_cache[i]);
790     l->iw_sc_cache[i] = NULL;
791   }
792 }
793 
794 /**
795  * <JA>
796  * factoring �� cache �Υ����ΰ�����Ʋ�������.
797  *
798  * @param wchmm [i/o] �ڹ�¤������
799  * </JA>
800  * <EN>
801  * Free all memory for  factoring cache.
802  *
803  * @param wchmm [i/o] tree lexicon
804  * </EN>
805  *
806  * @callgraph
807  * @callergraph
808  *
809  */
810 void
max_successor_cache_free(WCHMM_INFO * wchmm)811 max_successor_cache_free(WCHMM_INFO *wchmm)
812 {
813   free(wchmm->lmcache.probcache);
814   free(wchmm->lmcache.lastwcache);
815   max_successor_prob_iw_free(wchmm);
816   free(wchmm->lmcache.iw_sc_cache);
817 #ifdef HASH_CACHE_IW
818   free(wchmm->lmcache.iw_lw_cache);
819 #endif
820 }
821 
822 #ifdef UNIGRAM_FACTORING
823 
824 /**
825  * <JA>
826  * @brief  ñ����Ƭ�Ρ��ɤΤ���Factoring �ˤ����ƥ���å��夬ɬ�פʥΡ��ɤ�
827  * �ꥹ�Ȥ��������.
828  *
829  * 1-gram factoring �ϡ��ޥΡ��ɤˤ�����ľ��ñ��˰�¸���ʤ�������
830  * (unigram�κ�����)��Ϳ����. ���Τ��ᡤñ��֤� factoring �׻��ˤ����ơ�
831  * �ڹ�¤��������ʣ����ñ��Ƕ�ͭ����Ƥ���ñ����Ƭ�Ρ��ɤˤĤ��Ƥϡ�
832  * �����ͤ�ľ��ñ��ˤ�餺�����ͤǤ��ꡤǧ������ñ��֥���å�����ݻ�
833  * ����ɬ�פϤʤ�.
834  *
835  * ���δؿ��Ǥϡ�ñ����Ƭ�Ρ��ɤΥꥹ�Ȥ��餽�Τ褦�� factoring ����å��夬
836  * ���פʥΡ��ɤ�������ơ�1-gram factoring ����ñ��֥���å��夬ɬ�פ�
837  * ñ����Ƭ�Ρ��ɡʡ�¾��ñ��ȶ�ͭ����Ƥ��ʤ���Ω����ñ����Ƭ�Ρ��ɡˤ�
838  * �ꥹ�Ȥ��������wchmm->start2isolate ����� wchmm->isolatenum �˳�Ǽ����.
839  *
840  * @param wchmm [i/o] �ڹ�¤������
841  * </JA>
842  * <EN>
843  * @brief  Make a list of word head nodes on which cross-word factoring cache
844  * is needed.
845  *
846  * On 1-gram factoring, the branch nodes on tree lexicon has a fixed
847  * factoring value (maximum 1-gram score of all sub-tree words).  Thus, when
848  * computing cross-word factoring at word head nodes on inter-word
849  * transition, such 1-gram factoring nodes on word head, shared by several
850  * words, need not be cached in inter-word factoring cache.
851  *
852  * This function make a list of word-head nodes which requires inter-word
853  * factoring caching (i.e. isolated word head nodes, does not shared by other
854  * words) from the existing list of word head nodes, and set it to
855  * wchmm->start2isolate and wchmm->isolatenum.
856  *
857  * @param wchmm [i/o] tree lexicon
858  * </EN>
859  *
860  * @callgraph
861  * @callergraph
862  *
863  */
864 void
make_iwcache_index(WCHMM_INFO * wchmm)865 make_iwcache_index(WCHMM_INFO *wchmm)
866 {
867   int i, node, num;
868 
869   wchmm->start2isolate = (int *)mymalloc(sizeof(int) * wchmm->startnum);
870   num = 0;
871   for(i=0;i<wchmm->startnum;i++) {
872     node = wchmm->startnode[i];
873     if (wchmm->state[node].scid >= 0) {	/* not a factoring node (isolated node, has no 1-gram factoring value) */
874       wchmm->start2isolate[i] = num;
875       num++;
876     } else {			/* factoring node (shared) */
877       wchmm->start2isolate[i] = -1;
878     }
879   }
880   wchmm->isolatenum = num;
881 }
882 
883 /**
884  * <JA>
885  * @brief  �ڹ�¤�������� 1-gram factoring �ͤ�׻����Ƴ�Ǽ����.
886  *
887  * 1-gram factoring �Ǥ�ñ��֤Ƕ�ͭ����Ƥ���ޥΡ��ɤǤ� 1-gram �κ�����
888  * ��Ϳ����. ñ������ˤ��ʤ����ᡤ�����ͤ�ǧ����������
889  * �׻����Ƥ������Ȥ��Ǥ���. ���δؿ����ڹ�¤������
890  * ���ΤˤĤ��ơ���ͭ����Ƥ����successor list �ˣ��İʾ��ñ�����ĥΡ��ɡ�
891  * �Ρ��ɤ� 1-gram factoring �ͤ�׻����Ƴ�Ǽ����. 1-gram factoring�ͤ�
892  * �׻���ϡ����ΥΡ��ɤ� successor list �Ϥ�Ϥ����פǤ��뤿�ᡤ������
893  * �������.
894  *
895  * �ºݤˤϡ�factoring �ͤ� wchmm->fscore �˽缡��¸���졤�Ρ��ɤ�
896  * scid �ˤ�����¸�ͤؤΥ���ǥå���(1-)������ͤ���Ǽ�����. ���פˤʤä�
897  * successor list �ϡ��ºݤˤ� compaction_successor ��ǡ��б�����Ρ��ɤ�
898  * scid ����ˤʤäƤ��� successor list �������뤳�ȤǹԤʤ���.
899  *
900  * @param wchmm [i/o] �ڹ�¤������
901  * </JA>
902  * <EN>
903  * @brief  Calculate all the 1-gram factoring values on tree lexicon.
904  *
905  * On 1-gram factoring, the shared nodes on branch has fixed factoring score
906  * from 1-gram values, independent of the word context on recognition.  So
907  * the values are fixed for all recognition and can be calculated before
908  * search.  This function stores all the neede 1-gram factoring value by
909  * traversing tree lexicon with successor lists and compute maximum 1-gram
910  * for each successor lists that has more than two words (=shared).
911  * Since a successor list is no more neede after the 1-gram value is computed,
912  * they will be freed.
913  *
914  * Actually, computed factoring scores will be stored in wchmm->fscore
915  * sequencially, and the index value, starting from 1,
916  * to the fscore list is stored in scid of each nodes as a negative value.
917  * The free will be performed in compaction_successor() by checking if a
918  * successor's corresponding scid on tree lexicon has negative value.
919  *
920  * @param wchmm [i/o] tree lexicon
921  * </EN>
922  *
923  * @callgraph
924  * @callergraph
925  *
926  */
927 void
calc_all_unigram_factoring_values(WCHMM_INFO * wchmm)928 calc_all_unigram_factoring_values(WCHMM_INFO *wchmm)
929 {
930   S_CELL *sc, *sctmp;
931   LOGPROB tmpprob, maxprob;
932   int i, n;
933 
934   /* count needed number of 1-gram factoring nodes */
935   n = 0;
936   for (i=1;i<wchmm->scnum;i++) {
937     sc = wchmm->sclist[i];
938     if (sc == NULL) {
939       j_internal_error("call_all_unigram_factoring_values: sclist has no sc?\n");
940     }
941     if (sc->next != NULL) {
942       /* more than two words, so compute maximum 1-gram probability */
943       n++;
944     }
945   }
946   wchmm->fsnum = n + 1;
947   /* allocate area */
948   wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum);
949   /* assign values */
950   n = 1;
951   for (i=1;i<wchmm->scnum;i++) {
952     sc = wchmm->sclist[i];
953     if (sc->next != NULL) {
954       maxprob = LOG_ZERO;
955       for (sctmp = sc; sctmp; sctmp = sctmp->next) {
956 	if (wchmm->ngram) {
957 	  tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[sctmp->word])
958 #ifdef CLASS_NGRAM
959 	    + wchmm->winfo->cprob[sctmp->word]
960 #endif
961 	    ;
962 	} else {
963 	  tmpprob = LOG_ZERO;
964 	}
965 	if (wchmm->lmvar == LM_NGRAM_USER) {
966 	  tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, sctmp->word, tmpprob);
967 	}
968 	if (maxprob < tmpprob) maxprob = tmpprob;
969       }
970       wchmm->fscore[n] = maxprob;
971       free_successor(wchmm, i);
972       wchmm->state[wchmm->sclist2node[i]].scid = - n;
973       n++;
974     }
975   }
976   /* garbage collection of factored sclist */
977   compaction_successor(wchmm);
978 }
979 
980 #else  /* ~UNIGRAM_FACTORING */
981 
982 /**
983  * <JA>
984  * �ڹ�¤�������Τ���Ρ��ɤˤĤ��ơ�Ϳ����줿ñ��������Ф���2-gram
985  * ��������׻�����.
986  *
987  * @param wchmm [in] �ڹ�¤������
988  * @param lastword [in] ľ��ñ��
989  * @param node [in] @a wchmm ��ΥΡ����ֹ�
990  *
991  * @return 2-gram ��Ψ.
992  * </JA>
993  * <EN>
994  * Compute 2-gram factoring value for the node and return the probability.
995  *
996  * @param wchmm [in] tree lexicon
997  * @param lastword [in] the last context word
998  * @param node [in] node ID on @a wchmm
999  *
1000  * @return the log probability of 2-gram on that node.
1001  * </EN>
1002  *
1003  */
1004 static LOGPROB
calc_successor_prob(WCHMM_INFO * wchmm,WORD_ID lastword,int node)1005 calc_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
1006 {
1007   S_CELL *sc;
1008   LOGPROB tmpprob, maxprob;
1009   WORD_ID lw;
1010 
1011   maxprob = LOG_ZERO;
1012   if (wchmm->ngram) {
1013     lw = wchmm->winfo->wton[lastword];
1014   }
1015 
1016   for (sc = wchmm->sclist[wchmm->state[node].scid]; sc; sc = sc->next) {
1017     if (wchmm->ngram) {
1018       tmpprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, lw , wchmm->winfo->wton[sc->word])
1019 #ifdef CLASS_NGRAM
1020 	+ wchmm->winfo->cprob[sc->word]
1021 #endif
1022 	;
1023     } else {
1024       tmpprob = LOG_ZERO;
1025     }
1026     if (wchmm->lmvar == LM_NGRAM_USER) {
1027       tmpprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, sc->word, tmpprob);
1028     }
1029     if (maxprob < tmpprob) maxprob = tmpprob;
1030   }
1031 
1032   return(maxprob);
1033 }
1034 
1035 #endif  /* ~UNIGRAM_FACTORING */
1036 
1037 /**
1038  * <JA>
1039  * @brief  ñ����Τ���Ρ��ɤˤĤ��� factoring �ͤ�׻�����.
1040  *
1041  * 1-gram factoring �Ǹ���factoring�ͤ�������Ϥ����ͤ�¨�¤��֤����.
1042  * ¾�ξ��ϡ����ΥΡ��ɤΥ��֥ĥ꡼���ñ��� 2-gram��Ψ�ʤκ����͡ˤ�
1043  * �׻������.
1044  *
1045  * ñ���� factoring ����å��夬��θ�����. ���ʤ���ƥΡ��ɤˤĤ���
1046  * ľ��ñ�줬�������������줿�Ȥ���Ʊ���Ǥ���С�
1047  * ������ͤ��֤��졤�����Ǥʤ�����ͤ�׻���������å��夬���������.
1048  *
1049  * @param wchmm [in] �ڹ�¤������
1050  * @param lastword [in] ľ��ñ���ID
1051  * @param node [in] �Ρ����ֹ�
1052  *
1053  * @return �����ǥ륹����
1054  * </JA>
1055  * <EN>
1056  * @brief  compute factoring LM score for the given word-internal node.
1057  *
1058  * If it is a shared branch node and 1-gram factoring is used, the
1059  * constant factoring value which has already been assigned before search
1060  * will be returned immediately.  Else, the maximum 2-gram probability
1061  * of corresponding successor words are computed.
1062  *
1063  * The word-internal factoring cache is consulted within this function.
1064  * If the given last word is the same as the last call on that node,
1065  * the last computed value will be returned, else the maximum value
1066  * will be computed update the cache with the last word and value.
1067  *
1068  * @param wchmm [in] tree lexicon
1069  * @param lastword [in] word ID of last context word
1070  * @param node [in] node ID
1071  *
1072  * @return the LM factoring score.
1073  * </EN>
1074  *
1075  * @callgraph
1076  * @callergraph
1077  *
1078  */
1079 LOGPROB
max_successor_prob(WCHMM_INFO * wchmm,WORD_ID lastword,int node)1080 max_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
1081 {
1082   LOGPROB maxprob;
1083   WORD_ID last_nword, w;
1084   int scid;
1085   LM_PROB_CACHE *l;
1086 
1087   l = &(wchmm->lmcache);
1088 
1089   if (lastword != WORD_INVALID) { /* return nothing if no previous word */
1090     if (wchmm->ngram) {
1091       last_nword = wchmm->winfo->wton[lastword];
1092     } else {
1093       last_nword = lastword;
1094     }
1095     scid = wchmm->state[node].scid;
1096 #ifdef UNIGRAM_FACTORING
1097     if (scid < 0) {
1098       /* return 1-gram factoring value already calced */
1099       return(wchmm->fscore[(- scid)]);
1100     } else {
1101       /* this node has only one successor */
1102       /* return precise 2-gram score */
1103       if (last_nword != l->lastwcache[scid]) {
1104 	/* calc and cache */
1105 	w = (wchmm->sclist[scid])->word;
1106 	if (wchmm->ngram) {
1107 	  maxprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
1108 #ifdef CLASS_NGRAM
1109 	    + wchmm->winfo->cprob[w]
1110 #endif
1111 	    ;
1112 	} else {
1113 	  maxprob = LOG_ZERO;
1114 	}
1115 	if (wchmm->lmvar == LM_NGRAM_USER) {
1116 	  maxprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, maxprob);
1117 	}
1118 	l->lastwcache[scid] = last_nword;
1119 	l->probcache[scid] = maxprob;
1120 	return(maxprob);
1121       } else {
1122 	/* return cached */
1123 	return (l->probcache[scid]);
1124       }
1125     }
1126 #else  /* UNIGRAM_FACTORING */
1127     /* 2-gram */
1128     if (last_nword != l->lastwcache[scid]) {
1129       maxprob = calc_successor_prob(wchmm, lastword, node);
1130       /* store to cache */
1131       l->lastwcache[scid] = last_nword;
1132       l->probcache[scid] = maxprob;
1133       return(maxprob);
1134     } else {
1135       return (l->probcache[scid]);
1136     }
1137 #endif /* UNIGRAM_FACTORING */
1138   } else {
1139     return(0.0);
1140 #if 0
1141     maxprob = LOG_ZERO;
1142     for (sc=wchmm->state[node].sc;sc;sc=sc->next) {
1143       tmpprob = uni_prob(wchmm->ngram, sc->word);
1144       if (maxprob < tmpprob) maxprob = tmpprob;
1145     }
1146     return(maxprob);
1147 #endif
1148   }
1149 
1150 }
1151 
1152 /**
1153  * <JA>
1154  * @brief  ñ��֤� factoring �ͤΥꥹ�Ȥ��֤�.
1155  *
1156  * Ϳ����줿ľ��ñ����Ф��ơ�factoring�ͤ�׻����٤����Ƥ�ñ����Ƭ�ؤ�
1157  * factoring �ͤ�׻��������Υꥹ�Ȥ��֤�. ����factoring�ͤ�
1158  * ľ��ñ�줴�Ȥ˥ꥹ��ñ�̤ǥ���å��夵���. ���ʤ��������ľ��ñ�줬
1159  * ����ޤǤ˰��٤Ǥ�ľ��ñ��Ȥ��ƽи����Ƥ�����硤���Υꥹ�Ȥ��Τޤ�
1160  * �֤�.
1161  *
1162  * @param wchmm [in] �ڹ�¤������
1163  * @param lastword [in] ľ��ñ��
1164  *
1165  * @return ��ñ����Ƭ�Ρ��ɤؤ� factoring �������Υꥹ��
1166  * </JA>
1167  * <EN>
1168  * @brief  Compute cross-word facgtoring values for word head nodes and return
1169  * the list.
1170  *
1171  * Given a last word, this function compute the factoring LM scores for all
1172  * the word head node to which the context-dependent (not 1-gram) factoring
1173  * values should be computed.  The resulting list of factoring values are
1174  * cached within this function per the last word.
1175  *
1176  * @param wchmm [in] tree lexicon
1177  * @param lastword [in] last word
1178  *
1179  * @return the list of factoring LM scores for all the needed word-head nodes.
1180  * </EN>
1181  *
1182  * @callgraph
1183  * @callergraph
1184  *
1185  */
1186 LOGPROB *
max_successor_prob_iw(WCHMM_INFO * wchmm,WORD_ID lastword)1187 max_successor_prob_iw(WCHMM_INFO *wchmm, WORD_ID lastword)
1188 {
1189   int i, j, x, node;
1190   int last_nword;
1191   WORD_ID w;
1192   LM_PROB_CACHE *l;
1193   LOGPROB p;
1194 
1195   l = &(wchmm->lmcache);
1196 
1197   if (wchmm->ngram) {
1198     last_nword = wchmm->winfo->wton[lastword];
1199   } else {
1200     last_nword = lastword;
1201   }
1202 
1203 #ifdef HASH_CACHE_IW
1204   x = last_nword % l->iw_cache_num;
1205   if (l->iw_lw_cache[x] == last_nword) { /* cache hit */
1206     return(l->iw_sc_cache[x]);
1207   }
1208 #else  /* full cache */
1209   if (l->iw_sc_cache[last_nword] != NULL) { /* cache hit */
1210     return(l->iw_sc_cache[last_nword]);
1211   }
1212   x = last_nword;
1213   /* cache mis-hit, calc probs and cache them as new */
1214 #endif
1215   /* allocate cache memory */
1216   if (l->iw_sc_cache[x] == NULL) {
1217 #ifdef UNIGRAM_FACTORING
1218     l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
1219 #else
1220     l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
1221 #endif
1222     if (l->iw_sc_cache[x] == NULL) { /* malloc failed */
1223       /* clear existing cache, and retry */
1224       max_successor_prob_iw_free(wchmm);
1225       jlog("STAT: inter-word LM cache (%dMB) rehashed\n",
1226 	       (l->iw_cache_num *
1227 #ifdef UNIGRAM_FACTORING
1228 		wchmm->isolatenum
1229 #else
1230 		wchmm->startnum
1231 #endif
1232 		) / 1000 * sizeof(LOGPROB) / 1000);
1233 #ifdef UNIGRAM_FACTORING
1234       l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
1235 #else
1236       l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
1237 #endif
1238       if (l->iw_sc_cache[x] == NULL) { /* malloc failed again? */
1239 	j_internal_error("max_successor_prob_iw: cannot malloc\n");
1240       }
1241     }
1242   }
1243 
1244   /* calc prob for all startid */
1245 #ifdef UNIGRAM_FACTORING
1246   for (j=0;j<wchmm->startnum;j++) {
1247     i = wchmm->start2isolate[j];
1248     if (i == -1) continue;
1249     node = wchmm->startnode[j];
1250     if (wchmm->state[node].scid <= 0) {
1251       /* should not happen!!! below is just for debugging */
1252       j_internal_error("max_successor_prob_iw: isolated (not shared) tree root node has unigram factoring value??\n");
1253     } else {
1254       w = (wchmm->sclist[wchmm->state[node].scid])->word;
1255       if (wchmm->ngram) {
1256 	p = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
1257 #ifdef CLASS_NGRAM
1258 	  + wchmm->winfo->cprob[w]
1259 #endif
1260 	  ;
1261       } else {
1262 	p = LOG_ZERO;
1263       }
1264       if (wchmm->lmvar == LM_NGRAM_USER) {
1265 	p = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, p);
1266       }
1267       l->iw_sc_cache[x][i] = p;
1268     }
1269   }
1270 #else  /* ~UNIGRAM_FACTORING */
1271   for (i=0;i<wchmm->startnum;i++) {
1272     node = wchmm->startnode[i];
1273     l->iw_sc_cache[x][i] = calc_successor_prob(wchmm, lastword, node);
1274   }
1275 #endif
1276 #ifdef HASH_CACHE_IW
1277   l->iw_lw_cache[x] = last_nword;
1278 #endif
1279 
1280   return(l->iw_sc_cache[x]);
1281 }
1282 
1283 /**
1284  * <JA>
1285  * @brief  ʸˡ�ˤ��ñ�������Ū factoring
1286  *
1287  * Julian �ˤ����� CATEGORY_TREE ���������Ƥ���Ȥ��ʥǥե���ȡˡ�
1288  * �ڹ�¤������ϥ��ƥ���ñ�̡ʤ��ʤ����ʸ����ε���ñ�̡ˤǹ��ۤ���뤿�ᡤ
1289  * ��1�ѥ��Ǥθ����ǥ�Ǥ��륫�ƥ����������ñ��λϽ�ü��Ŭ�ѤǤ���.
1290  *
1291  * ���� CATEGORY_TREE ���������Ƥ��ʤ���硤�ڹ�¤�������
1292  * �������Τ�ñ����ڤ�����뤿�ᡤ���ƥ���������� N-gram (Julius) ��
1293  * Ʊ�ͤ�ñ����� factoring ��Ʊ�ͤε�����Ŭ�Ѥ����ɬ�פ�����.
1294  *
1295  * ���δؿ��� CATEGORY_TREE ���������Ƥ��ʤ��Ȥ��ˡ��嵭�� factoring
1296  * �ʷ���Ū factoring �ȸƤФ��ˤ�Ԥʤ������������Ƥ���.
1297  *
1298  * @param wchmm [in] �ڹ�¤������
1299  * @param lastword [in] ľ��ñ��
1300  * @param node [in] �Ρ����ֹ�
1301  *
1302  * @return ���ƥ�������夽�λޤؤ����ܤ��������� TRUE, �Բ�ǽ�Ǥ���� FALSE
1303  * </JA>
1304  * <EN>
1305  * @brief  Deterministic factoring for grammar-based recognition (Julian)
1306  *
1307  * If CATEGORY_TREE is defined (this is default) on Julian, the tree lexicon
1308  * will be organized per category and the category-pair constraint used
1309  * in the 1st pass can be applied statically at cross-word transition.
1310  *
1311  * If the CATEGORY_TREE is not defined, a single tree lexicon will be
1312  * constucted for a whole dictionary.  In this case, the category-pair
1313  * constraint should be applied dynamically in the word-internal transition,
1314  * like the factoring scheme with N-gram (Julius).
1315  *
1316  * This function provides such word-internal factoring for grammar-based
1317  * recognition (called deterministic factoring) when CATEGORY_TREE is
1318  * undefined in Julian.
1319  *
1320  * @param wchmm [in] tree lexicon
1321  * @param lastword [in] last word
1322  * @param node [in] node ID to check the constraint
1323  *
1324  * @return TRUE if the transition to the branch is allowed on the category-pair
1325  * constraint, or FALSE if not allowed.
1326  * </EN>
1327  *
1328  * @callgraph
1329  * @callergraph
1330  *
1331  */
1332 boolean
can_succeed(WCHMM_INFO * wchmm,WORD_ID lastword,int node)1333 can_succeed(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
1334 {
1335   int lc;
1336   S_CELL *sc;
1337 
1338   /* return TRUE if at least one subtree word can connect */
1339 
1340   if (lastword == WORD_INVALID) { /* case at beginning-of-word */
1341     for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
1342       if (dfa_cp_begin(wchmm->dfa, sc->word) == TRUE) return(TRUE);
1343     }
1344     return(FALSE);
1345   } else {
1346     lc = wchmm->winfo->wton[lastword];
1347     for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
1348       if (dfa_cp(wchmm->dfa, lc, sc->word) == TRUE) return(TRUE);
1349     }
1350     return(FALSE);
1351   }
1352 }
1353 
1354 /* end of file */
1355