1 /**
2  * @file   backtrellis.c
3  *
4  * <JA>
5  * @brief  ñ��ȥ�ꥹ����¸������
6  *
7  * ��1�ѥ��η�̤�ñ��ȥ�ꥹ�Ȥ�����¸������2�ѥ��ǻ��Ȥ��뤿��δؿ���
8  * �Ǥ�. Julius �Ǥϡ��裱�ѥ���õ����˽�ü�������ĤäƤ���ñ������ơ�
9  * ���λϽ�ü�ե졼�ࡤ��ü������������٤����ñ������ȤȤ��
10  * ��¸���졤�裲�ѥ��Ǥ��ν�����椫���õ�����Ԥ��ޤ�.
11  * �����裱�ѥ��ǥե졼�ऴ�Ȥ˻Ĥ����ñ�����Τ��Ȥ�֥ȥ�ꥹñ��ס�
12  * �ȥ�ꥹñ��ν������Τ��ñ��ȥ�ꥹ�פȸƤӤޤ�.
13  *
14  * �ȥ�ꥹñ��ϡ��裱�ѥ���ǧ����˳ƥե졼�ऴ�Ȥ���¸����ޤ�.
15  * �裱�ѥ���λ�塤�ȥ�ꥹ���Τ������������֤ȥե졼�ऴ�ȤΥ���ǥå���
16  * ��������ޤ�.
17  *
18  * �裲�ѥ��Ǥϡ�����ñ��ȥ�ꥹ���Ȥ���
19  * �ƻ��֡����ϥե졼��ˤˤ�����Ÿ����ǽ�ʲ���Υꥹ�Ȥ�����ȤȤ�ˡ�
20  * �����裱�ѥ��Ǥ�(��������)�������٤��裲�ѥ��ˤ����벾���̤Ÿ����ʬ��
21  * ���ꥹ�����Ȥ����Ѥ��ޤ�. ���Τ����ߤ��顤ñ��ȥ�ꥹ�ϡ֥Хå��ȥ�ꥹ��
22  * �Ȥ�ƤФ�Ƥ��ޤ�.
23  * </JA>
24  *
25  * <EN>
26  * @brief  Word trellis operations
27  *
28  * Functions to store the result of the 1st pass as "word trellis",
29  * and functions to access them from the 2nd pass are defined in this
30  * file.  On the 1st pass of Julius, all the promising words whose
31  * word end has been survived at the 1st pass will be stored as "word
32  * trellis", which consists of surviving words: word boundary,
33  * accumulated score and word history.
34  *
35  * The trellis word will be stored per frame at the 1st pass.  After
36  * the 1st pass ended, the word trellis will be re-organized and
37  * indexed by frame to prepare for access at the 2nd pass.
38  *
39  * In the 2nd pass of reverse stack decoding, this word trellis will be
40  * used to constrain the word hypothesis, and also used to estimate
41  * the score of unseen area by the obtained backward scores in the 1st pass.
42  * Thus the word trellis information is also called as "back trellis" in
43  * Julius.
44  * </EN>
45  *
46  * @author Akinobu LEE
47  * @date   Tue Feb 22 15:40:01 2005
48  *
49  * $Revision: 1.2 $
50  *
51  */
52 /*
53  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
54  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
55  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
56  * All rights reserved
57  */
58 
59 #include <julius/julius.h>
60 
61 /**
62  * <JA>
63  * ñ��ȥ�ꥹ���ݻ����� ñ��ȥ�ꥹ ��¤�Τ���������(��ư����1������¹�)
64  *
65  * @param bt [in] ��������� ñ��ȥ�ꥹ ��¤�ΤؤΥݥ���
66  * </JA>
67  * <EN>
68  * Initialize backtrellis that will hold the whole word trellis
69  * (called once on startup).
70  *
71  * @param bt [in] pointer to the backtrellis structure to initialize
72  * </EN>
73  *
74  * @callergraph
75  * @callgraph
76  *
77  */
78 void
bt_init(BACKTRELLIS * bt)79 bt_init(BACKTRELLIS *bt)
80 {
81   bt->num  = NULL;
82   bt->rw   = NULL;
83   bt->list = NULL;
84   bt->root = NULL;
85 }
86 
87 /**
88  * <JA>
89  * �����ǧ���Ѥ� ñ��ȥ�ꥹ ��¤�Τ�������� (ǧ�����ϻ����Ȥ˼¹�).
90  *
91  * @param bt [in] �оݤȤ���ñ��ȥ�ꥹ��¤�ΤؤΥݥ���
92  * </JA>
93  * <EN>
94  * Prepare backtrellis for the next input (called at beginning of each
95  * speech segment).
96  *
97  * @param bt [in] pointer to the word trellis structure
98  * </EN>
99  * @callergraph
100  * @callgraph
101  *
102  */
103 void
bt_prepare(BACKTRELLIS * bt)104 bt_prepare(BACKTRELLIS *bt)
105 {
106   /* free previously allocated data */
107   mybfree2(&(bt->root));
108 
109   /* reset entry point */
110   bt->num = NULL;
111   bt->rw = NULL;
112   bt->list = NULL;
113   bt->root = NULL;
114 }
115 
116 /**
117  * <EN>
118  * Free memories of backtrellis.
119  * </EN>
120  * <JA>
121  * ñ��ȥ�ꥹ�Υ����������.
122  * </JA>
123  *
124  * @param bt [out] pointer to the word trellis structure.
125  *
126  * @callergraph
127  * @callgraph
128  *
129  */
130 void
bt_free(BACKTRELLIS * bt)131 bt_free(BACKTRELLIS *bt)
132 {
133   if (bt->root) mybfree2(&(bt->root));
134   free(bt);
135 }
136 
137 /**
138  * <EN>
139  * Allocate a new trellis word atom.
140  * </EN>
141  * <JA>
142  * �ȥ�ꥹñ������˳���դ���.
143  * </JA>
144  *
145  * @param bt [out] pointer to the word trellis structure.
146  *
147  * @return pointer to the newly allocated trellis word.
148  *
149  * @callergraph
150  * @callgraph
151  *
152  */
153 TRELLIS_ATOM *
bt_new(BACKTRELLIS * bt)154 bt_new(BACKTRELLIS *bt)
155 {
156   TRELLIS_ATOM *new;
157 
158   new = (TRELLIS_ATOM *)mybmalloc2(sizeof(TRELLIS_ATOM), &(bt->root));
159   return new;
160 }
161 
162 
163 
164 /**
165  * <JA>
166  * ��1�ѥ��ǽи������ȥ�ꥹñ���ñ�콪ü�Υȥ�ꥹ����ˤ��Ǽ����.
167  *
168  * �����Ǥϳ�Ǽ�����Ԥ�����1�ѥ���λ��� bt_relocate_rw() ��
169  * �ե졼���˺����֤���.
170  *
171  * @param bt [i/o] �ȥ�ꥹñ����Ǽ����Хå��ȥ�ꥹ��¤��
172  * @param tatom [in] �и������ȥ�ꥹñ��ؤΥݥ���
173  * </JA>
174  * <EN>
175  * Store a trellis word generated on the 1st pass for the 2nd pass.
176  *
177  * This function just store the new atom into backtrellis.
178  * They will be re-located per frame after 1st pass for quick access
179  * in the 2nd pass.
180  *
181  * @param bt [i/o] backtrellis structure to store the trellis word
182  * @param tatom [in] the trellis word to be stored
183  * </EN>
184  *
185  * @callergraph
186  * @callgraph
187  *
188  */
189 void
bt_store(BACKTRELLIS * bt,TRELLIS_ATOM * tatom)190 bt_store(BACKTRELLIS *bt, TRELLIS_ATOM *tatom)
191 {
192 #ifdef WORD_GRAPH
193   tatom->within_context = FALSE;
194   tatom->within_wordgraph = FALSE;
195 #endif
196   tatom->next = bt->list;
197   bt->list = tatom;
198 }
199 
200 /**
201  * <JA>
202  * ��1�ѥ���λ��, ��Ǽ���줿ñ��ȥ�ꥹ�����ե졼���˺����֤���.
203  *
204  * @param bt [i/o] ñ��ȥ�ꥹ��¤��
205  * </JA>
206  * <EN>
207  * Re-locate the stored atom lists per frame (will be called after the
208  * 1st pass).
209  *
210  * @param bt [i/o] word trellis structure
211  * </EN>
212  *
213  * @callergraph
214  * @callgraph
215  *
216  */
217 void
bt_relocate_rw(BACKTRELLIS * bt)218 bt_relocate_rw(BACKTRELLIS *bt)
219 {
220   TRELLIS_ATOM *tre;
221   int t;
222   int totalnum, n;
223   TRELLIS_ATOM **tmp;
224 
225   if (bt->framelen == 0) {
226     bt->num = NULL;
227     return;
228   }
229 
230   bt->num = (int *)mybmalloc2(sizeof(int) * bt->framelen, &(bt->root));
231 
232   /* count number of trellis atom (= survived word end) for each frame */
233   for (t=0;t<bt->framelen;t++) bt->num[t] = 0;
234   totalnum = 0;
235   for (tre=bt->list;tre;tre=tre->next) {
236     /* the last frame (when triggered from sp to non-sp) should be discarded */
237     if (tre->endtime >= bt->framelen) continue;
238     bt->num[tre->endtime]++;
239     totalnum++;
240   }
241   /* if no atom found, return here with all bt->num[t] set to 0 */
242   if (totalnum <= 0) {
243     bt->num = NULL;
244     return;
245   }
246 
247   /* allocate area */
248   bt->rw  = (TRELLIS_ATOM ***)mybmalloc2(sizeof(TRELLIS_ATOM **) * bt->framelen, &(bt->root));
249   tmp = (TRELLIS_ATOM **)mybmalloc2(sizeof(TRELLIS_ATOM *) * totalnum, &(bt->root));
250   n = 0;
251   for (t=0;t<bt->framelen;t++) {
252     if (bt->num[t] > 0) {
253       bt->rw[t] = (TRELLIS_ATOM **)&(tmp[n]);
254       n += bt->num[t];
255     }
256   }
257   /* then store the atoms */
258   for (t=0;t<bt->framelen;t++) bt->num[t] = 0;
259   for (tre=bt->list;tre;tre=tre->next) {
260     /* the last frame (when triggered from sp to non-sp) should be discarded */
261     if (tre->endtime >= bt->framelen) continue;
262     t = tre->endtime;
263 
264     bt->rw[t][bt->num[t]] = tre;
265     bt->num[t]++;
266   }
267 }
268 
269 
270 /* �ʲ��δؿ��� bt_relocate_rw �¹Ը�ˤΤ߻��Ѳ�ǽ�Ȥʤ�. */
271 /* functions below this line should be called after bt_relocate_rw() */
272 
273 /**
274  * <JA>
275  * �༡�ǥ����ǥ�����, �裱�ѥ���λ��ˡ�
276  * ���ϥ������Ȥ�ξü�˻Ĥä�����ñ�첾�����Ф�, ������
277  * ��2�ѥ��ˤ�������/�ǽ�����Ȥ��ƥ��åȤ���.
278  *
279  * @param r [in] ǧ��������������
280  * </JA>
281  * <EN>
282  * When using progressive decoding with short pause segmentation,
283  * This function extracts the best word hypothesis on head and tail of
284  * the current input segment just after the 1st pass ends,
285  * and store them as start/end word in the following 2nd pass.
286  *
287  * @param r [in] recognition process instance
288  * </EN>
289  *
290  * @callergraph
291  * @callgraph
292  *
293  */
294 void
set_terminal_words(RecogProcess * r)295 set_terminal_words(RecogProcess *r)
296 {
297   LOGPROB maxscore;
298   int i,t;
299   BACKTRELLIS *bt;
300 
301   bt = r->backtrellis;
302 
303   if (bt->num == NULL) return;
304 
305   maxscore = LOG_ZERO;
306   /* find last frame where a word exists */
307   for(t=bt->framelen-1;t>=0;t--) {
308     if (bt->num[t] > 0) break;
309   }
310   /* get maximum word hypothesis at that frame */
311   for(i=0;i<bt->num[t];i++) {
312     if (maxscore < (bt->rw[t][i])->backscore) {
313       maxscore = (bt->rw[t][i])->backscore;
314       r->sp_break_2_begin_word = (bt->rw[t][i])->wid;
315     }
316   }
317   maxscore = LOG_ZERO;
318   /* find first frame where a word exists */
319   for(t=0;t<bt->framelen;t++) {
320     if (bt->num[t] > 0) break;
321   }
322   /* get maximum word hypothesis at that frame */
323   for(i=0;i<bt->num[t];i++) {
324     if (maxscore < (bt->rw[t][i])->backscore) {
325       maxscore = (bt->rw[t][i])->backscore;
326       r->sp_break_2_end_word = (bt->rw[t][i])->wid;
327     }
328   }
329 #ifdef SP_BREAK_DEBUG
330   jlog("DEBUG: 2nd pass begin word: %s\n",
331 	 (r->sp_break_2_begin_word == WORD_INVALID) ? "WORD_INVALID" : r->lm->winfo->wname[r->sp_break_2_begin_word]);
332   jlog("DEBUG: 2nd pass end word: %s\n",
333 	 (r->sp_break_2_end_word == WORD_INVALID) ? "WORD_INVALID" : r->lm->winfo->wname[r->sp_break_2_end_word]);
334 #endif
335 }
336 
337 
338 /* the outprob on the trellis connection point should be discounted */
339 /**
340  * <JA>
341  * ��1�ѥ���λ��, ��2�ѥ��ǤΥȥ�ꥹ����³�׻��Τ���ˡ�
342  * �����֤��ϤäƳƥȥ�ꥹñ��ν�ü�κǽ����֤ν������٤�Ʒ׻���,
343  * ��������Ѥ��麹�������Ƥ���. �裲�ѥ��Ǥϡ�������³���ˤ�
344  * ��³������θ������³���ξ��֤����٤��Ʒ׻������.
345  *
346  * @param wchmm [in] �ڹ�¤������
347  * @param bt [in] ñ��ȥ�ꥹ��¤��
348  * @param param [in] ���ϥѥ�᡼������
349  * </JA>
350  * <EN>
351  * Discount the output probabilities of the last state from the accumulated
352  * score on word edge for all trellis words survived on the 1st pass,
353  * for the acoustic re-computation on the 2nd pass.
354  * The acousitic likelihood of the word edge state will be re-computed
355  * when the next word hypotheses are expanded on the next 2nd pass.
356  *
357  * @param wchmm [in] tree lexicon
358  * @param bt [in] word trellis structure
359  * @param param [in] input parameter
360  * </EN>
361  *
362  * @callergraph
363  * @callgraph
364  *
365  */
366 void
bt_discount_pescore(WCHMM_INFO * wchmm,BACKTRELLIS * bt,HTK_Param * param)367 bt_discount_pescore(WCHMM_INFO *wchmm, BACKTRELLIS *bt, HTK_Param *param)
368 {
369   int t,i;
370   TRELLIS_ATOM *tre;
371 
372   if (bt->num == NULL) return;
373 
374   for (t=0; t<bt->framelen; t++) {
375     for (i=0; i<bt->num[t]; i++) {
376       tre = bt->rw[t][i];
377       /* On normal version, both language score and the output prob. score
378 	 at the connection point should removed on the trellis for the
379 	 later connection.  On multi-path mode, removing only the language
380 	 score is enough. */
381       tre->backscore -= outprob_style(wchmm, wchmm->wordend[tre->wid], tre->last_tre->wid, t, param);
382     }
383   }
384 }
385 
386 /**
387  * <EN>
388  * Subtract 2-gram scores at each trellis word for the 2nd pass.
389  * </EN>
390  * <JA>
391  * ��2�ѥ��Τ����2-gram��������ȥ�ꥹ���ñ�줫�麹������.
392  * </JA>
393  *
394  * @param bt [in] word trellis
395  *
396  * @callergraph
397  * @callgraph
398  *
399  */
400 void
bt_discount_lm(BACKTRELLIS * bt)401 bt_discount_lm(BACKTRELLIS *bt)
402 {
403   int t,i;
404   TRELLIS_ATOM *tre;
405 
406   if (bt->num == NULL) return;
407 
408   /* the LM score of the last word should be subtracted, because
409      their LM will be assigned by 3-gram on the 2nd pass. */
410   for (t=0; t<bt->framelen; t++) {
411     for (i=0; i<bt->num[t]; i++) {
412       tre = bt->rw[t][i];
413       tre->backscore -= tre->lscore;
414     }
415   }
416 }
417 
418 /**
419  * <JA>
420  * bt_sort_rw()�Ѥ�qsort������Хå�.
421  *
422  * @param a [in] ���ǣ�
423  * @param b [in] ���ǣ�
424  *
425  * @return ���祽���Ȥ�ɬ�פ���
426  * </JA>
427  * <EN>
428  * qsort callback for bt_sort_rw().
429  *
430  * @param a [in] first element
431  * @param b [in] second element
432  *
433  * @return a value needed to do upward sort.
434  * </EN>
435  *
436  *
437  */
438 static int
compare_wid(TRELLIS_ATOM ** a,TRELLIS_ATOM ** b)439 compare_wid(TRELLIS_ATOM **a, TRELLIS_ATOM **b)
440 {
441   if ((*a)->wid > (*b)->wid) return 1;
442   if ((*a)->wid < (*b)->wid) return -1;
443   return 0;
444 }
445 
446 /**
447  * <JA>
448  * bt_relocate_rw() ��λ��, ��®���������Τ����
449  * �Хå��ȥ�ꥹ��¤����Υȥ�ꥹñ���ե졼�ऴ�Ȥ�
450  * ñ��ID�ǥ����Ȥ��Ƥ���.
451  *
452  * @param bt [i/o] ñ��ȥ�ꥹ��¤��
453  *
454  * </JA>
455  * <EN>
456  * Sort the trellis words in the backtrellis by the word IDs per each frame,
457  * for rapid access on the 2nd pass.  This should be called just after
458  * bt_relocate_rw() was called.
459  *
460  * @param bt [i/o] word trellis structure
461  *
462  * </EN>
463  * @callergraph
464  * @callgraph
465  *
466  */
467 void
bt_sort_rw(BACKTRELLIS * bt)468 bt_sort_rw(BACKTRELLIS *bt)
469 {
470   int t;
471 
472   if (bt->num == NULL) return;
473 
474   for (t=0;t<bt->framelen;t++) {
475     qsort(bt->rw[t], bt->num[t], sizeof(TRELLIS_ATOM *),
476 	  (int (*)(const void *,const void *))compare_wid);
477   }
478 }
479 
480 /* �ʲ��δؿ��ϻ�����bt_sort_rw() ���ƤФ�Ƥ��뤳��(��2�ѥ���) */
481 /* functions below should be called after bt_sort_rw() */
482 
483 /**
484  * <JA>
485  * ñ��ȥ�ꥹ��λ������ե졼���ˡ�����ñ��ν�ü�����뤫�ɤ�����
486  * ��������.
487  *
488  * @param bt [in] ñ��ȥ�ꥹ��¤��
489  * @param t [in] �������뽪ü����ʥե졼���
490  * @param wkey [in] ��������ñ���ñ��ɣ�
491  *
492  * @return ���Ĥ��ä���礽�Υȥ�ꥹñ��ؤΥݥ��������Ĥ���ʤ���� NULL.
493  * </JA>
494  * <EN>
495  * Search a word on the specified frame in a word trellis data.
496  *
497  * @param bt [in] word trellis structure
498  * @param t [in] word end frame on which to search
499  * @param wkey [in] word ID to search
500  *
501  * @return pointer to the found trellis word, or NULL if not found.
502  * </EN>
503  *
504  * @callergraph
505  * @callgraph
506  *
507  */
508 TRELLIS_ATOM *
bt_binsearch_atom(BACKTRELLIS * bt,int t,WORD_ID wkey)509 bt_binsearch_atom(BACKTRELLIS *bt, int t, WORD_ID wkey)
510 {
511   /* do binary search */
512   /* assume rw are ordered by wid */
513   int left, right, mid;
514   TRELLIS_ATOM *tmp;
515 #ifdef WPAIR
516   int i;
517   LOGPROB maxscore;
518   TRELLIS_ATOM *maxtre;
519 #endif
520 
521   if (bt->num[t] == 0) return(NULL);
522 
523   left = 0;
524   right = bt->num[t] - 1;
525   while (left < right) {
526     mid = (left + right) / 2;
527     if ((bt->rw[t][mid])->wid < wkey) {
528       left = mid + 1;
529     } else {
530       right = mid;
531     }
532   }
533   tmp = bt->rw[t][left];
534   if (tmp->wid == wkey) {
535 #ifdef WPAIR
536     /* same word with different context will be found:
537        most likely one will be returned */
538     maxscore = LOG_ZERO;
539     maxtre = NULL;
540     i = left;
541     while (i >= 0) {
542       tmp = bt->rw[t][i];
543       if (tmp->wid != wkey) break;
544 #ifdef WORD_GRAPH
545       /* only words on a graph path should be counted */
546       if (!tmp->within_wordgraph) {
547 	i--; continue;
548       }
549 #endif
550       if (maxscore < tmp->backscore) {
551 	maxscore = tmp->backscore;
552 	maxtre = tmp;
553       }
554       i--;
555     }
556     i = left;
557     while (i < bt->num[t]) {
558       tmp = bt->rw[t][i];
559       if (tmp->wid != wkey) break;
560 #ifdef WORD_GRAPH
561       /* only words on a graph path should be counted */
562       if (!tmp->within_wordgraph) {
563 	i++; continue;
564       }
565 #endif
566       if (maxscore < tmp->backscore) {
567 	maxscore = tmp->backscore;
568 	maxtre = tmp;
569       }
570       i++;
571     }
572     tmp = maxtre;
573 #else
574 #ifdef WORD_GRAPH
575     /* treat only words on a graph path */
576     if (! tmp->within_wordgraph) {
577       return NULL;
578     }
579 #endif
580 #endif /* WPAIR */
581 
582     return(tmp);
583   } else {
584     return(NULL);
585   }
586 }
587 
588 /* end of file */
589