1 /**
2 * @file backtrellis.c
3 *
4 * <JA>
5 * @brief ñ��ȥ�ꥹ����¸������
6 *
7 * ��1�ѥ��η�̤�ñ��ȥ�ꥹ�Ȥ�����¸������2�ѥ��ǻ��Ȥ��뤿��δؿ���
8 * �Ǥ�. Julius �Ǥϡ��裱�ѥ���õ����˽�ü�������ĤäƤ���ñ������ơ�
9 * ���λϽ�ü�ե졼�ࡤ��ü������������٤����ñ������ȤȤ��
10 * ��¸���졤�裲�ѥ��Ǥ��ν�����椫���õ�����Ԥ��ޤ�.
11 * �����裱�ѥ��ǥե졼�ऴ�Ȥ˻Ĥ����ñ�����Τ��Ȥ�֥ȥ�ꥹñ��ס�
12 * �ȥ�ꥹñ��ν������Τ��ñ��ȥ�ꥹ�פȸƤӤޤ�.
13 *
14 * �ȥ�ꥹñ��ϡ��裱�ѥ���ǧ����˳ƥե졼�ऴ�Ȥ���¸����ޤ�.
15 * �裱�ѥ���λ�塤�ȥ�ꥹ���Τ������������֤ȥե졼�ऴ�ȤΥ���ǥå���
16 * ��������ޤ�.
17 *
18 * �裲�ѥ��Ǥϡ�����ñ��ȥ�ꥹ�Ȥ���
19 * �ƻ��֡����ϥե졼��ˤˤ�����Ÿ����ǽ�ʲ���Υꥹ�Ȥ�����ȤȤ�ˡ�
20 * �����裱�ѥ��Ǥ�(��������)�������٤��裲�ѥ��ˤ����벾���̤Ÿ����ʬ��
21 * ���ꥹ�����Ȥ����Ѥ��ޤ�. ���Τ����ߤ��顤ñ��ȥ�ꥹ�ϡ֥Хå��ȥ�ꥹ��
22 * �Ȥ�ƤФ�Ƥ��ޤ�.
23 * </JA>
24 *
25 * <EN>
26 * @brief Word trellis operations
27 *
28 * Functions to store the result of the 1st pass as "word trellis",
29 * and functions to access them from the 2nd pass are defined in this
30 * file. On the 1st pass of Julius, all the promising words whose
31 * word end has been survived at the 1st pass will be stored as "word
32 * trellis", which consists of surviving words: word boundary,
33 * accumulated score and word history.
34 *
35 * The trellis word will be stored per frame at the 1st pass. After
36 * the 1st pass ended, the word trellis will be re-organized and
37 * indexed by frame to prepare for access at the 2nd pass.
38 *
39 * In the 2nd pass of reverse stack decoding, this word trellis will be
40 * used to constrain the word hypothesis, and also used to estimate
41 * the score of unseen area by the obtained backward scores in the 1st pass.
42 * Thus the word trellis information is also called as "back trellis" in
43 * Julius.
44 * </EN>
45 *
46 * @author Akinobu LEE
47 * @date Tue Feb 22 15:40:01 2005
48 *
49 * $Revision: 1.2 $
50 *
51 */
52 /*
53 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
54 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
55 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
56 * All rights reserved
57 */
58
59 #include <julius/julius.h>
60
61 /**
62 * <JA>
63 * ñ��ȥ�ꥹ���ݻ����� ñ��ȥ�ꥹ ��¤�Τ���������(��ư����1������¹�)
64 *
65 * @param bt [in] ��������� ñ��ȥ�ꥹ ��¤�ΤؤΥݥ���
66 * </JA>
67 * <EN>
68 * Initialize backtrellis that will hold the whole word trellis
69 * (called once on startup).
70 *
71 * @param bt [in] pointer to the backtrellis structure to initialize
72 * </EN>
73 *
74 * @callergraph
75 * @callgraph
76 *
77 */
78 void
bt_init(BACKTRELLIS * bt)79 bt_init(BACKTRELLIS *bt)
80 {
81 bt->num = NULL;
82 bt->rw = NULL;
83 bt->list = NULL;
84 bt->root = NULL;
85 }
86
87 /**
88 * <JA>
89 * �����ǧ���Ѥ� ñ��ȥ�ꥹ ��¤�Τ�������� (ǧ�����ϻ����Ȥ˼¹�).
90 *
91 * @param bt [in] �оݤȤ���ñ��ȥ�ꥹ��¤�ΤؤΥݥ���
92 * </JA>
93 * <EN>
94 * Prepare backtrellis for the next input (called at beginning of each
95 * speech segment).
96 *
97 * @param bt [in] pointer to the word trellis structure
98 * </EN>
99 * @callergraph
100 * @callgraph
101 *
102 */
103 void
bt_prepare(BACKTRELLIS * bt)104 bt_prepare(BACKTRELLIS *bt)
105 {
106 /* free previously allocated data */
107 mybfree2(&(bt->root));
108
109 /* reset entry point */
110 bt->num = NULL;
111 bt->rw = NULL;
112 bt->list = NULL;
113 bt->root = NULL;
114 }
115
116 /**
117 * <EN>
118 * Free memories of backtrellis.
119 * </EN>
120 * <JA>
121 * ñ��ȥ�ꥹ�Υ����������.
122 * </JA>
123 *
124 * @param bt [out] pointer to the word trellis structure.
125 *
126 * @callergraph
127 * @callgraph
128 *
129 */
130 void
bt_free(BACKTRELLIS * bt)131 bt_free(BACKTRELLIS *bt)
132 {
133 if (bt->root) mybfree2(&(bt->root));
134 free(bt);
135 }
136
137 /**
138 * <EN>
139 * Allocate a new trellis word atom.
140 * </EN>
141 * <JA>
142 * �ȥ�ꥹñ����˳���դ���.
143 * </JA>
144 *
145 * @param bt [out] pointer to the word trellis structure.
146 *
147 * @return pointer to the newly allocated trellis word.
148 *
149 * @callergraph
150 * @callgraph
151 *
152 */
153 TRELLIS_ATOM *
bt_new(BACKTRELLIS * bt)154 bt_new(BACKTRELLIS *bt)
155 {
156 TRELLIS_ATOM *new;
157
158 new = (TRELLIS_ATOM *)mybmalloc2(sizeof(TRELLIS_ATOM), &(bt->root));
159 return new;
160 }
161
162
163
164 /**
165 * <JA>
166 * ��1�ѥ��ǽи������ȥ�ꥹñ���ñ�콪ü�Υȥ�ꥹ����ˤ��Ǽ����.
167 *
168 * �����Ǥϳ�Ǽ�����Ԥ�����1�ѥ���λ��� bt_relocate_rw() ��
169 * �ե졼���˺����֤���.
170 *
171 * @param bt [i/o] �ȥ�ꥹñ����Ǽ����Хå��ȥ�ꥹ��¤��
172 * @param tatom [in] �и������ȥ�ꥹñ��ؤΥݥ���
173 * </JA>
174 * <EN>
175 * Store a trellis word generated on the 1st pass for the 2nd pass.
176 *
177 * This function just store the new atom into backtrellis.
178 * They will be re-located per frame after 1st pass for quick access
179 * in the 2nd pass.
180 *
181 * @param bt [i/o] backtrellis structure to store the trellis word
182 * @param tatom [in] the trellis word to be stored
183 * </EN>
184 *
185 * @callergraph
186 * @callgraph
187 *
188 */
189 void
bt_store(BACKTRELLIS * bt,TRELLIS_ATOM * tatom)190 bt_store(BACKTRELLIS *bt, TRELLIS_ATOM *tatom)
191 {
192 #ifdef WORD_GRAPH
193 tatom->within_context = FALSE;
194 tatom->within_wordgraph = FALSE;
195 #endif
196 tatom->next = bt->list;
197 bt->list = tatom;
198 }
199
200 /**
201 * <JA>
202 * ��1�ѥ���λ��, ��Ǽ���줿ñ��ȥ�ꥹ�����ե졼���˺����֤���.
203 *
204 * @param bt [i/o] ñ��ȥ�ꥹ��¤��
205 * </JA>
206 * <EN>
207 * Re-locate the stored atom lists per frame (will be called after the
208 * 1st pass).
209 *
210 * @param bt [i/o] word trellis structure
211 * </EN>
212 *
213 * @callergraph
214 * @callgraph
215 *
216 */
217 void
bt_relocate_rw(BACKTRELLIS * bt)218 bt_relocate_rw(BACKTRELLIS *bt)
219 {
220 TRELLIS_ATOM *tre;
221 int t;
222 int totalnum, n;
223 TRELLIS_ATOM **tmp;
224
225 if (bt->framelen == 0) {
226 bt->num = NULL;
227 return;
228 }
229
230 bt->num = (int *)mybmalloc2(sizeof(int) * bt->framelen, &(bt->root));
231
232 /* count number of trellis atom (= survived word end) for each frame */
233 for (t=0;t<bt->framelen;t++) bt->num[t] = 0;
234 totalnum = 0;
235 for (tre=bt->list;tre;tre=tre->next) {
236 /* the last frame (when triggered from sp to non-sp) should be discarded */
237 if (tre->endtime >= bt->framelen) continue;
238 bt->num[tre->endtime]++;
239 totalnum++;
240 }
241 /* if no atom found, return here with all bt->num[t] set to 0 */
242 if (totalnum <= 0) {
243 bt->num = NULL;
244 return;
245 }
246
247 /* allocate area */
248 bt->rw = (TRELLIS_ATOM ***)mybmalloc2(sizeof(TRELLIS_ATOM **) * bt->framelen, &(bt->root));
249 tmp = (TRELLIS_ATOM **)mybmalloc2(sizeof(TRELLIS_ATOM *) * totalnum, &(bt->root));
250 n = 0;
251 for (t=0;t<bt->framelen;t++) {
252 if (bt->num[t] > 0) {
253 bt->rw[t] = (TRELLIS_ATOM **)&(tmp[n]);
254 n += bt->num[t];
255 }
256 }
257 /* then store the atoms */
258 for (t=0;t<bt->framelen;t++) bt->num[t] = 0;
259 for (tre=bt->list;tre;tre=tre->next) {
260 /* the last frame (when triggered from sp to non-sp) should be discarded */
261 if (tre->endtime >= bt->framelen) continue;
262 t = tre->endtime;
263
264 bt->rw[t][bt->num[t]] = tre;
265 bt->num[t]++;
266 }
267 }
268
269
270 /* �ʲ��δؿ��� bt_relocate_rw �¹Ը�ˤΤ��Ѳ�ǽ�Ȥʤ�. */
271 /* functions below this line should be called after bt_relocate_rw() */
272
273 /**
274 * <JA>
275 * �༡�ǥ����ǥ���, �裱�ѥ���λ��ˡ�
276 * ���ϥ������Ȥ�ξü�˻Ĥä�����ñ�첾�����Ф�, ������
277 * ��2�ѥ��ˤ�������/�ǽ�����Ȥ��ƥ��åȤ���.
278 *
279 * @param r [in] ǧ������������
280 * </JA>
281 * <EN>
282 * When using progressive decoding with short pause segmentation,
283 * This function extracts the best word hypothesis on head and tail of
284 * the current input segment just after the 1st pass ends,
285 * and store them as start/end word in the following 2nd pass.
286 *
287 * @param r [in] recognition process instance
288 * </EN>
289 *
290 * @callergraph
291 * @callgraph
292 *
293 */
294 void
set_terminal_words(RecogProcess * r)295 set_terminal_words(RecogProcess *r)
296 {
297 LOGPROB maxscore;
298 int i,t;
299 BACKTRELLIS *bt;
300
301 bt = r->backtrellis;
302
303 if (bt->num == NULL) return;
304
305 maxscore = LOG_ZERO;
306 /* find last frame where a word exists */
307 for(t=bt->framelen-1;t>=0;t--) {
308 if (bt->num[t] > 0) break;
309 }
310 /* get maximum word hypothesis at that frame */
311 for(i=0;i<bt->num[t];i++) {
312 if (maxscore < (bt->rw[t][i])->backscore) {
313 maxscore = (bt->rw[t][i])->backscore;
314 r->sp_break_2_begin_word = (bt->rw[t][i])->wid;
315 }
316 }
317 maxscore = LOG_ZERO;
318 /* find first frame where a word exists */
319 for(t=0;t<bt->framelen;t++) {
320 if (bt->num[t] > 0) break;
321 }
322 /* get maximum word hypothesis at that frame */
323 for(i=0;i<bt->num[t];i++) {
324 if (maxscore < (bt->rw[t][i])->backscore) {
325 maxscore = (bt->rw[t][i])->backscore;
326 r->sp_break_2_end_word = (bt->rw[t][i])->wid;
327 }
328 }
329 #ifdef SP_BREAK_DEBUG
330 jlog("DEBUG: 2nd pass begin word: %s\n",
331 (r->sp_break_2_begin_word == WORD_INVALID) ? "WORD_INVALID" : r->lm->winfo->wname[r->sp_break_2_begin_word]);
332 jlog("DEBUG: 2nd pass end word: %s\n",
333 (r->sp_break_2_end_word == WORD_INVALID) ? "WORD_INVALID" : r->lm->winfo->wname[r->sp_break_2_end_word]);
334 #endif
335 }
336
337
338 /* the outprob on the trellis connection point should be discounted */
339 /**
340 * <JA>
341 * ��1�ѥ���λ��, ��2�ѥ��ǤΥȥ�ꥹ����³���Τ���ˡ�
342 * �����֤��ϤäƳƥȥ�ꥹñ��ν�ü�κǽ����֤ν������٤�Ʒ���,
343 * ��������Ѥ��麹�������Ƥ���. �裲�ѥ��Ǥϡ�������³���ˤ�
344 * ��³������θ������³���ξ��֤����٤��Ʒ������.
345 *
346 * @param wchmm [in] �ڹ�¤������
347 * @param bt [in] ñ��ȥ�ꥹ��¤��
348 * @param param [in] ���ϥѥ�������
349 * </JA>
350 * <EN>
351 * Discount the output probabilities of the last state from the accumulated
352 * score on word edge for all trellis words survived on the 1st pass,
353 * for the acoustic re-computation on the 2nd pass.
354 * The acousitic likelihood of the word edge state will be re-computed
355 * when the next word hypotheses are expanded on the next 2nd pass.
356 *
357 * @param wchmm [in] tree lexicon
358 * @param bt [in] word trellis structure
359 * @param param [in] input parameter
360 * </EN>
361 *
362 * @callergraph
363 * @callgraph
364 *
365 */
366 void
bt_discount_pescore(WCHMM_INFO * wchmm,BACKTRELLIS * bt,HTK_Param * param)367 bt_discount_pescore(WCHMM_INFO *wchmm, BACKTRELLIS *bt, HTK_Param *param)
368 {
369 int t,i;
370 TRELLIS_ATOM *tre;
371
372 if (bt->num == NULL) return;
373
374 for (t=0; t<bt->framelen; t++) {
375 for (i=0; i<bt->num[t]; i++) {
376 tre = bt->rw[t][i];
377 /* On normal version, both language score and the output prob. score
378 at the connection point should removed on the trellis for the
379 later connection. On multi-path mode, removing only the language
380 score is enough. */
381 tre->backscore -= outprob_style(wchmm, wchmm->wordend[tre->wid], tre->last_tre->wid, t, param);
382 }
383 }
384 }
385
386 /**
387 * <EN>
388 * Subtract 2-gram scores at each trellis word for the 2nd pass.
389 * </EN>
390 * <JA>
391 * ��2�ѥ��Τ����2-gram��������ȥ�ꥹ���ñ�줫�麹������.
392 * </JA>
393 *
394 * @param bt [in] word trellis
395 *
396 * @callergraph
397 * @callgraph
398 *
399 */
400 void
bt_discount_lm(BACKTRELLIS * bt)401 bt_discount_lm(BACKTRELLIS *bt)
402 {
403 int t,i;
404 TRELLIS_ATOM *tre;
405
406 if (bt->num == NULL) return;
407
408 /* the LM score of the last word should be subtracted, because
409 their LM will be assigned by 3-gram on the 2nd pass. */
410 for (t=0; t<bt->framelen; t++) {
411 for (i=0; i<bt->num[t]; i++) {
412 tre = bt->rw[t][i];
413 tre->backscore -= tre->lscore;
414 }
415 }
416 }
417
418 /**
419 * <JA>
420 * bt_sort_rw()�Ѥ�qsort������Хå�.
421 *
422 * @param a [in] ���ǣ�
423 * @param b [in] ���ǣ�
424 *
425 * @return ���祽���Ȥ�ɬ�פ���
426 * </JA>
427 * <EN>
428 * qsort callback for bt_sort_rw().
429 *
430 * @param a [in] first element
431 * @param b [in] second element
432 *
433 * @return a value needed to do upward sort.
434 * </EN>
435 *
436 *
437 */
438 static int
compare_wid(TRELLIS_ATOM ** a,TRELLIS_ATOM ** b)439 compare_wid(TRELLIS_ATOM **a, TRELLIS_ATOM **b)
440 {
441 if ((*a)->wid > (*b)->wid) return 1;
442 if ((*a)->wid < (*b)->wid) return -1;
443 return 0;
444 }
445
446 /**
447 * <JA>
448 * bt_relocate_rw() ��λ��, ��®���������Τ����
449 * �Хå��ȥ�ꥹ��¤����Υȥ�ꥹñ���ե졼�ऴ�Ȥ�
450 * ñ��ID�ǥ����Ȥ��Ƥ���.
451 *
452 * @param bt [i/o] ñ��ȥ�ꥹ��¤��
453 *
454 * </JA>
455 * <EN>
456 * Sort the trellis words in the backtrellis by the word IDs per each frame,
457 * for rapid access on the 2nd pass. This should be called just after
458 * bt_relocate_rw() was called.
459 *
460 * @param bt [i/o] word trellis structure
461 *
462 * </EN>
463 * @callergraph
464 * @callgraph
465 *
466 */
467 void
bt_sort_rw(BACKTRELLIS * bt)468 bt_sort_rw(BACKTRELLIS *bt)
469 {
470 int t;
471
472 if (bt->num == NULL) return;
473
474 for (t=0;t<bt->framelen;t++) {
475 qsort(bt->rw[t], bt->num[t], sizeof(TRELLIS_ATOM *),
476 (int (*)(const void *,const void *))compare_wid);
477 }
478 }
479
480 /* �ʲ��δؿ��ϻ�����bt_sort_rw() ���ƤФ�Ƥ��뤳��(��2�ѥ���) */
481 /* functions below should be called after bt_sort_rw() */
482
483 /**
484 * <JA>
485 * ñ��ȥ�ꥹ��λ������ե졼���ˡ�����ñ��ν�ü�����뤫�ɤ�����
486 * ��������.
487 *
488 * @param bt [in] ñ��ȥ�ꥹ��¤��
489 * @param t [in] �������뽪ü����ʥե졼���
490 * @param wkey [in] ��������ñ���ñ��ɣ�
491 *
492 * @return ���Ĥ��ä���礽�Υȥ�ꥹñ��ؤΥݥ������Ĥ���ʤ���� NULL.
493 * </JA>
494 * <EN>
495 * Search a word on the specified frame in a word trellis data.
496 *
497 * @param bt [in] word trellis structure
498 * @param t [in] word end frame on which to search
499 * @param wkey [in] word ID to search
500 *
501 * @return pointer to the found trellis word, or NULL if not found.
502 * </EN>
503 *
504 * @callergraph
505 * @callgraph
506 *
507 */
508 TRELLIS_ATOM *
bt_binsearch_atom(BACKTRELLIS * bt,int t,WORD_ID wkey)509 bt_binsearch_atom(BACKTRELLIS *bt, int t, WORD_ID wkey)
510 {
511 /* do binary search */
512 /* assume rw are ordered by wid */
513 int left, right, mid;
514 TRELLIS_ATOM *tmp;
515 #ifdef WPAIR
516 int i;
517 LOGPROB maxscore;
518 TRELLIS_ATOM *maxtre;
519 #endif
520
521 if (bt->num[t] == 0) return(NULL);
522
523 left = 0;
524 right = bt->num[t] - 1;
525 while (left < right) {
526 mid = (left + right) / 2;
527 if ((bt->rw[t][mid])->wid < wkey) {
528 left = mid + 1;
529 } else {
530 right = mid;
531 }
532 }
533 tmp = bt->rw[t][left];
534 if (tmp->wid == wkey) {
535 #ifdef WPAIR
536 /* same word with different context will be found:
537 most likely one will be returned */
538 maxscore = LOG_ZERO;
539 maxtre = NULL;
540 i = left;
541 while (i >= 0) {
542 tmp = bt->rw[t][i];
543 if (tmp->wid != wkey) break;
544 #ifdef WORD_GRAPH
545 /* only words on a graph path should be counted */
546 if (!tmp->within_wordgraph) {
547 i--; continue;
548 }
549 #endif
550 if (maxscore < tmp->backscore) {
551 maxscore = tmp->backscore;
552 maxtre = tmp;
553 }
554 i--;
555 }
556 i = left;
557 while (i < bt->num[t]) {
558 tmp = bt->rw[t][i];
559 if (tmp->wid != wkey) break;
560 #ifdef WORD_GRAPH
561 /* only words on a graph path should be counted */
562 if (!tmp->within_wordgraph) {
563 i++; continue;
564 }
565 #endif
566 if (maxscore < tmp->backscore) {
567 maxscore = tmp->backscore;
568 maxtre = tmp;
569 }
570 i++;
571 }
572 tmp = maxtre;
573 #else
574 #ifdef WORD_GRAPH
575 /* treat only words on a graph path */
576 if (! tmp->within_wordgraph) {
577 return NULL;
578 }
579 #endif
580 #endif /* WPAIR */
581
582 return(tmp);
583 } else {
584 return(NULL);
585 }
586 }
587
588 /* end of file */
589