/**
* @file ngram_decode.c
*
*
* @brief N-gram確率に基づく次単語予測(第2パス)
*
* Julius のN-gramを用いたスタックデコーディング(第2パス)において,
* 次に接続しうる単語の集合を決定する.
*
* 与えられた展開元仮説の始端フレームを予測し,単語トレリス上で
* その予測フレーム周辺に終端が存在する単語の集合を,
* そのN-gram出現確率とともに返す.
*
* Julius では ngram_firstwords(), ngram_nextwords(), ngram_acceptable() が
* それぞれ第2パスのメイン関数 wchmm_fbs() から呼び出される. なお,
* Julian ではこれらの関数の代わりに dfa_decode.c の関数が用いられる.
*
*
*
* @brief N-gram based word prediction for the 2nd pass.
*
* These functions returns next word candidates in the 2nd recognition
* pass of Julius, i.e. N-gram based stack decoding.
*
* Given a partial sentence hypothesis, it first estimate the beginning frame
* of the hypothesis based on the word trellis. Then the words in the word
* trellis around the estimated frame are extracted from the word trellis.
* They will be returned with their N-gram probabilities.
*
* In Julius, ngram_firstwords(), ngram_nextwords() and ngram_acceptable()
* are called from main search function wchmm_fbs(). In Julian,
* corresponding functions in dfa_decode.c will be used instead.
*
*
* @author Akinobu Lee
* @date Fri Jul 8 14:57:51 2005
*
* $Revision: 1.3 $
*
*/
/*
* Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include
/**
*
* 次単語候補ソート用 qsort コールバック関数.
*
* @param a [in] 要素1
* @param b [in] 要素2
*
* @return aの単語ID > bの単語ID なら1, 逆なら -1, 同じなら 0 を返す.
*
*
* qsort callback function to sort next word candidates by their word ID.
*
* @param a [in] element 1
* @param b [in] element 2
*
* @return 1 if word id of a > that of b, -1 if negative, 0 if equal.
*
*/
static int
compare_nw(NEXTWORD **a, NEXTWORD **b)
{
if ((*a)->id > (*b)->id) return 1;
if ((*a)->id < (*b)->id) return -1;
return 0;
}
/**
*
* 次単語候補リスト内から単語を検索する.
*
* @param nw [in] 次単語候補リスト
* @param w [in] 検索する単語のID
* @param num [in] 次単語候補リストの長さ
*
* @return 見つかった場合その次単語候補構造体へのポインタ,見つからなければ
* NULL を返す.
*
*
* Find a word from list of next word candidates.
*
* @param nw [in] list of next word candidates
* @param w [in] word id to search for
* @param num [in] length of @a nw
*
* @return the pointer to the NEXTWORD data if found, or NULL if not found.
*
*/
/* find next word candiate whose id 'w' */
static NEXTWORD *
search_nw(NEXTWORD **nw, WORD_ID w, int num)
{
int left,right,mid;
NEXTWORD *tmp;
if (num == 0) return NULL;
left = 0;
right = num - 1;
while (left < right) {
mid = (left + right) / 2;
if ((nw[mid])->id < w) {
left = mid + 1;
} else {
right = mid;
}
}
tmp = nw[left];
if (tmp->id == w) {
return tmp;
} else {
return NULL;
}
}
/**
*
* Compute backward N-gram score from forward N-gram.
*
*
* 後向きの N-gram スコアを前向き N-gram から算出する.
*
*
* @param ngram [in] N-gram data structure
* @param w [in] word sequence
* @param wlen [in] length of @a w
*
* @return the backward probability of the word w[0].
*
*/
static LOGPROB
ngram_forw2back(NGRAM_INFO *ngram, WORD_ID *w, int wlen)
{
int i;
LOGPROB p1, p2;
p1 = 0.0;
for(i = 1; i < ngram->n; i++) {
if (i >= wlen) break;
p1 += ngram_prob(ngram, i, &(w[1]));
}
p2 = 0.0;
for(i = 0; i < ngram->n; i++) {
if (i >= wlen) break;
p2 += ngram_prob(ngram, i+1, w);
}
return(p2 - p1);
}
/**
*
* @brief 単語トレリスから次単語候補を抽出する.
*
* 単語トレリス上の指定したフレーム上に終端が存在するトレリス単語
* のリストを抽出し,それらの次単語としての N-gram 接続確率を計算する.
* そのリストを次単語情報構造体に追加して返す.
*
* @param r [in] 認識処理インスタンス
* @param nw [i/o] 次単語候補リスト(抽出結果は @a oldnum 以降に追加される)
* @param oldnum [in] @a nw にすでに格納されている次単語の数
* @param hypo [in] 展開元の文仮説
* @param t [in] 指定フレーム
*
* @return 抽出リストを追加したあとの @a nw に含まれる次単語の総数.
*
*
* @brief Extract next word candidates from word trellis.
*
* This function extracts the list of trellis words whose word end
* has survived in the word trellis at the specified frame.
* The N-gram probabilities of them are then computed and added to
* the current next word candidates data.
*
* @param r [in] recognition process instance
* @param nw [in] list of next word candidates (new words will be appended at @a oldnum)
* @param oldnum [in] number of words already stored in @a nw
* @param hypo [in] the source sentence hypothesis
* @param t [in] specified frame
*
* @return the total number of words currently stored in the @a nw.
*
*/
static int
pick_backtrellis_words(RecogProcess *r, NEXTWORD **nw, int oldnum, NODE *hypo, short t)
{
int i;
WORD_ID w;
LOGPROB rawscore;
#ifdef WPAIR
int w_old = WORD_INVALID;
#endif
int num;
WORD_ID cnword[MAX_N]; ///< Last two non-transparent words
WORD_ID cnwordrev[MAX_N]; ///< Last two non-transparent words
int cnnum; ///< Num of found non-transparent words (<=2)
int last_trans; ///< Num of skipped transparent words
BACKTRELLIS *bt;
WORD_INFO *winfo;
NGRAM_INFO *ngram;
LOGPROB lm_weight2, lm_penalty2, lm_penalty_trans;
num = oldnum;
bt = r->backtrellis;
winfo = r->lm->winfo;
ngram = r->lm->ngram;
lm_weight2 = r->config->lmp.lm_weight2;
lm_penalty2 = r->config->lmp.lm_penalty2;
lm_penalty_trans = r->config->lmp.lm_penalty_trans;
/* set word contexts to cnword[] from 1 considering transparent words */
if (ngram) {
cnnum = 0;
last_trans = 0;
for(i=hypo->seqnum-1;i>=0;i--) {
if (! winfo->is_transparent[hypo->seq[i]]) {
cnword[cnnum+1] = hypo->seq[i];
cnnum++;
if (cnnum >= ngram->n - 1) break;
} else {
last_trans++;
}
}
if (ngram->dir == DIR_RL) {
for(i=0;idir == DIR_RL) {
for(i=0;iwton[cnwordrev[i]];
} else {
for(i=0;iwton[cnword[i+1]];
}
}
/* lookup survived words in backtrellis on time frame 't' */
for (i=0;inum[t];i++) {
w = (bt->rw[t][i])->wid;
#ifdef WORD_GRAPH
/* only words on the word graphs are expanded */
if (!(bt->rw[t][i])->within_wordgraph) continue;
#endif /* not WORD_GRAPH */
#ifdef WPAIR
/* some word have same word ID with different previous word, so
only one will be opened (best word will be selected later
by next_word() */
if (w == w_old) continue; /* backtrellis is sorted by word ID */
else w_old = w;
#endif /* WPAIR */
/* skip if already exist */
if (search_nw(nw, w, oldnum) != NULL) continue;
/* compute LM probability of the word */
if (ngram) {
/* compute N-gram probability */
if (ngram->dir == DIR_RL) {
/* just compute N-gram prob of the word candidate */
cnwordrev[cnnum] = winfo->wton[w];
rawscore = ngram_prob(ngram, cnnum + 1, cnwordrev);
} else {
cnword[0] = winfo->wton[w];
rawscore = ngram_forw2back(ngram, cnword, cnnum + 1);
}
#ifdef CLASS_NGRAM
rawscore += winfo->cprob[w];
#endif
}
if (r->lmvar == LM_NGRAM_USER) {
/* call user-defined function */
/* be careful that the word context is ordered in backward direction */
rawscore = (*(r->lm->lmfunc.lmprob))(winfo, hypo->seq, hypo->seqnum, w, rawscore);
}
nw[num]->tre = bt->rw[t][i];
nw[num]->id = w;
nw[num]->lscore = rawscore * lm_weight2 + lm_penalty2;
if (winfo->is_transparent[w]) {
/*nw[num]->lscore -= (LOGPROB)last_trans * TRANS_RENZOKU_PENALTY;*/
if (winfo->is_transparent[hypo->seq[hypo->seqnum-1]]) {
nw[num]->lscore += lm_penalty_trans;
}
}
/* j_printf("%d: %s added\n", num, winfo->wname[nw[num]->id]); */
num++;
}
return num;
}
/**
*
* @brief 単語トレリスから次単語集合を決定する.
*
* 指定フレームの前後 lookup_range 分に終端があるトレリス上の単語を集め,
* 次単語構造体を構築する. 同じ単語が上記の範囲内に複数ある場合,
* 指定フレームにもっとも近いトレリス上の単語が選択される.
*
* @param r [in] 認識処理インスタンス
* @param nw [out] 次単語集合を格納する構造体へのポインタ
* @param hypo [in] 展開元の部分文仮説
* @param tm [in] 単語を探す中心となる指定フレーム
* @param t_end [in] 単語を探すフレームの右端
*
* @return @a nw に格納された次単語候補の数を返す.
*
*
* @brief Determine next word candidates from the word trellis.
*
* This function builds a list of next word candidates by looking up
* the word trellis at specified frame, with lookup_range frame margin.
* If the same words exists in the near frames, only the one nearest to the
* specified frame will be chosen.
*
* @param r [in] recognition process instance
* @param nw [out] pointer to hold the extracted words as list of next word candidates
* @param hypo [in] partial sentence hypothesis from which the words will be expanded
* @param tm [in] center time frame to look up the words
* @param t_end [in] right frame boundary for the lookup.
*
* @return the number of next words candidates stored in @a nw.
*
*/
static int
get_backtrellis_words(RecogProcess *r, NEXTWORD **nw, NODE *hypo, short tm, short t_end)
{
int num = 0;
int t, t_step;
int oldnum=0;
BACKTRELLIS *bt;
int lookup_range;
if (tm < 0) return(0);
bt = r->backtrellis;
lookup_range = r->config->pass2.lookup_range;
#ifdef PREFER_CENTER_ON_TRELLIS_LOOKUP
/* fix for 3.2 (01/10/18 by ri) */
/* before and after (one near center frame has high priority) */
for (t_step = 0; t_step < lookup_range; t_step++) {
/* before or center */
t = tm - t_step;
if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
if (num > oldnum) {
qsort(nw, num, sizeof(NEXTWORD *),
(int (*)(const void *,const void *))compare_nw);
oldnum = num;
}
if (t_step == 0) continue; /* center */
/* after */
t = tm + t_step;
if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
if (num > oldnum) {
qsort(nw, num, sizeof(NEXTWORD *),
(int (*)(const void *,const void *))compare_nw);
oldnum = num;
}
}
#else
/* before the center frame */
for(t = tm; t >= tm - lookup_range; t--) {
if (t < 0) break;
num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
if (num > oldnum) {
qsort(nw, num, sizeof(NEXTWORD *),
(int (*)(const void *,const void *))compare_nw);
oldnum = num;
}
}
/* after the center frame */
for(t = tm + 1; t < tm + lookup_range; t++) {
if (t > bt->framelen - 1) break;
if (t >= t_end) break;
num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
if (num > oldnum) {
qsort(nw, num, sizeof(NEXTWORD *),
(int (*)(const void *,const void *))compare_nw);
oldnum = num;
}
}
#endif
return num;
}
/**
*
* @brief 非展開単語を除去.
*
* 制約により展開対象とならない単語をリストから消去する.
*
* @param nw [i/o] 次単語集合(集合中の展開できない単語が消去される)
* @param hypo [in] 展開元の部分文仮説
* @param num [in] @a nw に現在格納されている単語数
* @param winfo [in] 単語辞書
*
* @return 新たに nw に含まれる次単語数
*
*
* @brief Remove non-expansion word from list.
*
* Remove words in the nextword list which should not be expanded.
*
* @param nw [i/o] list of next word candidates (will be shrinked by removing some words)
* @param hypo [in] partial sentence hypothesis from which the words will be expanded
* @param num [in] current number of next words in @a nw
* @param winfo [in] word dictionary
*
* @return the new number of words in @a nw
*
*/
static int
limit_nw(NEXTWORD **nw, NODE *hypo, int num, WORD_INFO *winfo)
{
int src,dst;
int newnum;
/* からは何も展開しない */
/* no hypothesis will be generated after "" */
if (hypo->seq[hypo->seqnum-1] == winfo->head_silwid) {
return(0);
}
dst = 0;
for (src=0; srcid == winfo->tail_silwid) {
/* は展開しない */
/* do not expand (it only appears at start) */
continue;
}
#ifdef FIX_35_INHIBIT_SAME_WORD_EXPANSION
/* 直前単語と同じトレリス単語は展開しない */
/* inhibit expanding the exactly the same trellis word twice */
if (nw[src]->tre == hypo->tre) continue;
#endif
if (src != dst) memcpy(nw[dst], nw[src], sizeof(NEXTWORD));
dst++;
}
newnum = dst;
return newnum;
}
/**
*
* @brief 初期単語仮説集合を求める.
*
* N-gramベースの探索では,初期仮説は単語末尾の無音単語に固定されている.
* ただし,ショートポーズセグメンテーション時は,第1パスで最終フレームに終端が
* 残った単語の中で尤度最大の単語となる.
*
* @param nw [out] 次単語候補リスト(得られた初期単語仮説を格納する)
* @param peseqlen [in] 入力フレーム長
* @param maxnw [in] @a nw に格納できる単語の最大数
* @param r [in] 認識処理インスタンス
*
* @return @a nw に格納された単語候補数を返す.
*
*
* @brief Get initial word hypotheses at the beginning.
*
* on N-gram based recogntion, the initial hypothesis is fixed to the tail
* silence word. Exception is that, in short-pause segmentation mode, the
* initial hypothesis will be chosen from survived words on the last input
* frame in the first pass.
*
* @param nw [out] pointer to hold the initial word candidates
* @param peseqlen [in] input frame length
* @param maxnw [in] maximum number of words that can be stored in @a nw
* @param r [in] recognition process instance
*
* @return the number of words extracted and stored to @a nw.
*
*
* @callgraph
* @callergraph
*/
int
ngram_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r)
{
if (r->config->successive.enabled) {
/* in sp segment mode */
if (r->sp_break_2_begin_word != WORD_INVALID) {
/* 初期仮説は 最終フレームに残った単語トレリス上の最尤単語 */
/* the initial hypothesis is the best word survived on the last frame of
the segment */
nw[0]->id = r->sp_break_2_begin_word;
} else {
/* 最終セグメント: 初期仮説は 単語の末尾の無音単語(=winfo->tail_silwid) */
/* we are in the last of sentence: initial hypothesis is word-end silence word */
nw[0]->id = r->lm->winfo->tail_silwid;
}
} else {
/* initial hypothesis should be word-end silence word */
nw[0]->id = r->lm->winfo->tail_silwid;
}
#ifdef FIX_PENALTY
nw[0]->lscore = 0.0;
#else
nw[0]->lscore = r->config->lmp.lm_penalty2;
#endif
return 1; /* number of words = 1 */
}
/**
*
* @brief 次単語仮説集合を返す.
*
* 与えられた部分文仮説から,次に接続しうる単語の集合を返す. 実際には,
* 第1パスの結果であるトレリス単語集合 bt 上で,展開元の部分文仮説の最終単語の
* (推定された)始端フレーム hypo->estimated_next_t の前後に存在する
* 単語集合を取出し,それらの N-gram 接続確率を計算して返す.
* 取り出された次単語仮説は,あらかじめ maxnm の長さだけ
* 領域が確保されている nw に格納される.
*
* @param hypo [in] 展開元の文仮説
* @param nw [out] 次単語候補リストを格納する領域へのポインタ
* @param maxnw [in] @a nw の最大長
* @param r [in] 認識処理インスタンス
*
* @return 抽出され nw に格納された次単語仮説の数を返す.
*
*
* @brief Return the list of next word candidate.
*
* Given a partial sentence hypothesis "hypo", it returns the list of
* next word candidates. Actually, it extracts from word trellis the
* list of words whose word-end node has survived near the estimated
* beginning-of-word frame of last word "hypo->estimated_next_t", and store
* them to "nw" with their N-gram probabilities.
*
* @param hypo [in] source partial sentence hypothesis
* @param nw [out] pointer to store the list of next word candidates (should be already allocated)
* @param maxnw [in] maximum number of words that can be stored to @a nw
* @param r [in] recognition process instance
*
* @return the number of extracted next word candidates in @a nw.
*
* @callgraph
* @callergraph
*/
int
ngram_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r)
{
int num, num2;
if (hypo->seqnum == 0) {
j_internal_error("ngram_nextwords: hypo contains no word\n");
}
/* 仮説の推定終端時刻において backtrellis内に残っている単語を得る */
/* get survived words on backtrellis at the estimated end frame */
num = get_backtrellis_words(r, nw, hypo, hypo->estimated_next_t, hypo->bestt);
/* 展開できない単語をチェックして外す */
/* exclude unallowed words */
num2 = limit_nw(nw, hypo, num, r->lm->winfo);
if (debug2_flag) jlog("DEBUG: ngram_decode: %d-%d=%d unfolded\n",num, num-num2,num2);
return(num2);
}
/**
*
* @brief 受理判定
*
* 与えられた部分文仮説が,文(すなわち探索終了)として
* 受理可能であるかどうかを返す. N-gram では文頭に対応する無音単語
* (silhead) であれば受理する.
*
* @param hypo [in] 部分文仮説
* @param r [in] 認識処理インスタンス
*
* @return 文として受理可能であれば TRUE,不可能なら FALSE を返す.
*
*
* @brief Acceptance check.
*
* Return whether the given partial hypothesis is acceptable as a sentence
* and can be treated as a final search candidate. In N-gram mode, it checks
* whether the last word is the beginning-of-sentence silence (silhead).
*
* @param hypo [in] partial sentence hypothesis to be examined
* @param r [in] recognition process instance
*
* @return TRUE if acceptable as a sentence, or FALSE if not.
*
* @callgraph
* @callergraph
*/
boolean
ngram_acceptable(NODE *hypo, RecogProcess *r)
{
if (r->config->successive.enabled) {
/* 最後の仮説が第1パス最尤仮説の最初の単語と一致しなければならない */
/* the last word should be equal to the first word on the best hypothesis on 1st pass */
if (hypo->seq[hypo->seqnum-1] == r->sp_break_2_end_word) {
return TRUE;
}
} else {
/* 最後の仮説が文頭無音単語でなければならない */
/* the last word should be head silence word */
if (hypo->seq[hypo->seqnum-1] == r->lm->winfo->head_silwid) {
return TRUE;
}
}
return FALSE;
}
/* end of file */