1 /**
2 * @file vsegment.c
3 *
4 * <JA>
5 * @brief ���Ϥ��Ф���Viterbi ���饤���Ȥμ¹�
6 * </JA>
7 *
8 * <EN>
9 * @brief Do viterbi alignment for the input
10 * </EN>
11 *
12 * @author Akinobu LEE
13 * @date Fri Feb 18 19:29:22 2005
14 *
15 * $Revision: 1.2 $
16 *
17 */
18 /*
19 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
20 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
21 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
22 * All rights reserved
23 */
24
25 #include <sent/stddefs.h>
26 #include <sent/htk_param.h>
27 #include <sent/hmm.h>
28
29 /**
30 * @brief Perform Viterbi alignment.
31 *
32 * This function performs viterbi alignment for the given sentence %HMM,
33 * input parameter and unit definition. Any segmentatino unit (word, phoneme
34 * state, etc.) is allowed: the segmentation unit should be specified by
35 * specifying a list of state id which are the end of each unit.
36 * For example, if you want to obtain phoneme alignment, the list of state
37 * number that exist at the end of phones should be specified by @a endstates.
38 *
39 * @param hmm [in] sentence HMM to be matched
40 * @param param [in] input parameter data
41 * @param wrk [i/o] HMM computation work area
42 * @param multipath [in] TRUE if need multi-path handling
43 * @param endstates [in] list of state id that corrsponds to the ends of units
44 * @param ulen [in] total number of units in the @a hmm
45 * @param id_ret [out] Pointer to store the newly allocated array of the resulting id sequence of units on the best path.
46 * @param seg_ret [out] Pointer to store the newly allocated array of the resulting end frame of each unit on the best path.
47 * @param uscore_ret [out] Pointer to store the newly allocated array of the resulting score at the end frame of each unit on the best path.
48 * @param slen_ret [out] Pointer to store the total number of units on the best path.
49 *
50 * @return the total acoustic score for the whole input.
51 */
52 LOGPROB
viterbi_segment(HMM * hmm,HTK_Param * param,HMMWork * wrk,boolean multipath,int * endstates,int ulen,int ** id_ret,int ** seg_ret,LOGPROB ** uscore_ret,int * slen_ret)53 viterbi_segment(HMM *hmm, HTK_Param *param, HMMWork *wrk, boolean multipath, int *endstates, int ulen, int **id_ret, int **seg_ret, LOGPROB **uscore_ret, int *slen_ret)
54 {
55 /* for viterbi */
56 LOGPROB *nodescore[2]; /* node buffer */
57 SEGTOKEN **tokenp[2]; /* propagating token which holds segment info */
58 int startt, endt;
59 int *from_node;
60 int *u_end, *u_start; /* the node is an end of the word, or -1 for non-multipath mode*/
61 int i, n;
62 unsigned int t;
63 int tl,tn;
64 LOGPROB tmpsum;
65 A_CELL *ac;
66 SEGTOKEN *newtoken, *token, *tmptoken, *root;
67 LOGPROB result_score;
68 LOGPROB maxscore, minscore; /* for debug */
69 int maxnode; /* for debug */
70 int *id, *seg, slen;
71 LOGPROB *uscore;
72
73 /* assume more than 1 units */
74 if (ulen < 1) {
75 jlog("Error: vsegment: no unit?\n");
76 return LOG_ZERO;
77 }
78
79 if (!multipath) {
80 /* initialize unit start/end marker */
81 u_start = (int *)mymalloc(hmm->len * sizeof(int));
82 u_end = (int *)mymalloc(hmm->len * sizeof(int));
83 for (n = 0; n < hmm->len; n++) {
84 u_start[n] = -1;
85 u_end[n] = -1;
86 }
87 u_start[0] = 0;
88 u_end[endstates[0]] = 0;
89 for (i=1;i<ulen;i++) {
90 u_start[endstates[i-1]+1] = i;
91 u_end[endstates[i]] = i;
92 }
93 #if 0
94 for (i=0;i<hmm->len;i++) {
95 printf("unit %d: start=%d, end=%d\n", i, u_start[i], u_end[i]);
96 }
97 #endif
98 }
99
100 /* initialize node buffers */
101 tn = 0;
102 tl = 1;
103 root = NULL;
104 for (i=0;i<2;i++){
105 nodescore[i] = (LOGPROB *)mymalloc(hmm->len * sizeof(LOGPROB));
106 tokenp[i] = (SEGTOKEN **)mymalloc(hmm->len * sizeof(SEGTOKEN *));
107 for (n = 0; n < hmm->len; n++) {
108 tokenp[i][n] = NULL;
109 }
110 }
111 for (n = 0; n < hmm->len; n++) {
112 nodescore[tn][n] = LOG_ZERO;
113 newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
114 newtoken->last_id = -1;
115 newtoken->last_end_frame = -1;
116 newtoken->last_end_score = 0.0;
117 newtoken->list = root;
118 root = newtoken;
119 newtoken->next = NULL;
120 tokenp[tn][n] = newtoken;
121 }
122 from_node = (int *)mymalloc(sizeof(int) * hmm->len);
123
124 /* first frame: only set initial score */
125 /*if (hmm->state[0].is_pseudo_state) {
126 jlog("Warning: state %d: pseudo state?\n", 0);
127 }*/
128 if (multipath) {
129 nodescore[tn][0] = 0.0;
130 } else {
131 nodescore[tn][0] = outprob(wrk, 0, &(hmm->state[0]), param);
132 }
133
134 /* do viterbi for rest frame */
135 if (multipath) {
136 startt = 0; endt = param->samplenum;
137 } else {
138 startt = 1; endt = param->samplenum - 1;
139 }
140 for (t = startt; t <= endt; t++) {
141 i = tl;
142 tl = tn;
143 tn = i;
144 maxscore = LOG_ZERO;
145 minscore = 0.0;
146
147 /* clear next scores */
148 for (i=0;i<hmm->len;i++) {
149 nodescore[tn][i] = LOG_ZERO;
150 from_node[i] = -1;
151 }
152
153 /* select viterbi path for each node */
154 for (n = 0; n < hmm->len; n++) {
155 if (nodescore[tl][n] <= LOG_ZERO) continue;
156 for (ac = hmm->state[n].ac; ac; ac = ac->next) {
157 tmpsum = nodescore[tl][n] + ac->a;
158 if (nodescore[tn][ac->arc] < tmpsum) {
159 nodescore[tn][ac->arc] = tmpsum;
160 from_node[ac->arc] = n;
161 }
162 }
163 }
164 /* propagate token, appending new if path was selected between units */
165 if (multipath) {
166 for (n = 0; n < hmm->len; n++) {
167 if (from_node[n] == -1 || nodescore[tn][n] <= LOG_ZERO) {
168 /*tokenp[tn][n] = NULL;*/
169 } else {
170 i=0;
171 while (from_node[n] > endstates[i]) i++;
172 if (n > endstates[i]) {
173 newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
174 newtoken->last_id = i;
175 newtoken->last_end_frame = t-1;
176 newtoken->last_end_score = nodescore[tl][from_node[n]];
177 newtoken->list = root;
178 root = newtoken;
179 newtoken->next = tokenp[tl][from_node[n]];
180 tokenp[tn][n] = newtoken;
181 } else {
182 tokenp[tn][n] = tokenp[tl][from_node[n]];
183 }
184 }
185 }
186 } else { /* not multipath */
187 for (n = 0; n < hmm->len; n++) {
188 if (from_node[n] == -1) {
189 tokenp[tn][n] = NULL;
190 } else if (nodescore[tn][n] <= LOG_ZERO) {
191 tokenp[tn][n] = tokenp[tl][from_node[n]];
192 } else {
193 if (u_end[from_node[n]] != -1 && u_start[n] != -1
194 && from_node[n] != n) {
195 newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
196 newtoken->last_id = u_end[from_node[n]];
197 newtoken->last_end_frame = t-1;
198 newtoken->last_end_score = nodescore[tl][from_node[n]];
199 newtoken->list = root;
200 root = newtoken;
201 newtoken->next = tokenp[tl][from_node[n]];
202 tokenp[tn][n] = newtoken;
203 } else {
204 tokenp[tn][n] = tokenp[tl][from_node[n]];
205 }
206 }
207 }
208 }
209
210 if (multipath) {
211 /* if this is next of last frame, loop ends here */
212 if (t == param->samplenum) break;
213 }
214
215 /* calc outprob to new nodes */
216 for (n = 0; n < hmm->len; n++) {
217 if (multipath) {
218 if (hmm->state[n].out.state == NULL) continue;
219 }
220 if (nodescore[tn][n] > LOG_ZERO) {
221 if (hmm->state[n].is_pseudo_state) {
222 jlog("Warning: vsegment: state %d: pseudo state?\n", n);
223 }
224 nodescore[tn][n] += outprob(wrk, t, &(hmm->state[n]), param);
225 }
226 if (nodescore[tn][n] > maxscore) { /* for debug */
227 maxscore = nodescore[tn][n];
228 maxnode = n;
229 }
230 }
231
232 #if 0
233 for (i=0;i<ulen;i++) {
234 printf("%d: unit %d(%d-%d): begin_frame = %d\n", t - 1, i,
235 (i > 0) ? endstates[i-1]+1 : 0, endstates[i],
236 (multipath && tokenp[tl][endstates[i]] == NULL) ? -1 : tokenp[tl][endstates[i]]->last_end_frame + 1);
237 }
238 #endif
239
240 /* printf("t=%3d max=%f n=%d\n",t,maxscore, maxnode); */
241
242 }
243
244 result_score = nodescore[tn][hmm->len-1];
245
246 /* parse back the last token to see the trail of best viterbi path */
247 /* and store the informations to returning buffer */
248 slen = 0;
249 if (!multipath) slen++;
250 for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
251 if (token->last_end_frame == -1) break;
252 slen++;
253 }
254 id = (int *)mymalloc(sizeof(int)*slen);
255 seg = (int *)mymalloc(sizeof(int)*slen);
256 uscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*slen);
257
258 if (multipath) {
259 i = slen - 1;
260 } else {
261 id[slen-1] = ulen - 1;
262 seg[slen-1] = t - 1;
263 uscore[slen-1] = result_score;
264 i = slen - 2;
265 }
266 for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
267 if (i < 0 || token->last_end_frame == -1) break;
268 id[i] = token->last_id;
269 seg[i] = token->last_end_frame;
270 uscore[i] = token->last_end_score;
271 i--;
272 }
273
274 /* normalize scores by frame */
275 for (i=slen-1;i>0;i--) {
276 uscore[i] = (uscore[i] - uscore[i-1]) / (seg[i] - seg[i-1]);
277 }
278 uscore[0] = uscore[0] / (seg[0] + 1);
279
280 /* set return value */
281 *id_ret = id;
282 *seg_ret = seg;
283 *uscore_ret = uscore;
284 *slen_ret = slen;
285
286 /* free memory */
287 if (!multipath) {
288 free(u_start);
289 free(u_end);
290 }
291 free(from_node);
292 token = root;
293 while(token) {
294 tmptoken = token->list;
295 free(token);
296 token = tmptoken;
297 }
298 for (i=0;i<2;i++) {
299 free(nodescore[i]);
300 free(tokenp[i]);
301 }
302
303 return(result_score);
304
305 }
306