1 /**
2  * @file   vsegment.c
3  *
4  * <JA>
5  * @brief  ���Ϥ��Ф���Viterbi ���饤���Ȥμ¹�
6  * </JA>
7  *
8  * <EN>
9  * @brief  Do viterbi alignment for the input
10  * </EN>
11  *
12  * @author Akinobu LEE
13  * @date   Fri Feb 18 19:29:22 2005
14  *
15  * $Revision: 1.2 $
16  *
17  */
18 /*
19  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
20  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
21  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
22  * All rights reserved
23  */
24 
25 #include <sent/stddefs.h>
26 #include <sent/htk_param.h>
27 #include <sent/hmm.h>
28 
29 /**
30  * @brief  Perform Viterbi alignment.
31  *
32  * This function performs viterbi alignment for the given sentence %HMM,
33  * input parameter and unit definition.  Any segmentatino unit (word, phoneme
34  * state, etc.) is allowed: the segmentation unit should be specified by
35  * specifying a list of state id which are the end of each unit.
36  * For example, if you want to obtain phoneme alignment, the list of state
37  * number that exist at the end of phones should be specified by @a endstates.
38  *
39  * @param hmm [in] sentence HMM to be matched
40  * @param param [in] input parameter data
41  * @param wrk [i/o] HMM computation work area
42  * @param multipath [in] TRUE if need multi-path handling
43  * @param endstates [in] list of state id that corrsponds to the ends of units
44  * @param ulen [in] total number of units in the @a hmm
45  * @param id_ret [out] Pointer to store the newly allocated array of the resulting id sequence of units on the best path.
46  * @param seg_ret [out] Pointer to store the newly allocated array of the resulting end frame of each unit on the best path.
47  * @param uscore_ret [out] Pointer to store the newly allocated array of the resulting score at the end frame of each unit on the best path.
48  * @param slen_ret [out] Pointer to store the total number of units on the best path.
49  *
50  * @return the total acoustic score for the whole input.
51  */
52 LOGPROB
viterbi_segment(HMM * hmm,HTK_Param * param,HMMWork * wrk,boolean multipath,int * endstates,int ulen,int ** id_ret,int ** seg_ret,LOGPROB ** uscore_ret,int * slen_ret)53 viterbi_segment(HMM *hmm, HTK_Param *param, HMMWork *wrk, boolean multipath, int *endstates, int ulen, int **id_ret, int **seg_ret, LOGPROB **uscore_ret, int *slen_ret)
54 {
55   /* for viterbi */
56   LOGPROB *nodescore[2];	/* node buffer */
57   SEGTOKEN **tokenp[2];		/* propagating token which holds segment info */
58   int startt, endt;
59   int *from_node;
60   int *u_end, *u_start;	/* the node is an end of the word, or -1 for non-multipath mode*/
61   int i, n;
62   unsigned int t;
63   int tl,tn;
64   LOGPROB tmpsum;
65   A_CELL *ac;
66   SEGTOKEN *newtoken, *token, *tmptoken, *root;
67   LOGPROB result_score;
68   LOGPROB maxscore, minscore;	/* for debug */
69   int maxnode;			/* for debug */
70   int *id, *seg, slen;
71   LOGPROB *uscore;
72 
73   /* assume more than 1 units */
74   if (ulen < 1) {
75     jlog("Error: vsegment: no unit?\n");
76     return LOG_ZERO;
77   }
78 
79   if (!multipath) {
80     /* initialize unit start/end marker */
81     u_start = (int *)mymalloc(hmm->len * sizeof(int));
82     u_end   = (int *)mymalloc(hmm->len * sizeof(int));
83     for (n = 0; n < hmm->len; n++) {
84       u_start[n] = -1;
85       u_end[n] = -1;
86     }
87     u_start[0] = 0;
88     u_end[endstates[0]] = 0;
89     for (i=1;i<ulen;i++) {
90       u_start[endstates[i-1]+1] = i;
91       u_end[endstates[i]] = i;
92     }
93 #if 0
94     for (i=0;i<hmm->len;i++) {
95       printf("unit %d: start=%d, end=%d\n", i, u_start[i], u_end[i]);
96     }
97 #endif
98   }
99 
100   /* initialize node buffers */
101   tn = 0;
102   tl = 1;
103   root = NULL;
104   for (i=0;i<2;i++){
105     nodescore[i] = (LOGPROB *)mymalloc(hmm->len * sizeof(LOGPROB));
106     tokenp[i] = (SEGTOKEN **)mymalloc(hmm->len * sizeof(SEGTOKEN *));
107     for (n = 0; n < hmm->len; n++) {
108       tokenp[i][n] = NULL;
109     }
110   }
111   for (n = 0; n < hmm->len; n++) {
112     nodescore[tn][n] = LOG_ZERO;
113     newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
114     newtoken->last_id = -1;
115     newtoken->last_end_frame = -1;
116     newtoken->last_end_score = 0.0;
117     newtoken->list = root;
118     root = newtoken;
119     newtoken->next = NULL;
120     tokenp[tn][n] = newtoken;
121   }
122   from_node = (int *)mymalloc(sizeof(int) * hmm->len);
123 
124   /* first frame: only set initial score */
125   /*if (hmm->state[0].is_pseudo_state) {
126     jlog("Warning: state %d: pseudo state?\n", 0);
127     }*/
128   if (multipath) {
129     nodescore[tn][0] = 0.0;
130   } else {
131     nodescore[tn][0] = outprob(wrk, 0, &(hmm->state[0]), param);
132   }
133 
134   /* do viterbi for rest frame */
135   if (multipath) {
136     startt = 0;  endt = param->samplenum;
137   } else {
138     startt = 1;  endt = param->samplenum - 1;
139   }
140   for (t = startt; t <= endt; t++) {
141     i = tl;
142     tl = tn;
143     tn = i;
144     maxscore = LOG_ZERO;
145     minscore = 0.0;
146 
147     /* clear next scores */
148     for (i=0;i<hmm->len;i++) {
149       nodescore[tn][i] = LOG_ZERO;
150       from_node[i] = -1;
151     }
152 
153     /* select viterbi path for each node */
154     for (n = 0; n < hmm->len; n++) {
155       if (nodescore[tl][n] <= LOG_ZERO) continue;
156       for (ac = hmm->state[n].ac; ac; ac = ac->next) {
157         tmpsum = nodescore[tl][n] + ac->a;
158         if (nodescore[tn][ac->arc] < tmpsum) {
159           nodescore[tn][ac->arc] = tmpsum;
160 	  from_node[ac->arc] = n;
161 	}
162       }
163     }
164     /* propagate token, appending new if path was selected between units */
165     if (multipath) {
166       for (n = 0; n < hmm->len; n++) {
167 	if (from_node[n] == -1 || nodescore[tn][n] <= LOG_ZERO) {
168 	  /*tokenp[tn][n] = NULL;*/
169 	} else {
170 	  i=0;
171 	  while (from_node[n] > endstates[i]) i++;
172 	  if (n > endstates[i]) {
173 	    newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
174 	    newtoken->last_id = i;
175 	    newtoken->last_end_frame = t-1;
176 	    newtoken->last_end_score = nodescore[tl][from_node[n]];
177 	    newtoken->list = root;
178 	    root = newtoken;
179 	    newtoken->next = tokenp[tl][from_node[n]];
180 	    tokenp[tn][n] = newtoken;
181 	  } else {
182 	    tokenp[tn][n] = tokenp[tl][from_node[n]];
183 	  }
184 	}
185       }
186     } else {			/* not multipath */
187       for (n = 0; n < hmm->len; n++) {
188 	if (from_node[n] == -1) {
189 	  tokenp[tn][n] = NULL;
190 	} else if (nodescore[tn][n] <= LOG_ZERO) {
191 	  tokenp[tn][n] = tokenp[tl][from_node[n]];
192 	} else {
193 	  if (u_end[from_node[n]] != -1 && u_start[n] != -1
194 	      && from_node[n] !=  n) {
195 	    newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
196 	    newtoken->last_id = u_end[from_node[n]];
197 	    newtoken->last_end_frame = t-1;
198 	    newtoken->last_end_score = nodescore[tl][from_node[n]];
199 	    newtoken->list = root;
200 	    root = newtoken;
201 	    newtoken->next = tokenp[tl][from_node[n]];
202 	    tokenp[tn][n] = newtoken;
203 	  } else {
204 	    tokenp[tn][n] = tokenp[tl][from_node[n]];
205 	  }
206 	}
207       }
208     }
209 
210     if (multipath) {
211       /* if this is next of last frame, loop ends here */
212       if (t == param->samplenum) break;
213     }
214 
215     /* calc outprob to new nodes */
216     for (n = 0; n < hmm->len; n++) {
217       if (multipath) {
218 	if (hmm->state[n].out.state == NULL) continue;
219       }
220       if (nodescore[tn][n] > LOG_ZERO) {
221 	if (hmm->state[n].is_pseudo_state) {
222 	  jlog("Warning: vsegment: state %d: pseudo state?\n", n);
223 	}
224 	nodescore[tn][n] += outprob(wrk, t, &(hmm->state[n]), param);
225       }
226       if (nodescore[tn][n] > maxscore) { /* for debug */
227 	maxscore = nodescore[tn][n];
228 	maxnode = n;
229       }
230     }
231 
232 #if 0
233     for (i=0;i<ulen;i++) {
234       printf("%d: unit %d(%d-%d): begin_frame = %d\n", t - 1, i,
235 	     (i > 0) ? endstates[i-1]+1 : 0, endstates[i],
236 	     (multipath && tokenp[tl][endstates[i]] == NULL) ? -1 : tokenp[tl][endstates[i]]->last_end_frame + 1);
237     }
238 #endif
239 
240     /* printf("t=%3d max=%f n=%d\n",t,maxscore, maxnode); */
241 
242   }
243 
244   result_score = nodescore[tn][hmm->len-1];
245 
246   /* parse back the last token to see the trail of best viterbi path */
247   /* and store the informations to returning buffer */
248   slen = 0;
249   if (!multipath) slen++;
250   for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
251     if (token->last_end_frame == -1) break;
252     slen++;
253   }
254   id = (int *)mymalloc(sizeof(int)*slen);
255   seg = (int *)mymalloc(sizeof(int)*slen);
256   uscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*slen);
257 
258   if (multipath) {
259     i = slen - 1;
260   } else {
261     id[slen-1] = ulen - 1;
262     seg[slen-1] = t - 1;
263     uscore[slen-1] = result_score;
264     i = slen - 2;
265   }
266   for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
267     if (i < 0 || token->last_end_frame == -1) break;
268     id[i] = token->last_id;
269     seg[i] = token->last_end_frame;
270     uscore[i] = token->last_end_score;
271     i--;
272   }
273 
274   /* normalize scores by frame */
275   for (i=slen-1;i>0;i--) {
276     uscore[i] = (uscore[i] - uscore[i-1]) / (seg[i] - seg[i-1]);
277   }
278   uscore[0] = uscore[0] / (seg[0] + 1);
279 
280   /* set return value */
281   *id_ret = id;
282   *seg_ret = seg;
283   *uscore_ret = uscore;
284   *slen_ret = slen;
285 
286   /* free memory */
287   if (!multipath) {
288     free(u_start);
289     free(u_end);
290   }
291   free(from_node);
292   token = root;
293   while(token) {
294     tmptoken = token->list;
295     free(token);
296     token = tmptoken;
297   }
298   for (i=0;i<2;i++) {
299     free(nodescore[i]);
300     free(tokenp[i]);
301   }
302 
303   return(result_score);
304 
305 }
306