1 /* ====================================================================
2 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
3 * reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * This work was supported in part by funding from the Defense Advanced
18 * Research Projects Agency and the National Science Foundation of the
19 * United States of America, and the CMU Sphinx Speech Consortium.
20 *
21 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * ====================================================================
34 *
35 */
36 /*
37 * ms_mgau.c -- Essentially a wrapper that wrap up gauden and
38 * senone. It supports multi-stream.
39 *
40 *
41 * **********************************************
42 * CMU ARPA Speech Project
43 *
44 * Copyright (c) 1997 Carnegie Mellon University.
45 * ALL RIGHTS RESERVED.
46 * **********************************************
47 * HISTORY
48 * $Log$
49 * Revision 1.2 2006/02/22 16:56:01 arthchan2003
50 * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone
51 *
52 * Revision 1.1.2.4 2005/09/25 18:55:19 arthchan2003
53 * Added a flag to turn on and off precomputation.
54 *
55 * Revision 1.1.2.3 2005/08/03 18:53:44 dhdfu
56 * Add memory deallocation functions. Also move all the initialization
57 * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it
58 * from decode_anytopo and friends.
59 *
60 * Revision 1.1.2.2 2005/08/02 21:05:38 arthchan2003
61 * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc.
62 *
63 * Revision 1.1.2.1 2005/07/20 19:37:09 arthchan2003
64 * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone. Add ms_mgau_init and model_set_mllr. This allow eliminating 600 lines of code in decode_anytopo/align/allphone.
65 *
66 *
67 *
68 */
69
70 #include <string.h>
71
72 #include <ms_mgau.h>
73 #include <ms_mllr.h>
74 #include <cb2mllr_io.h>
75 #include <cmd_ln.h>
76
77 /* Wrong place to put it */
78 int32
model_set_mllr(ms_mgau_model_t * msg,const char * mllrfile,const char * cb2mllrfile,feat_t * fcb,mdef_t * mdef,cmd_ln_t * config)79 model_set_mllr(ms_mgau_model_t * msg, const char *mllrfile,
80 const char *cb2mllrfile, feat_t * fcb, mdef_t * mdef,
81 cmd_ln_t *config)
82 {
83 float32 ****A, ***B;
84 int32 *cb2mllr;
85 int32 gid, sid, nclass;
86 uint8 *mgau_xform;
87
88 gauden_mean_reload(msg->g, cmd_ln_str_r(config, "-mean"));
89
90 if (ms_mllr_read_regmat(mllrfile, &A, &B,
91 fcb->stream_len, feat_n_stream(fcb),
92 &nclass) < 0)
93 E_FATAL("ms_mllr_read_regmat failed\n");
94
95 if (cb2mllrfile && strcmp(cb2mllrfile, ".1cls.") != 0) {
96 int32 ncb, nmllr;
97
98 cb2mllr_read(cb2mllrfile, &cb2mllr, &ncb, &nmllr);
99 if (nmllr != nclass)
100 E_FATAL
101 ("Number of classes in cb2mllr does not match mllr (%d != %d)\n",
102 ncb, nclass);
103 if (ncb != msg->s->n_sen)
104 E_FATAL
105 ("Number of senones in cb2mllr does not match mdef (%d != %d)\n",
106 ncb, msg->s->n_sen);
107 }
108 else
109 cb2mllr = NULL;
110
111
112 mgau_xform = (uint8 *) ckd_calloc(msg->g->n_mgau, sizeof(uint8));
113
114 /* Transform each non-CI mixture Gaussian */
115 for (sid = 0; sid < msg->s->n_sen; sid++) {
116 int32 class = 0;
117
118 if (cb2mllr)
119 class = cb2mllr[sid];
120 if (class == -1)
121 continue;
122
123 if (mdef->cd2cisen[sid] != sid) { /* Otherwise it's a CI senone */
124 gid = msg->s->mgau[sid];
125 if (!mgau_xform[gid]) {
126 ms_mllr_norm_mgau(msg->g->mean[gid], msg->g->n_density, A,
127 B, fcb->stream_len, feat_n_stream(fcb),
128 class);
129 mgau_xform[gid] = 1;
130 }
131 }
132 }
133
134 ckd_free(mgau_xform);
135
136 ms_mllr_free_regmat(A, B, feat_n_stream(fcb));
137 ckd_free(cb2mllr);
138
139 return S3_SUCCESS;
140 }
141
142 ms_mgau_model_t *
ms_mgau_init(const char * meanfile,const char * varfile,float64 varfloor,const char * mixwfile,float64 mixwfloor,int32 precomp,const char * senmgau,const char * lambdafile,int32 _topn,logmath_t * logmath)143 ms_mgau_init(const char *meanfile,
144 const char *varfile, float64 varfloor,
145 const char *mixwfile, float64 mixwfloor,
146 int32 precomp, const char *senmgau, const char *lambdafile,
147 int32 _topn, logmath_t *logmath)
148 {
149 /* Codebooks */
150 int32 i;
151 ms_mgau_model_t *msg;
152 gauden_t *g;
153 senone_t *s;
154 mgau2sen_t *m2s;
155
156 msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t));
157
158
159 msg->g = NULL;
160 msg->s = NULL;
161 msg->i = NULL;
162
163 msg->g = gauden_init(meanfile, varfile, varfloor, precomp, logmath);
164
165 msg->s = senone_init(mixwfile, senmgau, mixwfloor, logmath);
166
167 g = ms_mgau_gauden(msg);
168 s = ms_mgau_senone(msg);
169
170 /* Verify senone parameters against gauden parameters */
171 if (s->n_feat != g->n_feat)
172 E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat,
173 s->n_feat);
174 if (s->n_cw != g->n_density)
175 E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n",
176 g->n_density, s->n_cw);
177 if (s->n_gauden > g->n_mgau)
178 E_FATAL("Senones need more codebooks (%d) than present (%d)\n",
179 s->n_gauden, g->n_mgau);
180 if (s->n_gauden < g->n_mgau)
181 E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n",
182 s->n_gauden, g->n_mgau);
183 /* Initialize mapping from mixture Gaussian to senones */
184 msg->mgau2sen =
185 (mgau2sen_t **) ckd_calloc(g->n_mgau, sizeof(mgau2sen_t *));
186 for (i = 0; i < s->n_sen; i++) {
187 m2s = (mgau2sen_t *) ckd_calloc(1, sizeof(mgau2sen_t));
188 m2s->sen = i;
189 m2s->next = msg->mgau2sen[s->mgau[i]];
190 msg->mgau2sen[s->mgau[i]] = m2s;
191 }
192
193 /* CD/CI senone interpolation weights file, if present */
194 if (lambdafile != NULL) {
195 msg->i = interp_init(lambdafile, logmath);
196 /* Verify interpolation weights size with senones */
197 if (msg->i->n_sen != s->n_sen)
198 E_FATAL("Interpolation file has %d weights; but #senone= %d\n",
199 msg->i->n_sen, s->n_sen);
200 }
201 else
202 msg->i = NULL;
203
204
205 msg->topn = _topn;
206 E_INFO("The value of topn: %d\n", msg->topn);
207 if (msg->topn == 0 || msg->topn > msg->g->n_density) {
208 E_WARN
209 ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n",
210 msg->topn, msg->g->n_density);
211 msg->topn = msg->g->n_density;
212 }
213
214 msg->dist = (gauden_dist_t ***)
215 ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn,
216 sizeof(gauden_dist_t));
217 msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8));
218
219 return msg;
220 }
221
222 void
ms_mgau_free(ms_mgau_model_t * msg)223 ms_mgau_free(ms_mgau_model_t * msg)
224 {
225 int32 i;
226 gauden_t *g;
227
228 if (msg == NULL)
229 return;
230
231 g = ms_mgau_gauden(msg);
232 for (i = 0; i < g->n_mgau; ++i)
233 ckd_free(msg->mgau2sen[i]);
234 ckd_free(msg->mgau2sen);
235 gauden_free(msg->g);
236 senone_free(msg->s);
237 ckd_free_3d((void *) msg->dist);
238 ckd_free(msg->mgau_active);
239 ckd_free(msg);
240 }
241
242 int32
ms_cont_mgau_frame_eval(ascr_t * ascr,ms_mgau_model_t * msg,mdef_t * mdef,float32 ** feat)243 ms_cont_mgau_frame_eval(ascr_t * ascr,
244 ms_mgau_model_t * msg,
245 mdef_t * mdef, float32 ** feat)
246 {
247 int32 gid;
248 int32 s;
249 int32 topn;
250 int32 best;
251 gauden_t *g;
252 senone_t *sen;
253 interp_t *interp;
254
255 topn = ms_mgau_topn(msg);
256 g = ms_mgau_gauden(msg);
257 sen = ms_mgau_senone(msg);
258 interp = ms_mgau_interp(msg);
259
260 /*
261 * Evaluate gaussian density codebooks and senone scores for input codeword.
262 * Evaluate only active codebooks and senones.
263 */
264
265 if (interp) {
266 for (s = 0; s < mdef->n_ci_sen; s++)
267 ascr->sen_active[s] = 1;
268 }
269
270 /* Flag all active mixture-gaussian codebooks */
271
272 for (gid = 0; gid < g->n_mgau; gid++)
273 msg->mgau_active[gid] = 0;
274
275 for (s = 0; s < ascr->n_sen; s++) {
276 if (ascr->sen_active[s]) {
277 msg->mgau_active[sen->mgau[s]] = 1;
278 }
279 }
280
281 /* Compute topn gaussian density values (for active codebooks) */
282 for (gid = 0; gid < g->n_mgau; gid++) {
283 if (msg->mgau_active[gid])
284 gauden_dist(g, gid, topn, feat, msg->dist[gid]);
285 }
286
287 if (interp) {
288 for (s = 0; s < ascr->n_sen; s++) {
289 if (ascr->sen_active[s]) {
290 if (s >= mdef->n_ci_sen) {
291 interp_cd_ci(interp, ascr->senscr, s,
292 mdef->cd2cisen[s]);
293 }
294 }
295 }
296 }
297
298 best = (int32) 0x80000000;
299 for (s = 0; s < ascr->n_sen; s++) {
300 if (ascr->sen_active[s]) {
301 ascr->senscr[s] =
302 senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
303 if (best < ascr->senscr[s])
304 best = ascr->senscr[s];
305 }
306 }
307
308
309 /* Normalize senone scores (interpolation above can only lower best score) */
310 for (s = 0; s < ascr->n_sen; s++) {
311 if (ascr->sen_active[s])
312 ascr->senscr[s] -= best;
313 }
314
315 return best;
316 }
317