1 /* ====================================================================
2  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * This work was supported in part by funding from the Defense Advanced
18  * Research Projects Agency and the National Science Foundation of the
19  * United States of America, and the CMU Sphinx Speech Consortium.
20  *
21  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * ====================================================================
34  *
35  */
36 /*
37  * ms_mgau.c -- Essentially a wrapper that wrap up gauden and
38  * senone. It supports multi-stream.
39  *
40  *
41  * **********************************************
42  * CMU ARPA Speech Project
43  *
44  * Copyright (c) 1997 Carnegie Mellon University.
45  * ALL RIGHTS RESERVED.
46  * **********************************************
47  * HISTORY
48  * $Log$
49  * Revision 1.2  2006/02/22  16:56:01  arthchan2003
50  * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone
51  *
52  * Revision 1.1.2.4  2005/09/25 18:55:19  arthchan2003
53  * Added a flag to turn on and off precomputation.
54  *
55  * Revision 1.1.2.3  2005/08/03 18:53:44  dhdfu
56  * Add memory deallocation functions.  Also move all the initialization
57  * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it
58  * from decode_anytopo and friends.
59  *
60  * Revision 1.1.2.2  2005/08/02 21:05:38  arthchan2003
61  * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc.
62  *
63  * Revision 1.1.2.1  2005/07/20 19:37:09  arthchan2003
64  * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone.  Add ms_mgau_init and model_set_mllr.  This allow eliminating 600 lines of code in decode_anytopo/align/allphone.
65  *
66  *
67  *
68  */
69 
70 #include <string.h>
71 
72 #include <ms_mgau.h>
73 #include <ms_mllr.h>
74 #include <cb2mllr_io.h>
75 #include <cmd_ln.h>
76 
77 /* Wrong place to put it */
78 int32
model_set_mllr(ms_mgau_model_t * msg,const char * mllrfile,const char * cb2mllrfile,feat_t * fcb,mdef_t * mdef,cmd_ln_t * config)79 model_set_mllr(ms_mgau_model_t * msg, const char *mllrfile,
80 	       const char *cb2mllrfile, feat_t * fcb, mdef_t * mdef,
81 	       cmd_ln_t *config)
82 {
83     float32 ****A, ***B;
84     int32 *cb2mllr;
85     int32 gid, sid, nclass;
86     uint8 *mgau_xform;
87 
88     gauden_mean_reload(msg->g, cmd_ln_str_r(config, "-mean"));
89 
90     if (ms_mllr_read_regmat(mllrfile, &A, &B,
91                             fcb->stream_len, feat_n_stream(fcb),
92                             &nclass) < 0)
93         E_FATAL("ms_mllr_read_regmat failed\n");
94 
95     if (cb2mllrfile && strcmp(cb2mllrfile, ".1cls.") != 0) {
96         int32 ncb, nmllr;
97 
98         cb2mllr_read(cb2mllrfile, &cb2mllr, &ncb, &nmllr);
99         if (nmllr != nclass)
100             E_FATAL
101                 ("Number of classes in cb2mllr does not match mllr (%d != %d)\n",
102                  ncb, nclass);
103         if (ncb != msg->s->n_sen)
104             E_FATAL
105                 ("Number of senones in cb2mllr does not match mdef (%d != %d)\n",
106                  ncb, msg->s->n_sen);
107     }
108     else
109         cb2mllr = NULL;
110 
111 
112     mgau_xform = (uint8 *) ckd_calloc(msg->g->n_mgau, sizeof(uint8));
113 
114     /* Transform each non-CI mixture Gaussian */
115     for (sid = 0; sid < msg->s->n_sen; sid++) {
116         int32 class = 0;
117 
118         if (cb2mllr)
119             class = cb2mllr[sid];
120         if (class == -1)
121             continue;
122 
123         if (mdef->cd2cisen[sid] != sid) {       /* Otherwise it's a CI senone */
124             gid = msg->s->mgau[sid];
125             if (!mgau_xform[gid]) {
126                 ms_mllr_norm_mgau(msg->g->mean[gid], msg->g->n_density, A,
127                                   B, fcb->stream_len, feat_n_stream(fcb),
128                                   class);
129                 mgau_xform[gid] = 1;
130             }
131         }
132     }
133 
134     ckd_free(mgau_xform);
135 
136     ms_mllr_free_regmat(A, B, feat_n_stream(fcb));
137     ckd_free(cb2mllr);
138 
139     return S3_SUCCESS;
140 }
141 
142 ms_mgau_model_t *
ms_mgau_init(const char * meanfile,const char * varfile,float64 varfloor,const char * mixwfile,float64 mixwfloor,int32 precomp,const char * senmgau,const char * lambdafile,int32 _topn,logmath_t * logmath)143 ms_mgau_init(const char *meanfile,
144              const char *varfile, float64 varfloor,
145              const char *mixwfile, float64 mixwfloor,
146              int32 precomp, const char *senmgau, const char *lambdafile,
147 	     int32 _topn, logmath_t *logmath)
148 {
149     /* Codebooks */
150     int32 i;
151     ms_mgau_model_t *msg;
152     gauden_t *g;
153     senone_t *s;
154     mgau2sen_t *m2s;
155 
156     msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t));
157 
158 
159     msg->g = NULL;
160     msg->s = NULL;
161     msg->i = NULL;
162 
163     msg->g = gauden_init(meanfile, varfile, varfloor, precomp, logmath);
164 
165     msg->s = senone_init(mixwfile, senmgau, mixwfloor, logmath);
166 
167     g = ms_mgau_gauden(msg);
168     s = ms_mgau_senone(msg);
169 
170     /* Verify senone parameters against gauden parameters */
171     if (s->n_feat != g->n_feat)
172         E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat,
173                 s->n_feat);
174     if (s->n_cw != g->n_density)
175         E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n",
176                 g->n_density, s->n_cw);
177     if (s->n_gauden > g->n_mgau)
178         E_FATAL("Senones need more codebooks (%d) than present (%d)\n",
179                 s->n_gauden, g->n_mgau);
180     if (s->n_gauden < g->n_mgau)
181         E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n",
182                 s->n_gauden, g->n_mgau);
183     /* Initialize mapping from mixture Gaussian to senones */
184     msg->mgau2sen =
185         (mgau2sen_t **) ckd_calloc(g->n_mgau, sizeof(mgau2sen_t *));
186     for (i = 0; i < s->n_sen; i++) {
187         m2s = (mgau2sen_t *) ckd_calloc(1, sizeof(mgau2sen_t));
188         m2s->sen = i;
189         m2s->next = msg->mgau2sen[s->mgau[i]];
190         msg->mgau2sen[s->mgau[i]] = m2s;
191     }
192 
193     /* CD/CI senone interpolation weights file, if present */
194     if (lambdafile != NULL) {
195         msg->i = interp_init(lambdafile, logmath);
196         /* Verify interpolation weights size with senones */
197         if (msg->i->n_sen != s->n_sen)
198             E_FATAL("Interpolation file has %d weights; but #senone= %d\n",
199                     msg->i->n_sen, s->n_sen);
200     }
201     else
202         msg->i = NULL;
203 
204 
205     msg->topn = _topn;
206     E_INFO("The value of topn: %d\n", msg->topn);
207     if (msg->topn == 0 || msg->topn > msg->g->n_density) {
208         E_WARN
209             ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n",
210              msg->topn, msg->g->n_density);
211         msg->topn = msg->g->n_density;
212     }
213 
214     msg->dist = (gauden_dist_t ***)
215         ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn,
216                       sizeof(gauden_dist_t));
217     msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8));
218 
219     return msg;
220 }
221 
222 void
ms_mgau_free(ms_mgau_model_t * msg)223 ms_mgau_free(ms_mgau_model_t * msg)
224 {
225     int32 i;
226     gauden_t *g;
227 
228     if (msg == NULL)
229         return;
230 
231     g = ms_mgau_gauden(msg);
232     for (i = 0; i < g->n_mgau; ++i)
233         ckd_free(msg->mgau2sen[i]);
234     ckd_free(msg->mgau2sen);
235     gauden_free(msg->g);
236     senone_free(msg->s);
237     ckd_free_3d((void *) msg->dist);
238     ckd_free(msg->mgau_active);
239     ckd_free(msg);
240 }
241 
242 int32
ms_cont_mgau_frame_eval(ascr_t * ascr,ms_mgau_model_t * msg,mdef_t * mdef,float32 ** feat)243 ms_cont_mgau_frame_eval(ascr_t * ascr,
244                         ms_mgau_model_t * msg,
245                         mdef_t * mdef, float32 ** feat)
246 {
247     int32 gid;
248     int32 s;
249     int32 topn;
250     int32 best;
251     gauden_t *g;
252     senone_t *sen;
253     interp_t *interp;
254 
255     topn = ms_mgau_topn(msg);
256     g = ms_mgau_gauden(msg);
257     sen = ms_mgau_senone(msg);
258     interp = ms_mgau_interp(msg);
259 
260     /*
261      * Evaluate gaussian density codebooks and senone scores for input codeword.
262      * Evaluate only active codebooks and senones.
263      */
264 
265     if (interp) {
266         for (s = 0; s < mdef->n_ci_sen; s++)
267             ascr->sen_active[s] = 1;
268     }
269 
270     /* Flag all active mixture-gaussian codebooks */
271 
272     for (gid = 0; gid < g->n_mgau; gid++)
273         msg->mgau_active[gid] = 0;
274 
275     for (s = 0; s < ascr->n_sen; s++) {
276         if (ascr->sen_active[s]) {
277             msg->mgau_active[sen->mgau[s]] = 1;
278         }
279     }
280 
281     /* Compute topn gaussian density values (for active codebooks) */
282     for (gid = 0; gid < g->n_mgau; gid++) {
283         if (msg->mgau_active[gid])
284             gauden_dist(g, gid, topn, feat, msg->dist[gid]);
285     }
286 
287     if (interp) {
288         for (s = 0; s < ascr->n_sen; s++) {
289             if (ascr->sen_active[s]) {
290                 if (s >= mdef->n_ci_sen) {
291                     interp_cd_ci(interp, ascr->senscr, s,
292                                  mdef->cd2cisen[s]);
293                 }
294             }
295         }
296     }
297 
298     best = (int32) 0x80000000;
299     for (s = 0; s < ascr->n_sen; s++) {
300         if (ascr->sen_active[s]) {
301             ascr->senscr[s] =
302                 senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
303             if (best < ascr->senscr[s])
304                 best = ascr->senscr[s];
305         }
306     }
307 
308 
309     /* Normalize senone scores (interpolation above can only lower best score) */
310     for (s = 0; s < ascr->n_sen; s++) {
311         if (ascr->sen_active[s])
312             ascr->senscr[s] -= best;
313     }
314 
315     return best;
316 }
317