1 /* -*- c-basic-offset: 4 -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37 /************************************************
38 * CMU ARPA Speech Project
39 *
40 * Copyright (c) 2000 Carnegie Mellon University.
41 * ALL RIGHTS RESERVED.
42 ************************************************
43 *
44 * HISTORY
45 * $Log$
46 * Revision 1.27 2006/02/23 05:44:59 arthchan2003
47 * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH.
48 * 1, Added temp_init_vithistory, this will choose to initialize the correct viterbi history given the mode.
49 * 2, Moved most of the code in kb_setmllr to adaptor.c
50 *
51 * Revision 1.26.4.10 2006/01/16 18:22:21 arthchan2003
52 * Removed fcloses in kb_free because it caused seg faults. Valgrind also reported.
53 *
54 * Revision 1.26.4.9 2005/10/17 04:52:02 arthchan2003
55 * Free fast_gmm_t.
56 *
57 * Revision 1.26.4.8 2005/09/26 02:26:08 arthchan2003
58 * Change -s3hmmdir to -hmm
59 *
60 * Revision 1.26.4.7 2005/09/25 19:23:55 arthchan2003
61 * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code.
62 *
63 * Revision 1.26.4.6 2005/09/18 01:21:18 arthchan2003
64 * 1, Add a latticehist_t into kb_t, use a temporary method to allow polymorphism of initialization of vithist_t and latticehist_t. 2, remove the logic kb_set_mllr and put it to adapt_set_mllr
65 *
66 * Revision 1.26.4.5 2005/08/03 18:54:33 dhdfu
67 * Fix the support for multi-stream / semi-continuous models. It is
68 * still kind of a hack, but it now works.
69 *
70 * Revision 1.26.4.4 2005/08/02 21:32:30 arthchan2003
71 * added -s3hmmdir option.
72 *
73 * Revision 1.26.4.3 2005/07/20 21:19:52 arthchan2003
74 * Added options such that finite state grammar option is now accepted.
75 *
76 * Revision 1.26.4.2 2005/07/18 19:08:55 arthchan2003
77 * Fixed Copy right statement.
78 *
79 * Revision 1.26.4.1 2005/07/03 23:00:58 arthchan2003
80 * Free stat_t, histprune_t and srch_t correctly.
81 *
82 * Revision 1.26 2005/06/21 23:21:58 arthchan2003
83 * Log. This is a big refactoring for kb.c and it is worthwhile to give
84 * words on why and how things were done. There were generally a problem
85 * that the kb structure itself is too flat. That makes it has to
86 * maintained many structure that could be maintained by smaller
87 * structures. For example, the count of A and the array of A should
88 * well be put into the same structure to increase readability and
89 * modularity. One can explain why histprune_t, pl_t, stat_t and
90 * adapt_am_t were introduced with that line of reasoning.
91 *
92 * In srch_t, polymorphism of implementation is also one important
93 * element in separting all graph related members from kb_t to srch_t.
94 * One could probably implement the polymorphism as an interface of kb
95 * but it is not trivial from the semantic meaning of kb. That is
96 * probably why srch_t is introduced as the gateway of search interfaces.
97 *
98 * Another phenonemon one could see in the code was bad interaction
99 * between modules. This is quite serious in two areas: logging and
100 * checking. The current policy is unless something required cross
101 * checking two structures, they would be done internally inside a module
102 * initialization.
103 *
104 * Finally, kb_setlm is now removed and is replaced by ld_set_lm (by
105 * users) or srch_set_lm (by developers). I think this is quite
106 * reasonable.
107 *
108 * Revision 1.14 2005/06/19 19:41:23 archan
109 * Sphinx3 to s3.generic: Added multiple regression class for single stream MLLR. Enabled MLLR for livepretend and decode.
110 *
111 * Revision 1.13 2005/06/10 03:01:50 archan
112 * Fixed file_open.
113 *
114 * Revision 1.12 2005/05/26 00:46:59 archan
115 * Added functionalities that such that <sil> will not be inserted at the end of the utterance.
116 *
117 * Revision 1.11 2005/05/04 05:15:25 archan
118 * reverted the last change, seems to be not working because of compilation issue. Try not to deal with it now.
119 *
120 * Revision 1.10 2005/05/04 04:46:04 archan
121 * Move srch.c and srch.h to search. More and more this type of refactoring will be done in future
122 *
123 * Revision 1.9 2005/04/25 23:53:35 archan
124 * 1, Some minor modification of vithist_t, vithist_rescore can now support optional LM rescoring, vithist also has its own reporting routine. A new argument -lmrescore is also added in decode and livepretend. This can switch on and off the rescoring procedure. 2, I am reaching the final difficulty of mode 5 implementation. That is, to implement an algorithm which dynamically decide which tree copies should be entered. However, stuffs like score propagation in the leave nodes and non-leaves nodes are already done. 3, As briefly mentioned in 2, implementation of rescoring , which used to happened at leave nodes are now separated. The current implementation is not the most clever one. Wish I have time to change it before check-in to the canonical.
125 *
126 * Revision 1.8 2005/04/21 23:50:26 archan
127 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
128 *
129 * Revision 1.7 2005/04/20 03:36:18 archan
130 * Remove setlm from kb entirely, refactor it to search implementations, do the corresponding change for the changes in ascr and pl
131 *
132 * Revision 1.6 2005/03/30 01:22:47 archan
133 * Fixed mistakes in last updates. Add
134 *
135 *
136 * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
137 * Moved kb_*() routines into kb.c to make them independent of
138 * main() during compilation
139 *
140 * 29-Feb-2000 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
141 * Modified to allow runtime choice between 3-state and 5-state HMM
142 * topologies (instead of compile-time selection).
143 *
144 * 13-Aug-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
145 * Added -maxwpf.
146 *
147 * 10-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
148 * Started.
149 */
150
151
152 #include "kb.h"
153 #include "srch.h"
154 #include "mllr.h"
155
156
157 #define REPORT_KB 1
158
159
160 /* 20050321 Duplicated function. can also be io.c. Clean it up later. */
161 FILE *
file_open(const char * filepath)162 file_open(const char *filepath)
163 {
164 FILE *fp;
165 fp = NULL;
166 if (filepath) {
167 #ifdef WIN32
168 if ((fp = fopen(filepath, "wt")) == NULL)
169 #else
170 if ((fp = fopen(filepath, "w")) == NULL)
171 #endif
172 E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n",
173 filepath);
174 }
175 return fp;
176 }
177
178 void
kb_init(kb_t * kb,cmd_ln_t * config)179 kb_init(kb_t * kb, cmd_ln_t *config)
180 {
181 kbcore_t *kbcore;
182 mdef_t *mdef;
183 dict_t *dict;
184 dict2pid_t *d2p;
185 int32 cisencnt;
186
187 /* STRUCTURE: Initialize the kb structure to zero, just in case */
188 memset(kb, 0, sizeof(*kb));
189 kb->kbcore = kbcore_init(config);
190 if (kb->kbcore == NULL)
191 E_FATAL("Initialization of kb failed\n");
192
193 kbcore = kb->kbcore;
194 mdef = kbcore_mdef(kbcore);
195 dict = kbcore_dict(kbcore);
196 d2p = kbcore_dict2pid(kbcore);
197
198 /* STRUCTURE INITIALIZATION: Initialize the beam data structure */
199 if (cmd_ln_exists_r(config, "-ptranskip")) {
200 kb->beam = beam_init(cmd_ln_float64_r(config, "-beam"),
201 cmd_ln_float64_r(config, "-pbeam"),
202 cmd_ln_float64_r(config, "-wbeam"),
203 cmd_ln_float64_r(config, "-wend_beam"),
204 cmd_ln_int32_r(config, "-ptranskip"), mdef_n_ciphone(mdef),
205 kbcore->logmath
206 );
207
208 /* REPORT : Report the parameters in the beam data structure */
209 if (REPORT_KB)
210 beam_report(kb->beam);
211 }
212
213
214 /* STRUCTURE INITIALIZATION: Initialize the fast GMM computation data structure */
215 if (cmd_ln_exists_r(config, "-ci_pbeam")) {
216 kb->fastgmm = fast_gmm_init(cmd_ln_int32_r(config, "-ds"),
217 cmd_ln_int32_r(config, "-cond_ds"),
218 cmd_ln_int32_r(config, "-dist_ds"),
219 cmd_ln_int32_r(config, "-gs4gs"),
220 cmd_ln_int32_r(config, "-svq4svq"),
221 cmd_ln_float64_r(config, "-subvqbeam"),
222 cmd_ln_float64_r(config, "-ci_pbeam"),
223 cmd_ln_float64_r(config, "-tighten_factor"),
224 cmd_ln_int32_r(config, "-maxcdsenpf"),
225 mdef->n_ci_sen,
226 kbcore->logmath);
227
228 /* REPORT : Report the parameters in the fast_gmm_t data struture */
229 if (REPORT_KB)
230 fast_gmm_report(kb->fastgmm);
231 }
232
233 /* STRUCTURE INITIALIZATION: Initialize the phoneme lookahead data structure */
234 if (cmd_ln_exists_r(config, "-pl_beam")) {
235 kb->pl = pl_init(cmd_ln_int32_r(config, "-pheurtype"),
236 cmd_ln_int32_r(config, "-pl_beam"), mdef_n_ciphone(mdef),
237 kbcore->logmath
238 );
239
240 /* REPORT : Report the parameters in the pl_t data struture */
241 if (REPORT_KB)
242 pl_report(kb->pl);
243 }
244
245 /* STRUCTURE INITIALIZATION: Initialize the acoustic score data structure */
246 {
247 int32 pl_window = 1;
248
249 if (cmd_ln_exists_r(config, "-pl_window"))
250 pl_window = cmd_ln_int32_r(config, "-pl_window");
251
252 for (cisencnt = 0; cisencnt == mdef->cd2cisen[cisencnt]; cisencnt++) ;
253 kb->ascr = ascr_init(kbcore_n_mgau(kbcore),
254 kb->kbcore->dict2pid->n_comstate,
255 mdef_n_sseq(mdef),
256 dict2pid_n_comsseq(d2p),
257 pl_window, cisencnt);
258
259 if (REPORT_KB)
260 ascr_report(kb->ascr);
261 }
262
263 /* Initialize the front end if -adcin is specified */
264 if (cmd_ln_exists_r(config, "-adcin") && cmd_ln_boolean_r(config, "-adcin")) {
265 if ((kb->fe = fe_init_auto_r(config)) == NULL) {
266 E_FATAL("fe_init_auto_r() failed\n");
267 }
268 }
269 /* STRUCTURE INITIALIZATION : The feature vector */
270 if ((kb->feat =
271 feat_array_alloc(kbcore_fcb(kbcore), S3_MAX_FRAMES)) == NULL)
272 E_FATAL("feat_array_alloc() failed\n");
273
274 /* STRUCTURE INITIALIZATION : The statistics for the search */
275 kb->stat = stat_init();
276
277 /* STRUCTURE INITIALIZATION : The adaptation routines of the search */
278 kb->adapt_am = adapt_am_init();
279
280 if (cmd_ln_str_r(config, "-mllr")) {
281 kb_setmllr(cmd_ln_str_r(config, "-mllr"), cmd_ln_str_r(config, "-cb2mllr"), kb);
282 }
283
284 /* CHECK: make sure when (-cond_ds) is specified, a Gaussian map is also specified */
285 if (cmd_ln_int32_r(config, "-cond_ds") > 0 && kb->kbcore->gs == NULL)
286 E_FATAL
287 ("Conditional Down Sampling require the use of Gaussian Selection map\n");
288
289 /* MEMORY ALLOCATION : Word best score and exit */
290 /* Open hypseg file if specified */
291 kb->matchsegfp = kb->matchfp = NULL;
292 kb->matchsegfp = file_open(cmd_ln_str_r(config, "-hypseg"));
293 kb->matchfp = file_open(cmd_ln_str_r(config, "-hyp"));
294
295 if (cmd_ln_exists_r(config, "-hmmdump"))
296 kb->hmmdumpfp = cmd_ln_int32_r(config, "-hmmdump") ? stderr : NULL;
297
298 /* STRUCTURE INITIALIZATION : The search data structure, done only
299 after kb is initialized kb is acted as a clipboard. */
300 if (cmd_ln_exists_r(config, "-op_mode")) {
301 /* -op_mode, if set (i.e. not -1), takes precedence over -mode. */
302 if (cmd_ln_int32_r(config, "-op_mode") != -1)
303 kb->op_mode = cmd_ln_int32_r(config, "-op_mode");
304 else
305 kb->op_mode = srch_mode_str_to_index(cmd_ln_str_r(config, "-mode"));
306 E_INFO("SEARCH MODE INDEX %d\n", kb->op_mode);
307 if ((kb->srch = (srch_t *) srch_init(kb, kb->op_mode)) == NULL) {
308 E_FATAL("Search initialization failed. Forced exit\n");
309 }
310 if (REPORT_KB) {
311 srch_report(kb->srch);
312 }
313 }
314 }
315
316 void
kb_set_uttid(const char * _uttid,const char * _uttfile,kb_t * _kb)317 kb_set_uttid(const char *_uttid, const char *_uttfile, kb_t * _kb)
318 {
319 assert(_kb != NULL);
320 assert(_uttid != NULL);
321
322 ckd_free(_kb->uttid);
323 _kb->uttid = NULL;
324 _kb->uttid = ckd_salloc(_uttid);
325
326 ckd_free(_kb->uttfile);
327 _kb->uttfile = NULL;
328 if (_uttfile)
329 _kb->uttfile = ckd_salloc(_uttfile);
330 }
331
332 void
kb_setmllr(const char * mllrname,const char * cb2mllrname,kb_t * kb)333 kb_setmllr(const char *mllrname, const char *cb2mllrname,
334 /** < In: The filename of the MLLR class map */
335 kb_t * kb)
336 {
337 /* int32 veclen;*/
338
339 kbcore_t *kbc;
340
341 E_INFO("Using MLLR matrix %s\n", mllrname);
342 kbc = kb->kbcore;
343
344 if (strcmp(kb->adapt_am->prevmllrfn, mllrname) != 0) { /* If there is a change of mllr file name */
345
346 if (kbc->mgau)
347 adapt_set_mllr(kb->adapt_am, kbc->mgau, mllrname, cb2mllrname,
348 kbc->mdef, kbc->config);
349 else if (kbc->ms_mgau)
350 model_set_mllr(kbc->ms_mgau, mllrname, cb2mllrname, kbc->fcb,
351 kbc->mdef, kbc->config);
352 else
353 E_FATAL("Panic, kb has not Gaussian\n");
354
355 /* allocate memory for the prevmllrfn if it is too short */
356 if (strlen(mllrname) * sizeof(char) > 1024)
357 kb->adapt_am->prevmllrfn =
358 (char *) ckd_calloc(strlen(mllrname), sizeof(char));
359
360 strcpy(kb->adapt_am->prevmllrfn, mllrname);
361 }
362 else {
363 /* No need to change anything for now */
364 }
365 }
366
367 /* RAH 4.15.01 Lots of memory is allocated, but never freed, this function will clean up.
368 * First pass will get the low hanging fruit.*/
369 void
kb_free(kb_t * kb)370 kb_free(kb_t * kb)
371 {
372 if (kb->srch) {
373 srch_uninit(kb->srch);
374 /** Add search free code */
375 }
376
377 if (kb->stat) {
378 stat_free((void *) kb->stat);
379 }
380
381 if (kb->ascr)
382 ascr_free((void *) kb->ascr);
383
384 if (kb->fastgmm)
385 fast_gmm_free((void *) kb->fastgmm);
386
387 if (kb->beam)
388 beam_free((void *) kb->beam);
389
390
391 if (kb->pl)
392 pl_free((void *) kb->pl);
393
394 if (kb->kbcore != NULL)
395 kbcore_free(kb->kbcore);
396
397 /* This is awkward, currently, there are two routines to control MLLRs and I don't have time
398 to unify them yet. TBD */
399 if (kb->adapt_am->regA && kb->adapt_am->regB)
400 mllr_free_regmat(kb->adapt_am->regA, kb->adapt_am->regB);
401 if (kb->adapt_am)
402 adapt_am_free(kb->adapt_am);
403
404 if (kb->feat) {
405 ckd_free((void *) kb->feat[0][0]);
406 ckd_free_2d((void **) kb->feat);
407 }
408
409
410 if (kb->uttid) {
411 ckd_free(kb->uttid);
412 }
413
414 if (kb->uttfile) {
415 ckd_free(kb->uttfile);
416 }
417
418
419 #if 0 /* valgrind reports this one. */
420 if (kb->matchsegfp)
421 fclose(kb->matchsegfp);
422 if (kb->matchfp)
423 fclose(kb->matchfp);
424 #endif
425 }
426