1 /* -*- c-basic-offset: 4 -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /************************************************
38  * CMU ARPA Speech Project
39  *
40  * Copyright (c) 2000 Carnegie Mellon University.
41  * ALL RIGHTS RESERVED.
42  ************************************************
43  *
44  * HISTORY
45  * $Log$
46  * Revision 1.27  2006/02/23  05:44:59  arthchan2003
47  * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH.
48  * 1, Added temp_init_vithistory, this will choose to initialize the correct viterbi history given the mode.
49  * 2, Moved most of the code in kb_setmllr to adaptor.c
50  *
51  * Revision 1.26.4.10  2006/01/16 18:22:21  arthchan2003
52  * Removed fcloses in kb_free because it caused seg faults. Valgrind also reported.
53  *
54  * Revision 1.26.4.9  2005/10/17 04:52:02  arthchan2003
55  * Free fast_gmm_t.
56  *
57  * Revision 1.26.4.8  2005/09/26 02:26:08  arthchan2003
58  * Change -s3hmmdir to -hmm
59  *
60  * Revision 1.26.4.7  2005/09/25 19:23:55  arthchan2003
61  * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code.
62  *
63  * Revision 1.26.4.6  2005/09/18 01:21:18  arthchan2003
64  * 1, Add a latticehist_t into kb_t, use a temporary method to allow polymorphism of initialization of vithist_t and latticehist_t. 2, remove the logic kb_set_mllr and put it to adapt_set_mllr
65  *
66  * Revision 1.26.4.5  2005/08/03 18:54:33  dhdfu
67  * Fix the support for multi-stream / semi-continuous models.  It is
68  * still kind of a hack, but it now works.
69  *
70  * Revision 1.26.4.4  2005/08/02 21:32:30  arthchan2003
71  * added -s3hmmdir option.
72  *
73  * Revision 1.26.4.3  2005/07/20 21:19:52  arthchan2003
74  * Added options such that finite state grammar option is now accepted.
75  *
76  * Revision 1.26.4.2  2005/07/18 19:08:55  arthchan2003
77  * Fixed Copy right statement.
78  *
79  * Revision 1.26.4.1  2005/07/03 23:00:58  arthchan2003
80  * Free stat_t, histprune_t and srch_t correctly.
81  *
82  * Revision 1.26  2005/06/21 23:21:58  arthchan2003
83  * Log. This is a big refactoring for kb.c and it is worthwhile to give
84  * words on why and how things were done.  There were generally a problem
85  * that the kb structure itself is too flat.  That makes it has to
86  * maintained many structure that could be maintained by smaller
87  * structures.  For example, the count of A and the array of A should
88  * well be put into the same structure to increase readability and
89  * modularity. One can explain why histprune_t, pl_t, stat_t and
90  * adapt_am_t were introduced with that line of reasoning.
91  *
92  * In srch_t, polymorphism of implementation is also one important
93  * element in separting all graph related members from kb_t to srch_t.
94  * One could probably implement the polymorphism as an interface of kb
95  * but it is not trivial from the semantic meaning of kb.  That is
96  * probably why srch_t is introduced as the gateway of search interfaces.
97  *
98  * Another phenonemon one could see in the code was bad interaction
99  * between modules. This is quite serious in two areas: logging and
100  * checking. The current policy is unless something required cross
101  * checking two structures, they would be done internally inside a module
102  * initialization.
103  *
104  * Finally, kb_setlm is now removed and is replaced by ld_set_lm (by
105  * users) or srch_set_lm (by developers). I think this is quite
106  * reasonable.
107  *
108  * Revision 1.14  2005/06/19 19:41:23  archan
109  * Sphinx3 to s3.generic: Added multiple regression class for single stream MLLR. Enabled MLLR for livepretend and decode.
110  *
111  * Revision 1.13  2005/06/10 03:01:50  archan
112  * Fixed file_open.
113  *
114  * Revision 1.12  2005/05/26 00:46:59  archan
115  * Added functionalities that such that <sil> will not be inserted at the end of the utterance.
116  *
117  * Revision 1.11  2005/05/04 05:15:25  archan
118  * reverted the last change, seems to be not working because of compilation issue. Try not to deal with it now.
119  *
120  * Revision 1.10  2005/05/04 04:46:04  archan
121  * Move srch.c and srch.h to search. More and more this type of refactoring will be done in future
122  *
123  * Revision 1.9  2005/04/25 23:53:35  archan
124  * 1, Some minor modification of vithist_t, vithist_rescore can now support optional LM rescoring, vithist also has its own reporting routine. A new argument -lmrescore is also added in decode and livepretend.  This can switch on and off the rescoring procedure. 2, I am reaching the final difficulty of mode 5 implementation.  That is, to implement an algorithm which dynamically decide which tree copies should be entered.  However, stuffs like score propagation in the leave nodes and non-leaves nodes are already done. 3, As briefly mentioned in 2, implementation of rescoring , which used to happened at leave nodes are now separated. The current implementation is not the most clever one. Wish I have time to change it before check-in to the canonical.
125  *
126  * Revision 1.8  2005/04/21 23:50:26  archan
127  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
128  *
129  * Revision 1.7  2005/04/20 03:36:18  archan
130  * Remove setlm from kb entirely, refactor it to search implementations, do the corresponding change for the changes in ascr and pl
131  *
132  * Revision 1.6  2005/03/30 01:22:47  archan
133  * Fixed mistakes in last updates. Add
134  *
135  *
136  * 30-Dec-2000	Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
137  *		Moved kb_*() routines into kb.c to make them independent of
138  *		main() during compilation
139  *
140  * 29-Feb-2000	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
141  * 		Modified to allow runtime choice between 3-state and 5-state HMM
142  * 		topologies (instead of compile-time selection).
143  *
144  * 13-Aug-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
145  * 		Added -maxwpf.
146  *
147  * 10-May-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
148  * 		Started.
149  */
150 
151 
152 #include "kb.h"
153 #include "srch.h"
154 #include "mllr.h"
155 
156 
157 #define REPORT_KB 1
158 
159 
160 /* 20050321 Duplicated function. can also be io.c. Clean it up later. */
161 FILE *
file_open(const char * filepath)162 file_open(const char *filepath)
163 {
164     FILE *fp;
165     fp = NULL;
166     if (filepath) {
167 #ifdef WIN32
168         if ((fp = fopen(filepath, "wt")) == NULL)
169 #else
170         if ((fp = fopen(filepath, "w")) == NULL)
171 #endif
172             E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n",
173                     filepath);
174     }
175     return fp;
176 }
177 
178 void
kb_init(kb_t * kb,cmd_ln_t * config)179 kb_init(kb_t * kb, cmd_ln_t *config)
180 {
181     kbcore_t *kbcore;
182     mdef_t *mdef;
183     dict_t *dict;
184     dict2pid_t *d2p;
185     int32 cisencnt;
186 
187     /* STRUCTURE: Initialize the kb structure to zero, just in case */
188     memset(kb, 0, sizeof(*kb));
189     kb->kbcore = kbcore_init(config);
190     if (kb->kbcore == NULL)
191         E_FATAL("Initialization of kb failed\n");
192 
193     kbcore = kb->kbcore;
194     mdef = kbcore_mdef(kbcore);
195     dict = kbcore_dict(kbcore);
196     d2p = kbcore_dict2pid(kbcore);
197 
198     /* STRUCTURE INITIALIZATION: Initialize the beam data structure */
199     if (cmd_ln_exists_r(config, "-ptranskip")) {
200         kb->beam = beam_init(cmd_ln_float64_r(config, "-beam"),
201                              cmd_ln_float64_r(config, "-pbeam"),
202                              cmd_ln_float64_r(config, "-wbeam"),
203                              cmd_ln_float64_r(config, "-wend_beam"),
204                              cmd_ln_int32_r(config, "-ptranskip"), mdef_n_ciphone(mdef),
205                              kbcore->logmath
206             );
207 
208         /* REPORT : Report the parameters in the beam data structure */
209         if (REPORT_KB)
210                 beam_report(kb->beam);
211     }
212 
213 
214     /* STRUCTURE INITIALIZATION: Initialize the fast GMM computation data structure */
215     if (cmd_ln_exists_r(config, "-ci_pbeam")) {
216         kb->fastgmm = fast_gmm_init(cmd_ln_int32_r(config, "-ds"),
217                                     cmd_ln_int32_r(config, "-cond_ds"),
218                                     cmd_ln_int32_r(config, "-dist_ds"),
219                                     cmd_ln_int32_r(config, "-gs4gs"),
220                                     cmd_ln_int32_r(config, "-svq4svq"),
221                                     cmd_ln_float64_r(config, "-subvqbeam"),
222                                     cmd_ln_float64_r(config, "-ci_pbeam"),
223                                     cmd_ln_float64_r(config, "-tighten_factor"),
224                                     cmd_ln_int32_r(config, "-maxcdsenpf"),
225                                     mdef->n_ci_sen,
226                                     kbcore->logmath);
227 
228         /* REPORT : Report the parameters in the fast_gmm_t data struture */
229         if (REPORT_KB)
230             fast_gmm_report(kb->fastgmm);
231     }
232 
233     /* STRUCTURE INITIALIZATION: Initialize the phoneme lookahead data structure */
234     if (cmd_ln_exists_r(config, "-pl_beam")) {
235         kb->pl = pl_init(cmd_ln_int32_r(config, "-pheurtype"),
236                          cmd_ln_int32_r(config, "-pl_beam"), mdef_n_ciphone(mdef),
237                          kbcore->logmath
238             );
239 
240         /* REPORT : Report the parameters in the pl_t data struture */
241         if (REPORT_KB)
242             pl_report(kb->pl);
243     }
244 
245     /* STRUCTURE INITIALIZATION: Initialize the acoustic score data structure */
246     {
247         int32 pl_window = 1;
248 
249         if (cmd_ln_exists_r(config, "-pl_window"))
250             pl_window = cmd_ln_int32_r(config, "-pl_window");
251 
252         for (cisencnt = 0; cisencnt == mdef->cd2cisen[cisencnt]; cisencnt++) ;
253         kb->ascr = ascr_init(kbcore_n_mgau(kbcore),
254                              kb->kbcore->dict2pid->n_comstate,
255                              mdef_n_sseq(mdef),
256                              dict2pid_n_comsseq(d2p),
257                              pl_window, cisencnt);
258 
259         if (REPORT_KB)
260             ascr_report(kb->ascr);
261     }
262 
263     /* Initialize the front end if -adcin is specified */
264     if (cmd_ln_exists_r(config, "-adcin") && cmd_ln_boolean_r(config, "-adcin")) {
265         if ((kb->fe = fe_init_auto_r(config)) == NULL) {
266             E_FATAL("fe_init_auto_r() failed\n");
267         }
268     }
269     /* STRUCTURE INITIALIZATION : The feature vector */
270     if ((kb->feat =
271          feat_array_alloc(kbcore_fcb(kbcore), S3_MAX_FRAMES)) == NULL)
272         E_FATAL("feat_array_alloc() failed\n");
273 
274     /* STRUCTURE INITIALIZATION : The statistics for the search */
275     kb->stat = stat_init();
276 
277     /* STRUCTURE INITIALIZATION : The adaptation routines of the search */
278     kb->adapt_am = adapt_am_init();
279 
280     if (cmd_ln_str_r(config, "-mllr")) {
281         kb_setmllr(cmd_ln_str_r(config, "-mllr"), cmd_ln_str_r(config, "-cb2mllr"), kb);
282     }
283 
284     /* CHECK: make sure when (-cond_ds) is specified, a Gaussian map is also specified */
285     if (cmd_ln_int32_r(config, "-cond_ds") > 0 && kb->kbcore->gs == NULL)
286         E_FATAL
287             ("Conditional Down Sampling require the use of Gaussian Selection map\n");
288 
289     /* MEMORY ALLOCATION : Word best score and exit */
290     /* Open hypseg file if specified */
291     kb->matchsegfp = kb->matchfp = NULL;
292     kb->matchsegfp = file_open(cmd_ln_str_r(config, "-hypseg"));
293     kb->matchfp = file_open(cmd_ln_str_r(config, "-hyp"));
294 
295     if (cmd_ln_exists_r(config, "-hmmdump"))
296         kb->hmmdumpfp = cmd_ln_int32_r(config, "-hmmdump") ? stderr : NULL;
297 
298     /* STRUCTURE INITIALIZATION : The search data structure, done only
299        after kb is initialized kb is acted as a clipboard. */
300     if (cmd_ln_exists_r(config, "-op_mode")) {
301         /* -op_mode, if set (i.e. not -1), takes precedence over -mode. */
302         if (cmd_ln_int32_r(config, "-op_mode") != -1)
303             kb->op_mode = cmd_ln_int32_r(config, "-op_mode");
304         else
305             kb->op_mode = srch_mode_str_to_index(cmd_ln_str_r(config, "-mode"));
306         E_INFO("SEARCH MODE INDEX %d\n", kb->op_mode);
307         if ((kb->srch = (srch_t *) srch_init(kb, kb->op_mode)) == NULL) {
308             E_FATAL("Search initialization failed. Forced exit\n");
309         }
310         if (REPORT_KB) {
311             srch_report(kb->srch);
312         }
313     }
314 }
315 
316 void
kb_set_uttid(const char * _uttid,const char * _uttfile,kb_t * _kb)317 kb_set_uttid(const char *_uttid, const char *_uttfile, kb_t * _kb)
318 {
319     assert(_kb != NULL);
320     assert(_uttid != NULL);
321 
322     ckd_free(_kb->uttid);
323     _kb->uttid = NULL;
324     _kb->uttid = ckd_salloc(_uttid);
325 
326     ckd_free(_kb->uttfile);
327     _kb->uttfile = NULL;
328     if (_uttfile)
329         _kb->uttfile = ckd_salloc(_uttfile);
330 }
331 
332 void
kb_setmllr(const char * mllrname,const char * cb2mllrname,kb_t * kb)333 kb_setmllr(const char *mllrname, const char *cb2mllrname,
334                                    /** < In: The filename of the MLLR class map */
335            kb_t * kb)
336 {
337 /*  int32 veclen;*/
338 
339     kbcore_t *kbc;
340 
341     E_INFO("Using MLLR matrix %s\n", mllrname);
342     kbc = kb->kbcore;
343 
344     if (strcmp(kb->adapt_am->prevmllrfn, mllrname) != 0) {      /* If there is a change of mllr file name */
345 
346         if (kbc->mgau)
347             adapt_set_mllr(kb->adapt_am, kbc->mgau, mllrname, cb2mllrname,
348                            kbc->mdef, kbc->config);
349         else if (kbc->ms_mgau)
350             model_set_mllr(kbc->ms_mgau, mllrname, cb2mllrname, kbc->fcb,
351                            kbc->mdef, kbc->config);
352         else
353             E_FATAL("Panic, kb has not Gaussian\n");
354 
355         /* allocate memory for the prevmllrfn if it is too short */
356         if (strlen(mllrname) * sizeof(char) > 1024)
357             kb->adapt_am->prevmllrfn =
358                 (char *) ckd_calloc(strlen(mllrname), sizeof(char));
359 
360         strcpy(kb->adapt_am->prevmllrfn, mllrname);
361     }
362     else {
363         /* No need to change anything for now */
364     }
365 }
366 
367 /* RAH 4.15.01 Lots of memory is allocated, but never freed, this function will clean up.
368  * First pass will get the low hanging fruit.*/
369 void
kb_free(kb_t * kb)370 kb_free(kb_t * kb)
371 {
372     if (kb->srch) {
373         srch_uninit(kb->srch);
374     /** Add search free code */
375     }
376 
377     if (kb->stat) {
378         stat_free((void *) kb->stat);
379     }
380 
381     if (kb->ascr)
382         ascr_free((void *) kb->ascr);
383 
384     if (kb->fastgmm)
385         fast_gmm_free((void *) kb->fastgmm);
386 
387     if (kb->beam)
388         beam_free((void *) kb->beam);
389 
390 
391     if (kb->pl)
392         pl_free((void *) kb->pl);
393 
394     if (kb->kbcore != NULL)
395         kbcore_free(kb->kbcore);
396 
397     /* This is awkward, currently, there are two routines to control MLLRs and I don't have time
398        to unify them yet. TBD */
399     if (kb->adapt_am->regA && kb->adapt_am->regB)
400         mllr_free_regmat(kb->adapt_am->regA, kb->adapt_am->regB);
401     if (kb->adapt_am)
402         adapt_am_free(kb->adapt_am);
403 
404     if (kb->feat) {
405         ckd_free((void *) kb->feat[0][0]);
406         ckd_free_2d((void **) kb->feat);
407     }
408 
409 
410     if (kb->uttid) {
411         ckd_free(kb->uttid);
412     }
413 
414     if (kb->uttfile) {
415         ckd_free(kb->uttfile);
416     }
417 
418 
419 #if 0                           /* valgrind reports this one. */
420     if (kb->matchsegfp)
421         fclose(kb->matchsegfp);
422     if (kb->matchfp)
423         fclose(kb->matchfp);
424 #endif
425 }
426