1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * kbcore.h -- Structures for maintain the main models.
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1999 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log$
49  * Revision 1.2  2006/04/06  14:03:02  dhdfu
50  * Prevent confusion among future generations by calling this s2_semi_mgau instead of sc_vq
51  *
52  * Revision 1.1  2006/04/05 20:27:30  dhdfu
53  * A Great Reorganzation of header files and executables
54  *
55  * Revision 1.12  2006/02/23 05:54:58  arthchan2003
56  * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH
57  * 1, Added linksilences.  This allows silences to be linked correctly in mode FLAT, TREE.
58  * 2, Added s3_am_init: an all-in-one initalization routine SCHMM and CDHMM.
59  *
60  * Revision 1.11.4.5  2006/01/16 18:25:16  arthchan2003
61  * Sphinx 3.x tree decoders assume silences are unlinked (set them to BAD_S3WID) before used. Whereas the flat lexicon decode doesn't have this assumption.  The changes in versions this branch also significantly changed behavior of the decoder. Thus the function LinkSilences is introduced to change back the behavior if necessary.
62  *
63  * Revision 1.11.4.4  2005/09/18 01:29:37  arthchan2003
64  * 1, .s3cont. mode is supported.  When it is specified by -senmgau, it will invoke the MS version of GMM computation even for CDHMM. Not supposed to be documented for users. 2, Remove unlinkSilences and put it inside search-specific initialization.  Apparently, remove it entirely will screw up the current test of mode 4 and 5.  add it back will screw up mode 3.  That's why I used temp solution.
65  *
66  * Revision 1.11.4.3  2005/08/03 18:54:32  dhdfu
67  * Fix the support for multi-stream / semi-continuous models.  It is
68  * still kind of a hack, but it now works.
69  *
70  * Revision 1.11.4.2  2005/08/02 21:33:47  arthchan2003
71  * Factored the code of initializing one hmm into s3_am_init. That is to say initialization of mdef, mgau, var, mixw and tmat could all be found one function.
72  *
73  * Revision 1.11.4.1  2005/07/20 21:19:52  arthchan2003
74  * Added options such that finite state grammar option is now accepted.
75  *
76  * Revision 1.11  2005/06/21 23:28:48  arthchan2003
77  * Log. Please also see comments of kb.[ch].  Major changes you could see
78  * is that the lmset interface is now used rather than several interfaces
79  * for reading lm. Other than that, you could say most changes are
80  * harmless internal interfaces changes.
81  *
82  * Revision 1.5  2005/06/18 03:22:29  archan
83  * Add lmset_init. A wrapper function of various LM initialization and initialize an lmset It is now used in decode, livepretend, dag and astar.
84  *
85  * Revision 1.4  2005/04/20 03:38:43  archan
86  * Do the corresponding code changes for the lm code.
87  *
88  * Revision 1.3  2005/03/30 01:22:47  archan
89  * Fixed mistakes in last updates. Add
90  *
91  *
92  * 11-Feb-2000	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
93  * 		Removed svqpp stuff.  It doesn't work too well anyway.
94  *
95  * 06-Dec-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
96  * 		Added kb_t.svqpp_t and related handling.
97  *
98  * 30-Apr-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
99  * 		Created.
100  */
101 
102 
103 #ifndef _S3_KBCORE_H_
104 #define _S3_KBCORE_H_
105 
106 #include <s3types.h>
107 #include <cmd_ln.h>
108 #include <logmath.h>
109 #include "feat.h"
110 #include "cont_mgau.h"
111 #include "ms_mgau.h"
112 #include "s2_semi_mgau.h"
113 #include "mdef.h"
114 #include "dict.h"
115 #include "dict2pid.h"
116 #include "fillpen.h"
117 #include "lm.h"
118 #include "tmat.h"
119 #include "subvq.h"
120 #include "gs.h"
121 
122 
123 #ifdef __cplusplus
124 extern "C" {
125 #endif
126 #if 0
127 } /* Fool Emacs into not indenting things. */
128 #endif
129 
130 /** \file kbcore.h
131  * \brief kb core structures, the structure that stores parameters for s3.X search
132  */
133 
134 typedef struct {
135     cmd_ln_t *config; /**< The command-line or configuration object */
136     feat_t *fcb; /**< feature end structure */
137     mdef_t *mdef; /**< Model definition  */
138     dict_t *dict; /**< Dictionary structure */
139     dict2pid_t *dict2pid; /**< Conversion of dictionary to Phoneme ID */
140 
141     lmset_t *lmset; /**< LM Set. ARCHAN, since sphinx 3.6, it is used whenever an lm is allocated.
142                        This unified the internal data structure. */
143 
144     /*Specified either one of them when using kbcore.h.  It is not yet very nice now. */
145     mgau_model_t *mgau; /**< Acoustic Model for single stream */
146     ms_mgau_model_t *ms_mgau; /**< Acoustic Model for multipel stream */
147     s2_semi_mgau_t *s2_mgau; /**< Acoustic model for Sphinx2 semi-continuous */
148 
149     fillpen_t *fillpen; /**< Filler penalty */
150     subvq_t *svq; /**< SVQ */
151     gs_t *gs; /**< Gaussian Selector */
152     tmat_t *tmat; /**< Transition Matrix. */
153 
154     int32 maxNewHeurScore; /**< Temporary variables for phoneme lookahead. This stores the heuristic score */
155     int32 lastfrm; /**, Temporary variables, should be removed */
156 
157     s3lmwid32_t startwid;
158     s3lmwid32_t finishwid;
159     logmath_t *logmath;
160 } kbcore_t;
161 
162 
163 /**
164    Create a new kbcore
165 */
166 S3DECODER_EXPORT
167 kbcore_t *New_kbcore(cmd_ln_t *config);
168 
169 /**
170    Initialize just the acoustic model for kbcore, taking parameters
171    from the global command-line module.
172 */
173 S3DECODER_EXPORT
174 void s3_am_init(kbcore_t *kbc);
175 
176 
177 /**
178  * Initialize one or more of all the major models:  pronunciation dictionary, acoustic models,
179  * language models.  Parameters are taken from the command line (see cmdln_macro.h)
180  */
181 kbcore_t *kbcore_init(cmd_ln_t *config);
182 
183 /** free the kbcore */
184 S3DECODER_EXPORT
185 void kbcore_free (kbcore_t *kbcore  /**< The kbcore structure */
186     );
187 
188 /**
189    Sphinx 3.x tree decoders assume silences are unlinked (set them
190    to BAD_S3WID) before used. Whereas the flat lexicon decoder
191    doesn't have such assumption.  These two functions change this
192    behavior.  Called in mode 3, 4 and 5 to make sure different code
193    works.  FIXME: This is dumb.
194 */
195 void unlinksilences(lm_t* l, kbcore_t *kbc, dict_t *d);
196 
197 void linksilences(lm_t* l, kbcore_t *kbc, dict_t *d);
198 
199 /** Access macros; not meant for arbitrary use */
200 #define kbcore_config(k)	((k)->config)
201 #define kbcore_fcb(k)		((k)->fcb)
202 #define kbcore_mdef(k)		((k)->mdef)
203 #define kbcore_dict(k)		((k)->dict)
204 #define kbcore_dict2pid(k)	((k)->dict2pid)
205 #define kbcore_lm(k)		((k)->lmset ? (k)->lmset->cur_lm : NULL)
206 #define kbcore_fillpen(k)	((k)->fillpen)
207 #define kbcore_dict2lmwid(k,w)	((k)->dict2lmwid[w])
208 #define kbcore_mgau(k)		((k)->mgau)
209 #define kbcore_ms_mgau(k)	((k)->ms_mgau)
210 #define kbcore_s2_mgau(k)	((k)->s2_mgau)
211 #define kbcore_svq(k)		((k)->svq)
212 #define kbcore_gs(k)		((k)->gs)
213 #define kbcore_tmat(k)		((k)->tmat)
214 #define kbcore_lmset(k)		((k)->lmset)
215 /*#define kbcore_n_mgau(k)	((k)->mgau ? mgau_n_mgau((k)->mgau) : (k)->ms_mgau->s->n_sen)
216  */
217 #define kbcore_n_mgau(k)	((k)->mgau ? mgau_n_mgau((k)->mgau) \
218                                 : ((k)->s2_mgau ? (k)->s2_mgau->CdWdPDFMod \
219                                    : (k)->ms_mgau->s->n_sen))
220 #define kbcore_logmath(k)	((k)->logmath)
221 
222 
223 
224 #if 0
225 { /* Stop indent from complaining */
226 #endif
227 #ifdef __cplusplus
228 }
229 #endif
230 
231 #endif
232