1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* ==================================================================== 3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 4 * reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * This work was supported in part by funding from the Defense Advanced 19 * Research Projects Agency and the National Science Foundation of the 20 * United States of America, and the CMU Sphinx Speech Consortium. 21 * 22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * 34 * ==================================================================== 35 * 36 */ 37 /* 38 * kbcore.h -- Structures for maintain the main models. 39 * 40 * ********************************************** 41 * CMU ARPA Speech Project 42 * 43 * Copyright (c) 1999 Carnegie Mellon University. 44 * ALL RIGHTS RESERVED. 45 * ********************************************** 46 * 47 * HISTORY 48 * $Log$ 49 * Revision 1.2 2006/04/06 14:03:02 dhdfu 50 * Prevent confusion among future generations by calling this s2_semi_mgau instead of sc_vq 51 * 52 * Revision 1.1 2006/04/05 20:27:30 dhdfu 53 * A Great Reorganzation of header files and executables 54 * 55 * Revision 1.12 2006/02/23 05:54:58 arthchan2003 56 * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH 57 * 1, Added linksilences. This allows silences to be linked correctly in mode FLAT, TREE. 58 * 2, Added s3_am_init: an all-in-one initalization routine SCHMM and CDHMM. 59 * 60 * Revision 1.11.4.5 2006/01/16 18:25:16 arthchan2003 61 * Sphinx 3.x tree decoders assume silences are unlinked (set them to BAD_S3WID) before used. Whereas the flat lexicon decode doesn't have this assumption. The changes in versions this branch also significantly changed behavior of the decoder. Thus the function LinkSilences is introduced to change back the behavior if necessary. 62 * 63 * Revision 1.11.4.4 2005/09/18 01:29:37 arthchan2003 64 * 1, .s3cont. mode is supported. When it is specified by -senmgau, it will invoke the MS version of GMM computation even for CDHMM. Not supposed to be documented for users. 2, Remove unlinkSilences and put it inside search-specific initialization. Apparently, remove it entirely will screw up the current test of mode 4 and 5. add it back will screw up mode 3. That's why I used temp solution. 65 * 66 * Revision 1.11.4.3 2005/08/03 18:54:32 dhdfu 67 * Fix the support for multi-stream / semi-continuous models. It is 68 * still kind of a hack, but it now works. 69 * 70 * Revision 1.11.4.2 2005/08/02 21:33:47 arthchan2003 71 * Factored the code of initializing one hmm into s3_am_init. That is to say initialization of mdef, mgau, var, mixw and tmat could all be found one function. 72 * 73 * Revision 1.11.4.1 2005/07/20 21:19:52 arthchan2003 74 * Added options such that finite state grammar option is now accepted. 75 * 76 * Revision 1.11 2005/06/21 23:28:48 arthchan2003 77 * Log. Please also see comments of kb.[ch]. Major changes you could see 78 * is that the lmset interface is now used rather than several interfaces 79 * for reading lm. Other than that, you could say most changes are 80 * harmless internal interfaces changes. 81 * 82 * Revision 1.5 2005/06/18 03:22:29 archan 83 * Add lmset_init. A wrapper function of various LM initialization and initialize an lmset It is now used in decode, livepretend, dag and astar. 84 * 85 * Revision 1.4 2005/04/20 03:38:43 archan 86 * Do the corresponding code changes for the lm code. 87 * 88 * Revision 1.3 2005/03/30 01:22:47 archan 89 * Fixed mistakes in last updates. Add 90 * 91 * 92 * 11-Feb-2000 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 93 * Removed svqpp stuff. It doesn't work too well anyway. 94 * 95 * 06-Dec-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 96 * Added kb_t.svqpp_t and related handling. 97 * 98 * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 99 * Created. 100 */ 101 102 103 #ifndef _S3_KBCORE_H_ 104 #define _S3_KBCORE_H_ 105 106 #include <s3types.h> 107 #include <cmd_ln.h> 108 #include <logmath.h> 109 #include "feat.h" 110 #include "cont_mgau.h" 111 #include "ms_mgau.h" 112 #include "s2_semi_mgau.h" 113 #include "mdef.h" 114 #include "dict.h" 115 #include "dict2pid.h" 116 #include "fillpen.h" 117 #include "lm.h" 118 #include "tmat.h" 119 #include "subvq.h" 120 #include "gs.h" 121 122 123 #ifdef __cplusplus 124 extern "C" { 125 #endif 126 #if 0 127 } /* Fool Emacs into not indenting things. */ 128 #endif 129 130 /** \file kbcore.h 131 * \brief kb core structures, the structure that stores parameters for s3.X search 132 */ 133 134 typedef struct { 135 cmd_ln_t *config; /**< The command-line or configuration object */ 136 feat_t *fcb; /**< feature end structure */ 137 mdef_t *mdef; /**< Model definition */ 138 dict_t *dict; /**< Dictionary structure */ 139 dict2pid_t *dict2pid; /**< Conversion of dictionary to Phoneme ID */ 140 141 lmset_t *lmset; /**< LM Set. ARCHAN, since sphinx 3.6, it is used whenever an lm is allocated. 142 This unified the internal data structure. */ 143 144 /*Specified either one of them when using kbcore.h. It is not yet very nice now. */ 145 mgau_model_t *mgau; /**< Acoustic Model for single stream */ 146 ms_mgau_model_t *ms_mgau; /**< Acoustic Model for multipel stream */ 147 s2_semi_mgau_t *s2_mgau; /**< Acoustic model for Sphinx2 semi-continuous */ 148 149 fillpen_t *fillpen; /**< Filler penalty */ 150 subvq_t *svq; /**< SVQ */ 151 gs_t *gs; /**< Gaussian Selector */ 152 tmat_t *tmat; /**< Transition Matrix. */ 153 154 int32 maxNewHeurScore; /**< Temporary variables for phoneme lookahead. This stores the heuristic score */ 155 int32 lastfrm; /**, Temporary variables, should be removed */ 156 157 s3lmwid32_t startwid; 158 s3lmwid32_t finishwid; 159 logmath_t *logmath; 160 } kbcore_t; 161 162 163 /** 164 Create a new kbcore 165 */ 166 S3DECODER_EXPORT 167 kbcore_t *New_kbcore(cmd_ln_t *config); 168 169 /** 170 Initialize just the acoustic model for kbcore, taking parameters 171 from the global command-line module. 172 */ 173 S3DECODER_EXPORT 174 void s3_am_init(kbcore_t *kbc); 175 176 177 /** 178 * Initialize one or more of all the major models: pronunciation dictionary, acoustic models, 179 * language models. Parameters are taken from the command line (see cmdln_macro.h) 180 */ 181 kbcore_t *kbcore_init(cmd_ln_t *config); 182 183 /** free the kbcore */ 184 S3DECODER_EXPORT 185 void kbcore_free (kbcore_t *kbcore /**< The kbcore structure */ 186 ); 187 188 /** 189 Sphinx 3.x tree decoders assume silences are unlinked (set them 190 to BAD_S3WID) before used. Whereas the flat lexicon decoder 191 doesn't have such assumption. These two functions change this 192 behavior. Called in mode 3, 4 and 5 to make sure different code 193 works. FIXME: This is dumb. 194 */ 195 void unlinksilences(lm_t* l, kbcore_t *kbc, dict_t *d); 196 197 void linksilences(lm_t* l, kbcore_t *kbc, dict_t *d); 198 199 /** Access macros; not meant for arbitrary use */ 200 #define kbcore_config(k) ((k)->config) 201 #define kbcore_fcb(k) ((k)->fcb) 202 #define kbcore_mdef(k) ((k)->mdef) 203 #define kbcore_dict(k) ((k)->dict) 204 #define kbcore_dict2pid(k) ((k)->dict2pid) 205 #define kbcore_lm(k) ((k)->lmset ? (k)->lmset->cur_lm : NULL) 206 #define kbcore_fillpen(k) ((k)->fillpen) 207 #define kbcore_dict2lmwid(k,w) ((k)->dict2lmwid[w]) 208 #define kbcore_mgau(k) ((k)->mgau) 209 #define kbcore_ms_mgau(k) ((k)->ms_mgau) 210 #define kbcore_s2_mgau(k) ((k)->s2_mgau) 211 #define kbcore_svq(k) ((k)->svq) 212 #define kbcore_gs(k) ((k)->gs) 213 #define kbcore_tmat(k) ((k)->tmat) 214 #define kbcore_lmset(k) ((k)->lmset) 215 /*#define kbcore_n_mgau(k) ((k)->mgau ? mgau_n_mgau((k)->mgau) : (k)->ms_mgau->s->n_sen) 216 */ 217 #define kbcore_n_mgau(k) ((k)->mgau ? mgau_n_mgau((k)->mgau) \ 218 : ((k)->s2_mgau ? (k)->s2_mgau->CdWdPDFMod \ 219 : (k)->ms_mgau->s->n_sen)) 220 #define kbcore_logmath(k) ((k)->logmath) 221 222 223 224 #if 0 225 { /* Stop indent from complaining */ 226 #endif 227 #ifdef __cplusplus 228 } 229 #endif 230 231 #endif 232