1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* ==================================================================== 3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 4 * reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * 19 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 20 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 23 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * ==================================================================== 32 * 33 */ 34 /* 35 * fsg_psubtree.h -- Phone-level FSG subtree representing all transitions 36 * out of a single FSG state. 37 * (Note: Currently, it is actually a flat lexicon representation 38 * 39 * ********************************************** 40 * CMU ARPA Speech Project 41 * 42 * Copyright (c) 2004 Carnegie Mellon University. 43 * ALL RIGHTS RESERVED. 44 * ********************************************** 45 * 46 * HISTORY 47 * 48 * $Log$ 49 * Revision 1.1 2006/04/05 20:27:30 dhdfu 50 * A Great Reorganzation of header files and executables 51 * 52 * Revision 1.2 2006/02/23 05:10:18 arthchan2003 53 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3 54 * 55 * Revision 1.1.2.5 2005/07/24 01:34:54 arthchan2003 56 * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID 57 * 58 * Revision 1.1.2.4 2005/07/20 21:18:30 arthchan2003 59 * FSG can now be read, srch_fsg_init can now be initialized, psubtree can be built. Sounds like it is time to plug in other function pointers. 60 * 61 * Revision 1.1.2.3 2005/07/17 05:44:32 arthchan2003 62 * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet. 63 * 64 * Revision 1.1.2.2 2005/07/13 18:39:47 arthchan2003 65 * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro. There are 8 minor HACKs where functions need to be removed temporarily. Also, there are three major hacks. 1, there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables. This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement. But I haven't figure it out yet. 66 * 67 * Revision 1.1.2.1 2005/06/27 05:26:29 arthchan2003 68 * Sphinx 2 fsg mainpulation routines. Compiled with faked functions. Currently fended off from users. 69 * 70 * Revision 1.1 2004/07/16 00:57:12 egouvea 71 * Added Ravi's implementation of FSG support. 72 * 73 * Revision 1.3 2004/06/25 14:49:08 rkm 74 * Optimized size of history table and speed of word transitions by maintaining only best scoring word exits at each state 75 * 76 * Revision 1.2 2004/05/27 14:22:57 rkm 77 * FSG cross-word triphones completed (but for single-phone words) 78 * 79 * Revision 1.1.1.1 2004/03/01 14:30:31 rkm 80 * 81 * 82 * Revision 1.2 2004/02/27 15:05:21 rkm 83 * *** empty log message *** 84 * 85 * Revision 1.1 2004/02/23 15:53:45 rkm 86 * Renamed from fst to fsg 87 * 88 * Revision 1.4 2004/02/23 15:09:50 rkm 89 * *** empty log message *** 90 * 91 * Revision 1.3 2004/02/19 21:16:54 rkm 92 * Added fsg_search.{c,h} 93 * 94 * Revision 1.2 2004/02/18 15:02:34 rkm 95 * Added fsg_lextree.{c,h} 96 * 97 * Revision 1.1 2004/02/17 21:11:49 rkm 98 * *** empty log message *** 99 * 100 * 101 * 09-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 102 * Started. 103 */ 104 105 106 #ifndef __S2_FSG_PSUBTREE_H__ 107 #define __S2_FSG_PSUBTREE_H__ 108 109 110 #include <stdio.h> 111 112 #include <cmd_ln.h> 113 #include <logmath.h> 114 115 #include "s3types.h" 116 #include "word_fsg.h" 117 #include "fsg.h" 118 #include "hmm.h" 119 #include "dict.h" 120 #include "mdef.h" 121 122 123 #ifdef __cplusplus 124 extern "C" { 125 #endif 126 #if 0 127 /* Fool Emacs. */ 128 } 129 #endif 130 131 /* 132 * **HACK-ALERT**!! Compile-time constant determining the size of the 133 * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.) 134 * But it makes memory allocation simpler and more efficient. 135 */ 136 #define FSG_PNODE_CTXT_BVSZ 2 137 138 typedef struct { 139 uint32 bv[FSG_PNODE_CTXT_BVSZ]; 140 } fsg_pnode_ctxt_t; 141 142 143 /** 144 * \struct fsg_pnode_s 145 * \brief an fsg node. 146 * All transitions (words) out of any given FSG state represented are by a 147 * phonetic prefix lextree (except for epsilon or null transitions; they 148 * are not part of the lextree). Lextree leaf nodes represent individual 149 * FSG transitions, so no sharing is allowed at the leaf nodes. The FSG 150 * transition probs are distributed along the lextree: the prob at a node 151 * is the max of the probs of all leaf nodes (and, hence, FSG transitions) 152 * reachable from that node. 153 * 154 * To conserve memory, the underlying HMMs with state-level information are 155 * allocated only as needed. Root and leaf nodes must also account for all 156 * the possible phonetic contexts, with an independent HMM for each distinct 157 * context. 158 */ 159 typedef struct fsg_pnode_s { 160 /** 161 * If this is not a leaf node, the first successor (child) node. Otherwise 162 * the parent FSG transition for which this is the leaf node (for figuring 163 * the FSG destination state, and word emitted by the transition). A node 164 * may have several children. The succ ptr gives just the first; the rest 165 * are linked via the sibling ptr below. 166 */ 167 union { 168 struct fsg_pnode_s *succ; 169 word_fsglink_t *fsglink; 170 } next; 171 172 /* 173 * For simplicity of memory management (i.e., freeing the pnodes), all 174 * pnodes allocated for all transitions out of a state are maintained in a 175 * linear linked list through the alloc_next pointer. 176 */ 177 struct fsg_pnode_s *alloc_next; 178 179 /* 180 * The next node that is also a child of the parent of this node; NULL if 181 * none. 182 */ 183 struct fsg_pnode_s *sibling; 184 185 /* 186 * The transition (log) probability to be incurred upon transitioning to 187 * this node. (Transition probabilities are really associated with the 188 * transitions. But a lextree node has exactly one incoming transition. 189 * Hence, the prob can be associated with the node.) 190 * This is a logs2(prob) value, and includes the language weight. 191 */ 192 int32 logs2prob; 193 194 /* 195 * The root and leaf positions associated with any transition have to deal 196 * with multiple phonetic contexts. However, different contexts may result 197 * in the same SSID (senone-seq ID), and can share a single pnode with that 198 * SSID. But the pnode should track the set of context CI phones that share 199 * it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For 200 * simplicity of implementation, its size is a compile-time constant for 201 * now.) Single phone words would need a 2-D array of context, but that's 202 * too expensive. For now, they simply use SIL as right context, so only 203 * the left context is properly modelled. 204 * (For word-internal phones, this field is unused, of course.) 205 */ 206 fsg_pnode_ctxt_t ctxt; 207 208 uint8 ci_ext; /* This node's CIphone as viewed externally (context) */ 209 uint8 ppos; /* Phoneme position in pronunciation */ 210 uint8 leaf; /* Whether this is a leaf node */ 211 212 /* HMM-state-level stuff here */ 213 hmm_t hmm; 214 } fsg_pnode_t; 215 216 /* Access macros */ 217 #define fsg_pnode_leaf(p) ((p)->leaf) 218 #define fsg_pnode_logs2prob(p) ((p)->logs2prob) 219 #define fsg_pnode_succ(p) ((p)->next.succ) 220 #define fsg_pnode_fsglink(p) ((p)->next.fsglink) 221 #define fsg_pnode_sibling(p) ((p)->sibling) 222 #define fsg_pnode_hmmptr(p) (&((p)->hmm)) 223 #define fsg_pnode_ci_ext(p) ((p)->ci_ext) 224 #define fsg_pnode_ppos(p) ((p)->ppos) 225 #define fsg_pnode_leaf(p) ((p)->leaf) 226 #define fsg_pnode_ctxt(p) ((p)->ctxt) 227 228 #define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f))) 229 230 231 /** 232 * Build the phone lextree for all transitions out of state from_state. 233 * Return the root node of this tree. 234 * Also, return a linear linked list of all allocated fsg_pnode_t nodes in 235 * *alloc_head (for memory management purposes). 236 */ 237 fsg_pnode_t *fsg_psubtree_init (hmm_context_t *ctx, 238 word_fsg_t *fsg, /**< A word fsg */ 239 int32 from_state, /**< from which state to initalize*/ 240 fsg_pnode_t **alloc_head, 241 cmd_ln_t *config, 242 logmath_t *logmath 243 ); 244 245 246 /** 247 * Free the given lextree. alloc_head: head of linear list of allocated 248 * nodes updated by fsg_psubtree_init(). 249 */ 250 void fsg_psubtree_free (fsg_pnode_t *alloc_head); 251 252 253 /* 254 * Dump the list of nodes in the given lextree to the given file. alloc_head: 255 * head of linear list of allocated nodes updated by fsg_psubtree_init(). 256 */ 257 void fsg_psubtree_dump (fsg_pnode_t *alloc_head, FILE *fp, 258 dict_t *dict, mdef_t *mdef 259 ); 260 261 262 /* 263 * Attempt to transition into the given node with the given attributes. 264 * If the node is already active in the given frame with a score better 265 * than the incoming score, nothing is done. Otherwise the transition is 266 * successful. 267 * Return value: TRUE if the node was newly activated for the given frame, 268 * FALSE if it was already activated for that frame (whether the incoming 269 * transition was successful or not). 270 */ 271 int fsg_psubtree_pnode_enter (fsg_pnode_t *pnode, 272 int32 score, 273 int32 frame, 274 int32 bpidx); 275 276 277 /* 278 * Mark the given pnode as inactive (for search). 279 */ 280 void fsg_psubtree_pnode_deactivate (fsg_pnode_t *pnode); 281 282 283 /* Set all flags on in the given context bitvector */ 284 void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt); 285 286 /* 287 * Subtract bitvector sub from bitvector src (src updated with the result). 288 * Return 0 if result is all 0, non-zero otherwise. 289 */ 290 uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub); 291 292 #ifdef __cplusplus 293 } 294 #endif 295 296 297 #endif 298