1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* ==================================================================== 3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 4 * reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * 19 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 20 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 23 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * ==================================================================== 32 * 33 */ 34 35 /* 36 * fsg_model.h -- Word-level finite state graph 37 * 38 * ********************************************** 39 * CMU ARPA Speech Project 40 * 41 * Copyright (c) 2003 Carnegie Mellon University. 42 * ALL RIGHTS RESERVED. 43 * ********************************************** 44 */ 45 46 47 #ifndef __FSG_MODEL_H__ 48 #define __FSG_MODEL_H__ 49 50 /* System headers. */ 51 #include <stdio.h> 52 #include <string.h> 53 54 /* SphinxBase headers. */ 55 #include <sphinxbase/prim_type.h> 56 #include <sphinxbase/glist.h> 57 #include <sphinxbase/logmath.h> 58 #include <sphinxbase/bitvec.h> 59 #include <sphinxbase/hash_table.h> 60 #include <sphinxbase/listelem_alloc.h> 61 #include <sphinxbase/sphinxbase_export.h> 62 63 /* 64 * A single transition in the FSG. 65 */ 66 typedef struct fsg_link_s { 67 int32 from_state; 68 int32 to_state; 69 int32 logs2prob; /**< log(transition probability)*lw */ 70 int32 wid; /**< Word-ID; <0 if epsilon or null transition */ 71 } fsg_link_t; 72 73 /* Access macros */ 74 #define fsg_link_from_state(l) ((l)->from_state) 75 #define fsg_link_to_state(l) ((l)->to_state) 76 #define fsg_link_wid(l) ((l)->wid) 77 #define fsg_link_logs2prob(l) ((l)->logs2prob) 78 79 /** 80 * Adjacency list (opaque) for a state in an FSG. 81 */ 82 typedef struct trans_list_s trans_list_t; 83 84 /** 85 * Word level FSG definition. 86 * States are simply integers 0..n_state-1. 87 * A transition emits a word and has a given probability of being taken. 88 * There can also be null or epsilon transitions, with no associated emitted 89 * word. 90 */ 91 typedef struct fsg_model_s { 92 int refcount; /**< Reference count. */ 93 char *name; /**< A unique string identifier for this FSG */ 94 int32 n_word; /**< Number of unique words in this FSG */ 95 int32 n_word_alloc; /**< Number of words allocated in vocab */ 96 char **vocab; /**< Vocabulary for this FSG. */ 97 bitvec_t *silwords; /**< Indicates which words are silence/fillers. */ 98 bitvec_t *altwords; /**< Indicates which words are pronunciation alternates. */ 99 logmath_t *lmath; /**< Pointer to log math computation object. */ 100 int32 n_state; /**< number of states in FSG */ 101 int32 start_state; /**< Must be in the range [0..n_state-1] */ 102 int32 final_state; /**< Must be in the range [0..n_state-1] */ 103 float32 lw; /**< Language weight that's been applied to transition 104 logprobs */ 105 trans_list_t *trans; /**< Transitions out of each state, if any. */ 106 listelem_alloc_t *link_alloc; /**< Allocator for FSG links. */ 107 } fsg_model_t; 108 109 /* Access macros */ 110 #define fsg_model_name(f) ((f)->name) 111 #define fsg_model_n_state(f) ((f)->n_state) 112 #define fsg_model_start_state(f) ((f)->start_state) 113 #define fsg_model_final_state(f) ((f)->final_state) 114 #define fsg_model_log(f,p) logmath_log((f)->lmath, p) 115 #define fsg_model_lw(f) ((f)->lw) 116 #define fsg_model_n_word(f) ((f)->n_word) 117 #define fsg_model_word_str(f,wid) (wid == -1 ? "(NULL)" : (f)->vocab[wid]) 118 119 /** 120 * Iterator over arcs. 121 */ 122 typedef struct fsg_arciter_s fsg_arciter_t; 123 124 /** 125 * Have silence transitions been added? 126 */ 127 #define fsg_model_has_sil(f) ((f)->silwords != NULL) 128 129 /** 130 * Have alternate word transitions been added? 131 */ 132 #define fsg_model_has_alt(f) ((f)->altwords != NULL) 133 134 #define fsg_model_is_filler(f,wid) \ 135 (fsg_model_has_sil(f) ? bitvec_is_set((f)->silwords, wid) : FALSE) 136 #define fsg_model_is_alt(f,wid) \ 137 (fsg_model_has_alt(f) ? bitvec_is_set((f)->altwords, wid) : FALSE) 138 139 /** 140 * Create a new FSG. 141 */ 142 SPHINXBASE_EXPORT 143 fsg_model_t *fsg_model_init(char const *name, logmath_t *lmath, 144 float32 lw, int32 n_state); 145 146 /** 147 * Read a word FSG from the given file and return a pointer to the structure 148 * created. Return NULL if any error occurred. 149 * 150 * File format: 151 * 152 * <pre> 153 * Any number of comment lines; ignored 154 * FSG_BEGIN [<fsgname>] 155 * N <#states> 156 * S <start-state ID> 157 * F <final-state ID> 158 * T <from-state> <to-state> <prob> [<word-string>] 159 * T ... 160 * ... (any number of state transitions) 161 * FSG_END 162 * Any number of comment lines; ignored 163 * </pre> 164 * 165 * The FSG spec begins with the line containing the keyword FSG_BEGIN. 166 * It has an optional fsg name string. If not present, the FSG has the empty 167 * string as its name. 168 * 169 * Following the FSG_BEGIN declaration is the number of states, the start 170 * state, and the final state, each on a separate line. States are numbered 171 * in the range [0 .. <numberofstate>-1]. 172 * 173 * These are followed by all the state transitions, each on a separate line, 174 * and terminated by the FSG_END line. A state transition has the given 175 * probability of being taken, and emits the given word. The word emission 176 * is optional; if word-string omitted, it is an epsilon or null transition. 177 * 178 * Comments can also be embedded within the FSG body proper (i.e. between 179 * FSG_BEGIN and FSG_END): any line with a # character in col 1 is treated 180 * as a comment line. 181 * 182 * Return value: a new fsg_model_t structure if the file is successfully 183 * read, NULL otherwise. 184 */ 185 SPHINXBASE_EXPORT 186 fsg_model_t *fsg_model_readfile(const char *file, logmath_t *lmath, float32 lw); 187 188 /** 189 * Like fsg_model_readfile(), but from an already open stream. 190 */ 191 SPHINXBASE_EXPORT 192 fsg_model_t *fsg_model_read(FILE *fp, logmath_t *lmath, float32 lw); 193 194 /** 195 * Retain ownership of an FSG. 196 * 197 * @return Pointer to retained FSG. 198 */ 199 SPHINXBASE_EXPORT 200 fsg_model_t *fsg_model_retain(fsg_model_t *fsg); 201 202 /** 203 * Free the given word FSG. 204 * 205 * @return new reference count (0 if freed completely) 206 */ 207 SPHINXBASE_EXPORT 208 int fsg_model_free(fsg_model_t *fsg); 209 210 /** 211 * Add a word to the FSG vocabulary. 212 * 213 * @return Word ID for this new word. 214 */ 215 SPHINXBASE_EXPORT 216 int fsg_model_word_add(fsg_model_t *fsg, char const *word); 217 218 /** 219 * Look up a word in the FSG vocabulary. 220 * 221 * @return Word ID for this word 222 */ 223 SPHINXBASE_EXPORT 224 int fsg_model_word_id(fsg_model_t *fsg, char const *word); 225 226 /** 227 * Add the given transition to the FSG transition matrix. 228 * 229 * Duplicates (i.e., two transitions between the same states, with the 230 * same word label) are flagged and only the highest prob retained. 231 */ 232 SPHINXBASE_EXPORT 233 void fsg_model_trans_add(fsg_model_t * fsg, 234 int32 from, int32 to, int32 logp, int32 wid); 235 236 /** 237 * Add a null transition between the given states. 238 * 239 * There can be at most one null transition between the given states; 240 * duplicates are flagged and only the best prob retained. Transition 241 * probs must be <= 1 (i.e., logprob <= 0). 242 * 243 * @return 1 if a new transition was added, 0 if the prob of an existing 244 * transition was upgraded; -1 if nothing was changed. 245 */ 246 SPHINXBASE_EXPORT 247 int32 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, int32 logp); 248 249 /** 250 * Add a "tag" transition between the given states. 251 * 252 * A "tag" transition is a null transition with a non-null word ID, 253 * which corresponds to a semantic tag or other symbol to be output 254 * when this transition is taken. 255 * 256 * As above, there can be at most one null or tag transition between 257 * the given states; duplicates are flagged and only the best prob 258 * retained. Transition probs must be <= 1 (i.e., logprob <= 0). 259 * 260 * @return 1 if a new transition was added, 0 if the prob of an existing 261 * transition was upgraded; -1 if nothing was changed. 262 */ 263 SPHINXBASE_EXPORT 264 int32 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, 265 int32 logp, int32 wid); 266 267 /** 268 * Obtain transitive closure of null transitions in the given FSG. 269 * 270 * @param nulls List of null transitions, or NULL to find them automatically. 271 * @return Updated list of null transitions. 272 */ 273 SPHINXBASE_EXPORT 274 glist_t fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls); 275 276 /** 277 * Get the list of transitions (if any) from state i to j. 278 */ 279 SPHINXBASE_EXPORT 280 glist_t fsg_model_trans(fsg_model_t *fsg, int32 i, int32 j); 281 282 /** 283 * Get an iterator over the outgoing transitions from state i. 284 */ 285 SPHINXBASE_EXPORT 286 fsg_arciter_t *fsg_model_arcs(fsg_model_t *fsg, int32 i); 287 288 /** 289 * Get the current arc from the arc iterator. 290 */ 291 SPHINXBASE_EXPORT 292 fsg_link_t *fsg_arciter_get(fsg_arciter_t *itor); 293 294 /** 295 * Move the arc iterator forward. 296 */ 297 SPHINXBASE_EXPORT 298 fsg_arciter_t *fsg_arciter_next(fsg_arciter_t *itor); 299 300 /** 301 * Free the arc iterator (early termination) 302 */ 303 SPHINXBASE_EXPORT 304 void fsg_arciter_free(fsg_arciter_t *itor); 305 /** 306 * Get the null transition (if any) from state i to j. 307 */ 308 SPHINXBASE_EXPORT 309 fsg_link_t *fsg_model_null_trans(fsg_model_t *fsg, int32 i, int32 j); 310 311 /** 312 * Add silence word transitions to each state in given FSG. 313 * 314 * @param state state to add a self-loop to, or -1 for all states. 315 * @param silprob probability of silence transition. 316 */ 317 SPHINXBASE_EXPORT 318 int fsg_model_add_silence(fsg_model_t * fsg, char const *silword, 319 int state, float32 silprob); 320 321 /** 322 * Add alternate pronunciation transitions for a word in given FSG. 323 */ 324 SPHINXBASE_EXPORT 325 int fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, 326 char const *altword); 327 328 /** 329 * Write FSG to a file. 330 */ 331 SPHINXBASE_EXPORT 332 void fsg_model_write(fsg_model_t *fsg, FILE *fp); 333 334 /** 335 * Write FSG to a file. 336 */ 337 SPHINXBASE_EXPORT 338 void fsg_model_writefile(fsg_model_t *fsg, char const *file); 339 340 /** 341 * Write FSG to a file in AT&T FSM format. 342 */ 343 SPHINXBASE_EXPORT 344 void fsg_model_write_fsm(fsg_model_t *fsg, FILE *fp); 345 346 /** 347 * Write FSG to a file in AT&T FSM format. 348 */ 349 SPHINXBASE_EXPORT 350 void fsg_model_writefile_fsm(fsg_model_t *fsg, char const *file); 351 352 /** 353 * Write FSG symbol table to a file (for AT&T FSM) 354 */ 355 SPHINXBASE_EXPORT 356 void fsg_model_write_symtab(fsg_model_t *fsg, FILE *file); 357 358 /** 359 * Write FSG symbol table to a file (for AT&T FSM) 360 */ 361 SPHINXBASE_EXPORT 362 void fsg_model_writefile_symtab(fsg_model_t *fsg, char const *file); 363 364 #endif /* __FSG_MODEL_H__ */ 365