1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* ==================================================================== 3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 4 * reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * This work was supported in part by funding from the Defense Advanced 19 * Research Projects Agency and the National Science Foundation of the 20 * United States of America, and the CMU Sphinx Speech Consortium. 21 * 22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * 34 * ==================================================================== 35 * 36 */ 37 38 /* 39 * mdef.h -- HMM model definition: base (CI) phones and triphones 40 * 41 * ********************************************** 42 * CMU ARPA Speech Project 43 * 44 * Copyright (c) 1999 Carnegie Mellon University. 45 * ALL RIGHTS RESERVED. 46 * ********************************************** 47 */ 48 49 50 #ifndef __MDEF_H__ 51 #define __MDEF_H__ 52 53 54 /* System headers. */ 55 #include <stdio.h> 56 57 /* SphinxBase headers. */ 58 #include <sphinxbase/hash_table.h> 59 60 #ifdef __cplusplus 61 extern "C" { 62 #endif 63 64 /** \file mdef.h 65 * \brief Model definition 66 */ 67 68 /** \enum word_posn_t 69 * \brief Union of different type of word position 70 */ 71 72 typedef enum { 73 WORD_POSN_INTERNAL = 0, /**< Internal phone of word */ 74 WORD_POSN_BEGIN = 1, /**< Beginning phone of word */ 75 WORD_POSN_END = 2, /**< Ending phone of word */ 76 WORD_POSN_SINGLE = 3, /**< Single phone word (i.e. begin & end) */ 77 WORD_POSN_UNDEFINED = 4 /**< Undefined value, used for initial conditions, etc */ 78 } word_posn_t; 79 #define N_WORD_POSN 4 /**< total # of word positions (excluding undefined) */ 80 #define WPOS_NAME "ibesu" /**< Printable code for each word position above */ 81 #define S3_SILENCE_CIPHONE "SIL" /**< Hard-coded silence CI phone name */ 82 83 /** 84 \struct ciphone_t 85 \brief CI phone information 86 */ 87 typedef struct { 88 char *name; /**< The name of the CI phone */ 89 int32 filler; /**< Whether a filler phone; if so, can be substituted by 90 silence phone in left or right context position */ 91 } ciphone_t; 92 93 /** 94 * \struct phone_t 95 * \brief Triphone information, including base phones as a subset. For the latter, lc, rc and wpos are non-existent. 96 */ 97 typedef struct { 98 int32 ssid; /**< State sequence (or senone sequence) ID, considering the 99 n_emit_state senone-ids are a unit. The senone sequences 100 themselves are in a separate table */ 101 int32 tmat; /**< Transition matrix id */ 102 int16 ci, lc, rc; /**< Base, left, right context ciphones */ 103 word_posn_t wpos; /**< Word position */ 104 105 } phone_t; 106 107 /** 108 * \struct ph_rc_t 109 * \brief Structures needed for mapping <ci,lc,rc,wpos> into pid. (See mdef_t.wpos_ci_lclist below.) (lc = left context; rc = right context.) 110 * NOTE: Both ph_rc_t and ph_lc_t FOR INTERNAL USE ONLY. 111 */ 112 typedef struct ph_rc_s { 113 int16 rc; /**< Specific rc for a parent <wpos,ci,lc> */ 114 int32 pid; /**< Triphone id for above rc instance */ 115 struct ph_rc_s *next; /**< Next rc entry for same parent <wpos,ci,lc> */ 116 } ph_rc_t; 117 118 /** 119 * \struct ph_lc_t 120 * \brief Structures for storing the left context. 121 */ 122 123 typedef struct ph_lc_s { 124 int16 lc; /**< Specific lc for a parent <wpos,ci> */ 125 ph_rc_t *rclist; /**< rc list for above lc instance */ 126 struct ph_lc_s *next; /**< Next lc entry for same parent <wpos,ci> */ 127 } ph_lc_t; 128 129 130 /** The main model definition structure */ 131 /** 132 \struct mdef_t 133 \brief strcture for storing the model definition. 134 */ 135 typedef struct { 136 int32 n_ciphone; /**< number basephones actually present */ 137 int32 n_phone; /**< number basephones + number triphones actually present */ 138 int32 n_emit_state; /**< number emitting states per phone */ 139 int32 n_ci_sen; /**< number CI senones; these are the first */ 140 int32 n_sen; /**< number senones (CI+CD) */ 141 int32 n_tmat; /**< number transition matrices */ 142 143 hash_table_t *ciphone_ht; /**< Hash table for mapping ciphone strings to ids */ 144 ciphone_t *ciphone; /**< CI-phone information for all ciphones */ 145 phone_t *phone; /**< Information for all ciphones and triphones */ 146 uint16 **sseq; /**< Unique state (or senone) sequences in this model, shared 147 among all phones/triphones */ 148 int32 n_sseq; /**< No. of unique senone sequences in this model */ 149 150 int16 *cd2cisen; /**< Parent CI-senone id for each senone; the first 151 n_ci_sen are identity mappings; the CD-senones are 152 contiguous for each parent CI-phone */ 153 int16 *sen2cimap; /**< Parent CI-phone for each senone (CI or CD) */ 154 155 int16 sil; /**< SILENCE_CIPHONE id */ 156 157 ph_lc_t ***wpos_ci_lclist; /**< wpos_ci_lclist[wpos][ci] = list of lc for <wpos,ci>. 158 wpos_ci_lclist[wpos][ci][lc].rclist = list of rc for 159 <wpos,ci,lc>. Only entries for the known triphones 160 are created to conserve space. 161 (NOTE: FOR INTERNAL USE ONLY.) */ 162 } mdef_t; 163 164 /** Access macros; not meant for arbitrary use */ 165 #define mdef_is_fillerphone(m,p) ((m)->ciphone[p].filler) 166 #define mdef_n_ciphone(m) ((m)->n_ciphone) 167 #define mdef_n_phone(m) ((m)->n_phone) 168 #define mdef_n_sseq(m) ((m)->n_sseq) 169 #define mdef_n_emit_state(m) ((m)->n_emit_state) 170 #define mdef_n_sen(m) ((m)->n_sen) 171 #define mdef_n_tmat(m) ((m)->n_tmat) 172 #define mdef_pid2ssid(m,p) ((m)->phone[p].ssid) 173 #define mdef_pid2tmatid(m,p) ((m)->phone[p].tmat) 174 #define mdef_silphone(m) ((m)->sil) 175 #define mdef_sen2cimap(m) ((m)->sen2cimap) 176 #define mdef_sseq2sen(m,ss,pos) ((m)->sseq[ss][pos]) 177 #define mdef_pid2ci(m,p) ((m)->phone[p].ci) 178 #define mdef_cd2cisen(m) ((m)->cd2cisen) 179 180 /** 181 * Initialize the phone structure from the given model definition file. 182 * It should be treated as a READ-ONLY structure. 183 * @return pointer to the phone structure created. 184 */ 185 mdef_t *mdef_init (char *mdeffile, /**< In: Model definition file */ 186 int breport /**< In: whether to report the progress or not */ 187 ); 188 189 190 /** 191 Get the ciphone id given a string name 192 @return ciphone id for the given ciphone string name 193 */ 194 int mdef_ciphone_id(mdef_t *m, /**< In: Model structure being queried */ 195 char *ciphone /**< In: ciphone for which id wanted */ 196 ); 197 198 /** 199 Get the phone string given the ci phone id. 200 @return: READ-ONLY ciphone string name for the given ciphone id 201 */ 202 const char *mdef_ciphone_str(mdef_t *m, /**< In: Model structure being queried */ 203 int ci /**< In: ciphone id for which name wanted */ 204 ); 205 206 /** 207 Decide whether the phone is ci phone. 208 @return 1 if given triphone argument is a ciphone, 0 if not, -1 if error 209 */ 210 int mdef_is_ciphone (mdef_t *m, /**< In: Model structure being queried */ 211 int p /**< In: triphone id being queried */ 212 ); 213 214 /** 215 Decide whether the senone is a senone for a ci phone, or a ci senone 216 @return 1 if a given senone is a ci senone 217 */ 218 int mdef_is_cisenone(mdef_t *m, /**< In: Model structure being queried */ 219 int s /**< In: senone id being queried */ 220 ); 221 222 /** 223 Decide the phone id given the left, right and base phones. 224 @return: phone id for the given constituents if found, else BAD_S3PID 225 */ 226 int mdef_phone_id (mdef_t *m, /**< In: Model structure being queried */ 227 int b, /**< In: base ciphone id */ 228 int l, /**< In: left context ciphone id */ 229 int r, /**< In: right context ciphone id */ 230 word_posn_t pos /**< In: Word position */ 231 ); 232 233 /** 234 * Create a phone string for the given phone (base or triphone) id in the given buf. 235 * @return 0 if successful, -1 if error. 236 */ 237 int mdef_phone_str(mdef_t *m, /**< In: Model structure being queried */ 238 int pid, /**< In: phone id being queried */ 239 char *buf /**< Out: On return, buf has the string */ 240 ); 241 242 /** 243 * Compare the underlying HMMs for two given phones (i.e., compare the two transition 244 * matrix IDs and the individual state(senone) IDs). 245 * @return 0 iff the HMMs are identical, -1 otherwise. 246 */ 247 int mdef_hmm_cmp (mdef_t *m, /**< In: Model being queried */ 248 int p1, /**< In: One of the two triphones being compared */ 249 int p2 /**< In: One of the two triphones being compared */ 250 ); 251 252 /** Report the model definition's parameters */ 253 void mdef_report(mdef_t *m /**< In: model definition structure */ 254 ); 255 256 /** RAH, For freeing memory */ 257 void mdef_free_recursive_lc (ph_lc_t *lc /**< In: A list of left context */ 258 ); 259 void mdef_free_recursive_rc (ph_rc_t *rc /**< In: A list of right context */ 260 ); 261 262 /** Free an mdef_t */ 263 void mdef_free (mdef_t *mdef /**< In : The model definition*/ 264 ); 265 266 267 #ifdef __cplusplus 268 } 269 #endif 270 271 #endif 272