1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /*
39  * mdef.h -- HMM model definition: base (CI) phones and triphones
40  *
41  * **********************************************
42  * CMU ARPA Speech Project
43  *
44  * Copyright (c) 1999 Carnegie Mellon University.
45  * ALL RIGHTS RESERVED.
46  * **********************************************
47  */
48 
49 
50 #ifndef __MDEF_H__
51 #define __MDEF_H__
52 
53 
54 /* System headers. */
55 #include <stdio.h>
56 
57 /* SphinxBase headers. */
58 #include <sphinxbase/hash_table.h>
59 
60 #ifdef __cplusplus
61 extern "C" {
62 #endif
63 
64 /** \file mdef.h
65  * \brief Model definition
66  */
67 
68 /** \enum word_posn_t
69  * \brief Union of different type of word position
70  */
71 
72 typedef enum {
73     WORD_POSN_INTERNAL = 0,	/**< Internal phone of word */
74     WORD_POSN_BEGIN = 1,	/**< Beginning phone of word */
75     WORD_POSN_END = 2,		/**< Ending phone of word */
76     WORD_POSN_SINGLE = 3,	/**< Single phone word (i.e. begin & end) */
77     WORD_POSN_UNDEFINED = 4	/**< Undefined value, used for initial conditions, etc */
78 } word_posn_t;
79 #define N_WORD_POSN	4	/**< total # of word positions (excluding undefined) */
80 #define WPOS_NAME	"ibesu"	/**< Printable code for each word position above */
81 #define S3_SILENCE_CIPHONE "SIL" /**< Hard-coded silence CI phone name */
82 
83 /**
84    \struct ciphone_t
85    \brief CI phone information
86 */
87 typedef struct {
88     char *name;                 /**< The name of the CI phone */
89     int32 filler;		/**< Whether a filler phone; if so, can be substituted by
90 				   silence phone in left or right context position */
91 } ciphone_t;
92 
93 /**
94  * \struct phone_t
95  * \brief Triphone information, including base phones as a subset.  For the latter, lc, rc and wpos are non-existent.
96  */
97 typedef struct {
98     int32 ssid;			/**< State sequence (or senone sequence) ID, considering the
99 				   n_emit_state senone-ids are a unit.  The senone sequences
100 				   themselves are in a separate table */
101     int32 tmat;			/**< Transition matrix id */
102     int16 ci, lc, rc;		/**< Base, left, right context ciphones */
103     word_posn_t wpos;		/**< Word position */
104 
105 } phone_t;
106 
107 /**
108  * \struct ph_rc_t
109  * \brief Structures needed for mapping <ci,lc,rc,wpos> into pid.  (See mdef_t.wpos_ci_lclist below.)  (lc = left context; rc = right context.)
110  * NOTE: Both ph_rc_t and ph_lc_t FOR INTERNAL USE ONLY.
111  */
112 typedef struct ph_rc_s {
113     int16 rc;			/**< Specific rc for a parent <wpos,ci,lc> */
114     int32 pid;			/**< Triphone id for above rc instance */
115     struct ph_rc_s *next;	/**< Next rc entry for same parent <wpos,ci,lc> */
116 } ph_rc_t;
117 
118 /**
119  * \struct ph_lc_t
120  * \brief Structures for storing the left context.
121  */
122 
123 typedef struct ph_lc_s {
124     int16 lc;			/**< Specific lc for a parent <wpos,ci> */
125     ph_rc_t *rclist;		/**< rc list for above lc instance */
126     struct ph_lc_s *next;	/**< Next lc entry for same parent <wpos,ci> */
127 } ph_lc_t;
128 
129 
130 /** The main model definition structure */
131 /**
132    \struct mdef_t
133    \brief strcture for storing the model definition.
134 */
135 typedef struct {
136     int32 n_ciphone;		/**< number basephones actually present */
137     int32 n_phone;		/**< number basephones + number triphones actually present */
138     int32 n_emit_state;		/**< number emitting states per phone */
139     int32 n_ci_sen;		/**< number CI senones; these are the first */
140     int32 n_sen;		/**< number senones (CI+CD) */
141     int32 n_tmat;		/**< number transition matrices */
142 
143     hash_table_t *ciphone_ht;	/**< Hash table for mapping ciphone strings to ids */
144     ciphone_t *ciphone;		/**< CI-phone information for all ciphones */
145     phone_t *phone;		/**< Information for all ciphones and triphones */
146     uint16 **sseq;		/**< Unique state (or senone) sequences in this model, shared
147                                    among all phones/triphones */
148     int32 n_sseq;		/**< No. of unique senone sequences in this model */
149 
150     int16 *cd2cisen;		/**< Parent CI-senone id for each senone; the first
151 				   n_ci_sen are identity mappings; the CD-senones are
152 				   contiguous for each parent CI-phone */
153     int16 *sen2cimap;		/**< Parent CI-phone for each senone (CI or CD) */
154 
155     int16 sil;			/**< SILENCE_CIPHONE id */
156 
157     ph_lc_t ***wpos_ci_lclist;	/**< wpos_ci_lclist[wpos][ci] = list of lc for <wpos,ci>.
158                                    wpos_ci_lclist[wpos][ci][lc].rclist = list of rc for
159                                    <wpos,ci,lc>.  Only entries for the known triphones
160                                    are created to conserve space.
161                                    (NOTE: FOR INTERNAL USE ONLY.) */
162 } mdef_t;
163 
164 /** Access macros; not meant for arbitrary use */
165 #define mdef_is_fillerphone(m,p)	((m)->ciphone[p].filler)
166 #define mdef_n_ciphone(m)		((m)->n_ciphone)
167 #define mdef_n_phone(m)			((m)->n_phone)
168 #define mdef_n_sseq(m)			((m)->n_sseq)
169 #define mdef_n_emit_state(m)		((m)->n_emit_state)
170 #define mdef_n_sen(m)			((m)->n_sen)
171 #define mdef_n_tmat(m)			((m)->n_tmat)
172 #define mdef_pid2ssid(m,p)		((m)->phone[p].ssid)
173 #define mdef_pid2tmatid(m,p)		((m)->phone[p].tmat)
174 #define mdef_silphone(m)		((m)->sil)
175 #define mdef_sen2cimap(m)		((m)->sen2cimap)
176 #define mdef_sseq2sen(m,ss,pos)		((m)->sseq[ss][pos])
177 #define mdef_pid2ci(m,p)		((m)->phone[p].ci)
178 #define mdef_cd2cisen(m)		((m)->cd2cisen)
179 
180 /**
181  * Initialize the phone structure from the given model definition file.
182  * It should be treated as a READ-ONLY structure.
183  * @return pointer to the phone structure created.
184  */
185 mdef_t *mdef_init (char *mdeffile, /**< In: Model definition file */
186 		   int breport     /**< In: whether to report the progress or not */
187     );
188 
189 
190 /**
191     Get the ciphone id given a string name
192     @return ciphone id for the given ciphone string name
193 */
194 int mdef_ciphone_id(mdef_t *m,		/**< In: Model structure being queried */
195                     char *ciphone	/**< In: ciphone for which id wanted */
196     );
197 
198 /**
199     Get the phone string given the ci phone id.
200     @return: READ-ONLY ciphone string name for the given ciphone id
201 */
202 const char *mdef_ciphone_str(mdef_t *m,	/**< In: Model structure being queried */
203                              int ci	/**< In: ciphone id for which name wanted */
204     );
205 
206 /**
207     Decide whether the phone is ci phone.
208     @return 1 if given triphone argument is a ciphone, 0 if not, -1 if error
209 */
210 int mdef_is_ciphone (mdef_t *m,		/**< In: Model structure being queried */
211                      int p		/**< In: triphone id being queried */
212     );
213 
214 /**
215    Decide whether the senone is a senone for a ci phone, or a ci senone
216    @return 1 if a given senone is a ci senone
217 */
218 int mdef_is_cisenone(mdef_t *m,               /**< In: Model structure being queried */
219                      int s		        /**< In: senone id being queried */
220     );
221 
222 /**
223     Decide the phone id given the left, right and base phones.
224     @return: phone id for the given constituents if found, else BAD_S3PID
225 */
226 int mdef_phone_id (mdef_t *m,		/**< In: Model structure being queried */
227                    int b,		/**< In: base ciphone id */
228                    int l,		/**< In: left context ciphone id */
229                    int r,		/**< In: right context ciphone id */
230                    word_posn_t pos	/**< In: Word position */
231     );
232 
233 /**
234  * Create a phone string for the given phone (base or triphone) id in the given buf.
235  * @return 0 if successful, -1 if error.
236  */
237 int mdef_phone_str(mdef_t *m,		/**< In: Model structure being queried */
238                    int pid,		/**< In: phone id being queried */
239                    char *buf		/**< Out: On return, buf has the string */
240     );
241 
242 /**
243  * Compare the underlying HMMs for two given phones (i.e., compare the two transition
244  * matrix IDs and the individual state(senone) IDs).
245  * @return 0 iff the HMMs are identical, -1 otherwise.
246  */
247 int mdef_hmm_cmp (mdef_t *m,	/**< In: Model being queried */
248                   int p1, 	/**< In: One of the two triphones being compared */
249                   int p2	/**< In: One of the two triphones being compared */
250     );
251 
252 /** Report the model definition's parameters */
253 void mdef_report(mdef_t *m /**<  In: model definition structure */
254     );
255 
256 /** RAH, For freeing memory */
257 void mdef_free_recursive_lc (ph_lc_t *lc /**< In: A list of left context */
258     );
259 void mdef_free_recursive_rc (ph_rc_t *rc /**< In: A list of right context */
260     );
261 
262 /** Free an mdef_t */
263 void mdef_free (mdef_t *mdef /**< In : The model definition*/
264     );
265 
266 
267 #ifdef __cplusplus
268 }
269 #endif
270 
271 #endif
272