1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 /*
35  * fsg_psubtree.h -- Phone-level FSG subtree representing all transitions
36  * out of a single FSG state.
37  * (Note: Currently, it is actually a flat lexicon representation
38  *
39  * **********************************************
40  * CMU ARPA Speech Project
41  *
42  * Copyright (c) 2004 Carnegie Mellon University.
43  * ALL RIGHTS RESERVED.
44  * **********************************************
45  *
46  * HISTORY
47  *
48  * $Log$
49  * Revision 1.1  2006/04/05  20:27:30  dhdfu
50  * A Great Reorganzation of header files and executables
51  *
52  * Revision 1.2  2006/02/23 05:10:18  arthchan2003
53  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3
54  *
55  * Revision 1.1.2.5  2005/07/24 01:34:54  arthchan2003
56  * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID
57  *
58  * Revision 1.1.2.4  2005/07/20 21:18:30  arthchan2003
59  * FSG can now be read, srch_fsg_init can now be initialized, psubtree can be built. Sounds like it is time to plug in other function pointers.
60  *
61  * Revision 1.1.2.3  2005/07/17 05:44:32  arthchan2003
62  * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet.
63  *
64  * Revision 1.1.2.2  2005/07/13 18:39:47  arthchan2003
65  * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro.  There are 8 minor HACKs where functions need to be removed temporarily.  Also, there are three major hacks. 1,  there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables.  This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement.  But I haven't figure it out yet.
66  *
67  * Revision 1.1.2.1  2005/06/27 05:26:29  arthchan2003
68  * Sphinx 2 fsg mainpulation routines.  Compiled with faked functions.  Currently fended off from users.
69  *
70  * Revision 1.1  2004/07/16 00:57:12  egouvea
71  * Added Ravi's implementation of FSG support.
72  *
73  * Revision 1.3  2004/06/25 14:49:08  rkm
74  * Optimized size of history table and speed of word transitions by maintaining only best scoring word exits at each state
75  *
76  * Revision 1.2  2004/05/27 14:22:57  rkm
77  * FSG cross-word triphones completed (but for single-phone words)
78  *
79  * Revision 1.1.1.1  2004/03/01 14:30:31  rkm
80  *
81  *
82  * Revision 1.2  2004/02/27 15:05:21  rkm
83  * *** empty log message ***
84  *
85  * Revision 1.1  2004/02/23 15:53:45  rkm
86  * Renamed from fst to fsg
87  *
88  * Revision 1.4  2004/02/23 15:09:50  rkm
89  * *** empty log message ***
90  *
91  * Revision 1.3  2004/02/19 21:16:54  rkm
92  * Added fsg_search.{c,h}
93  *
94  * Revision 1.2  2004/02/18 15:02:34  rkm
95  * Added fsg_lextree.{c,h}
96  *
97  * Revision 1.1  2004/02/17 21:11:49  rkm
98  * *** empty log message ***
99  *
100  *
101  * 09-Feb-2004	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
102  * 		Started.
103  */
104 
105 
106 #ifndef __S2_FSG_PSUBTREE_H__
107 #define __S2_FSG_PSUBTREE_H__
108 
109 
110 #include <stdio.h>
111 
112 #include <cmd_ln.h>
113 #include <logmath.h>
114 
115 #include "s3types.h"
116 #include "word_fsg.h"
117 #include "fsg.h"
118 #include "hmm.h"
119 #include "dict.h"
120 #include "mdef.h"
121 
122 
123 #ifdef __cplusplus
124 extern "C" {
125 #endif
126 #if 0
127 /* Fool Emacs. */
128 }
129 #endif
130 
131 /*
132  * **HACK-ALERT**!!  Compile-time constant determining the size of the
133  * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv.  (See below.)
134  * But it makes memory allocation simpler and more efficient.
135  */
136 #define FSG_PNODE_CTXT_BVSZ	2
137 
138 typedef struct {
139     uint32 bv[FSG_PNODE_CTXT_BVSZ];
140 } fsg_pnode_ctxt_t;
141 
142 
143 /**
144  * \struct fsg_pnode_s
145  * \brief an fsg node.
146  * All transitions (words) out of any given FSG state represented are by a
147  * phonetic prefix lextree (except for epsilon or null transitions; they
148  * are not part of the lextree).  Lextree leaf nodes represent individual
149  * FSG transitions, so no sharing is allowed at the leaf nodes.  The FSG
150  * transition probs are distributed along the lextree: the prob at a node
151  * is the max of the probs of all leaf nodes (and, hence, FSG transitions)
152  * reachable from that node.
153  *
154  * To conserve memory, the underlying HMMs with state-level information are
155  * allocated only as needed.  Root and leaf nodes must also account for all
156  * the possible phonetic contexts, with an independent HMM for each distinct
157  * context.
158  */
159 typedef struct fsg_pnode_s {
160     /**
161      * If this is not a leaf node, the first successor (child) node.  Otherwise
162      * the parent FSG transition for which this is the leaf node (for figuring
163      * the FSG destination state, and word emitted by the transition).  A node
164      * may have several children.  The succ ptr gives just the first; the rest
165      * are linked via the sibling ptr below.
166      */
167     union {
168         struct fsg_pnode_s *succ;
169         word_fsglink_t *fsglink;
170     } next;
171 
172     /*
173      * For simplicity of memory management (i.e., freeing the pnodes), all
174      * pnodes allocated for all transitions out of a state are maintained in a
175      * linear linked list through the alloc_next pointer.
176      */
177     struct fsg_pnode_s *alloc_next;
178 
179     /*
180      * The next node that is also a child of the parent of this node; NULL if
181      * none.
182      */
183     struct fsg_pnode_s *sibling;
184 
185     /*
186      * The transition (log) probability to be incurred upon transitioning to
187      * this node.  (Transition probabilities are really associated with the
188      * transitions.  But a lextree node has exactly one incoming transition.
189      * Hence, the prob can be associated with the node.)
190      * This is a logs2(prob) value, and includes the language weight.
191      */
192     int32 logs2prob;
193 
194     /*
195      * The root and leaf positions associated with any transition have to deal
196      * with multiple phonetic contexts.  However, different contexts may result
197      * in the same SSID (senone-seq ID), and can share a single pnode with that
198      * SSID.  But the pnode should track the set of context CI phones that share
199      * it.  Hence the fsg_pnode_ctxt_t bit-vector set-representation.  (For
200      * simplicity of implementation, its size is a compile-time constant for
201      * now.)  Single phone words would need a 2-D array of context, but that's
202      * too expensive.  For now, they simply use SIL as right context, so only
203      * the left context is properly modelled.
204      * (For word-internal phones, this field is unused, of course.)
205      */
206     fsg_pnode_ctxt_t ctxt;
207 
208     uint8 ci_ext;		/* This node's CIphone as viewed externally (context) */
209     uint8 ppos;		/* Phoneme position in pronunciation */
210     uint8 leaf;		/* Whether this is a leaf node */
211 
212     /* HMM-state-level stuff here */
213     hmm_t hmm;
214 } fsg_pnode_t;
215 
216 /* Access macros */
217 #define fsg_pnode_leaf(p)	((p)->leaf)
218 #define fsg_pnode_logs2prob(p)	((p)->logs2prob)
219 #define fsg_pnode_succ(p)	((p)->next.succ)
220 #define fsg_pnode_fsglink(p)	((p)->next.fsglink)
221 #define fsg_pnode_sibling(p)	((p)->sibling)
222 #define fsg_pnode_hmmptr(p)	(&((p)->hmm))
223 #define fsg_pnode_ci_ext(p)	((p)->ci_ext)
224 #define fsg_pnode_ppos(p)	((p)->ppos)
225 #define fsg_pnode_leaf(p)	((p)->leaf)
226 #define fsg_pnode_ctxt(p)	((p)->ctxt)
227 
228 #define fsg_pnode_add_ctxt(p,c)	((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
229 
230 
231 /**
232  * Build the phone lextree for all transitions out of state from_state.
233  * Return the root node of this tree.
234  * Also, return a linear linked list of all allocated fsg_pnode_t nodes in
235  * *alloc_head (for memory management purposes).
236  */
237 fsg_pnode_t *fsg_psubtree_init (hmm_context_t *ctx,
238                                 word_fsg_t *fsg, /**< A word fsg */
239 				int32 from_state, /**< from which state to initalize*/
240 				fsg_pnode_t **alloc_head,
241                                 cmd_ln_t *config,
242 				logmath_t *logmath
243     );
244 
245 
246 /**
247  * Free the given lextree.  alloc_head: head of linear list of allocated
248  * nodes updated by fsg_psubtree_init().
249  */
250 void fsg_psubtree_free (fsg_pnode_t *alloc_head);
251 
252 
253 /*
254  * Dump the list of nodes in the given lextree to the given file.  alloc_head:
255  * head of linear list of allocated nodes updated by fsg_psubtree_init().
256  */
257 void fsg_psubtree_dump (fsg_pnode_t *alloc_head, FILE *fp,
258 			dict_t *dict, mdef_t *mdef
259     );
260 
261 
262 /*
263  * Attempt to transition into the given node with the given attributes.
264  * If the node is already active in the given frame with a score better
265  * than the incoming score, nothing is done.  Otherwise the transition is
266  * successful.
267  * Return value: TRUE if the node was newly activated for the given frame,
268  * FALSE if it was already activated for that frame (whether the incoming
269  * transition was successful or not).
270  */
271 int fsg_psubtree_pnode_enter (fsg_pnode_t *pnode,
272                               int32 score,
273                               int32 frame,
274                               int32 bpidx);
275 
276 
277 /*
278  * Mark the given pnode as inactive (for search).
279  */
280 void fsg_psubtree_pnode_deactivate (fsg_pnode_t *pnode);
281 
282 
283 /* Set all flags on in the given context bitvector */
284 void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt);
285 
286 /*
287  * Subtract bitvector sub from bitvector src (src updated with the result).
288  * Return 0 if result is all 0, non-zero otherwise.
289  */
290 uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub);
291 
292 #ifdef __cplusplus
293 }
294 #endif
295 
296 
297 #endif
298