1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* srch.h
39  * HISTORY
40  * $Log$
41  * Revision 1.1  2006/04/05  20:27:30  dhdfu
42  * A Great Reorganzation of header files and executables
43  *
44  * Revision 1.2  2006/02/23 15:26:10  arthchan2003
45  * Merged from SPHINX3_5_2_RCI_IRII:
46  *
47  * Summary of changes. Detail could be seen in the comments from the
48  * branches.
49  *
50  *  After 6 months, we have two more searches using interface
51  * provided by srch.c. That included an adapted version of Sphinx 2's FSG
52  * search.  Also, the original version of flat-lexicon decoding search.
53  *
54  * Second stage search operation is still not properly put in the srch_t
55  * structure.  We should create function hooks that allow developer to
56  * put the code more properly than now.
57  *
58  * The interface of srch.c is still not very completed. Things we should
59  * support include switching of AM and MLLR.  They are currently
60  * commented.
61  *
62  * Mode 5, the word-dependent tree copies are now fended off from the
63  * users.
64  *
65  * Mode 2, the FSG search are opened.  It is not very well tested so the
66  * user will be warned about its nature.
67  *
68  * Revision 1.1.4.15  2006/01/16 20:01:20  arthchan2003
69  * Added Commented code in srch.[ch] for second-stage rescoring. Not used for now.
70  *
71  * Revision 1.1.4.14  2005/11/17 06:36:36  arthchan2003
72  * There are several important changes. 1, acoustic score scale has changed back to put it the search structure.  This fixed a bug introduced pre-2005 code branching where only the scaling factor of the last frame. 2, Added a fmt argument of matchseg_write , implemented segmentation output for s2 and ctm file format. matchseg_write also now shared across the flat and tree decoder now. 3, Added Rong's read_seg_hyp_line.
73  *
74  * Revision 1.1.4.13  2005/09/25 19:23:55  arthchan2003
75  * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code.
76  *
77  * Revision 1.1.4.12  2005/09/18 01:44:12  arthchan2003
78  * Very boldly, started to support flat lexicon decoding (mode 3) in srch.c.  Add log_hypseg. Mode 3 is implemented as srch-one-frame implementation. Scaling doesn't work at this point.
79  *
80  * Revision 1.1.4.11  2005/09/11 23:07:28  arthchan2003
81  * srch.c now support lattice rescoring by rereading the generated lattice in a file. When it is operated, silence cannot be unlinked from the dictionary.  This is a hack and its reflected in the code of dag, kbcore and srch. code
82  *
83  * Revision 1.1.4.10  2005/08/02 21:37:28  arthchan2003
84  * 1, Used s3_cd_gmm_compute_sen instead of approx_cd_gmm_compute_sen in mode 2, 4 and 5.  This will suppose to make s3.0 to be able to read SCHMM and use them as well. 2, Change srch_gmm_compute_lv2 to accept a two-dimensional array (no_stream*no_coeff) instead of a one dimensional array (no_coeff).
85  *
86  * Revision 1.1.4.9  2005/07/24 19:35:59  arthchan2003
87  * Added GAUDEN_EVAL_WINDOW in srch.h. Assuming this is property of a search.
88  *
89  * Revision 1.1.4.8  2005/07/24 01:39:26  arthchan2003
90  * Added srch_on_srch_frame_lv[12] in the search abstraction routine.  This will allow implementation just provide the search for one frame without supplying all function pointer in the standard abstraction.
91  *
92  * Revision 1.1.4.7  2005/07/22 03:41:05  arthchan2003
93  * 1, (Incomplete) Add function pointers for flat foward search. Notice implementation is not yet filled in. 2, adding log_hypstr and log_hyp_detailed.  It is sphinx 3.0 version of matchwrite.  Add it to possible code merge.
94  *
95  * Revision 1.1.4.6  2005/07/17 05:54:55  arthchan2003
96  * replace vithist_dag_write_header with dag_write_header
97  *
98  * Revision 1.1.4.5  2005/07/13 18:46:39  arthchan2003
99  * Re-included srch_fsg.h
100  *
101  * Revision 1.1.4.4  2005/07/07 02:37:39  arthchan2003
102  * 1, Changed names of srchmode* functions to srch_mode*, 2, complete srch_mode_index_to_str, 3, Remove srch_rescoring and ask implementation to call these "rescoring functions" themselves.  The reason is rescoring is not as universal as I would think in the general search. I think search implementer should be the one who decide whether rescoring is one part of their search algorithms
103  *
104  * Revision 1.1.4.3  2005/07/04 07:18:49  arthchan2003
105  * Disabled support of FSG. Added comments for srch_utt_begin and srch_utt_end.
106  *
107  * Revision 1.1.4.2  2005/07/03 23:04:55  arthchan2003
108  * 1, Added srchmode_str_to_index, 2, called the deallocation routine of the search implementation layer in srch_uninit
109  *
110  * Revision 1.1.4.1  2005/06/28 07:03:01  arthchan2003
111  * Added read_fsg operation as one method. Currently, it is still not clear how it should iteract with lm
112  *
113  * Revision 1.1  2005/06/22 02:24:42  arthchan2003
114  * Log. A search interface implementation are checked in. I will call
115  * srch_t to be search abstraction or search mechanism from now on.  The
116  * major reason of separating with the search implementation routine
117  * (srch_*.[ch]) is that search is something that people could come up
118  * with thousands of ways to implement.
119  *
120  * Such a design shows a certain sense of defiance of conventional ways
121  * of designing speech recognition. Namely, **always** using generic
122  * graph as the grandfather ancester of every search lattice.  This could
123  * 1) break a lot of legacy optimization code. 2) could be slow depends
124  * on the implementation.
125  *
126  * The current design only specify the operations that are supposed to be
127  * generic in every search (or atomic search operations (ASOs)).
128  * Ideally, users only need to implement the interface to make the code
129  * work for another search.
130  *
131  * From this point of view, the current check-in still have some
132  * fundamental flaws.  For example, the communication mechanism between
133  * different atomic search operations are not clearly defined. Scores are
134  * now computed and put into structures of ascr. (ascr has no clear
135  * interface to outside world). This is something we need to improve.
136  *
137  * Revision 1.18  2005/06/16 04:59:10  archan
138  * Sphinx3 to s3.generic, a gentle-refactored version of Dave's change in senone scale.
139  *
140  * Revision 1.17  2005/06/10 03:40:57  archan
141  * 1, Fixed doxygen documentation of srch.h, 2, eliminate srch.h C-style functions. 3, Start to fend off the users for using mode 5.  We are ready to merge the code.
142  *
143  * Revision 1.16  2005/06/10 03:01:50  archan
144  * Fixed file_open.
145  *
146  * Revision 1.15  2005/06/09 21:03:33  archan
147  * Update srch.h and srch_debug.c such that include files doesn't depend on explicitly specified directory name.  Rather it would be taken care by -I option in Makefile.am
148  *
149  * Revision 1.14  2005/05/11 06:10:38  archan
150  * Code for lattice and back track pointer table dumping is now wrapped in reg_result_dump.  The function is shared across mode 4 and mode 5.  Possibly later for mode 3 and mode 6 as well.
151  *
152  * Revision 1.13  2005/05/11 00:18:45  archan
153  * Add comments on srch.h and srch_time_switch_tree.h and srch_debug.h on how things work. A very detail comment is added in srch.h to describe how generally srch_t is interacting with other parts of the code.
154  *
155  * Revision 1.12  2005/05/04 05:15:25  archan
156  * reverted the last change, seems to be not working because of compilation issue. Try not to deal with it now.
157  *
158  * Revision 1.1  2005/05/04 04:46:04  archan
159  * Move srch.c and srch.h to search. More and more this type of refactoring will be done in future
160  *
161  * Revision 1.10  2005/05/03 04:09:09  archan
162  * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore.  This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame.  The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century.  But well, after all, everything needs a start.  I will then really get the results from the search and see how it looks.
163  *
164  * Revision 1.9  2005/04/25 19:22:47  archan
165  * Refactor out the code of rescoring from lexical tree. Potentially we want to turn off the rescoring if we need.
166  *
167  * Revision 1.8  2005/04/22 04:22:36  archan
168  * Add gmm_wrap, this will share code across op_mode 4 and op_mode 5. Also it also separate active senone selection into a different process.  I hope this is the final step before making the WST search works.  At the current stage, the code of mode-5 looks very much alike mode-4.  This is intended because in Prototype 4, tail sharing will be used to reduce memory.
169  *
170  * Revision 1.7  2005/04/21 23:50:26  archan
171  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
172  *
173  * Revision 1.6  2005/04/20 03:42:55  archan
174  * srch.c now is the only of the master search driver. When there is any change in the **interaction** of different blocks, srch.c should be changed first.  Then the search implenetation, such as srch_time_switch_tree.c
175  *
176  * Revision 1.5  2005/03/30 01:22:47  archan
177  * Fixed mistakes in last updates. Add
178  *
179  *
180  * 17-Mar-2005 A. Chan (archan@cs.cmu.edu) at Carnegie Mellon University
181  * 1            Started. This replaced utt.c starting from Sphinx 3.6.
182  */
183 
184 #include <stdio.h>
185 
186 #include <s3types.h>
187 #include <glist.h>
188 #include "dag.h"
189 #include "lm.h"
190 #include "ascr.h"
191 #include "adaptor.h"
192 #include "stat.h"
193 #include "fast_algo_struct.h"
194 #include "kbcore.h"
195 #include "kb.h"
196 
197 
198 /* Mode 1 */
199 #include "srch_allphone.h"
200 
201 /* Mode 2 */
202 #include "srch_fsg.h"
203 
204 /* Mode 3 */
205 #include "srch_flat_fwd.h"
206 
207 /* Mode 4 */
208 #include "srch_time_switch_tree.h"
209 
210 /* Mode 5 */
211 #include "srch_word_switch_tree.h"
212 
213 /* Mode 1368*/
214 #include "srch_do_nothing.h"
215 
216 /* Mode 1369*/
217 #include "srch_debug.h"
218 
219 
220 #include "srch_output.h"
221 
222 #ifndef _SRCH_H_
223 #define _SRCH_H_
224 
225 
226 #ifdef __cplusplus
227 extern "C" {
228 #endif
229 #if 0
230 /* Fool Emacs. */
231 }
232 #endif
233 
234 #define SRCH_SUCCESS 0
235 #define SRCH_FAILURE 1
236 
237 /**
238    The many operation modes of srch.c.
239 
240    Mode 1-4 are working modes.  Not all of them are functioning as at
241    but we have plans to migrate to that.
242 
243    Just for the sake of it. mode 6-10 are assigned to 5 working
244    programmers of CMU Sphinx. We don't expect too much from them. So
245    let's see how it goes.
246 
247    Mode 88 is also assigned but this is just for the sake of it.
248  */
249 
250 #define OPERATION_ALIGN         0 /**< (Reserved but not used) force
251 				     alignment mode. It is
252 				     questionable whether this belongs
253 				     in the decoder at all since it's
254 				     mostly used in training.
255 				  */
256 #define OPERATION_ALLPHONE      1 /**< Phoneme recognition.
257 				    */
258 #define OPERATION_GRAPH         2 /**< FSG mode, adapated from sphinx 2
259 				     specify -fsg in the current decode
260 				     interface should work.
261 				   */
262 #define OPERATION_FLATFWD       3 /**< Flat-lexicon decoding mode
263 				     In CMU, it is well-used as a
264 				     research tool for acoustic and
265 				     language modeling research. Its
266 				     shell, decode_anytopo is still
267 				     one of the standard executables.
268 				   */
269 #define OPERATION_TST_DECODE    4 /**< Tree-lexicon decoding with
270 				     time-switching tree copies
271 				     the current default.  Or what I
272 				     called the "magic wheel" search.
273 				     It is a practical and useful
274 				     search techniques.
275 				   */
276 #define OPERATION_WST_DECODE    5 /**< (Reserved but not used)
277 				     Tree-lexicon decoding with
278 				     word-switching tree copies.  Work
279 				     for bigram with trigram
280 				     rescoring.  Not well developed
281 				     enough to be deployed.
282 				   */
283 #define OPERATION_EVANDRO_MODE  6 /**< (Reserved but not used) Dr.
284 				     Evandro Gouvea is a very defensive
285 				     programmer. He is very clever and
286 				     has strong mathematical
287 				     background.  I would guess his
288 				     search code would be very solid.
289 				   */
290 #define OPERATION_DAVID_MODE    7 /**< (Reserved but not used) It is
291 				     not an illusion that David
292 				     Huggins-Daines is productive.
293 				     The reason he hasn't touched
294 				     the search part yet is because
295 				     he hasn't read Richard Bellman's
296 				     "Dynamic Programming" and Arthur
297 				     Chan's C++ implementation of
298 				     arec.  He will definitely show
299 				     his super-fast speed in search
300 				     implementation as well.
301 				   */
302 #define OPERATION_ARTHUR_MODE   8 /**< (Reserved but not used) Arthur
303 				     Chan is the guy who think of this
304 				     search mode thing. He wrote mode
305 				     5. Just like plan 9.  It is quite
306 				     a failure.  Though, this guy is
307 				     not that easy-going, he must have
308 				     think of something this time.
309 				   */
310 #define OPERATION_YITAO_MODE    9 /**< (Reserved but not used) Yitao Sun
311 				     works mainly on grammar, CFG, FSG
312 				     and stuffs.  Likely his search will
313 				     also reflect his research direction.
314 				   */
315 #define OPERATION_RAVI_MODE    10 /**< (Reserved but not used) Mosur
316 				     Ravishankar works out both s2 and
317 				     s3 He is also the original author
318 				     of mode 2, 3 and 4. (In practice,
319 				     Arthur Chan's mode 5 really
320 				     couldn't match mode 4 at all.)
321 				     He always has a lot of
322 				     interesting ideas in his mind. Likely
323 				     it might think of interesting search
324 				     similar to mode 4.
325 				   */
326 #define OPERATION_STEVE_MODE   88 /**< (Reserved but not used) Steven
327 				     Lee is the guy who work out the
328 				     a-star search for the MIT
329 				     segmental recognizer.  He yielded
330 				     to Arthur Chan and become one of
331 				     the developers of CMU Sphinx.
332 				     But his heart is still with MIT,
333 				     MIT SLS.  Likely he will still
334 				     be happy that 88 is a very lucky
335 				     number in Chinese.
336 				  */
337 #define OPERATION_DO_NOTHING 1368 /**< Do nothing mode. as it means
338 				     it does nothing. It is used as
339 				     one of the function test of the
340 				     search operator.
341 				  */
342 
343 #define OPERATION_DEBUG      1369 /**< Debug mode, it dumps the
344 				     internal function call in srch.c
345 				     layer.  ARCHAN 20050329: 1369 has
346 				     no meaning at all, I just love to
347 				     call it in that way.
348 				  */
349 
350 #define GRAPH_STRUCT_FLAT 0
351 #define GRAPH_STRUCT_TST 1
352 #define GRAPH_STRUCT_WST 2
353 #define GRAPH_STRUCT_GENGRAPH 3
354 #define GRAPH_STRUCT_PHMM 4
355 
356 #define GMM_STRUCT_CDHMM 0
357 #define GMM_STRUCT_SCHMM 1
358 
359 
360 #define GAUDEN_EVAL_WINDOW 8 /*Moving window length when frames are
361 			       considered as blocks, currently used in
362 			       3.0 family of tools. */
363 
364 #define DFLT_UTT_SIZE 5000 /**< The default number of frames of an utterance */
365 #define DFLT_NUM_SEGS 200  /**< The default number of segments of an utterance */
366 
367 
368 
369 /* \struct grp_str_t
370  */
371 typedef struct {
372     void *graph_struct; /**< The graph structure */
373     int32 graph_type;   /**< The graph type */
374 }grp_str_t;
375 
376 
377 
378 /**
379     \file srch.h
380     \brief search abstraction.
381 
382     Written at 20050510 by ARCHAN.
383 
384     Mechanism/implementation separation
385 
386     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
387 
388     Starting from Sphinx 3.6, the implementation of the search has
389     conceptually separated into two layers.  The top layer srch.c
390     defines the top level atomic function defintion (implemented as
391     function pointers) that every search-like operations will share.
392 
393     To understand what it means, consider two seemingly different
394     search-like operations, alignment and decoding.  Both of them
395     required 1) initialization, 2) propropagte 1 frame. 3) termniation.
396     Each of these operation can well be defined as one single
397     interface.
398 
399     Some people call this type of implementation as "C-class" which I
400     think is very appropiate.  Because in general, C++, Objective-C and
401     D actually used the same mechanism to implement the concept
402     "class".  Obviously, the actual implementation was hidden in the
403     compiler in those cases so that's why so many people are confused.
404 
405     Why polymorphism is important?
406 
407     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
408 
409     Before sphinx 3.6, maintainers (that includes me) tend to think
410     that different operations would require different treatment.  The
411     consequence of that is different opertations tends to have
412     different implementations, styles.  Code tends to be duplicated if
413     programmers think in this way.  One big problem is the fact that we
414     had so called s3 slow and s3 fast and there were 3000 lines
415     duplication.
416 
417     Code duplication also tends compound. i.e. if code was duplicated
418     twice, it will be duplicated four times.  That reduces
419     productivitiy of the team.
420 
421     Other reasons why srch.c is written
422 
423     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
424 
425     At the time when we implemented this routine, we actually didn't
426     know whether mode 5 (word tree copies) will work or not.
427     Therefore, letting the code of mode 4 (lucky wheel search) and mode
428     5 to coexist will be the best.
429 
430 
431     "Embedding" of multi-stage search into the dynamic programming
432 
433     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
434 
435     Another interesting part (Interesting in the sense that I would
436     write a paper on this.) of the search is that we (I will say mainly
437     me because this is not new at all.) realize that approximiate
438     search (i.e. something like phoneme-lookahead), detail search
439     (i.e. something like hard-core tree and fsm search) and high level
440     rescoring using knowledge source (e.g. using LM to rescore N-best)
441     could all be incorporated into the first pass search and
442     potentially make the code more verstille.
443 
444     They key concept on how to do this is here: at one frame, one can
445     carry out the above three operations and make the search faster
446     because of 1) heuristics value obtained in the approximate search
447     2) early incorporation of high-level knowledge in the rescoring stage.
448 
449     The naming of function interfaces shows this tendency. lv1 means the
450     approximate first-stage search. lv2 means the detail second-stage
451     search. rescoring means the use of high-level information in
452     rescoring during the search.
453 
454     The scope of what srch.c defines
455 
456     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
457 
458     Written at 20050510
459 
460     At the time I wrote this, we only implement mode 4 (Ravi's
461     implementation in Sphinx 3.2, I usually call it "lucky wheel"
462     implementation.  (See my description in srch_time_switch_tree.h),
463     mode 5 (My implementation using tree copies.)  and mode 1369 (A
464     debug mode of the search mechanism where only a prompt will be
465     displayed and show that a function is called. Why 1369? coz I am a
466     nerd. :-))
467 
468     Though only three modes of the search were written. I found it to
469     be utmost important to share my imagination of what we can actually
470     do using this interface of the search. In theory, the slow search
471     (pre-defined as mode 3) and the fast searches (pre-defined as mode
472     4 and mode 5) can be incorporated using this interface. s3align and
473     s3allphone (pre-defined as mode 0 and 1) in s3.0 family can also be
474     implemented in this way.
475 
476     Applications such as keyword spotting and speaker verifcation. They
477     are not defined at this point but they are possible using the
478     architecture
479 
480     What limits this architecture is that it might not be able to
481     implement a wide class of segmental models.  I will speculate if a
482     recursive formulat can be derived for a particular type of model
483     (segmental or non-segmental, HMM or non-HMM).  The current
484     interface could be overloaded and allow implementation of them.
485     There are definitely some types of segmental model are very hard to
486     implement using this structure; e.g. MIT segmental model (pre-1999)
487     where segmentation was generated by low-level information first and
488     search were performed on segments.
489 
490 
491     A general guide-line on how to write a search implementation
492 
493     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
494 
495     1, The batch mode search operation and the on-line mode operation
496     can always share code. If the implementation is duplicated, it
497     shows a bad omen.
498 
499     2, Knowledge source (such as LM and finite state grammer) always
500     change the graph structure. That is why srch_{set,add,delete}_lm
501     was defined, if we have multiple types of knowledge
502     sources. Interfaces should be added in srch.c and the search
503     operation (e.g. search/srch_time_switch_tree.c) provides the actualy
504     implemenation
505 
506     3, GMM computation and search can be easily separated into two
507     routines. Therefore, instead of wrap it up in individual searches,
508     a new set of function called gmmwrap.c is used which conform the
509     interface in srch.c.
510 
511     4, Mechanism such as phoneme lookahead and generally fast match are
512     all similar in the sense that. They look forward a couple of frames
513     and compute a heuristic score. The windows controlling mechanism
514     should be coded in srch.c . The actual heuristic should be coded
515     somewhere else.
516 
517     5, grh contains the actual data structure that stores the graph. We
518     didn't use WFST as a common structure because we found that it has
519     certain fact we don't know at the time we implemented.
520 
521     6, Always get back to the debug mode (mode 1369) if you want to
522     debug the search mechanism
523 
524     7, (V. Imp.) When in doubt, ask Arthur Chan. If he is not in CMU
525     any more, brainstorm how to get him back.  He is a nice guy and has
526     no problem in talking.
527 
528 */
529 typedef struct srch_funcs_s {
530     /*
531       Function pointers that perform the operations.  Every mode will
532       set these pointers at the beginning of the search.
533     */
534 
535     /** Initialization of the search, coz the graph type can be different */
536     int (*init)(kb_t *kb, /**< Pointer of kb_t which srch_init wants to copy from */
537                 void* srch_struct /**< a pointer of srch_t */
538         );
539 
540     /**< Un-Initialize of the search. */
541     int (*uninit)(
542         void* srch_struct /**< a pointer of srch_t */
543         );
544     /**< Begin search for one utterance */
545     int (*utt_begin)(
546         void* srch_struct /**< a pointer of srch_t */
547         );
548 
549     /** End search for one utterance */
550     int (*utt_end)(
551         void* srch_struct /**< a pointer of srch_t */
552         );
553     /** Actual decoding operation */
554     int (*decode)(
555         void* srch_struct /**< a pointer of srch_t */
556         );
557 
558     /** Set LM operation.  */
559     int (*set_lm)(
560         void* srch_struct, /**< a pointer of srch_t */
561         const char *lmname /**< The LM name */
562         );
563 
564     /** Add LM operation */
565     int (*add_lm)(void* srch_struct, /**< a pointer of srch_t */
566                   lm_t* lm,          /**< A new lm */
567                   const char *lmname /**< The LM name */
568         );
569 
570     /** Delete LM operation */
571     int (*delete_lm)(void* srch_struct,  /**< a pointer of srch_t */
572                      const char *lmname  /**< The LM name */
573         );
574 
575     /** Read FSG operation*/
576 #if 0
577     word_fsg_t* (*read_fsgfile)(void* srch_struct, /**< a pointer of srch_t */
578                                 const char* fsgname /** The fsg file name*/
579 
580         );
581 #endif
582     /* The 4 operations that require switching during the approximate search process */
583     /**< lv1 stands for approximate search. Currently not used. */
584 
585     /** Compute Approximate GMM */
586     int (*gmm_compute_lv1)(void* srch_struct,  /**< a pointer of srch_t */
587                            float32 *feat,      /**< The feature vector */
588                            int32 frmno_lp1,    /**< The frame for the cache */
589                            int32 frmno_lp2     /**< The frame for the windows */
590         );
591 
592 
593     /* The level 1 search functions are not yet fully used. Not all of them are defined nowWhen fast
594        match is needed. We will need them more.
595     */
596     int (*one_srch_frame_lv1)(void* srch_struct /**< a pointer of srch_t */
597         );
598 
599     int (*hmm_compute_lv1)(void* srch_struct);
600     int (*eval_beams_lv1)(void* srch_struct);
601     int (*propagate_graph_ph_lv1)(void* srch_struct);
602     int (*propagate_graph_wd_lv1)(void* srch_struct);
603 
604     /* The 4 operations that require switching during the detail search process */
605     /** lv2 stands for detail search. */
606 
607 
608     /** Compute detail (CD) GMM scores or lv2*/
609     int (*gmm_compute_lv2)(void* srch_struct,  /**< a pointer of srch_t */
610                            float32 **feat,      /**< A feature vector */
611                            int32 time          /**< The frame we want to compute detail score */
612         );
613 
614 
615     /** A short-cut function that allows implementer could just
616         implement searching for one frame without implement the
617         following 4 fuctions.
618     */
619     int (*one_srch_frame_lv2)(void* srch_struct /**< a pointer of srch_t */
620         );
621 
622 
623     /** Compute detail (CD) HMM scores or lv2*/
624     int (*hmm_compute_lv2)(void* srch_struct,  /**< a pointer of srch_t */
625                            int32 frmno         /**< The frame we want to compute detail score */
626         );
627 
628     /** Compute the beams*/
629     int (*eval_beams_lv2)(void* srch_struct     /**< a pointer of srch_t */
630         );
631 
632     /** Propagate the graph in phone level */
633     int (*propagate_graph_ph_lv2)(void* srch_struct, /**< a pointer of srch_t */
634                                   int32 frmno        /**< The frame no. */
635         );
636 
637     /** Propagate the graph in word level */
638     int (*propagate_graph_wd_lv2)(void* srch_struct,  /**< a pointer of srch_t */
639                                   int32 frmno       /**< The frame no. */
640         );
641 
642     /** Rescoring srch */
643     int (*rescoring) (void* srch_struct,  /**< a pointer of srch_t */
644                       int32 frmno         /**< The frame no. */
645         );
646 
647     int (*frame_windup) (void * srch_struct, int32 frmno);
648     int (*compute_heuristic) (void * srch_struct, int32 win_efv);
649     int (*shift_one_cache_frame) (void *srch_struct,int32 win_efv);
650     int (*select_active_gmm) (void *srch_struct);
651 
652 
653     /**
654         Second stage functions. They provide a generalized interface for
655         different modes to generate output
656     */
657     /**
658        Generation of hypothesis (*.hyp). Notice, displaying hypothesis is taken care by srch.c itself.
659     */
660     glist_t (*gen_hyp) (void * srch_struct /**< a pointer of srch_t */
661         );
662 
663     /**
664        Generation of directed acyclic graph (*.lat.gz). Notice , dumping
665        the dag will be taken care by srch.c. There is mode specific
666        optimization.
667        @return a dag which represent the word graphs.
668     */
669     dag_t* (*gen_dag) (void* srch_struct, /**< a pointer of srch_t */
670                        glist_t hyp
671         );
672 
673     /**
674        Dump vithist
675     */
676     int (*dump_vithist)(void * srch_struct /**< a pointer of srch_t */
677         );
678 
679     /**
680        Interface of best path search.
681     */
682     glist_t (*bestpath_impl)(void *srch_struct, /**< a pointer of srch_t */
683                              dag_t *dag
684         );
685 
686     /**
687        Interface for sphinx3 dag dumping function
688     */
689     int (*dag_dump) (void * srch_struct,
690                      dag_t *dag
691         );
692 
693     /**
694        Interface of N-best search.
695     */
696     glist_t (*nbest_impl)(void *srch_struct, /**< a pointer of srch_t */
697                           dag_t *dag
698         );
699 
700     /** Empty "guard" element which does nothing. */
701     void *nothing;
702 } srch_funcs_t;
703 
704 typedef struct srch_s {
705     /**
706      * Methods specific to a particular search mode.
707      **/
708     srch_funcs_t *funcs;
709 
710     grp_str_t* grh;     /**< Pointer to search specific structures */
711     int op_mode;        /**< The operation mode */
712     stat_t *stat;       /**< Pointer to the statistics structure */
713     char *uttid;        /**< Reference to UttID */
714     char *uttfile;      /**< Reference to Utterance filename (or NULL) */
715 
716     /*
717       These variables control the logistic of a search operation.  The
718       are global to all different search modes.
719     */
720     int32 cache_win;    /**< The windows lengths of the cache for approximate search */
721     int32 cache_win_strt;    /**< The start index of the window near the end of a block */
722 
723     int32 senscale;     /**< TEMPORARY VARIABLE: Senone scale */
724 
725     int32 *ascale;   /**< Same as senscale but it records the senscale for the whole sentence.
726                         The default size is 3000 frames.
727                      */
728     int32 ascale_sz;       /**< Size of the ascr_scale*/
729     int32 num_frm;        /**< Number of frames processed */
730 
731     int32 *segsz;   /**< Size of segments for each call */
732     int32 segsz_sz;      /**< Size of segments size*/
733 
734     int32 num_segs;     /**< In one search, (i.e. between one srch_utt_begin and one srch_utt_end)
735                            The number of segments the search has decoded.
736                            That also means the number of times srch_utt_decode_blk is called.
737                         */
738 
739 
740     /*
741        Auxillary Structures for the search.
742     */
743     int32 exit_id;		/**< Lattice ID for exit word (-1 if
744                                  * utterance is not complete) */
745     dag_t *dag;			/**< Word graph representation of completed utterance. */
746 
747     /* ARCHAN: Various pruning beams, put them together such that it looks more logical. */
748     ascr_t *ascr;		  /**< Pointer to Senone and composite senone scores for one frame */
749     beam_t *beam;		  /**< Pointer to Structure that wraps up parameters related to beam pruning */
750     fast_gmm_t *fastgmm;    /**< Pointer to Structure that wraps up parameters for fast GMM computation */
751     pl_t *pl;              /**< Pointer to Structure that wraps up parameter for phoneme look-ahead */
752     adapt_am_t * adapt_am; /** Pointer to AM adaptation structure */
753     kbcore_t *kbc;      /**< Pointer to the kbcore */
754 
755 
756     FILE *matchfp;          /**< Copy of File handle for the match file */
757     FILE *matchsegfp;       /**< Copy of File handle for the match segmentation file */
758 
759     FILE *hmmdumpfp;        /**< Copy of File handle for dumping hmms for debugging */
760 
761     /* FIXME, duplicated with fwd_dbg_t */
762     int32 hmm_dump_sf;	/**< Start frame for HMMs to be dumped for debugging */
763     int32 hmm_dump_ef;	/**< End frame for HMMs to be dumped for debugging */
764 }srch_t;
765 
766 /**
767     Translate mode string (as passed in the -mode argument) to number.
768     Names chosen for familiarity with Sphinx2/PocketSphinx.
769 
770     The currently valid operation modes are
771 
772     Strings        Operation Mode(Mode Idx)
773     ---------------------------------------
774     allphone       OPERATION_ALLPHONE(1)
775 
776     fsg            OPERATION_GRAPH(2)
777 
778     fwdflat        OPERATION_FLATFWD(3)
779 
780     fwdtree        OPERATION_TST_DECODE(4)
781 
782     @return the mode index if it is valid, -1 if it is not.
783 */
784 
785 int32 srch_mode_str_to_index(const char* mode_str);
786 
787 /** Translate mode string to mode index. User need to supply an initialized
788     @return a mode string;
789     @see srchmode_str_to_index
790 */
791 
792 char* srch_mode_index_to_str(int32 index);
793 
794 
795 /* The following are C-style method for srch structure.  In theory,
796    users could used both C-style and function pointer style to access
797    functionalities of the code. However, we recommend developers to use
798    the C-style functions because 1) it won't scare people that match, 2)
799    it is more consistent with other modules in sphinx 3.
800 */
801 
802 /** Initialize the search routine, this will specify the type of search
803     drivers and initialized all resouces.  It will do the following things
804 
805     1, Set all function pointers depends on the mode number.
806 
807     2, Initialize parameters which the default search abstraction
808     mechanism required. This currently include, cache_win,
809     cache_win_start.  They control how much delay of the first level
810     and second level search. Also senscale, a parameter which store
811     the largest gmm score for each frame.
812 
813     3, Initialize search-specific data structure using srch_init
814     method of the search class.  That is s->srch_init will be called.
815 
816     @return an initialized srch_t with operation mode op_mode.
817 */
818 
819 srch_t* srch_init(kb_t *kb, /**< In: knowledge base */
820 		  int32 op_mode /**< In: operation mode of the search */
821     );
822 
823 /** Report the search routine */
824 /** using file name of the model definition and directory name to initialize */
825 
826 /** Report the search structure
827  */
828 void srch_report(srch_t* srch /**< In: a search structure */
829     );
830 
831 /**
832  *  Begin decoding of speech for one utterance.
833  *
834  * @return SRCH_SUCCESS if succeed, SRCH_FAILURE if failed.
835  * @see srch_utt_end
836  */
837 
838 int32 srch_utt_begin(srch_t* srch /**< In: a search structure */
839     );
840 
841 /**
842    Decode one block of speech and provide the implementation of the default search abstraction
843 */
844 S3DECODER_EXPORT
845 int32 srch_utt_decode_blk(srch_t* srch, /**< In: a search structure */
846 			  float ***block_feat,  /**< In: a pointer of a two dimensional array */
847 			  int32 block_nfeatvec, /**< In: Number of feature vector */
848 			  int32 *curfrm  /**< In/Out: a pointer of the current frame index*/
849     );
850 
851 /**
852    End decoding of speech for one utterance.
853 */
854 int32 srch_utt_end(srch_t* srch /**< In: a search structure */
855     );
856 
857 /** Wrap up the search routine*/
858 int32 srch_uninit(srch_t* srch /**< In: a search structure */
859     );
860 
861 /**
862  * Get a recognition backtrace, as a list of srch_hyp_t structures.
863  * You must free this with glist_free() after using it. */
864 glist_t srch_get_hyp(srch_t *srch /**< In: a search structure */
865     );
866 
867 /**
868  * Get the complete set of possible hypotheses, as a word graph (DAG).
869  * The dag_t returned by this function remains owned by srch_t.  Do
870  * NOT attempt to free it.
871  */
872 dag_t *srch_get_dag(srch_t *srch);
873 
874 
875 /** Dump recognition result */
876 void reg_result_dump (srch_t* s, /**< A search structure */
877 		      int32 id  /**< Utterance ID */
878     );
879 /**
880    Dump the best senone score for each frame
881 */
882 void write_bstsenscr(FILE *fp, /**< A file pointer */
883 		     int32 numframe, /**< Number of frame in one recognition */
884 		     int32* scale    /**< Scales */
885     );
886 
887 
888 /** using file name of the LM or defined lmctlfn mechanism */
889 S3DECODER_EXPORT
890 int32 srch_set_lm(srch_t* srch,  /**< A search structure */
891 		  const char *lmname /**< LM fie name */
892     );
893 
894 /** delete lm */
895 int32 srch_delete_lm(srch_t* srch,  /**< A search structure */
896 		  const char *lmname /**< LM fie name */
897     );
898 
899 #if 0 /*Tentative: but not yet implemented */
900 int32 srch_set_am(void);
901 
902 /** add new am */
903 int32 srch_add_am(void);
904 
905 /** delete am */
906 int32 srch_delete_am(void);
907 
908 /** add new lm */
909 int32 srch_add_lm(void);
910 
911 
912 /** using file name of the regression class mapping or a directory name to initialize  */
913 int32 srch_set_mllr(void);
914 
915 /** add new mllr */
916 int32 srch_add_mllr(void);
917 
918 /** delete mllr */
919 int32 srch_delete_mllr(void);
920 
921 /** using file name of interpolation file to initialize it */
922 int32 srch_set_lamdafn(void);
923 
924 /** add new mllr */
925 int32 srch_add_lamdafn(void);
926 
927 /** delete mllr */
928 int32 srch_delete_lamdafn(void);
929 
930 /** add new words into the dictionary */
931 int32 srch_add_words_to_dict(void);
932 
933 #endif /* End not implemented */
934 
935 #ifdef __cplusplus
936 }
937 #endif
938 
939 
940 #endif /*_SRCH_H_ */
941