1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* ==================================================================== 3 * Copyright (c) 1999-2014 Carnegie Mellon University. All rights 4 * reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * This work was supported in part by funding from the Defense Advanced 19 * Research Projects Agency and the National Science Foundation of the 20 * United States of America, and the CMU Sphinx Speech Consortium. 21 * 22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * 34 * ==================================================================== 35 * 36 */ 37 38 #ifndef _S3_DICT2PID_H_ 39 #define _S3_DICT2PID_H_ 40 41 /* System headers. */ 42 #include <stdio.h> 43 44 /* SphinxBase headers. */ 45 #include <sphinxbase/logmath.h> 46 #include <sphinxbase/bitvec.h> 47 48 /* Local headers. */ 49 #include "s3types.h" 50 #include "bin_mdef.h" 51 #include "dict.h" 52 53 /** \file dict2pid.h 54 * \brief Building triphones for a dictionary. 55 * 56 * This is one of the more complicated parts of a cross-word 57 * triphone model decoder. The first and last phones of each word 58 * get their left and right contexts, respectively, from other 59 * words. For single-phone words, both its contexts are from other 60 * words, simultaneously. As these words are not known beforehand, 61 * life gets complicated. 62 */ 63 64 #ifdef __cplusplus 65 extern "C" { 66 #endif 67 68 /** 69 * \struct xwdssid_t 70 * \brief cross word triphone model structure 71 */ 72 73 typedef struct { 74 s3ssid_t *ssid; /**< Senone Sequence ID list for all context ciphones */ 75 s3cipid_t *cimap; /**< Index into ssid[] above for each ci phone */ 76 int32 n_ssid; /**< #Unique ssid in above, compressed ssid list */ 77 } xwdssid_t; 78 79 /** 80 \struct dict2pid_t 81 \brief Building composite triphone (as well as word internal triphones) with the dictionary. 82 */ 83 84 typedef struct { 85 int refcount; 86 87 bin_mdef_t *mdef; /**< Model definition, used to generate 88 internal ssids on the fly. */ 89 dict_t *dict; /**< Dictionary this table refers to. */ 90 91 /*Notice the order of the arguments */ 92 /* FIXME: This is crying out for compression - in Mandarin we have 93 * 180 context independent phones, which makes this an 11MB 94 * array. */ 95 s3ssid_t ***ldiph_lc; /**< For multi-phone words, [base][rc][lc] -> ssid; filled out for 96 word-initial base x rc combinations in current vocabulary */ 97 98 99 xwdssid_t **rssid; /**< Right context state sequence id table 100 First dimension: base phone, 101 Second dimension: left context. 102 */ 103 104 105 s3ssid_t ***lrdiph_rc; /**< For single-phone words, [base][lc][rc] -> ssid; filled out for 106 single-phone base x lc combinations in current vocabulary */ 107 108 xwdssid_t **lrssid; /**< Left-Right context state sequence id table 109 First dimension: base phone, 110 Second dimension: left context. 111 */ 112 } dict2pid_t; 113 114 /** Access macros; not designed for arbitrary use */ 115 #define dict2pid_rssid(d,ci,lc) (&(d)->rssid[ci][lc]) 116 #define dict2pid_ldiph_lc(d,b,r,l) ((d)->ldiph_lc[b][r][l]) 117 #define dict2pid_lrdiph_rc(d,b,l,r) ((d)->lrdiph_rc[b][l][r]) 118 119 /** 120 * Build the dict2pid structure for the given model/dictionary 121 */ 122 dict2pid_t *dict2pid_build(bin_mdef_t *mdef, /**< A model definition*/ 123 dict_t *dict /**< An initialized dictionary */ 124 ); 125 126 /** 127 * Retain a pointer to dict2pid 128 */ 129 dict2pid_t *dict2pid_retain(dict2pid_t *d2p); 130 131 /** 132 * Free the memory dict2pid structure 133 */ 134 int dict2pid_free(dict2pid_t *d2p /**< In: the d2p */ 135 ); 136 137 /** 138 * Return the senone sequence ID for the given word position. 139 */ 140 s3ssid_t dict2pid_internal(dict2pid_t *d2p, 141 int32 wid, 142 int pos); 143 144 /** 145 * Add a word to the dict2pid structure (after adding it to dict). 146 */ 147 int dict2pid_add_word(dict2pid_t *d2p, 148 int32 wid); 149 150 /** 151 * For debugging 152 */ 153 void dict2pid_dump(FILE *fp, /**< In: a file pointer */ 154 dict2pid_t *d2p /**< In: a dict2pid_t structure */ 155 ); 156 157 /** Report a dict2pid data structure */ 158 void dict2pid_report(dict2pid_t *d2p /**< In: a dict2pid_t structure */ 159 ); 160 161 /** 162 * Get number of rc 163 */ 164 int32 get_rc_nssid(dict2pid_t *d2p, /**< In: a dict2pid */ 165 s3wid_t w /**< In: a wid */ 166 ); 167 168 /** 169 * Get RC map 170 */ 171 s3cipid_t* dict2pid_get_rcmap(dict2pid_t *d2p, /**< In: a dict2pid */ 172 s3wid_t w /**< In: a wid */ 173 ); 174 175 #ifdef __cplusplus 176 } 177 #endif 178 179 180 #endif 181