1 /* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37 /*
38 * dict2pid.c -- Triphones for dictionary
39 *
40 * **********************************************
41 * CMU ARPA Speech Project
42 *
43 * Copyright (c) 1999 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
46 *
47 * HISTORY
48 * $Log$
49 * Revision 1.7 2006/02/22 21:05:16 arthchan2003
50 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH:
51 *
52 * 1, Added logic to handle bothe composite and non composite left
53 * triphone. Composite left triphone's logic (the original one) is
54 * tested thoroughly. The non-composite triphone (or full triphone) is
55 * found to have bugs. The latter is fended off from the users in the
56 * library level.
57 *
58 * 2, Fixed dox-doc.
59 *
60 * Revision 1.6.4.5 2005/11/17 06:13:49 arthchan2003
61 * Use compressed right context in expansion in triphones.
62 *
63 * Revision 1.6.4.4 2005/10/17 04:48:45 arthchan2003
64 * Free resource correctly in dict2pid.
65 *
66 * Revision 1.6.4.3 2005/10/07 19:03:38 arthchan2003
67 * Added xwdssid_t structure. Also added compression routines.
68 *
69 * Revision 1.6.4.2 2005/09/25 19:13:31 arthchan2003
70 * Added optional full triphone expansion support when building context phone mapping.
71 *
72 * Revision 1.6.4.1 2005/07/17 05:21:28 arthchan2003
73 * Add panic signal to the code, also commentted ldiph_comsseq.
74 *
75 * Revision 1.6 2005/06/21 21:03:49 arthchan2003
76 * 1, Introduced a reporting routine. 2, Fixed doyxgen documentation, 3, Added keyword.
77 *
78 * Revision 1.4 2005/04/21 23:50:26 archan
79 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
80 *
81 * Revision 1.3 2005/03/30 01:22:46 archan
82 * Fixed mistakes in last updates. Add
83 *
84 *
85 * 14-Sep-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
86 * Added dict2pid_comsseq2sen_active().
87 *
88 * 04-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
89 * Started.
90 */
91
92
93 #include <string.h>
94
95 #include "dict2pid.h"
96 #include "logs3.h"
97
98
99 /** \file dict2pid.c
100 * \brief Implementation of dict2pid
101 *
102 * A general remark, notice "comsseq" sometimes means compressed
103 * sequence. It should be understood as differnet thing as
104 * composite in the source code.
105 */
106
107 /**
108 * Build a glist of triphone senone-sequence IDs (ssids) derivable from [b][r] at the word
109 * begin position. If no triphone found in mdef, include the ssid for basephone b.
110 * Return the generated glist.
111 */
112 static glist_t
ldiph_comsseq(mdef_t * mdef,int32 b,int32 r)113 ldiph_comsseq(mdef_t * mdef, /**< a model definition*/
114 int32 b, /**< base phone */
115 int32 r /**< right context */
116 )
117 {
118 int32 l, p, ssid;
119 glist_t g;
120
121 g = NULL;
122 for (l = 0; l < mdef_n_ciphone(mdef); l++) {
123 p = mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
124 (s3cipid_t) r, WORD_POSN_BEGIN);
125
126 if (IS_S3PID(p)) {
127 gnode_t *gn;
128 ssid = mdef_pid2ssid(mdef, p);
129 for (gn = g; gn; gn = gnode_next(gn))
130 if (gnode_int32(gn) == ssid)
131 break;
132 if (gn == NULL)
133 g = glist_add_int32(g, ssid);
134 }
135 }
136 if (!g)
137 g = glist_add_int32(g, mdef_pid2ssid(mdef, b));
138
139 return g;
140 }
141
142
143 /**
144 * Build a glist of triphone senone-sequence IDs (ssids) derivable from [r][b] at the word
145 * end position. If no triphone found in mdef, include the ssid for basephone b.
146 * Return the generated glist.
147 */
148 static glist_t
rdiph_comsseq(mdef_t * mdef,int32 b,int32 l)149 rdiph_comsseq(mdef_t * mdef, int32 b, int32 l)
150 {
151 int32 r, p, ssid;
152 glist_t g;
153
154 g = NULL;
155 for (r = 0; r < mdef_n_ciphone(mdef); r++) {
156 p = mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
157 (s3cipid_t) r, WORD_POSN_END);
158
159 if (IS_S3PID(p)) {
160 gnode_t *gn;
161 ssid = mdef_pid2ssid(mdef, p);
162 for (gn = g; gn; gn = gnode_next(gn))
163 if (gnode_int32(gn) == ssid)
164 break;
165 if (gn == NULL)
166 g = glist_add_int32(g, ssid);
167 }
168 }
169 if (!g)
170 g = glist_add_int32(g, mdef_pid2ssid(mdef, b));
171
172 return g;
173 }
174
175
176 /**
177 * Build a glist of triphone senone-sequence IDs (ssids) derivable from [b] as a single
178 * phone word. If no triphone found in mdef, include the ssid for basephone b.
179 * Return the generated glist.
180 */
181 static glist_t
single_comsseq(mdef_t * mdef,int32 b)182 single_comsseq(mdef_t * mdef, int32 b)
183 {
184 int32 l, r, p, ssid;
185 glist_t g;
186
187 g = NULL;
188 for (l = 0; l < mdef_n_ciphone(mdef); l++) {
189 for (r = 0; r < mdef_n_ciphone(mdef); r++) {
190 p = mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
191 (s3cipid_t) r, WORD_POSN_SINGLE);
192
193 if (IS_S3PID(p)) {
194 gnode_t *gn;
195 ssid = mdef_pid2ssid(mdef, p);
196 for (gn = g; gn; gn = gnode_next(gn))
197 if (gnode_int32(gn) == ssid)
198 break;
199 if (gn == NULL)
200 g = glist_add_int32(g, ssid);
201 }
202 }
203 }
204 if (!g)
205 g = glist_add_int32(g, mdef_pid2ssid(mdef, b));
206
207 return g;
208 }
209
210
211 /**
212 * Build a glist of triphone senone-sequence IDs (ssids) derivable from [b] as a single
213 * phone word, with a given left context l. If no triphone found in mdef, include the ssid
214 * for basephone b. Return the generated glist.
215 */
216 static glist_t
single_lc_comsseq(mdef_t * mdef,int32 b,int32 l)217 single_lc_comsseq(mdef_t * mdef, int32 b, int32 l)
218 {
219 int32 r, p, ssid;
220 glist_t g;
221
222 g = NULL;
223 for (r = 0; r < mdef_n_ciphone(mdef); r++) {
224 p = mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
225 (s3cipid_t) r, WORD_POSN_SINGLE);
226
227 if (IS_S3PID(p)) {
228 gnode_t *gn;
229 ssid = mdef_pid2ssid(mdef, p);
230 for (gn = g; gn; gn = gnode_next(gn))
231 if (gnode_int32(gn) == ssid)
232 break;
233 if (gn == NULL)
234 g = glist_add_int32(g, ssid);
235 }
236 }
237 if (!g)
238 g = glist_add_int32(g, mdef_pid2ssid(mdef, b));
239
240 return g;
241 }
242
243 #if 0
244 /*Comment to make compiler happy. Though, please make sure it is in-sync with single_lc_comsseq*/
245 /**
246 * Build a glist of triphone senone-sequence IDs (ssids) derivable
247 * from [b] as a single phone word, with a given right context r. If
248 * no triphone found in mdef, include the ssid for basephone b.
249 * Return the generated glist.
250 */
251
252 static glist_t
253 single_rc_comsseq(mdef_t * mdef, int32 b, int32 r)
254 {
255 int32 l, p, ssid;
256 glist_t g;
257
258 g = NULL;
259 for (l = 0; l < mdef_n_ciphone(mdef); l++) {
260 p = mdef_phone_id(mdef, (s3cipid_t) b, (s3cipid_t) l,
261 (s3cipid_t) r, WORD_POSN_SINGLE);
262
263 if (IS_S3PID(p)) {
264 gnode_t *gn;
265 ssid = mdef_pid2ssid(mdef, p);
266 for (gn = g; gn; gn = gnode_next(gn))
267 if (gnode_int32(gn) == ssid)
268 break;
269 if (gn == NULL)
270 g = glist_add_int32(g, ssid);
271 }
272 }
273 if (!g)
274 g = glist_add_int32(g, mdef_pid2ssid(mdef, b));
275
276 return g;
277 }
278 #endif
279
280
281 /**
282 * Convert the glist of ssids to a composite sseq id. Return the composite ID.
283 */
284 static s3ssid_t
ssidlist2comsseq(glist_t g,mdef_t * mdef,dict2pid_t * dict2pid,hash_table_t * hs,hash_table_t * hp)285 ssidlist2comsseq(glist_t g, mdef_t * mdef, dict2pid_t * dict2pid,
286 hash_table_t * hs, /* For composite states */
287 hash_table_t * hp) /* For composite senone seq */
288 {
289 int32 i, j, n, s, ssid;
290 s3senid_t **sen;
291 s3senid_t *comsenid;
292 gnode_t *gn;
293
294 n = glist_count(g);
295 if (n <= 0)
296 E_FATAL("Panic: length(ssidlist)= %d\n", n);
297
298 /* Space for list of senones for each state, derived from the given glist */
299 sen =
300 (s3senid_t **) ckd_calloc(mdef_n_emit_state(mdef),
301 sizeof(s3senid_t *));
302 for (i = 0; i < mdef_n_emit_state(mdef); i++) {
303 sen[i] = (s3senid_t *) ckd_calloc(n + 1, sizeof(s3senid_t));
304 sen[i][0] = BAD_S3SENID; /* Sentinel */
305 }
306 /* Space for composite senone ID for each state position */
307 comsenid =
308 (s3senid_t *) ckd_calloc(mdef_n_emit_state(mdef),
309 sizeof(s3senid_t));
310
311 /* Expand g into an array of arrays of unique senone IDs, one for
312 * each state in the model. */
313 for (gn = g; gn; gn = gnode_next(gn)) {
314 ssid = gnode_int32(gn);
315
316 /* Expand ssid into individual states (senones); insert in sen[][] if not present */
317 for (i = 0; i < mdef_n_emit_state(mdef); i++) {
318 s = mdef->sseq[ssid][i];
319
320 for (j = 0; (IS_S3SENID(sen[i][j])) && (sen[i][j] != s); j++);
321 if (NOT_S3SENID(sen[i][j])) {
322 sen[i][j] = s;
323 sen[i][j + 1] = BAD_S3SENID;
324 }
325 }
326 }
327
328 /* Convert senones list for each state position into composite state */
329 for (i = 0; i < mdef_n_emit_state(mdef); i++) {
330 /* Count number of unique senones for this state. */
331 for (j = 0; IS_S3SENID(sen[i][j]); j++);
332 assert(j > 0);
333
334 /* Map set of senones to composite senone ID. */
335 j = (long)hash_table_enter_bkey(hs, (char *) (sen[i]), j * sizeof(s3senid_t),
336 (void *)(long)dict2pid->n_comstate);
337 /* Did this set of senones already exist? */
338 if (j == dict2pid->n_comstate)
339 dict2pid->n_comstate++; /* if not, it's a new composite senone */
340 else
341 ckd_free((void *) sen[i]);
342
343 /* Composite senone ID for this state. */
344 comsenid[i] = j;
345 }
346 ckd_free(sen);
347
348 /* Map sequence of composite senids (one per state) to composite sseq ID */
349 j = (long) hash_table_enter_bkey(hp, (char *) comsenid,
350 mdef->n_emit_state * sizeof(s3senid_t),
351 (void *)(long)dict2pid->n_comsseq);
352 /* Did it already exist? */
353 if (j == dict2pid->n_comsseq) {
354 /* if not, it's a new composite senone sequence. */
355 dict2pid->n_comsseq++;
356 if (dict2pid->n_comsseq >= MAX_S3SENID)
357 E_FATAL
358 ("#Composite sseq limit(%d) reached; increase MAX_S3SENID\n",
359 dict2pid->n_comsseq);
360 }
361 else
362 ckd_free((void *) comsenid);
363
364 return ((s3ssid_t) j);
365 }
366
367 void
compress_table(s3ssid_t * uncomp_tab,s3ssid_t * com_tab,s3cipid_t * ci_map,int32 n_ci)368 compress_table(s3ssid_t * uncomp_tab, s3ssid_t * com_tab,
369 s3cipid_t * ci_map, int32 n_ci)
370 {
371 int32 found;
372 int32 r;
373 int32 tmp_r;
374
375 for (r = 0; r < n_ci; r++) {
376 com_tab[r] = BAD_S3SSID;
377 ci_map[r] = BAD_S3CIPID;
378 }
379 /** Compress this map */
380 for (r = 0; r < n_ci; r++) {
381
382 found = 0;
383 for (tmp_r = 0; tmp_r < r && com_tab[tmp_r] != BAD_S3SSID; tmp_r++) { /* If it appears before, just filled in cimap; */
384 if (uncomp_tab[r] == com_tab[tmp_r]) {
385 found = 1;
386 ci_map[r] = tmp_r;
387 break;
388 }
389 }
390
391 if (found == 0) {
392 com_tab[tmp_r] = uncomp_tab[r];
393 ci_map[r] = tmp_r;
394 }
395 }
396 }
397
398
399 static void
compress_right_context_tree(mdef_t * mdef,dict2pid_t * d2p)400 compress_right_context_tree(mdef_t * mdef, dict2pid_t * d2p)
401 {
402 int32 n_ci;
403 int32 b, l, r;
404 int32 *rmap;
405 s3ssid_t *tmpssid;
406 s3cipid_t *tmpcimap;
407
408 n_ci = mdef->n_ciphone;
409
410 tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t));
411 tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t));
412
413 assert(d2p->rdiph_rc);
414 d2p->rssid =
415 (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *));
416
417 for (b = 0; b < n_ci; b++) {
418
419 d2p->rssid[b] =
420 (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t));
421
422 for (l = 0; l < n_ci; l++) {
423
424 rmap = d2p->rdiph_rc[b][l];
425
426 compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone);
427
428 for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID;
429 r++);
430
431 if (tmpssid[0] != BAD_S3SSID) {
432 d2p->rssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t));
433 memcpy(d2p->rssid[b][l].ssid, tmpssid,
434 r * sizeof(s3ssid_t));
435 d2p->rssid[b][l].cimap =
436 ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t));
437 memcpy(d2p->rssid[b][l].cimap, tmpcimap,
438 (mdef->n_ciphone) * sizeof(s3cipid_t));
439 d2p->rssid[b][l].n_ssid = r;
440 }
441 else {
442 d2p->rssid[b][l].ssid = NULL;
443 d2p->rssid[b][l].cimap = NULL;
444 d2p->rssid[b][l].n_ssid = 0;
445 }
446
447 }
448 }
449
450 /* Try to compress lrdiph_rc into lrdiph_rc_compressed */
451 ckd_free(tmpssid);
452 ckd_free(tmpcimap);
453
454
455 }
456
457 static void
compress_left_right_context_tree(mdef_t * mdef,dict2pid_t * d2p)458 compress_left_right_context_tree(mdef_t * mdef, dict2pid_t * d2p)
459 {
460 int32 n_ci;
461 int32 b, l, r;
462 int32 *rmap;
463 s3ssid_t *tmpssid;
464 s3cipid_t *tmpcimap;
465
466 n_ci = mdef->n_ciphone;
467
468 tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t));
469 tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t));
470
471 assert(d2p->lrdiph_rc);
472
473 d2p->lrssid =
474 (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *));
475
476 for (b = 0; b < n_ci; b++) {
477
478 d2p->lrssid[b] =
479 (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t));
480
481 for (l = 0; l < n_ci; l++) {
482 rmap = d2p->lrdiph_rc[b][l];
483
484 compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone);
485
486 for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID;
487 r++);
488
489 if (tmpssid[0] != BAD_S3SSID) {
490 d2p->lrssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t));
491 memcpy(d2p->lrssid[b][l].ssid, tmpssid,
492 r * sizeof(s3ssid_t));
493 d2p->lrssid[b][l].cimap =
494 ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t));
495 memcpy(d2p->lrssid[b][l].cimap, tmpcimap,
496 (mdef->n_ciphone) * sizeof(s3cipid_t));
497 d2p->lrssid[b][l].n_ssid = r;
498 }
499 else {
500 d2p->lrssid[b][l].ssid = NULL;
501 d2p->lrssid[b][l].cimap = NULL;
502 d2p->lrssid[b][l].n_ssid = 0;
503 }
504 }
505 }
506
507 /* Try to compress lrdiph_rc into lrdiph_rc_compressed */
508 ckd_free(tmpssid);
509 ckd_free(tmpcimap);
510
511
512 }
513
514 /**
515 ARCHAN, A duplicate of get_rc_npid in ctxt_table.h. I doubt whether it is correct
516 because the compressed map has not been checked.
517 */
518 int32
get_rc_nssid(dict2pid_t * d2p,s3wid_t w,dict_t * dict)519 get_rc_nssid(dict2pid_t * d2p, s3wid_t w, dict_t * dict)
520 {
521 int32 pronlen;
522 s3cipid_t b, lc;
523
524 pronlen = dict->word[w].pronlen;
525 b = dict->word[w].ciphone[pronlen - 1];
526
527 if (pronlen == 1) {
528 /* Is this true ?
529 No known left context. But all cimaps (for any l) are identical; pick one
530 */
531 /*E_INFO("Single phone word\n"); */
532 return (d2p->lrssid[b][0].n_ssid);
533 }
534 else {
535 /* E_INFO("Multiple phone word\n"); */
536 lc = dict->word[w].ciphone[pronlen - 2];
537 return (d2p->rssid[b][lc].n_ssid);
538 }
539
540 }
541
542 s3cipid_t *
dict2pid_get_rcmap(dict2pid_t * d2p,s3wid_t w,dict_t * dict)543 dict2pid_get_rcmap(dict2pid_t * d2p, s3wid_t w, dict_t * dict)
544 {
545 int32 pronlen;
546 s3cipid_t b, lc;
547
548 pronlen = dict->word[w].pronlen;
549 b = dict->word[w].ciphone[pronlen - 1];
550
551 if (pronlen == 1) {
552 /* Is this true ?
553 No known left context. But all cimaps (for any l) are identical; pick one
554 */
555 /*E_INFO("Single phone word\n"); */
556 return (d2p->lrssid[b][0].cimap);
557 }
558 else {
559 /* E_INFO("Multiple phone word\n"); */
560 lc = dict->word[w].ciphone[pronlen - 2];
561 return (d2p->rssid[b][lc].cimap);
562 }
563
564 }
565
566
567
568
569 static void
free_compress_map(xwdssid_t ** tree,int32 n_ci)570 free_compress_map(xwdssid_t ** tree, int32 n_ci)
571 {
572 int32 b, l;
573 for (b = 0; b < n_ci; b++) {
574 for (l = 0; l < n_ci; l++) {
575 ckd_free(tree[b][l].ssid);
576 ckd_free(tree[b][l].cimap);
577 }
578 ckd_free(tree[b]);
579 }
580 ckd_free(tree);
581 }
582
583
584 /* RAH 4.16.01 This code has several leaks that must be fixed */
585 dict2pid_t *
dict2pid_build(mdef_t * mdef,dict_t * dict,int32 is_composite,logmath_t * logmath)586 dict2pid_build(mdef_t * mdef, dict_t * dict, int32 is_composite, logmath_t *logmath)
587 {
588 dict2pid_t *dict2pid;
589 s3ssid_t *internal, **ldiph, **rdiph, *single;
590 int32 pronlen;
591 hash_table_t *hs, *hp;
592 glist_t g;
593 gnode_t *gn;
594 s3senid_t *sen;
595 hash_entry_t *he;
596 int32 *cslen;
597 int32 i, j, b, l, r, w, n, p;
598
599 E_INFO("Building PID tables for dictionary\n");
600 assert(mdef);
601 assert(dict);
602
603
604 dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t));
605
606 dict2pid->n_dictsize = dict_size(dict);
607 dict2pid->internal =
608 (s3ssid_t **) ckd_calloc(dict_size(dict), sizeof(s3ssid_t *));
609 dict2pid->ldiph_lc =
610 (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
611 mdef->n_ciphone, sizeof(s3ssid_t));
612 dict2pid->rdiph_rc =
613 (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
614 mdef->n_ciphone, sizeof(s3ssid_t));
615 dict2pid->is_composite = is_composite;
616
617 dict2pid->n_ci = mdef->n_ciphone;
618 if (dict2pid->is_composite) {
619 dict2pid->single_lc = (s3ssid_t **) ckd_calloc_2d(mdef->n_ciphone,
620 mdef->n_ciphone,
621 sizeof
622 (s3ssid_t));
623 dict2pid->lrdiph_rc = NULL;
624 dict2pid->rssid = NULL;
625 dict2pid->lrssid = NULL;
626
627 }
628 else {
629
630 dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone,
631 mdef->n_ciphone,
632 mdef->n_ciphone,
633 sizeof
634 (s3ssid_t));
635 dict2pid->single_lc = NULL;
636
637
638 }
639
640 dict2pid->comstate = NULL;
641 dict2pid->comsseq = NULL;
642 dict2pid->comwt = NULL;
643
644 dict2pid->n_comstate = 0;
645 dict2pid->n_comsseq = 0;
646 dict2pid->is_composite = is_composite;
647
648 hs = hash_table_new(mdef->n_ciphone * mdef->n_ciphone * mdef->n_emit_state,
649 HASH_CASE_YES);
650 hp = hash_table_new(mdef->n_ciphone * mdef->n_ciphone, HASH_CASE_YES);
651
652 for (w = 0, n = 0; w < dict_size(dict); w++) {
653 pronlen = dict_pronlen(dict, w);
654 if (pronlen < 0)
655 E_FATAL("Pronunciation-length(%s)= %d\n",
656 dict_wordstr(dict, w), pronlen);
657 n += pronlen;
658 }
659
660 internal = (s3ssid_t *) ckd_calloc(n, sizeof(s3ssid_t));
661
662
663 /* Temporary */
664 ldiph =
665 (s3ssid_t **) ckd_calloc_2d(mdef->n_ciphone, mdef->n_ciphone,
666 sizeof(s3ssid_t));
667 rdiph =
668 (s3ssid_t **) ckd_calloc_2d(mdef->n_ciphone, mdef->n_ciphone,
669 sizeof(s3ssid_t));
670 single = (s3ssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(s3ssid_t));
671 for (b = 0; b < mdef->n_ciphone; b++) {
672 for (l = 0; l < mdef->n_ciphone; l++) {
673 for (r = 0; r < mdef->n_ciphone; r++) {
674 dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID;
675 dict2pid->rdiph_rc[b][l][r] = BAD_S3SSID;
676 }
677
678 if (dict2pid->is_composite) {
679 assert(dict2pid->single_lc);
680 dict2pid->single_lc[b][l] = BAD_S3SSID;
681 }
682
683 ldiph[b][l] = BAD_S3SSID;
684 rdiph[b][l] = BAD_S3SSID;
685 }
686 single[b] = BAD_S3SSID;
687 }
688
689 for (w = 0; w < dict_size(dict); w++) {
690 dict2pid->internal[w] = internal;
691 pronlen = dict_pronlen(dict, w);
692
693 if (pronlen >= 2) {
694
695 /** This segments of code take care of the initialization of
696 internal[0] and ldiph[b][r][l]
697 */
698
699 /* Find or create a composite senone sequence for b(?,r) */
700 b = dict_pron(dict, w, 0);
701 r = dict_pron(dict, w, 1);
702 if (NOT_S3SSID(ldiph[b][r])) {
703
704 if (dict2pid->is_composite) {
705 /* Get all ssids for b(?,r) */
706 g = ldiph_comsseq(mdef, b, r);
707 /* Build a composite sseq from those ssids */
708 ldiph[b][r] =
709 ssidlist2comsseq(g, mdef, dict2pid, hs, hp);
710 glist_free(g);
711 }
712
713 /* Record all possible ssids for b(?,r) */
714 for (l = 0; l < mdef_n_ciphone(mdef); l++) {
715 p = mdef_phone_id_nearest(mdef, (s3cipid_t) b,
716 (s3cipid_t) l, (s3cipid_t) r,
717 WORD_POSN_BEGIN);
718 dict2pid->ldiph_lc[b][r][l] = mdef_pid2ssid(mdef, p);
719 }
720 }
721
722 /* And ... only use it if we are not doing full triphones. (?!) */
723 if (dict2pid->is_composite)
724 internal[0] = ldiph[b][r];
725 else
726 internal[0] = BAD_S3SSID;
727
728 /* Now find ssids for all the word internal triphones and
729 * place them in internal[i]. */
730 for (i = 1; i < pronlen - 1; i++) {
731 l = b;
732 b = r;
733 r = dict_pron(dict, w, i + 1);
734
735 p = mdef_phone_id_nearest(mdef, (s3cipid_t) b,
736 (s3cipid_t) l, (s3cipid_t) r,
737 WORD_POSN_INTERNAL);
738 internal[i] = mdef_pid2ssid(mdef, p);
739 }
740
741 /** This part will take care of the initialization of
742 internal[pronlen-1] and rdiph[b][l][r]. Notice that this
743 is symmetric to the first part of the code.
744 */
745
746 l = b;
747 b = r;
748 if (NOT_S3SSID(rdiph[b][l])) {
749 if (dict2pid->is_composite) {
750 g = rdiph_comsseq(mdef, b, l);
751 rdiph[b][l] =
752 ssidlist2comsseq(g, mdef, dict2pid, hs, hp);
753 glist_free(g);
754 }
755
756 for (r = 0; r < mdef_n_ciphone(mdef); r++) {
757 p = mdef_phone_id_nearest(mdef, (s3cipid_t) b,
758 (s3cipid_t) l, (s3cipid_t) r,
759 WORD_POSN_BEGIN);
760 dict2pid->rdiph_rc[b][l][r] = mdef_pid2ssid(mdef, p);
761 }
762 }
763
764 if (dict2pid->is_composite)
765 internal[pronlen - 1] = rdiph[b][l];
766 else
767 internal[pronlen - 1] = BAD_S3SSID;
768
769 }
770 else if (pronlen == 1) {
771
772 b = dict_pron(dict, w, 0);
773
774 if (dict2pid->is_composite) {
775 assert(dict2pid->single_lc);
776
777 /* Find or build composite senone sequence for b(?,?) */
778 if (NOT_S3SSID(single[b])) {
779
780 g = single_comsseq(mdef, b);
781 single[b] =
782 ssidlist2comsseq(g, mdef, dict2pid, hs, hp);
783 glist_free(g);
784
785 /* Record all possible *composite* ssids for b(?,?) */
786 for (l = 0; l < mdef_n_ciphone(mdef); l++) {
787 g = single_lc_comsseq(mdef, b, l);
788 dict2pid->single_lc[b][l] =
789 ssidlist2comsseq(g, mdef, dict2pid, hs, hp);
790 glist_free(g);
791 }
792 }
793 internal[0] = single[b];
794 }
795 else {
796 /* Don't compress but build table directly */
797 if (NOT_S3SSID(single[b])) {
798 for (l = 0; l < mdef_n_ciphone(mdef); l++) {
799 for (r = 0; r < mdef_n_ciphone(mdef); r++) {
800 p = mdef_phone_id_nearest(mdef, (s3cipid_t) b,
801 (s3cipid_t) l,
802 (s3cipid_t) r,
803 WORD_POSN_SINGLE);
804 dict2pid->lrdiph_rc[b][l][r] =
805 mdef_pid2ssid(mdef, p);
806 }
807 }
808 }
809 internal[pronlen - 1] = BAD_S3SSID;
810 }
811
812 }
813 else {
814 E_FATAL("panic: pronlen=0, what's going on?\n");
815 }
816
817 if (!dict2pid->is_composite) {
818 /* E_INFO("internal[0] %d, internal[pronlen-1] %d\n", internal[0],internal[pronlen-1]); */
819 assert(internal[0] == BAD_S3SSID
820 && internal[pronlen - 1] == BAD_S3SSID);
821 }
822
823 internal += pronlen;
824 }
825
826 ckd_free_2d((void **) ldiph);
827 ckd_free_2d((void **) rdiph);
828 ckd_free((void *) single);
829
830 if (dict2pid->is_composite) {
831 /* Count the length of each composite state (i.e. how many
832 * actual senones it maps to). */
833 /* n_comstate will have been set through calls to ssidlist2comsseq(). */
834 cslen = (int32 *) ckd_calloc(dict2pid->n_comstate, sizeof(int32));
835 /* as will the entries of hs. */
836 g = hash_table_tolist(hs, &n);
837 assert(n == dict2pid->n_comstate);
838 n = 0;
839 /* Iterate over entries of hs to figure out how much to allocate. */
840 for (gn = g; gn; gn = gnode_next(gn)) {
841 he = (hash_entry_t *) gnode_ptr(gn);
842 /* Key is a set of actual senone IDs. */
843 sen = (s3senid_t *) hash_entry_key(he);
844 for (i = 0; IS_S3SENID(sen[i]); i++);
845
846 /* Value is the composite state ID. */
847 cslen[(long)hash_entry_val(he)] = i + 1; /* +1 for terminating sentinel */
848
849 n += (i + 1);
850 }
851 /* Allocate the composite state to senone list table. */
852 dict2pid->comstate =
853 (s3senid_t **) ckd_calloc(dict2pid->n_comstate,
854 sizeof(s3senid_t *));
855 sen = (s3senid_t *) ckd_calloc(n, sizeof(s3senid_t));
856 for (i = 0; i < dict2pid->n_comstate; i++) {
857 dict2pid->comstate[i] = sen;
858 sen += cslen[i];
859 }
860
861 /* Build the composite state to senone list table from hs. */
862 for (gn = g; gn; gn = gnode_next(gn)) {
863 he = (hash_entry_t *) gnode_ptr(gn);
864 sen = (s3senid_t *) hash_entry_key(he);
865 i = (long)hash_entry_val(he);
866
867 for (j = 0; j < cslen[i]; j++)
868 dict2pid->comstate[i][j] = sen[j];
869 assert(sen[j - 1] == BAD_S3SENID);
870
871 ckd_free((void *) sen);
872 sen = NULL;
873 }
874 ckd_free(cslen);
875 glist_free(g);
876
877 /* Allocate space for composite sseq table */
878 /* n_comsseq will have been set through calls to ssidlist2comsseq(). */
879 dict2pid->comsseq =
880 (s3senid_t **) ckd_calloc(dict2pid->n_comsseq,
881 sizeof(s3senid_t *));
882
883 for (i = 0; i < dict2pid->n_comsseq; i++) {
884 dict2pid->comsseq[i] = NULL;
885 }
886
887 /* as will the entries of hp. */
888 g = hash_table_tolist(hp, &n);
889 assert(n == dict2pid->n_comsseq);
890
891 /* Build composite sseq table by iterating over hp. */
892 for (gn = g; gn; gn = gnode_next(gn)) {
893 he = (hash_entry_t *) gnode_ptr(gn);
894 /* Value: composite ssid */
895 i = (long)hash_entry_val(he);
896 /* Key: array of composite state IDs. */
897 dict2pid->comsseq[i] = (s3senid_t *) hash_entry_key(he);
898 }
899 glist_free(g);
900
901 /* Weight for each composite state. */
902 /* These are weighted inversely to the number of normal
903 * senones which make them up. I'm guessing that the
904 * reasoning behind this is that the more different senones
905 * combined into a single composite score, the less relevant
906 * that score will be. */
907 dict2pid->comwt =
908 (int32 *) ckd_calloc(dict2pid->n_comstate, sizeof(int32));
909 for (i = 0; i < dict2pid->n_comstate; i++) {
910 sen = dict2pid->comstate[i];
911
912 for (j = 0; IS_S3SENID(sen[j]); j++);
913 #if 0
914 /* if comstate i has N states, its weight= (1/N^2) (Major Hack!!) */
915 dict2pid->comwt[i] = -(logs3(logmath, (float64) j) << 1);
916 #else
917 /* if comstate i has N states, its weight= 1/N */
918 dict2pid->comwt[i] = -logs3(logmath, (float64) j);
919 #endif
920 }
921 }
922
923 if (!(dict2pid->is_composite)) {
924 assert(dict2pid->comstate == NULL);
925 assert(dict2pid->comsseq == NULL);
926 assert(dict2pid->comwt == NULL);
927 assert(dict2pid->single_lc == NULL);
928 assert(dict2pid->n_comstate == 0);
929 assert(dict2pid->n_comsseq == 0);
930
931 /* Try to compress rdiph_rc into rdiph_rc_compressed
932 This should be moved to a function.
933 */
934
935 compress_right_context_tree(mdef, dict2pid);
936 compress_left_right_context_tree(mdef, dict2pid);
937
938 }
939 else {
940 assert(dict2pid->rssid == NULL);
941 assert(dict2pid->lrssid == NULL);
942 }
943
944 hash_table_free(hs);
945 hash_table_free(hp);
946
947 return dict2pid;
948 }
949
950 void
dict2pid_free(dict2pid_t * d2p)951 dict2pid_free(dict2pid_t * d2p)
952 {
953 int32 i;
954
955 if (d2p) {
956 if (d2p->comwt)
957 ckd_free((void *) d2p->comwt);
958 if (d2p->comsseq) {
959
960 for (i = 0; i < d2p->n_comsseq; i++) {
961 if (d2p->comsseq[i] != NULL) {
962 ckd_free((void *) d2p->comsseq[i]);
963 }
964 }
965 ckd_free((void *) d2p->comsseq);
966 }
967
968 if (d2p->comstate) {
969 ckd_free((void **) d2p->comstate[0]);
970 ckd_free((void **) d2p->comstate);
971 }
972
973 if (d2p->single_lc)
974 ckd_free_2d((void *) d2p->single_lc);
975
976 if (d2p->ldiph_lc)
977 ckd_free_3d((void ***) d2p->ldiph_lc);
978
979
980 if (d2p->rdiph_rc)
981 ckd_free_3d((void ***) d2p->rdiph_rc);
982
983 if (d2p->lrdiph_rc)
984 ckd_free_3d((void ***) d2p->lrdiph_rc);
985
986 if (d2p->internal) {
987 ckd_free((void *) d2p->internal[0]);
988 ckd_free((void **) d2p->internal);
989 }
990
991 if (d2p->rssid)
992 free_compress_map(d2p->rssid, d2p->n_ci);
993
994 if (d2p->lrssid)
995 free_compress_map(d2p->lrssid, d2p->n_ci);
996
997 ckd_free(d2p);
998 }
999
1000 }
1001
1002
1003
1004 void
dict2pid_report(dict2pid_t * d2p)1005 dict2pid_report(dict2pid_t * d2p)
1006 {
1007 E_INFO_NOFN("Initialization of dict2pid_t, report:\n");
1008 if (d2p->is_composite) {
1009 E_INFO_NOFN("Dict2pid is in composite triphone mode\n");
1010 E_INFO_NOFN("%d composite states; %d composite sseq\n",
1011 d2p->n_comstate, d2p->n_comsseq);
1012 }
1013 else {
1014 E_INFO_NOFN("Dict2pid is in normal triphone mode\n");
1015 }
1016 E_INFO_NOFN("\n");
1017
1018
1019 }
1020
1021 /**
1022 * Populate composite senone score array.
1023 *
1024 * The composite senone score is the maximum of its component senones'
1025 * scores, scaled down by the number of component senones.
1026 */
1027 void
dict2pid_comsenscr(dict2pid_t * d2p,int32 * senscr,int32 * comsenscr)1028 dict2pid_comsenscr(dict2pid_t * d2p, int32 * senscr, int32 * comsenscr)
1029 {
1030 int32 i, j;
1031 int32 best;
1032 s3senid_t *comstate, k;
1033
1034 for (i = 0; i < d2p->n_comstate; i++) {
1035 comstate = d2p->comstate[i];
1036
1037 best = senscr[comstate[0]];
1038 for (j = 1;; j++) {
1039 k = comstate[j];
1040 if (NOT_S3SENID(k))
1041 break;
1042 if (best < senscr[k])
1043 best = senscr[k];
1044 }
1045
1046 comsenscr[i] = best + d2p->comwt[i];
1047 }
1048 }
1049
1050 /**
1051 * Mark senones active based on a set of active composite senones.
1052 */
1053 void
dict2pid_comsseq2sen_active(dict2pid_t * d2p,mdef_t * mdef,uint8 * comssid,uint8 * sen)1054 dict2pid_comsseq2sen_active(dict2pid_t * d2p, mdef_t * mdef,
1055 uint8 * comssid, uint8 * sen)
1056 {
1057 int32 ss, cs, i, j;
1058 s3senid_t *csp, *sp; /* Composite state pointer */
1059
1060 for (ss = 0; ss < d2p->n_comsseq; ss++) {
1061 if (comssid[ss]) {
1062 csp = d2p->comsseq[ss];
1063
1064 for (i = 0; i < mdef_n_emit_state(mdef); i++) {
1065 cs = csp[i];
1066 sp = d2p->comstate[cs];
1067
1068 for (j = 0; IS_S3SENID(sp[j]); j++)
1069 sen[sp[j]] = 1;
1070 }
1071 }
1072 }
1073 }
1074
1075
1076 void
dict2pid_dump(FILE * fp,dict2pid_t * d2p,mdef_t * mdef,dict_t * dict)1077 dict2pid_dump(FILE * fp, dict2pid_t * d2p, mdef_t * mdef, dict_t * dict)
1078 {
1079 int32 w, p, pronlen;
1080 int32 i, j, b, l, r;
1081
1082 fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n");
1083 for (w = 0; w < dict_size(dict); w++) {
1084 fprintf(fp, "%30s ", dict_wordstr(dict, w));
1085
1086 pronlen = dict_pronlen(dict, w);
1087 for (p = 0; p < pronlen; p++)
1088 fprintf(fp, " %5d", d2p->internal[w][p]);
1089 fprintf(fp, "\n");
1090 }
1091 fprintf(fp, "#\n");
1092
1093 fprintf(fp, "# LDIPH_LC (b r l ssid)\n");
1094 for (b = 0; b < mdef_n_ciphone(mdef); b++) {
1095 for (r = 0; r < mdef_n_ciphone(mdef); r++) {
1096 for (l = 0; l < mdef_n_ciphone(mdef); l++) {
1097 if (IS_S3SSID(d2p->ldiph_lc[b][r][l]))
1098 fprintf(fp, "%6s %6s %6s %5d\n", mdef_ciphone_str(mdef, (s3cipid_t) b), mdef_ciphone_str(mdef, (s3cipid_t) r), mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]); /* RAH, ldiph_lc is returning an int32, %d expects an int16 */
1099 }
1100 }
1101 }
1102 fprintf(fp, "#\n");
1103
1104 fprintf(fp, "# SINGLE_LC (b l comssid)\n");
1105 for (b = 0; b < mdef_n_ciphone(mdef); b++) {
1106 for (l = 0; l < mdef_n_ciphone(mdef); l++) {
1107 if (IS_S3SSID(d2p->single_lc[b][l]))
1108 fprintf(fp, "%6s %6s %5d\n", mdef_ciphone_str(mdef, (s3cipid_t) b), mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->single_lc[b][l]); /* RAH, single_lc is returning an int32, %d expects an int16 */
1109 }
1110 }
1111 fprintf(fp, "#\n");
1112
1113 fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq);
1114 for (i = 0; i < mdef->n_sseq; i++) {
1115 fprintf(fp, "%5d ", i);
1116 for (j = 0; j < mdef_n_emit_state(mdef); j++)
1117 fprintf(fp, " %5d", mdef->sseq[i][j]);
1118 fprintf(fp, "\n");
1119 }
1120 fprintf(fp, "#\n");
1121
1122 fprintf(fp, "# COMSSEQ %d (comstate comstate ...)\n", d2p->n_comsseq);
1123 for (i = 0; i < d2p->n_comsseq; i++) {
1124 fprintf(fp, "%5d ", i);
1125 for (j = 0; j < mdef_n_emit_state(mdef); j++)
1126 fprintf(fp, " %5d", d2p->comsseq[i][j]);
1127 fprintf(fp, "\n");
1128 }
1129 fprintf(fp, "#\n");
1130
1131 fprintf(fp, "# COMSTATE %d (senid senid ...)\n", d2p->n_comstate);
1132 for (i = 0; i < d2p->n_comstate; i++) {
1133 fprintf(fp, "%5d ", i);
1134 for (j = 0; IS_S3SENID(d2p->comstate[i][j]); j++)
1135 fprintf(fp, " %5d", d2p->comstate[i][j]);
1136 fprintf(fp, "\n");
1137 }
1138 fprintf(fp, "#\n");
1139 fprintf(fp, "# END\n");
1140
1141 fflush(fp);
1142 }
1143
1144
1145 #if 0
1146 for (r = 0; r < mdef->n_ciphone; r++) {
1147 printf("%d ", rmap[r]);
1148 }
1149
1150 printf("\n");
1151 fflush(stdout);
1152
1153 for (r = 0; r < mdef->n_ciphone; r++) {
1154 printf("%d ", tmpssid[r]);
1155 }
1156
1157 printf("\n");
1158 fflush(stdout);
1159 for (r = 0; r < mdef->n_ciphone; r++) {
1160 printf("%d ", tmpcimap[r]);
1161 }
1162
1163 printf("\n");
1164 fflush(stdout);
1165
1166 for (r = 0; r < dict2pid->rssid[b][l].n_ssid; r++) {
1167 printf("%d ", dict2pid->rssid[b][l].ssid[r]);
1168 }
1169
1170 printf("\n");
1171 fflush(stdout);
1172
1173 if (dict2pid->rssid[b][l].n_ssid > 0) {
1174 for (r = 0; r < mdef->n_ciphone; r++) {
1175 printf("%d ", dict2pid->rssid[b][l].cimap[r]);
1176 }
1177 }
1178 printf("\n");
1179
1180 fflush(stdout);
1181 #endif
1182