1 /* ====================================================================
2  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * This work was supported in part by funding from the Defense Advanced
18  * Research Projects Agency and the National Science Foundation of the
19  * United States of America, and the CMU Sphinx Speech Consortium.
20  *
21  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * ====================================================================
34  *
35  */
36 /*
37  * senone.c -- Mixture density weights associated with each tied state.
38  *
39  * **********************************************
40  * CMU ARPA Speech Project
41  *
42  * Copyright (c) 1996 Carnegie Mellon University.
43  * ALL RIGHTS RESERVED.
44  * **********************************************
45  *
46  * HISTORY
47  *
48  * $Log$
49  * Revision 1.6  2006/02/22  17:27:39  arthchan2003
50  * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: 1, NOT doing truncation in the multi-stream GMM computation \n. 2, Added .s3cont. to be the alias of the old multi-stream GMM computation routine \n. 3, Added license \n.  4, Fixed dox-doc. \n
51  *
52  * Revision 1.5.4.5  2006/01/17 22:57:06  arthchan2003
53  * Add directive TRUNCATE_LOGPDF in ms_senone.c
54  *
55  * Revision 1.5.4.4  2006/01/16 19:47:05  arthchan2003
56  * Removed the truncation of senone probability code.
57  *
58  * Revision 1.5.4.3  2005/08/03 18:53:43  dhdfu
59  * Add memory deallocation functions.  Also move all the initialization
60  * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it
61  * from decode_anytopo and friends.
62  *
63  * Revision 1.5.4.2  2005/08/02 21:06:33  arthchan2003
64  * Change options such that .s3cont. works as well.
65  *
66  * Revision 1.5.4.1  2005/07/20 19:39:01  arthchan2003
67  * Added licences in ms_* series of code.
68  *
69  * Revision 1.5  2005/06/21 18:57:31  arthchan2003
70  * 1, Fixed doxygen documentation. 2, Added $ keyword.
71  *
72  * Revision 1.1.1.1  2005/03/24 15:24:00  archan
73  * I found Evandro's suggestion is quite right after yelling at him 2 days later. So I decide to check this in again without any binaries. (I have done make distcheck. ) . Again, this is a candidate for s3.6 and I believe I need to work out 4-5 intermediate steps before I can complete the first prototype.  That's why I keep local copies.
74  *
75  * Revision 1.4  2004/12/05 12:01:31  arthchan2003
76  * 1, move libutil/libutil.h to s3types.h, seems to me not very nice to have it in every files. 2, Remove warning messages of main_align.c 3, Remove warning messages in chgCase.c
77  *
78  * Revision 1.3  2004/11/13 21:25:19  arthchan2003
79  * commit of 1, absolute CI-GMMS , 2, fast CI senone computation using svq, 3, Decrease the number of static variables, 4, fixing the random generator problem of vector_vqgen, 5, move all unused files to NOTUSED
80  *
81  * Revision 1.2  2004/08/09 01:02:33  arthchan2003
82  * check in the windows setup for align
83  *
84  * Revision 1.1  2004/08/09 00:17:11  arthchan2003
85  * Incorporating s3.0 align, at this point, there are still some small problems in align but they don't hurt. For example, the score doesn't match with s3.0 and the output will have problem if files are piped to /dev/null/. I think we can go for it.
86  *
87  * Revision 1.1  2003/02/14 14:40:34  cbq
88  * Compiles.  Analysis is probably hosed.
89  *
90  * Revision 1.1  2000/04/24 09:39:41  lenzo
91  * s3 import.
92  *
93  *
94  * 06-Mar-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
95  * 		Added handling of .semi. and .cont. special cases for senone-mgau
96  * 		mapping.
97  *
98  * 20-Dec-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
99  * 		Changed senone_mixw_read and senone_mgau_map_read to use the new
100  *		libio/bio_fread functions.
101  *
102  * 20-Jan-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
103  * 		Modified senone_eval to accommodate both normal and transposed
104  *		senone organization.
105  *
106  * 13-Dec-95	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
107  * 		Added implementation of senone_mgau_map_read.
108  * 		Added senone_eval_all() optimized for the semicontinuous case.
109  *
110  * 12-Nov-95	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
111  * 		Created.
112  */
113 
114 
115 #include "ms_senone.h"
116 #include "logs3.h"
117 
118 #include "bio.h"
119 #include <string.h>
120 #include <assert.h>
121 
122 
123 #define MIXW_PARAM_VERSION	"1.0"
124 #define SPDEF_PARAM_VERSION	"1.2"
125 
126 
127 static int32
128 senone_mgau_map_read(senone_t * s, const char *file_name)
129 {
130     FILE *fp;
131     int32 byteswap, chksum_present, n_gauden_present;
132     uint32 chksum;
133     int32 i;
134     char eofchk;
135     char **argname, **argval;
136     float32 v;
137 
138     E_INFO("Reading senone gauden-codebook map file: %s\n", file_name);
139 
140     if ((fp = fopen(file_name, "rb")) == NULL)
141         E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);
142 
143     /* Read header, including argument-value info and 32-bit byteorder magic */
144     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
145         E_FATAL("bio_readhdr(%s) failed\n", file_name);
146 
147     /* Parse argument-value list */
148     chksum_present = 0;
149     n_gauden_present = 0;
150     for (i = 0; argname[i]; i++) {
151         if (strcmp(argname[i], "version") == 0) {
152             if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) {
153                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
154                        file_name, argval[i], SPDEF_PARAM_VERSION);
155             }
156 
157             /* HACK!! Convert version# to float32 and take appropriate action */
158             if (sscanf(argval[i], "%f", &v) != 1)
159                 E_FATAL("%s: Bad version no. string: %s\n", file_name,
160                         argval[i]);
161 
162             n_gauden_present = (v > 1.1) ? 1 : 0;
163         }
164         else if (strcmp(argname[i], "chksum0") == 0) {
165             chksum_present = 1; /* Ignore the associated value */
166         }
167     }
168     bio_hdrarg_free(argname, argval);
169     argname = argval = NULL;
170 
171     chksum = 0;
172 
173     /* Read #gauden (if version matches) */
174     if (n_gauden_present) {
175         if (bio_fread
176             (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1)
177             E_FATAL("fread(%s) (#gauden) failed\n", file_name);
178     }
179 
180     /* Read 1d array data */
181     if (bio_fread_1d
182         ((void **) (&s->mgau), sizeof(s3mgauid_t), &(s->n_sen), fp,
183          byteswap, &chksum) < 0) {
184         E_FATAL("bio_fread_1d(%s) failed\n", file_name);
185     }
186 
187     /* Infer n_gauden if not present in this version */
188     if (!n_gauden_present) {
189         s->n_gauden = 1;
190         for (i = 0; i < s->n_sen; i++)
191             if (s->mgau[i] >= s->n_gauden)
192                 s->n_gauden = s->mgau[i] + 1;
193     }
194 
195     if (chksum_present)
196         bio_verify_chksum(fp, byteswap, chksum);
197 
198     if (fread(&eofchk, 1, 1, fp) == 1)
199         E_FATAL("More data than expected in %s\n", file_name);
200 
201     fclose(fp);
202 
203     E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen,
204            s->n_gauden);
205 
206     return 1;
207 }
208 
209 
210 static int32
211 senone_mixw_read(senone_t * s, const char *file_name)
212 {
213     char eofchk;
214     FILE *fp;
215     int32 byteswap, chksum_present;
216     uint32 chksum;
217     float32 *pdf;
218     int32 i, f, c, p, n_err;
219     char **argname, **argval;
220 
221     E_INFO("Reading senone mixture weights: %s\n", file_name);
222 
223     if ((fp = fopen(file_name, "rb")) == NULL)
224         E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);
225 
226     /* Read header, including argument-value info and 32-bit byteorder magic */
227     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
228         E_FATAL("bio_readhdr(%s) failed\n", file_name);
229 
230     /* Parse argument-value list */
231     chksum_present = 0;
232     for (i = 0; argname[i]; i++) {
233         if (strcmp(argname[i], "version") == 0) {
234             if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0)
235                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
236                        file_name, argval[i], MIXW_PARAM_VERSION);
237         }
238         else if (strcmp(argname[i], "chksum0") == 0) {
239             chksum_present = 1; /* Ignore the associated value */
240         }
241     }
242     bio_hdrarg_free(argname, argval);
243     argname = argval = NULL;
244 
245     chksum = 0;
246 
247     /* Read #senones, #features, #codewords, arraysize */
248     if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
249          1)
250         ||
251         (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum)
252          != 1)
253         || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum)
254             != 1)
255         || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
256         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
257     }
258     if (i != s->n_sen * s->n_feat * s->n_cw) {
259         E_FATAL
260             ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
261              file_name, i, s->n_sen, s->n_feat, s->n_cw);
262     }
263 
264     /*
265      * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits.
266      * All PDF values will be truncated (in the LSB positions) by these many bits.
267      */
268     if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0))
269         E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor);
270 
271     p = logs3(s->logmath, s->mixwfloor);
272 
273 #if TRUNCATE_LOGPDF
274     for (s->shift = 0, p = -p; p >= 256; s->shift++, p >>= 1);
275     E_INFO("Truncating senone logs3(pdf) values by %d bits, to 8 bits\n",
276            s->shift);
277 #endif
278 
279     /*
280      * Allocate memory for senone PDF data.  Organize normally or transposed depending on
281      * s->n_gauden.
282      */
283     if (s->n_gauden > 1) {
284         s->pdf =
285             (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw,
286                                           sizeof(senprob_t));
287     }
288     else {
289         s->pdf =
290             (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen,
291                                           sizeof(senprob_t));
292     }
293 
294     /* Temporary structure to read in floats */
295     pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32));
296 
297     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
298     n_err = 0;
299     for (i = 0; i < s->n_sen; i++) {
300         for (f = 0; f < s->n_feat; f++) {
301             if (bio_fread
302                 ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap,
303                  &chksum)
304                 != s->n_cw) {
305                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
306             }
307 
308             /* Normalize and floor */
309             if (vector_sum_norm(pdf, s->n_cw) <= 0.0)
310                 n_err++;
311             vector_floor(pdf, s->n_cw, s->mixwfloor);
312             vector_sum_norm(pdf, s->n_cw);
313 
314             /* Convert to logs3, truncate to 8 bits, and store in s->pdf */
315             for (c = 0; c < s->n_cw; c++) {
316                 p = -(logs3(s->logmath, pdf[c]));
317 
318 #if TRUNCATE_LOGPDF
319                 p += (1 << (s->shift - 1)) - 1; /* Rounding before truncation */
320 
321                 if (s->n_gauden > 1)
322                     s->pdf[i][f][c] =
323                         (p < (255 << s->shift)) ? (p >> s->shift) : 255;
324                 else
325                     s->pdf[f][c][i] =
326                         (p < (255 << s->shift)) ? (p >> s->shift) : 255;
327 
328 #else
329                 if (s->n_gauden > 1)
330                     s->pdf[i][f][c] = p;
331                 else
332                     s->pdf[f][c][i] = p;
333 #endif
334             }
335         }
336     }
337     if (n_err > 0)
338         E_ERROR("Weight normalization failed for %d senones\n", n_err);
339 
340     ckd_free(pdf);
341 
342     if (chksum_present)
343         bio_verify_chksum(fp, byteswap, chksum);
344 
345     if (fread(&eofchk, 1, 1, fp) == 1)
346         E_FATAL("More data than expected in %s\n", file_name);
347 
348     fclose(fp);
349 
350     E_INFO
351         ("Read mixture weights for %d senones: %d features x %d codewords\n",
352          s->n_sen, s->n_feat, s->n_cw);
353 
354     return 1;
355 }
356 
357 
358 senone_t *
359 senone_init(const char *mixwfile, const char *sen2mgau_map_file, float32 mixwfloor, logmath_t *logmath)
360 {
361     senone_t *s;
362     int32 n = 0, i;
363 
364     assert(sen2mgau_map_file);
365 
366     s = (senone_t *) ckd_calloc(1, sizeof(senone_t));
367     s->logmath = logmath;
368     s->mixwfloor = mixwfloor;
369 
370 
371     if (strcmp(sen2mgau_map_file, ".semi.") == 0)
372         s->n_gauden = 1;
373     else if (strcmp(sen2mgau_map_file, ".cont.") == 0
374              || strcmp(sen2mgau_map_file, ".s3cont.") == 0)
375         s->n_gauden = 2;        /* HACK!! Dummy value >1 for the moment; fixed below */
376     else {
377         senone_mgau_map_read(s, sen2mgau_map_file);
378         n = s->n_sen;
379     }
380 
381     senone_mixw_read(s, mixwfile);
382 
383     if (strcmp(sen2mgau_map_file, ".semi.") == 0) {
384         /* All-to-1 senones-codebook mapping */
385         s->mgau = (s3mgauid_t *) ckd_calloc(s->n_sen, sizeof(s3mgauid_t));
386     }
387     else if (strcmp(sen2mgau_map_file, ".cont.") == 0
388              || strcmp(sen2mgau_map_file, ".s3cont.") == 0) {
389         /* 1-to-1 senone-codebook mapping */
390         if (s->n_sen <= 1)
391             E_FATAL("#senone=%d; must be >1\n", s->n_sen);
392 
393         s->mgau = (s3mgauid_t *) ckd_calloc(s->n_sen, sizeof(s3mgauid_t));
394         for (i = 0; i < s->n_sen; i++)
395             s->mgau[i] = i;
396 
397         s->n_gauden = s->n_sen;
398     }
399     else {
400         if (s->n_sen != n)
401             E_FATAL("#senones inconsistent: %d in %s; %d in %s\n",
402                     n, sen2mgau_map_file, s->n_sen, mixwfile);
403     }
404 
405     s->featscr = NULL;
406     return s;
407 }
408 
409 void
410 senone_free(senone_t * s)
411 {
412     if (s == NULL)
413         return;
414     if (s->pdf)
415         ckd_free_3d((void *) s->pdf);
416     if (s->mgau)
417         ckd_free(s->mgau);
418     if (s->featscr)
419         ckd_free(s->featscr);
420     ckd_free(s);
421 }
422 
423 
424 /*
425  * Compute senone score for one senone.
426  * NOTE:  Remember that senone PDF tables contain SCALED logs3 values.
427  * NOTE:  Remember also that PDF data may be transposed or not depending on s->n_gauden.
428  */
429 int32
430 senone_eval(senone_t * s, s3senid_t id, gauden_dist_t ** dist, int32 n_top)
431 {
432     int32 scr;                  /* total senone score */
433     int32 fscr;                 /* senone score for one feature */
434     int32 fwscr;                /* senone score for one feature, one codeword */
435     int32 f, t;
436     gauden_dist_t *fdist;
437 
438     assert((id >= 0) && (id < s->n_sen));
439     assert((n_top > 0) && (n_top <= s->n_cw));
440 
441     scr = 0;
442 
443     for (f = 0; f < s->n_feat; f++) {
444         fdist = dist[f];
445 
446         /* Top codeword for feature f */
447 #if TRUNCATE_LOGPDF
448         fscr = (s->n_gauden > 1) ? fdist[0].dist - (s->pdf[id][f][fdist[0].id] << s->shift) :   /* untransposed */
449             fdist[0].dist - (s->pdf[f][fdist[0].id][id] << s->shift);   /* transposed */
450 #else
451 
452         fscr = (s->n_gauden > 1) ? fdist[0].dist - (s->pdf[id][f][fdist[0].id]) :       /* untransposed */
453             fdist[0].dist - (s->pdf[f][fdist[0].id][id]);       /* transposed */
454 #endif
455 
456         /* Remaining of n_top codewords for feature f */
457         for (t = 1; t < n_top; t++) {
458 #if TRUNCATE_LOGPDF
459             fwscr = (s->n_gauden > 1) ?
460                 fdist[t].dist - (s->pdf[id][f][fdist[t].id] << s->shift) :
461                 fdist[t].dist - (s->pdf[f][fdist[t].id][id] << s->shift);
462 #else
463 
464             fwscr = (s->n_gauden > 1) ?
465                 fdist[t].dist - (s->pdf[id][f][fdist[t].id]) :
466                 fdist[t].dist - (s->pdf[f][fdist[t].id][id]);
467 #endif
468 
469             fscr = logmath_add(s->logmath, fscr, fwscr);
470 
471         }
472 
473         scr += fscr;
474 
475     }
476 
477     return scr;
478 }
479 
480 
481 /*
482  * Optimized for special case of all senones sharing one codebook (perhaps many features).
483  * In particular, the PDF tables are transposed in memory.
484  */
485 void
486 senone_eval_all(senone_t * s, gauden_dist_t ** dist, int32 n_top,
487                 int32 * senscr)
488 {
489     int32 i, f, k, cwdist, scr;
490 
491     senprob_t *pdf;
492     int32 *featscr = NULL;
493     featscr = s->featscr;
494 
495     assert(s->n_gauden == 1);
496     assert((n_top > 0) && (n_top <= s->n_cw));
497 
498     if ((s->n_feat > 1) && (!featscr))
499         featscr = (int32 *) ckd_calloc(s->n_sen, sizeof(int32));
500 
501     /* Feature 0 */
502     /* Top-N codeword 0 */
503     cwdist = dist[0][0].dist;
504     pdf = s->pdf[0][dist[0][0].id];
505 
506 #if TRUNCATE_LOGPDF
507     for (i = 0; i < s->n_sen; i++)
508         senscr[i] = cwdist - (pdf[i] << s->shift);
509 #else
510     for (i = 0; i < s->n_sen; i++)
511         senscr[i] = cwdist - (pdf[i]);
512 
513 #endif
514 
515     /* Remaining top-N codewords */
516     for (k = 1; k < n_top; k++) {
517         cwdist = dist[0][k].dist;
518         pdf = s->pdf[0][dist[0][k].id];
519 
520         for (i = 0; i < s->n_sen; i++) {
521 #if TRUNCATE_LOGPDF
522             scr = cwdist - (pdf[i] << s->shift);
523 #else
524             scr = cwdist - (pdf[i]);
525 #endif
526             senscr[i] = logmath_add(s->logmath, senscr[i], scr);
527         }
528     }
529 
530     /* Remaining features */
531     for (f = 1; f < s->n_feat; f++) {
532         /* Top-N codeword 0 */
533         cwdist = dist[f][0].dist;
534         pdf = s->pdf[f][dist[f][0].id];
535 
536 #if TRUNCATE_LOGPDF
537         for (i = 0; i < s->n_sen; i++)
538             featscr[i] = cwdist - (pdf[i] << s->shift);
539 #else
540         for (i = 0; i < s->n_sen; i++)
541             featscr[i] = cwdist - (pdf[i]);
542 #endif
543 
544         /* Remaining top-N codewords */
545         for (k = 1; k < n_top; k++) {
546             cwdist = dist[f][k].dist;
547             pdf = s->pdf[f][dist[f][k].id];
548 
549             for (i = 0; i < s->n_sen; i++) {
550 #if TRUNCATE_LOGPDF
551                 scr = cwdist - (pdf[i] << s->shift);
552 #else
553                 scr = cwdist - (pdf[i]);
554 #endif
555                 featscr[i] = logmath_add(s->logmath, featscr[i], scr);
556             }
557         }
558 
559         for (i = 0; i < s->n_sen; i++)
560             senscr[i] += featscr[i];
561     }
562 }
563