1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers. */
39 #include <string.h>
40 #include <stdio.h>
41 #include <assert.h>
42 
43 /* SphinxBase headers. */
44 #include <sphinxbase/bio.h>
45 
46 /* Local headers. */
47 #include "ms_senone.h"
48 
49 #define MIXW_PARAM_VERSION	"1.0"
50 #define SPDEF_PARAM_VERSION	"1.2"
51 
52 static int32
senone_mgau_map_read(senone_t * s,char const * file_name)53 senone_mgau_map_read(senone_t * s, char const *file_name)
54 {
55     FILE *fp;
56     int32 byteswap, chksum_present, n_gauden_present;
57     uint32 chksum;
58     int32 i;
59     char eofchk;
60     char **argname, **argval;
61     void *ptr;
62     float32 v;
63 
64     E_INFO("Reading senone gauden-codebook map file: %s\n", file_name);
65 
66     if ((fp = fopen(file_name, "rb")) == NULL)
67         E_FATAL_SYSTEM("Failed to open map file '%s' for reading", file_name);
68 
69     /* Read header, including argument-value info and 32-bit byteorder magic */
70     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
71         E_FATAL("Failed to read header from file '%s'\n", file_name);
72 
73     /* Parse argument-value list */
74     chksum_present = 0;
75     n_gauden_present = 0;
76     for (i = 0; argname[i]; i++) {
77         if (strcmp(argname[i], "version") == 0) {
78             if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) {
79                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
80                        file_name, argval[i], SPDEF_PARAM_VERSION);
81             }
82 
83             /* HACK!! Convert version# to float32 and take appropriate action */
84             if (sscanf(argval[i], "%f", &v) != 1)
85                 E_FATAL("%s: Bad version no. string: %s\n", file_name,
86                         argval[i]);
87 
88             n_gauden_present = (v > 1.1) ? 1 : 0;
89         }
90         else if (strcmp(argname[i], "chksum0") == 0) {
91             chksum_present = 1; /* Ignore the associated value */
92         }
93     }
94     bio_hdrarg_free(argname, argval);
95     argname = argval = NULL;
96 
97     chksum = 0;
98 
99     /* Read #gauden (if version matches) */
100     if (n_gauden_present) {
101         E_INFO("Reading number of codebooks from %s\n", file_name);
102         if (bio_fread
103             (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1)
104             E_FATAL("fread(%s) (#gauden) failed\n", file_name);
105     }
106 
107     /* Read 1d array data */
108     if (bio_fread_1d(&ptr, sizeof(uint32), &(s->n_sen), fp,
109 		     byteswap, &chksum) < 0) {
110         E_FATAL("bio_fread_1d(%s) failed\n", file_name);
111     }
112     s->mgau = ptr;
113     E_INFO("Mapping %d senones to %d codebooks\n", s->n_sen, s->n_gauden);
114 
115     /* Infer n_gauden if not present in this version */
116     if (!n_gauden_present) {
117         s->n_gauden = 1;
118         for (i = 0; i < s->n_sen; i++)
119             if (s->mgau[i] >= s->n_gauden)
120                 s->n_gauden = s->mgau[i] + 1;
121     }
122 
123     if (chksum_present)
124         bio_verify_chksum(fp, byteswap, chksum);
125 
126     if (fread(&eofchk, 1, 1, fp) == 1)
127         E_FATAL("More data than expected in %s: %d\n", file_name, eofchk);
128 
129     fclose(fp);
130 
131     E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen,
132            s->n_gauden);
133 
134     return 1;
135 }
136 
137 
138 static int32
senone_mixw_read(senone_t * s,char const * file_name,logmath_t * lmath)139 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath)
140 {
141     char eofchk;
142     FILE *fp;
143     int32 byteswap, chksum_present;
144     uint32 chksum;
145     float32 *pdf;
146     int32 i, f, c, p, n_err;
147     char **argname, **argval;
148 
149     E_INFO("Reading senone mixture weights: %s\n", file_name);
150 
151     if ((fp = fopen(file_name, "rb")) == NULL)
152         E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name);
153 
154     /* Read header, including argument-value info and 32-bit byteorder magic */
155     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
156         E_FATAL("Failed to read header from file '%s'\n", file_name);
157 
158     /* Parse argument-value list */
159     chksum_present = 0;
160     for (i = 0; argname[i]; i++) {
161         if (strcmp(argname[i], "version") == 0) {
162             if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0)
163                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
164                        file_name, argval[i], MIXW_PARAM_VERSION);
165         }
166         else if (strcmp(argname[i], "chksum0") == 0) {
167             chksum_present = 1; /* Ignore the associated value */
168         }
169     }
170     bio_hdrarg_free(argname, argval);
171     argname = argval = NULL;
172 
173     chksum = 0;
174 
175     /* Read #senones, #features, #codewords, arraysize */
176     if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
177          1)
178         ||
179         (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum)
180          != 1)
181         || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum)
182             != 1)
183         || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
184         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
185     }
186     if (i != s->n_sen * s->n_feat * s->n_cw) {
187         E_FATAL
188             ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
189              file_name, i, s->n_sen, s->n_feat, s->n_cw);
190     }
191 
192     /*
193      * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits.
194      * All PDF values will be truncated (in the LSB positions) by these many bits.
195      */
196     if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0))
197         E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor);
198 
199     /* Use a fixed shift for compatibility with everything else. */
200     E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT);
201 
202     /*
203      * Allocate memory for senone PDF data.  Organize normally or transposed depending on
204      * s->n_gauden.
205      */
206     if (s->n_gauden > 1) {
207 	E_INFO("Not transposing mixture weights in memory\n");
208         s->pdf =
209             (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw,
210                                           sizeof(senprob_t));
211     }
212     else {
213 	E_INFO("Transposing mixture weights in memory\n");
214         s->pdf =
215             (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen,
216                                           sizeof(senprob_t));
217     }
218 
219     /* Temporary structure to read in floats */
220     pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32));
221 
222     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
223     n_err = 0;
224     for (i = 0; i < s->n_sen; i++) {
225         for (f = 0; f < s->n_feat; f++) {
226             if (bio_fread
227                 ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap,
228                  &chksum)
229                 != s->n_cw) {
230                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
231             }
232 
233             /* Normalize and floor */
234             if (vector_sum_norm(pdf, s->n_cw) <= 0.0)
235                 n_err++;
236             vector_floor(pdf, s->n_cw, s->mixwfloor);
237             vector_sum_norm(pdf, s->n_cw);
238 
239             /* Convert to logs3, truncate to 8 bits, and store in s->pdf */
240             for (c = 0; c < s->n_cw; c++) {
241                 p = -(logmath_log(lmath, pdf[c]));
242                 p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */
243 
244                 if (s->n_gauden > 1)
245                     s->pdf[i][f][c] =
246                         (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
247                 else
248                     s->pdf[f][c][i] =
249                         (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
250             }
251         }
252     }
253     if (n_err > 0)
254         E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
255 
256     ckd_free(pdf);
257 
258     if (chksum_present)
259         bio_verify_chksum(fp, byteswap, chksum);
260 
261     if (fread(&eofchk, 1, 1, fp) == 1)
262         E_FATAL("More data than expected in %s\n", file_name);
263 
264     fclose(fp);
265 
266     E_INFO
267         ("Read mixture weights for %d senones: %d features x %d codewords\n",
268          s->n_sen, s->n_feat, s->n_cw);
269 
270     return 1;
271 }
272 
273 
274 senone_t *
senone_init(gauden_t * g,char const * mixwfile,char const * sen2mgau_map_file,float32 mixwfloor,logmath_t * lmath,bin_mdef_t * mdef)275 senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file,
276 	    float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef)
277 {
278     senone_t *s;
279     int32 n = 0, i;
280 
281     s = (senone_t *) ckd_calloc(1, sizeof(senone_t));
282     s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
283     s->mixwfloor = mixwfloor;
284 
285     s->n_gauden = g->n_mgau;
286     if (sen2mgau_map_file) {
287 	if (!(strcmp(sen2mgau_map_file, ".semi.") == 0
288 	      || strcmp(sen2mgau_map_file, ".ptm.") == 0
289 	      || strcmp(sen2mgau_map_file, ".cont.") == 0)) {
290 	    senone_mgau_map_read(s, sen2mgau_map_file);
291 	    n = s->n_sen;
292 	}
293     }
294     else {
295 	if (s->n_gauden == 1)
296 	    sen2mgau_map_file = ".semi.";
297 	else if (s->n_gauden == bin_mdef_n_ciphone(mdef))
298 	    sen2mgau_map_file = ".ptm.";
299 	else
300 	    sen2mgau_map_file = ".cont.";
301     }
302 
303     senone_mixw_read(s, mixwfile, lmath);
304 
305     if (strcmp(sen2mgau_map_file, ".semi.") == 0) {
306         /* All-to-1 senones-codebook mapping */
307 	E_INFO("Mapping all senones to one codebook\n");
308         s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
309     }
310     else if (strcmp(sen2mgau_map_file, ".ptm.") == 0) {
311         /* All-to-ciphone-id senones-codebook mapping */
312 	E_INFO("Mapping senones to context-independent phone codebooks\n");
313         s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
314         for (i = 0; i < s->n_sen; i++)
315 	    s->mgau[i] = bin_mdef_sen2cimap(mdef, i);
316     }
317     else if (strcmp(sen2mgau_map_file, ".cont.") == 0
318              || strcmp(sen2mgau_map_file, ".s3cont.") == 0) {
319         /* 1-to-1 senone-codebook mapping */
320 	E_INFO("Mapping senones to individual codebooks\n");
321         if (s->n_sen <= 1)
322             E_FATAL("#senone=%d; must be >1\n", s->n_sen);
323 
324         s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
325         for (i = 0; i < s->n_sen; i++)
326             s->mgau[i] = i;
327 	/* Not sure why this is here, it probably does nothing. */
328         s->n_gauden = s->n_sen;
329     }
330     else {
331         if (s->n_sen != n)
332             E_FATAL("#senones inconsistent: %d in %s; %d in %s\n",
333                     n, sen2mgau_map_file, s->n_sen, mixwfile);
334     }
335 
336     s->featscr = NULL;
337     return s;
338 }
339 
340 void
senone_free(senone_t * s)341 senone_free(senone_t * s)
342 {
343     if (s == NULL)
344         return;
345     if (s->pdf)
346         ckd_free_3d((void *) s->pdf);
347     if (s->mgau)
348         ckd_free(s->mgau);
349     if (s->featscr)
350         ckd_free(s->featscr);
351     logmath_free(s->lmath);
352     ckd_free(s);
353 }
354 
355 
356 /*
357  * Compute senone score for one senone.
358  * NOTE:  Remember that senone PDF tables contain SCALED, NEGATED logs3 values.
359  * NOTE:  Remember also that PDF data may be transposed or not depending on s->n_gauden.
360  */
361 int32
senone_eval(senone_t * s,int id,gauden_dist_t ** dist,int32 n_top)362 senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top)
363 {
364     int32 scr;                  /* total senone score */
365     int32 fden;                 /* Gaussian density */
366     int32 fscr;                 /* senone score for one feature */
367     int32 fwscr;                /* senone score for one feature, one codeword */
368     int32 f, t;
369     gauden_dist_t *fdist;
370 
371     assert((id >= 0) && (id < s->n_sen));
372     assert((n_top > 0) && (n_top <= s->n_cw));
373 
374     scr = 0;
375 
376     for (f = 0; f < s->n_feat; f++) {
377         int top;
378         fdist = dist[f];
379 
380         /* Top codeword for feature f */
381 	top = fden = ((int32)fdist[0].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
382         fscr = (s->n_gauden > 1)
383 	    ? (fden + -s->pdf[id][f][fdist[0].id])  /* untransposed */
384 	    : (fden + -s->pdf[f][fdist[0].id][id]); /* transposed */
385         E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
386                     id, f, -(fscr - fden), -(fden-top), -(fscr-top)));
387         /* Remaining of n_top codewords for feature f */
388         for (t = 1; t < n_top; t++) {
389 	    fden = ((int32)fdist[t].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
390             fwscr = (s->n_gauden > 1) ?
391                 (fden + -s->pdf[id][f][fdist[t].id]) :
392                 (fden + -s->pdf[f][fdist[t].id][id]);
393             fscr = logmath_add(s->lmath, fscr, fwscr);
394             E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
395                         id, f, -(fwscr - fden), -(fden-top), -(fscr-top)));
396         }
397 	/* Senone scores are also scaled, negated logs3 values.  Hence
398 	 * we have to negate the stuff we calculated above. */
399         scr -= fscr;
400     }
401     /* Downscale scores. */
402     scr /= s->aw;
403 
404     /* Avoid overflowing int16 */
405     if (scr > 32767)
406       scr = 32767;
407     if (scr < -32768)
408       scr = -32768;
409     return scr;
410 }
411