1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38 /* System headers. */
39 #include <string.h>
40 #include <stdio.h>
41 #include <assert.h>
42
43 /* SphinxBase headers. */
44 #include <sphinxbase/bio.h>
45
46 /* Local headers. */
47 #include "ms_senone.h"
48
49 #define MIXW_PARAM_VERSION "1.0"
50 #define SPDEF_PARAM_VERSION "1.2"
51
52 static int32
senone_mgau_map_read(senone_t * s,char const * file_name)53 senone_mgau_map_read(senone_t * s, char const *file_name)
54 {
55 FILE *fp;
56 int32 byteswap, chksum_present, n_gauden_present;
57 uint32 chksum;
58 int32 i;
59 char eofchk;
60 char **argname, **argval;
61 void *ptr;
62 float32 v;
63
64 E_INFO("Reading senone gauden-codebook map file: %s\n", file_name);
65
66 if ((fp = fopen(file_name, "rb")) == NULL)
67 E_FATAL_SYSTEM("Failed to open map file '%s' for reading", file_name);
68
69 /* Read header, including argument-value info and 32-bit byteorder magic */
70 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
71 E_FATAL("Failed to read header from file '%s'\n", file_name);
72
73 /* Parse argument-value list */
74 chksum_present = 0;
75 n_gauden_present = 0;
76 for (i = 0; argname[i]; i++) {
77 if (strcmp(argname[i], "version") == 0) {
78 if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) {
79 E_WARN("Version mismatch(%s): %s, expecting %s\n",
80 file_name, argval[i], SPDEF_PARAM_VERSION);
81 }
82
83 /* HACK!! Convert version# to float32 and take appropriate action */
84 if (sscanf(argval[i], "%f", &v) != 1)
85 E_FATAL("%s: Bad version no. string: %s\n", file_name,
86 argval[i]);
87
88 n_gauden_present = (v > 1.1) ? 1 : 0;
89 }
90 else if (strcmp(argname[i], "chksum0") == 0) {
91 chksum_present = 1; /* Ignore the associated value */
92 }
93 }
94 bio_hdrarg_free(argname, argval);
95 argname = argval = NULL;
96
97 chksum = 0;
98
99 /* Read #gauden (if version matches) */
100 if (n_gauden_present) {
101 E_INFO("Reading number of codebooks from %s\n", file_name);
102 if (bio_fread
103 (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1)
104 E_FATAL("fread(%s) (#gauden) failed\n", file_name);
105 }
106
107 /* Read 1d array data */
108 if (bio_fread_1d(&ptr, sizeof(uint32), &(s->n_sen), fp,
109 byteswap, &chksum) < 0) {
110 E_FATAL("bio_fread_1d(%s) failed\n", file_name);
111 }
112 s->mgau = ptr;
113 E_INFO("Mapping %d senones to %d codebooks\n", s->n_sen, s->n_gauden);
114
115 /* Infer n_gauden if not present in this version */
116 if (!n_gauden_present) {
117 s->n_gauden = 1;
118 for (i = 0; i < s->n_sen; i++)
119 if (s->mgau[i] >= s->n_gauden)
120 s->n_gauden = s->mgau[i] + 1;
121 }
122
123 if (chksum_present)
124 bio_verify_chksum(fp, byteswap, chksum);
125
126 if (fread(&eofchk, 1, 1, fp) == 1)
127 E_FATAL("More data than expected in %s: %d\n", file_name, eofchk);
128
129 fclose(fp);
130
131 E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen,
132 s->n_gauden);
133
134 return 1;
135 }
136
137
138 static int32
senone_mixw_read(senone_t * s,char const * file_name,logmath_t * lmath)139 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath)
140 {
141 char eofchk;
142 FILE *fp;
143 int32 byteswap, chksum_present;
144 uint32 chksum;
145 float32 *pdf;
146 int32 i, f, c, p, n_err;
147 char **argname, **argval;
148
149 E_INFO("Reading senone mixture weights: %s\n", file_name);
150
151 if ((fp = fopen(file_name, "rb")) == NULL)
152 E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name);
153
154 /* Read header, including argument-value info and 32-bit byteorder magic */
155 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
156 E_FATAL("Failed to read header from file '%s'\n", file_name);
157
158 /* Parse argument-value list */
159 chksum_present = 0;
160 for (i = 0; argname[i]; i++) {
161 if (strcmp(argname[i], "version") == 0) {
162 if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0)
163 E_WARN("Version mismatch(%s): %s, expecting %s\n",
164 file_name, argval[i], MIXW_PARAM_VERSION);
165 }
166 else if (strcmp(argname[i], "chksum0") == 0) {
167 chksum_present = 1; /* Ignore the associated value */
168 }
169 }
170 bio_hdrarg_free(argname, argval);
171 argname = argval = NULL;
172
173 chksum = 0;
174
175 /* Read #senones, #features, #codewords, arraysize */
176 if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
177 1)
178 ||
179 (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum)
180 != 1)
181 || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum)
182 != 1)
183 || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
184 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
185 }
186 if (i != s->n_sen * s->n_feat * s->n_cw) {
187 E_FATAL
188 ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
189 file_name, i, s->n_sen, s->n_feat, s->n_cw);
190 }
191
192 /*
193 * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits.
194 * All PDF values will be truncated (in the LSB positions) by these many bits.
195 */
196 if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0))
197 E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor);
198
199 /* Use a fixed shift for compatibility with everything else. */
200 E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT);
201
202 /*
203 * Allocate memory for senone PDF data. Organize normally or transposed depending on
204 * s->n_gauden.
205 */
206 if (s->n_gauden > 1) {
207 E_INFO("Not transposing mixture weights in memory\n");
208 s->pdf =
209 (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw,
210 sizeof(senprob_t));
211 }
212 else {
213 E_INFO("Transposing mixture weights in memory\n");
214 s->pdf =
215 (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen,
216 sizeof(senprob_t));
217 }
218
219 /* Temporary structure to read in floats */
220 pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32));
221
222 /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
223 n_err = 0;
224 for (i = 0; i < s->n_sen; i++) {
225 for (f = 0; f < s->n_feat; f++) {
226 if (bio_fread
227 ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap,
228 &chksum)
229 != s->n_cw) {
230 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
231 }
232
233 /* Normalize and floor */
234 if (vector_sum_norm(pdf, s->n_cw) <= 0.0)
235 n_err++;
236 vector_floor(pdf, s->n_cw, s->mixwfloor);
237 vector_sum_norm(pdf, s->n_cw);
238
239 /* Convert to logs3, truncate to 8 bits, and store in s->pdf */
240 for (c = 0; c < s->n_cw; c++) {
241 p = -(logmath_log(lmath, pdf[c]));
242 p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */
243
244 if (s->n_gauden > 1)
245 s->pdf[i][f][c] =
246 (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
247 else
248 s->pdf[f][c][i] =
249 (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
250 }
251 }
252 }
253 if (n_err > 0)
254 E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
255
256 ckd_free(pdf);
257
258 if (chksum_present)
259 bio_verify_chksum(fp, byteswap, chksum);
260
261 if (fread(&eofchk, 1, 1, fp) == 1)
262 E_FATAL("More data than expected in %s\n", file_name);
263
264 fclose(fp);
265
266 E_INFO
267 ("Read mixture weights for %d senones: %d features x %d codewords\n",
268 s->n_sen, s->n_feat, s->n_cw);
269
270 return 1;
271 }
272
273
274 senone_t *
senone_init(gauden_t * g,char const * mixwfile,char const * sen2mgau_map_file,float32 mixwfloor,logmath_t * lmath,bin_mdef_t * mdef)275 senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file,
276 float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef)
277 {
278 senone_t *s;
279 int32 n = 0, i;
280
281 s = (senone_t *) ckd_calloc(1, sizeof(senone_t));
282 s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
283 s->mixwfloor = mixwfloor;
284
285 s->n_gauden = g->n_mgau;
286 if (sen2mgau_map_file) {
287 if (!(strcmp(sen2mgau_map_file, ".semi.") == 0
288 || strcmp(sen2mgau_map_file, ".ptm.") == 0
289 || strcmp(sen2mgau_map_file, ".cont.") == 0)) {
290 senone_mgau_map_read(s, sen2mgau_map_file);
291 n = s->n_sen;
292 }
293 }
294 else {
295 if (s->n_gauden == 1)
296 sen2mgau_map_file = ".semi.";
297 else if (s->n_gauden == bin_mdef_n_ciphone(mdef))
298 sen2mgau_map_file = ".ptm.";
299 else
300 sen2mgau_map_file = ".cont.";
301 }
302
303 senone_mixw_read(s, mixwfile, lmath);
304
305 if (strcmp(sen2mgau_map_file, ".semi.") == 0) {
306 /* All-to-1 senones-codebook mapping */
307 E_INFO("Mapping all senones to one codebook\n");
308 s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
309 }
310 else if (strcmp(sen2mgau_map_file, ".ptm.") == 0) {
311 /* All-to-ciphone-id senones-codebook mapping */
312 E_INFO("Mapping senones to context-independent phone codebooks\n");
313 s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
314 for (i = 0; i < s->n_sen; i++)
315 s->mgau[i] = bin_mdef_sen2cimap(mdef, i);
316 }
317 else if (strcmp(sen2mgau_map_file, ".cont.") == 0
318 || strcmp(sen2mgau_map_file, ".s3cont.") == 0) {
319 /* 1-to-1 senone-codebook mapping */
320 E_INFO("Mapping senones to individual codebooks\n");
321 if (s->n_sen <= 1)
322 E_FATAL("#senone=%d; must be >1\n", s->n_sen);
323
324 s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
325 for (i = 0; i < s->n_sen; i++)
326 s->mgau[i] = i;
327 /* Not sure why this is here, it probably does nothing. */
328 s->n_gauden = s->n_sen;
329 }
330 else {
331 if (s->n_sen != n)
332 E_FATAL("#senones inconsistent: %d in %s; %d in %s\n",
333 n, sen2mgau_map_file, s->n_sen, mixwfile);
334 }
335
336 s->featscr = NULL;
337 return s;
338 }
339
340 void
senone_free(senone_t * s)341 senone_free(senone_t * s)
342 {
343 if (s == NULL)
344 return;
345 if (s->pdf)
346 ckd_free_3d((void *) s->pdf);
347 if (s->mgau)
348 ckd_free(s->mgau);
349 if (s->featscr)
350 ckd_free(s->featscr);
351 logmath_free(s->lmath);
352 ckd_free(s);
353 }
354
355
356 /*
357 * Compute senone score for one senone.
358 * NOTE: Remember that senone PDF tables contain SCALED, NEGATED logs3 values.
359 * NOTE: Remember also that PDF data may be transposed or not depending on s->n_gauden.
360 */
361 int32
senone_eval(senone_t * s,int id,gauden_dist_t ** dist,int32 n_top)362 senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top)
363 {
364 int32 scr; /* total senone score */
365 int32 fden; /* Gaussian density */
366 int32 fscr; /* senone score for one feature */
367 int32 fwscr; /* senone score for one feature, one codeword */
368 int32 f, t;
369 gauden_dist_t *fdist;
370
371 assert((id >= 0) && (id < s->n_sen));
372 assert((n_top > 0) && (n_top <= s->n_cw));
373
374 scr = 0;
375
376 for (f = 0; f < s->n_feat; f++) {
377 int top;
378 fdist = dist[f];
379
380 /* Top codeword for feature f */
381 top = fden = ((int32)fdist[0].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
382 fscr = (s->n_gauden > 1)
383 ? (fden + -s->pdf[id][f][fdist[0].id]) /* untransposed */
384 : (fden + -s->pdf[f][fdist[0].id][id]); /* transposed */
385 E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
386 id, f, -(fscr - fden), -(fden-top), -(fscr-top)));
387 /* Remaining of n_top codewords for feature f */
388 for (t = 1; t < n_top; t++) {
389 fden = ((int32)fdist[t].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
390 fwscr = (s->n_gauden > 1) ?
391 (fden + -s->pdf[id][f][fdist[t].id]) :
392 (fden + -s->pdf[f][fdist[t].id][id]);
393 fscr = logmath_add(s->lmath, fscr, fwscr);
394 E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
395 id, f, -(fwscr - fden), -(fden-top), -(fscr-top)));
396 }
397 /* Senone scores are also scaled, negated logs3 values. Hence
398 * we have to negate the stuff we calculated above. */
399 scr -= fscr;
400 }
401 /* Downscale scores. */
402 scr /= s->aw;
403
404 /* Avoid overflowing int16 */
405 if (scr > 32767)
406 scr = 32767;
407 if (scr < -32768)
408 scr = -32768;
409 return scr;
410 }
411