1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2010 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers */
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <limits.h>
44 #include <math.h>
45 #if defined(__ADSPBLACKFIN__)
46 #elif !defined(_WIN32_WCE)
47 #include <sys/types.h>
48 #endif
49 
50 /* SphinxBase headers */
51 #include <sphinx_config.h>
52 #include <sphinxbase/cmd_ln.h>
53 #include <sphinxbase/fixpoint.h>
54 #include <sphinxbase/ckd_alloc.h>
55 #include <sphinxbase/bio.h>
56 #include <sphinxbase/err.h>
57 #include <sphinxbase/prim_type.h>
58 
59 /* Local headers */
60 #include "tied_mgau_common.h"
61 #include "ptm_mgau.h"
62 
63 static ps_mgaufuncs_t ptm_mgau_funcs = {
64     "ptm",
65     ptm_mgau_frame_eval,      /* frame_eval */
66     ptm_mgau_mllr_transform,  /* transform */
67     ptm_mgau_free             /* free */
68 };
69 
70 #define COMPUTE_GMM_MAP(_idx)                           \
71     diff[_idx] = obs[_idx] - mean[_idx];                \
72     sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]);     \
73     compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]);
74 #define COMPUTE_GMM_REDUCE(_idx)                \
75     d = GMMSUB(d, compl[_idx]);
76 
77 static void
78 insertion_sort_topn(ptm_topn_t *topn, int i, int32 d)
79 {
80     ptm_topn_t vtmp;
81     int j;
82 
83     topn[i].score = d;
84     if (i == 0)
85         return;
86     vtmp = topn[i];
87     for (j = i - 1; j >= 0 && d > topn[j].score; j--) {
88         topn[j + 1] = topn[j];
89     }
90     topn[j + 1] = vtmp;
91 }
92 
93 static int
94 eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
95 {
96     ptm_topn_t *topn;
97     int i, ceplen;
98 
99     topn = s->f->topn[cb][feat];
100     ceplen = s->g->featlen[feat];
101 
102     for (i = 0; i < s->max_topn; i++) {
103         mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
104         mfcc_t *var, d;
105         mfcc_t *obs;
106         int32 cw, j;
107 
108         cw = topn[i].cw;
109         mean = s->g->mean[cb][feat][0] + cw * ceplen;
110         var = s->g->var[cb][feat][0] + cw * ceplen;
111         d = s->g->det[cb][feat][cw];
112         obs = z;
113         for (j = 0; j < ceplen % 4; ++j) {
114             diff[0] = *obs++ - *mean++;
115             sqdiff[0] = MFCCMUL(diff[0], diff[0]);
116             compl[0] = MFCCMUL(sqdiff[0], *var);
117             d = GMMSUB(d, compl[0]);
118             ++var;
119         }
120         /* We could vectorize this but it's unlikely to make much
121          * difference as the outer loop here isn't very big. */
122         for (;j < ceplen; j += 4) {
123             COMPUTE_GMM_MAP(0);
124             COMPUTE_GMM_MAP(1);
125             COMPUTE_GMM_MAP(2);
126             COMPUTE_GMM_MAP(3);
127             COMPUTE_GMM_REDUCE(0);
128             COMPUTE_GMM_REDUCE(1);
129             COMPUTE_GMM_REDUCE(2);
130             COMPUTE_GMM_REDUCE(3);
131             var += 4;
132             obs += 4;
133             mean += 4;
134         }
135         insertion_sort_topn(topn, i, (int32)d);
136     }
137 
138     return topn[0].score;
139 }
140 
141 /* This looks bad, but it actually isn't.  Less than 1% of eval_cb's
142  * time is spent doing this. */
143 static void
144 insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best,
145                   int cw, int32 intd)
146 {
147     for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur)
148         memcpy(*cur + 1, *cur, sizeof(**cur));
149     ++*cur;
150     (*cur)->cw = cw;
151     (*cur)->score = intd;
152 }
153 
154 static int
155 eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
156 {
157     ptm_topn_t *worst, *best, *topn;
158     mfcc_t *mean;
159     mfcc_t *var, *det, *detP, *detE;
160     int32 i, ceplen;
161 
162     best = topn = s->f->topn[cb][feat];
163     worst = topn + (s->max_topn - 1);
164     mean = s->g->mean[cb][feat][0];
165     var = s->g->var[cb][feat][0];
166     det = s->g->det[cb][feat];
167     detE = det + s->g->n_density;
168     ceplen = s->g->featlen[feat];
169 
170     for (detP = det; detP < detE; ++detP) {
171         mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
172         mfcc_t d, thresh;
173         mfcc_t *obs;
174         ptm_topn_t *cur;
175         int32 cw, j;
176 
177         d = *detP;
178         thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */
179         obs = z;
180         cw = (int)(detP - det);
181 
182         /* Unroll the loop starting with the first dimension(s).  In
183          * theory this might be a bit faster if this Gaussian gets
184          * "knocked out" by C0. In practice not. */
185         for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
186             diff[0] = *obs++ - *mean++;
187             sqdiff[0] = MFCCMUL(diff[0], diff[0]);
188             compl[0] = MFCCMUL(sqdiff[0], *var++);
189             d = GMMSUB(d, compl[0]);
190         }
191         /* Now do 4 dimensions at a time.  You'd think that GCC would
192          * vectorize this?  Apparently not.  And it's right, because
193          * that won't make this any faster, at least on x86-64. */
194         for (; j < ceplen && d >= thresh; j += 4) {
195             COMPUTE_GMM_MAP(0);
196             COMPUTE_GMM_MAP(1);
197             COMPUTE_GMM_MAP(2);
198             COMPUTE_GMM_MAP(3);
199             COMPUTE_GMM_REDUCE(0);
200             COMPUTE_GMM_REDUCE(1);
201             COMPUTE_GMM_REDUCE(2);
202             COMPUTE_GMM_REDUCE(3);
203             var += 4;
204             obs += 4;
205             mean += 4;
206         }
207         if (j < ceplen) {
208             /* terminated early, so not in topn */
209             mean += (ceplen - j);
210             var += (ceplen - j);
211             continue;
212         }
213         if (d < thresh)
214             continue;
215         for (i = 0; i < s->max_topn; i++) {
216             /* already there, so don't need to insert */
217             if (topn[i].cw == cw)
218                 break;
219         }
220         if (i < s->max_topn)
221             continue;       /* already there.  Don't insert */
222         insertion_sort_cb(&cur, worst, best, cw, (int32)d);
223     }
224 
225     return best->score;
226 }
227 
228 /**
229  * Compute top-N densities for active codebooks (and prune)
230  */
231 static int
232 ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame)
233 {
234     int i, j;
235 
236     /* First evaluate top-N from previous frame. */
237     for (i = 0; i < s->g->n_mgau; ++i)
238         for (j = 0; j < s->g->n_feat; ++j)
239             eval_topn(s, i, j, z[j]);
240 
241     /* If frame downsampling is in effect, possibly do nothing else. */
242     if (frame % s->ds_ratio)
243         return 0;
244 
245     /* Evaluate remaining codebooks. */
246     for (i = 0; i < s->g->n_mgau; ++i) {
247         if (bitvec_is_clear(s->f->mgau_active, i))
248             continue;
249         for (j = 0; j < s->g->n_feat; ++j) {
250             eval_cb(s, i, j, z[j]);
251         }
252     }
253     return 0;
254 }
255 
256 /**
257  * Normalize densities to produce "posterior probabilities",
258  * i.e. things with a reasonable dynamic range, then scale and
259  * clamp them to the acceptable range.  This is actually done
260  * solely to ensure that we can use fast_logmath_add().  Note that
261  * unless we share the same normalizer across all codebooks for
262  * each feature stream we get defective scores (that's why these
263  * loops are inside out - doing it per-feature should give us
264  * greater precision). */
265 static int
266 ptm_mgau_codebook_norm(ptm_mgau_t *s, mfcc_t **z, int frame)
267 {
268     int i, j;
269 
270     for (j = 0; j < s->g->n_feat; ++j) {
271         int32 norm = WORST_SCORE;
272         for (i = 0; i < s->g->n_mgau; ++i) {
273             if (bitvec_is_clear(s->f->mgau_active, i))
274                 continue;
275             if (norm < s->f->topn[i][j][0].score >> SENSCR_SHIFT)
276                 norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT;
277         }
278         assert(norm != WORST_SCORE);
279         for (i = 0; i < s->g->n_mgau; ++i) {
280             int32 k;
281             if (bitvec_is_clear(s->f->mgau_active, i))
282                 continue;
283             for (k = 0; k < s->max_topn; ++k) {
284                 s->f->topn[i][j][k].score >>= SENSCR_SHIFT;
285                 s->f->topn[i][j][k].score -= norm;
286                 s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score;
287                 if (s->f->topn[i][j][k].score > MAX_NEG_ASCR)
288                     s->f->topn[i][j][k].score = MAX_NEG_ASCR;
289             }
290         }
291     }
292 
293     return 0;
294 }
295 
296 static int
297 ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active,
298                         int32 n_senone_active, int compallsen)
299 {
300     int i, lastsen;
301 
302     if (compallsen) {
303         bitvec_set_all(s->f->mgau_active, s->g->n_mgau);
304         return 0;
305     }
306     bitvec_clear_all(s->f->mgau_active, s->g->n_mgau);
307     for (lastsen = i = 0; i < n_senone_active; ++i) {
308         int sen = senone_active[i] + lastsen;
309         int cb = s->sen2cb[sen];
310         bitvec_set(s->f->mgau_active, cb);
311         lastsen = sen;
312     }
313     E_DEBUG(1, ("Active codebooks:"));
314     for (i = 0; i < s->g->n_mgau; ++i) {
315         if (bitvec_is_clear(s->f->mgau_active, i))
316             continue;
317         E_DEBUGCONT(1, (" %d", i));
318     }
319     E_DEBUGCONT(1, ("\n"));
320     return 0;
321 }
322 
323 /**
324  * Compute senone scores from top-N densities for active codebooks.
325  */
326 static int
327 ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores,
328                      uint8 *senone_active, int32 n_senone_active,
329                      int compall)
330 {
331     int i, lastsen, bestscore;
332 
333     memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
334     /* FIXME: This is the non-cache-efficient way to do this.  We want
335      * to evaluate one codeword at a time but this requires us to have
336      * a reverse codebook to senone mapping, which we don't have
337      * (yet), since different codebooks have different top-N
338      * codewords. */
339     if (compall)
340         n_senone_active = s->n_sen;
341     bestscore = 0x7fffffff;
342     for (lastsen = i = 0; i < n_senone_active; ++i) {
343         int sen, f, cb;
344         int ascore;
345 
346         if (compall)
347             sen = i;
348         else
349             sen = senone_active[i] + lastsen;
350         lastsen = sen;
351         cb = s->sen2cb[sen];
352 
353         if (bitvec_is_clear(s->f->mgau_active, cb)) {
354             int j;
355             /* Because senone_active is deltas we can't really "knock
356              * out" senones from pruned codebooks, and in any case,
357              * it wouldn't make any difference to the search code,
358              * which doesn't expect senone_active to change. */
359             for (f = 0; f < s->g->n_feat; ++f) {
360                 for (j = 0; j < s->max_topn; ++j) {
361                     s->f->topn[cb][f][j].score = MAX_NEG_ASCR;
362                 }
363             }
364         }
365         /* For each feature, log-sum codeword scores + mixw to get
366          * feature density, then sum (multiply) to get ascore */
367         ascore = 0;
368         for (f = 0; f < s->g->n_feat; ++f) {
369             ptm_topn_t *topn;
370             int j, fden = 0;
371             topn = s->f->topn[cb][f];
372             for (j = 0; j < s->max_topn; ++j) {
373                 int mixw;
374                 /* Find mixture weight for this codeword. */
375                 if (s->mixw_cb) {
376                     int dcw = s->mixw[f][topn[j].cw][sen/2];
377                     dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
378                     mixw = s->mixw_cb[dcw];
379                 }
380                 else {
381                     mixw = s->mixw[f][topn[j].cw][sen];
382                 }
383                 if (j == 0)
384                     fden = mixw + topn[j].score;
385                 else
386                     fden = fast_logmath_add(s->lmath_8b, fden,
387                                        mixw + topn[j].score);
388                 E_DEBUG(3, ("fden[%d][%d] l+= %d + %d = %d\n",
389                             sen, f, mixw, topn[j].score, fden));
390             }
391             ascore += fden;
392         }
393         if (ascore < bestscore) bestscore = ascore;
394         senone_scores[sen] = ascore;
395     }
396     /* Normalize the scores again (finishing the job we started above
397      * in ptm_mgau_codebook_eval...) */
398     for (i = 0; i < s->n_sen; ++i) {
399         senone_scores[i] -= bestscore;
400     }
401 
402     return 0;
403 }
404 
405 /**
406  * Compute senone scores for the active senones.
407  */
408 int32
409 ptm_mgau_frame_eval(ps_mgau_t *ps,
410                     int16 *senone_scores,
411                     uint8 *senone_active,
412                     int32 n_senone_active,
413                     mfcc_t ** featbuf, int32 frame,
414                     int32 compallsen)
415 {
416     ptm_mgau_t *s = (ptm_mgau_t *)ps;
417     int fast_eval_idx;
418 
419     /* Find the appropriate frame in the rotating history buffer
420      * corresponding to the requested input frame.  No bounds checking
421      * is done here, which just means you'll get semi-random crap if
422      * you request a frame in the future or one that's too far in the
423      * past.  Since the history buffer is just used for fast match
424      * that might not be fatal. */
425     fast_eval_idx = frame % s->n_fast_hist;
426     s->f = s->hist + fast_eval_idx;
427     /* Compute the top-N codewords for every codebook, unless this
428      * is a past frame, in which case we already have them (we
429      * hope!) */
430     if (frame >= ps_mgau_base(ps)->frame_idx) {
431         ptm_fast_eval_t *lastf;
432         /* Get the previous frame's top-N information (on the
433          * first frame of the input this is just all WORST_DIST,
434          * no harm in that) */
435         if (fast_eval_idx == 0)
436             lastf = s->hist + s->n_fast_hist - 1;
437         else
438             lastf = s->hist + fast_eval_idx - 1;
439         /* Copy in initial top-N info */
440         memcpy(s->f->topn[0][0], lastf->topn[0][0],
441                s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t));
442         /* Generate initial active codebook list (this might not be
443          * necessary) */
444         ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
445         /* Now evaluate top-N, prune, and evaluate remaining codebooks. */
446         ptm_mgau_codebook_eval(s, featbuf, frame);
447         ptm_mgau_codebook_norm(s, featbuf, frame);
448     }
449     /* Evaluate intersection of active senones and active codebooks. */
450     ptm_mgau_senone_eval(s, senone_scores, senone_active,
451                          n_senone_active, compallsen);
452 
453     return 0;
454 }
455 
456 static int32
457 read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file)
458 {
459     FILE *fp;
460     char line[1000];
461     int32 i, n, r, c;
462     int32 do_swap, do_mmap;
463     size_t offset;
464     int n_clust = 0;
465     int n_feat = s->g->n_feat;
466     int n_density = s->g->n_density;
467     int n_sen = bin_mdef_n_sen(mdef);
468     int n_bits = 8;
469 
470     s->n_sen = n_sen; /* FIXME: Should have been done earlier */
471     do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
472 
473     if ((fp = fopen(file, "rb")) == NULL)
474         return -1;
475 
476     E_INFO("Loading senones from dump file %s\n", file);
477     /* Read title size, title */
478     if (fread(&n, sizeof(int32), 1, fp) != 1) {
479         E_ERROR_SYSTEM("Failed to read title size from %s", file);
480         goto error_out;
481     }
482     /* This is extremely bogus */
483     do_swap = 0;
484     if (n < 1 || n > 999) {
485         SWAP_INT32(&n);
486         if (n < 1 || n > 999) {
487             E_ERROR("Title length %x in dump file %s out of range\n", n, file);
488             goto error_out;
489         }
490         do_swap = 1;
491     }
492     if (fread(line, sizeof(char), n, fp) != n) {
493         E_ERROR_SYSTEM("Cannot read title");
494         goto error_out;
495     }
496     if (line[n - 1] != '\0') {
497         E_ERROR("Bad title in dump file\n");
498         goto error_out;
499     }
500     E_INFO("%s\n", line);
501 
502     /* Read header size, header */
503     if (fread(&n, sizeof(n), 1, fp) != 1) {
504         E_ERROR_SYSTEM("Failed to read header size from %s", file);
505         goto error_out;
506     }
507     if (do_swap) SWAP_INT32(&n);
508     if (fread(line, sizeof(char), n, fp) != n) {
509         E_ERROR_SYSTEM("Cannot read header");
510         goto error_out;
511     }
512     if (line[n - 1] != '\0') {
513         E_ERROR("Bad header in dump file\n");
514         goto error_out;
515     }
516 
517     /* Read other header strings until string length = 0 */
518     for (;;) {
519         if (fread(&n, sizeof(n), 1, fp) != 1) {
520             E_ERROR_SYSTEM("Failed to read header string size from %s", file);
521             goto error_out;
522         }
523         if (do_swap) SWAP_INT32(&n);
524         if (n == 0)
525             break;
526         if (fread(line, sizeof(char), n, fp) != n) {
527             E_ERROR_SYSTEM("Cannot read header");
528             goto error_out;
529         }
530         /* Look for a cluster count, if present */
531         if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
532             n_feat = atoi(line + strlen("feature_count "));
533         }
534         if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
535             n_density = atoi(line + strlen("mixture_count "));
536         }
537         if (!strncmp(line, "model_count ", strlen("model_count "))) {
538             n_sen = atoi(line + strlen("model_count "));
539         }
540         if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
541             n_clust = atoi(line + strlen("cluster_count "));
542         }
543         if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
544             n_bits = atoi(line + strlen("cluster_bits "));
545         }
546     }
547 
548     /* Defaults for #rows, #columns in mixw array. */
549     c = n_sen;
550     r = n_density;
551     if (n_clust == 0) {
552         /* Older mixw files have them here, and they might be padded. */
553         if (fread(&r, sizeof(r), 1, fp) != 1) {
554             E_ERROR_SYSTEM("Cannot read #rows");
555             goto error_out;
556         }
557         if (do_swap) SWAP_INT32(&r);
558         if (fread(&c, sizeof(c), 1, fp) != 1) {
559             E_ERROR_SYSTEM("Cannot read #columns");
560             goto error_out;
561         }
562         if (do_swap) SWAP_INT32(&c);
563         E_INFO("Rows: %d, Columns: %d\n", r, c);
564     }
565 
566     if (n_feat != s->g->n_feat) {
567         E_ERROR("Number of feature streams mismatch: %d != %d\n",
568                 n_feat, s->g->n_feat);
569         goto error_out;
570     }
571     if (n_density != s->g->n_density) {
572         E_ERROR("Number of densities mismatch: %d != %d\n",
573                 n_density, s->g->n_density);
574         goto error_out;
575     }
576     if (n_sen != s->n_sen) {
577         E_ERROR("Number of senones mismatch: %d != %d\n",
578                 n_sen, s->n_sen);
579         goto error_out;
580     }
581 
582     if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
583         E_ERROR("Cluster count must be 0, 15, or 16\n");
584         goto error_out;
585     }
586     if (n_clust == 15)
587         ++n_clust;
588 
589     if (!((n_bits == 8) || (n_bits == 4))) {
590         E_ERROR("Cluster count must be 4 or 8\n");
591         goto error_out;
592     }
593 
594     if (do_mmap) {
595             E_INFO("Using memory-mapped I/O for senones\n");
596     }
597     offset = ftell(fp);
598 
599     /* Allocate memory for pdfs (or memory map them) */
600     if (do_mmap) {
601         s->sendump_mmap = mmio_file_read(file);
602         /* Get cluster codebook if any. */
603         if (n_clust) {
604             s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
605             offset += n_clust;
606         }
607     }
608     else {
609         /* Get cluster codebook if any. */
610         if (n_clust) {
611             s->mixw_cb = ckd_calloc(1, n_clust);
612             if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
613                 E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
614                 goto error_out;
615             }
616         }
617     }
618 
619     /* Set up pointers, or read, or whatever */
620     if (s->sendump_mmap) {
621         s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw));
622         for (n = 0; n < n_feat; n++) {
623             int step = c;
624             if (n_bits == 4)
625                 step = (step + 1) / 2;
626             for (i = 0; i < r; i++) {
627                 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
628                 offset += step;
629             }
630         }
631     }
632     else {
633         s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
634         /* Read pdf values and ids */
635         for (n = 0; n < n_feat; n++) {
636             int step = c;
637             if (n_bits == 4)
638                 step = (step + 1) / 2;
639             for (i = 0; i < r; i++) {
640                 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
641                     != (size_t) step) {
642                     E_ERROR("Failed to read %d bytes from sendump\n", step);
643                     goto error_out;
644                 }
645             }
646         }
647     }
648 
649     fclose(fp);
650     return 0;
651 error_out:
652     fclose(fp);
653     return -1;
654 }
655 
656 static int32
657 read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin)
658 {
659     char **argname, **argval;
660     char eofchk;
661     FILE *fp;
662     int32 byteswap, chksum_present;
663     uint32 chksum;
664     float32 *pdf;
665     int32 i, f, c, n;
666     int32 n_sen;
667     int32 n_feat;
668     int32 n_comp;
669     int32 n_err;
670 
671     E_INFO("Reading mixture weights file '%s'\n", file_name);
672 
673     if ((fp = fopen(file_name, "rb")) == NULL)
674         E_FATAL_SYSTEM("Failed to open mixture file '%s' for reading", file_name);
675 
676     /* Read header, including argument-value info and 32-bit byteorder magic */
677     if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
678         E_FATAL("Failed to read header from '%s'\n", file_name);
679 
680     /* Parse argument-value list */
681     chksum_present = 0;
682     for (i = 0; argname[i]; i++) {
683         if (strcmp(argname[i], "version") == 0) {
684             if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
685                 E_WARN("Version mismatch(%s): %s, expecting %s\n",
686                        file_name, argval[i], MGAU_MIXW_VERSION);
687         }
688         else if (strcmp(argname[i], "chksum0") == 0) {
689             chksum_present = 1; /* Ignore the associated value */
690         }
691     }
692     bio_hdrarg_free(argname, argval);
693     argname = argval = NULL;
694 
695     chksum = 0;
696 
697     /* Read #senones, #features, #codewords, arraysize */
698     if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
699         || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
700             1)
701         || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
702             1)
703         || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
704         E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
705     }
706     if (n_feat != s->g->n_feat)
707         E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat);
708     if (n != n_sen * n_feat * n_comp) {
709         E_FATAL
710             ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
711              file_name, i, n_sen, n_feat, n_comp);
712     }
713 
714     /* n_sen = number of mixture weights per codeword, which is
715      * fixed at the number of senones since we have only one codebook.
716      */
717     s->n_sen = n_sen;
718 
719     /* Quantized mixture weight arrays. */
720     s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density,
721                             n_sen, sizeof(***s->mixw));
722 
723     /* Temporary structure to read in floats before conversion to (int32) logs3 */
724     pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
725 
726     /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
727     n_err = 0;
728     for (i = 0; i < n_sen; i++) {
729         for (f = 0; f < n_feat; f++) {
730             if (bio_fread((void *) pdf, sizeof(float32),
731                           n_comp, fp, byteswap, &chksum) != n_comp) {
732                 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
733             }
734 
735             /* Normalize and floor */
736             if (vector_sum_norm(pdf, n_comp) <= 0.0)
737                 n_err++;
738             vector_floor(pdf, n_comp, SmoothMin);
739             vector_sum_norm(pdf, n_comp);
740 
741             /* Convert to LOG, quantize, and transpose */
742             for (c = 0; c < n_comp; c++) {
743                 int32 qscr;
744 
745                 qscr = -logmath_log(s->lmath_8b, pdf[c]);
746                 if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
747                     qscr = MAX_NEG_MIXW;
748                 s->mixw[f][c][i] = qscr;
749             }
750         }
751     }
752     if (n_err > 0)
753         E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
754 
755     ckd_free(pdf);
756 
757     if (chksum_present)
758         bio_verify_chksum(fp, byteswap, chksum);
759 
760     if (fread(&eofchk, 1, 1, fp) == 1)
761         E_FATAL("More data than expected in %s\n", file_name);
762 
763     fclose(fp);
764 
765     E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
766     return n_sen;
767 }
768 
769 ps_mgau_t *
770 ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef)
771 {
772     ptm_mgau_t *s;
773     ps_mgau_t *ps;
774     char const *sendump_path;
775     int i;
776 
777     s = ckd_calloc(1, sizeof(*s));
778     s->config = acmod->config;
779 
780     s->lmath = logmath_retain(acmod->lmath);
781     /* Log-add table. */
782     s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
783     if (s->lmath_8b == NULL)
784         goto error_out;
785     /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
786     if (logmath_get_width(s->lmath_8b) != 1) {
787         E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
788                 logmath_get_base(s->lmath_8b));
789         goto error_out;
790     }
791 
792     /* Read means and variances. */
793     if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"),
794                             cmd_ln_str_r(s->config, "-var"),
795                             cmd_ln_float32_r(s->config, "-varfloor"),
796                             s->lmath)) == NULL)
797         goto error_out;
798     /* We only support 256 codebooks or less (like 640k or 2GB, this
799      * should be enough for anyone) */
800     if (s->g->n_mgau > 256) {
801         E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau);
802         goto error_out;
803     }
804     if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) {
805         E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef));
806         goto error_out;
807     }
808     /* Verify n_feat and veclen, against acmod. */
809     if (s->g->n_feat != feat_dimension1(acmod->fcb)) {
810         E_ERROR("Number of streams does not match: %d != %d\n",
811                 s->g->n_feat, feat_dimension1(acmod->fcb));
812         goto error_out;
813     }
814     for (i = 0; i < s->g->n_feat; ++i) {
815         if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
816             E_ERROR("Dimension of stream %d does not match: %d != %d\n",
817                     s->g->featlen[i], feat_dimension2(acmod->fcb, i));
818             goto error_out;
819         }
820     }
821     /* Read mixture weights. */
822     if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) {
823         if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
824             goto error_out;
825         }
826     }
827     else {
828         if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
829                       cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
830             goto error_out;
831         }
832     }
833     s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
834     s->max_topn = cmd_ln_int32_r(s->config, "-topn");
835     E_INFO("Maximum top-N: %d\n", s->max_topn);
836 
837     /* Assume mapping of senones to their base phones, though this
838      * will become more flexible in the future. */
839     s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb));
840     for (i = 0; i < s->n_sen; ++i)
841         s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i);
842 
843     /* Allocate fast-match history buffers.  We need enough for the
844      * phoneme lookahead window, plus the current frame, plus one for
845      * good measure? (FIXME: I don't remember why) */
846     s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
847     s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist));
848     /* s->f will be a rotating pointer into s->hist. */
849     s->f = s->hist;
850     for (i = 0; i < s->n_fast_hist; ++i) {
851         int j, k, m;
852         /* Top-N codewords for every codebook and feature. */
853         s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat,
854                                         s->max_topn, sizeof(ptm_topn_t));
855         /* Initialize them to sane (yet arbitrary) defaults. */
856         for (j = 0; j < s->g->n_mgau; ++j) {
857             for (k = 0; k < s->g->n_feat; ++k) {
858                 for (m = 0; m < s->max_topn; ++m) {
859                     s->hist[i].topn[j][k][m].cw = m;
860                     s->hist[i].topn[j][k][m].score = WORST_DIST;
861                 }
862             }
863         }
864         /* Active codebook mapping (just codebook, not features,
865            at least not yet) */
866         s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau);
867         /* Start with them all on, prune them later. */
868         bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau);
869     }
870 
871     ps = (ps_mgau_t *)s;
872     ps->vt = &ptm_mgau_funcs;
873     return ps;
874 error_out:
875     ptm_mgau_free(ps_mgau_base(s));
876     return NULL;
877 }
878 
879 int
880 ptm_mgau_mllr_transform(ps_mgau_t *ps,
881                             ps_mllr_t *mllr)
882 {
883     ptm_mgau_t *s = (ptm_mgau_t *)ps;
884     return gauden_mllr_transform(s->g, mllr, s->config);
885 }
886 
887 void
888 ptm_mgau_free(ps_mgau_t *ps)
889 {
890     int i;
891     ptm_mgau_t *s = (ptm_mgau_t *)ps;
892 
893     logmath_free(s->lmath);
894     logmath_free(s->lmath_8b);
895     if (s->sendump_mmap) {
896         ckd_free_2d(s->mixw);
897         mmio_file_unmap(s->sendump_mmap);
898     }
899     else {
900         ckd_free_3d(s->mixw);
901     }
902     ckd_free(s->sen2cb);
903 
904     for (i = 0; i < s->n_fast_hist; i++) {
905 	ckd_free_3d(s->hist[i].topn);
906 	bitvec_free(s->hist[i].mgau_active);
907     }
908     ckd_free(s->hist);
909 
910     gauden_free(s->g);
911     ckd_free(s);
912 }
913