1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* ==================================================================== 3 * Copyright (c) 1999-2010 Carnegie Mellon University. All rights 4 * reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * This work was supported in part by funding from the Defense Advanced 19 * Research Projects Agency and the National Science Foundation of the 20 * United States of America, and the CMU Sphinx Speech Consortium. 21 * 22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * 34 * ==================================================================== 35 * 36 */ 37 38 /* System headers */ 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <assert.h> 43 #include <limits.h> 44 #include <math.h> 45 #if defined(__ADSPBLACKFIN__) 46 #elif !defined(_WIN32_WCE) 47 #include <sys/types.h> 48 #endif 49 50 /* SphinxBase headers */ 51 #include <sphinx_config.h> 52 #include <sphinxbase/cmd_ln.h> 53 #include <sphinxbase/fixpoint.h> 54 #include <sphinxbase/ckd_alloc.h> 55 #include <sphinxbase/bio.h> 56 #include <sphinxbase/err.h> 57 #include <sphinxbase/prim_type.h> 58 59 /* Local headers */ 60 #include "tied_mgau_common.h" 61 #include "ptm_mgau.h" 62 63 static ps_mgaufuncs_t ptm_mgau_funcs = { 64 "ptm", 65 ptm_mgau_frame_eval, /* frame_eval */ 66 ptm_mgau_mllr_transform, /* transform */ 67 ptm_mgau_free /* free */ 68 }; 69 70 #define COMPUTE_GMM_MAP(_idx) \ 71 diff[_idx] = obs[_idx] - mean[_idx]; \ 72 sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \ 73 compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]); 74 #define COMPUTE_GMM_REDUCE(_idx) \ 75 d = GMMSUB(d, compl[_idx]); 76 77 static void 78 insertion_sort_topn(ptm_topn_t *topn, int i, int32 d) 79 { 80 ptm_topn_t vtmp; 81 int j; 82 83 topn[i].score = d; 84 if (i == 0) 85 return; 86 vtmp = topn[i]; 87 for (j = i - 1; j >= 0 && d > topn[j].score; j--) { 88 topn[j + 1] = topn[j]; 89 } 90 topn[j + 1] = vtmp; 91 } 92 93 static int 94 eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z) 95 { 96 ptm_topn_t *topn; 97 int i, ceplen; 98 99 topn = s->f->topn[cb][feat]; 100 ceplen = s->g->featlen[feat]; 101 102 for (i = 0; i < s->max_topn; i++) { 103 mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */ 104 mfcc_t *var, d; 105 mfcc_t *obs; 106 int32 cw, j; 107 108 cw = topn[i].cw; 109 mean = s->g->mean[cb][feat][0] + cw * ceplen; 110 var = s->g->var[cb][feat][0] + cw * ceplen; 111 d = s->g->det[cb][feat][cw]; 112 obs = z; 113 for (j = 0; j < ceplen % 4; ++j) { 114 diff[0] = *obs++ - *mean++; 115 sqdiff[0] = MFCCMUL(diff[0], diff[0]); 116 compl[0] = MFCCMUL(sqdiff[0], *var); 117 d = GMMSUB(d, compl[0]); 118 ++var; 119 } 120 /* We could vectorize this but it's unlikely to make much 121 * difference as the outer loop here isn't very big. */ 122 for (;j < ceplen; j += 4) { 123 COMPUTE_GMM_MAP(0); 124 COMPUTE_GMM_MAP(1); 125 COMPUTE_GMM_MAP(2); 126 COMPUTE_GMM_MAP(3); 127 COMPUTE_GMM_REDUCE(0); 128 COMPUTE_GMM_REDUCE(1); 129 COMPUTE_GMM_REDUCE(2); 130 COMPUTE_GMM_REDUCE(3); 131 var += 4; 132 obs += 4; 133 mean += 4; 134 } 135 insertion_sort_topn(topn, i, (int32)d); 136 } 137 138 return topn[0].score; 139 } 140 141 /* This looks bad, but it actually isn't. Less than 1% of eval_cb's 142 * time is spent doing this. */ 143 static void 144 insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best, 145 int cw, int32 intd) 146 { 147 for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur) 148 memcpy(*cur + 1, *cur, sizeof(**cur)); 149 ++*cur; 150 (*cur)->cw = cw; 151 (*cur)->score = intd; 152 } 153 154 static int 155 eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z) 156 { 157 ptm_topn_t *worst, *best, *topn; 158 mfcc_t *mean; 159 mfcc_t *var, *det, *detP, *detE; 160 int32 i, ceplen; 161 162 best = topn = s->f->topn[cb][feat]; 163 worst = topn + (s->max_topn - 1); 164 mean = s->g->mean[cb][feat][0]; 165 var = s->g->var[cb][feat][0]; 166 det = s->g->det[cb][feat]; 167 detE = det + s->g->n_density; 168 ceplen = s->g->featlen[feat]; 169 170 for (detP = det; detP < detE; ++detP) { 171 mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */ 172 mfcc_t d, thresh; 173 mfcc_t *obs; 174 ptm_topn_t *cur; 175 int32 cw, j; 176 177 d = *detP; 178 thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */ 179 obs = z; 180 cw = (int)(detP - det); 181 182 /* Unroll the loop starting with the first dimension(s). In 183 * theory this might be a bit faster if this Gaussian gets 184 * "knocked out" by C0. In practice not. */ 185 for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) { 186 diff[0] = *obs++ - *mean++; 187 sqdiff[0] = MFCCMUL(diff[0], diff[0]); 188 compl[0] = MFCCMUL(sqdiff[0], *var++); 189 d = GMMSUB(d, compl[0]); 190 } 191 /* Now do 4 dimensions at a time. You'd think that GCC would 192 * vectorize this? Apparently not. And it's right, because 193 * that won't make this any faster, at least on x86-64. */ 194 for (; j < ceplen && d >= thresh; j += 4) { 195 COMPUTE_GMM_MAP(0); 196 COMPUTE_GMM_MAP(1); 197 COMPUTE_GMM_MAP(2); 198 COMPUTE_GMM_MAP(3); 199 COMPUTE_GMM_REDUCE(0); 200 COMPUTE_GMM_REDUCE(1); 201 COMPUTE_GMM_REDUCE(2); 202 COMPUTE_GMM_REDUCE(3); 203 var += 4; 204 obs += 4; 205 mean += 4; 206 } 207 if (j < ceplen) { 208 /* terminated early, so not in topn */ 209 mean += (ceplen - j); 210 var += (ceplen - j); 211 continue; 212 } 213 if (d < thresh) 214 continue; 215 for (i = 0; i < s->max_topn; i++) { 216 /* already there, so don't need to insert */ 217 if (topn[i].cw == cw) 218 break; 219 } 220 if (i < s->max_topn) 221 continue; /* already there. Don't insert */ 222 insertion_sort_cb(&cur, worst, best, cw, (int32)d); 223 } 224 225 return best->score; 226 } 227 228 /** 229 * Compute top-N densities for active codebooks (and prune) 230 */ 231 static int 232 ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame) 233 { 234 int i, j; 235 236 /* First evaluate top-N from previous frame. */ 237 for (i = 0; i < s->g->n_mgau; ++i) 238 for (j = 0; j < s->g->n_feat; ++j) 239 eval_topn(s, i, j, z[j]); 240 241 /* If frame downsampling is in effect, possibly do nothing else. */ 242 if (frame % s->ds_ratio) 243 return 0; 244 245 /* Evaluate remaining codebooks. */ 246 for (i = 0; i < s->g->n_mgau; ++i) { 247 if (bitvec_is_clear(s->f->mgau_active, i)) 248 continue; 249 for (j = 0; j < s->g->n_feat; ++j) { 250 eval_cb(s, i, j, z[j]); 251 } 252 } 253 return 0; 254 } 255 256 /** 257 * Normalize densities to produce "posterior probabilities", 258 * i.e. things with a reasonable dynamic range, then scale and 259 * clamp them to the acceptable range. This is actually done 260 * solely to ensure that we can use fast_logmath_add(). Note that 261 * unless we share the same normalizer across all codebooks for 262 * each feature stream we get defective scores (that's why these 263 * loops are inside out - doing it per-feature should give us 264 * greater precision). */ 265 static int 266 ptm_mgau_codebook_norm(ptm_mgau_t *s, mfcc_t **z, int frame) 267 { 268 int i, j; 269 270 for (j = 0; j < s->g->n_feat; ++j) { 271 int32 norm = WORST_SCORE; 272 for (i = 0; i < s->g->n_mgau; ++i) { 273 if (bitvec_is_clear(s->f->mgau_active, i)) 274 continue; 275 if (norm < s->f->topn[i][j][0].score >> SENSCR_SHIFT) 276 norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT; 277 } 278 assert(norm != WORST_SCORE); 279 for (i = 0; i < s->g->n_mgau; ++i) { 280 int32 k; 281 if (bitvec_is_clear(s->f->mgau_active, i)) 282 continue; 283 for (k = 0; k < s->max_topn; ++k) { 284 s->f->topn[i][j][k].score >>= SENSCR_SHIFT; 285 s->f->topn[i][j][k].score -= norm; 286 s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score; 287 if (s->f->topn[i][j][k].score > MAX_NEG_ASCR) 288 s->f->topn[i][j][k].score = MAX_NEG_ASCR; 289 } 290 } 291 } 292 293 return 0; 294 } 295 296 static int 297 ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active, 298 int32 n_senone_active, int compallsen) 299 { 300 int i, lastsen; 301 302 if (compallsen) { 303 bitvec_set_all(s->f->mgau_active, s->g->n_mgau); 304 return 0; 305 } 306 bitvec_clear_all(s->f->mgau_active, s->g->n_mgau); 307 for (lastsen = i = 0; i < n_senone_active; ++i) { 308 int sen = senone_active[i] + lastsen; 309 int cb = s->sen2cb[sen]; 310 bitvec_set(s->f->mgau_active, cb); 311 lastsen = sen; 312 } 313 E_DEBUG(1, ("Active codebooks:")); 314 for (i = 0; i < s->g->n_mgau; ++i) { 315 if (bitvec_is_clear(s->f->mgau_active, i)) 316 continue; 317 E_DEBUGCONT(1, (" %d", i)); 318 } 319 E_DEBUGCONT(1, ("\n")); 320 return 0; 321 } 322 323 /** 324 * Compute senone scores from top-N densities for active codebooks. 325 */ 326 static int 327 ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores, 328 uint8 *senone_active, int32 n_senone_active, 329 int compall) 330 { 331 int i, lastsen, bestscore; 332 333 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores)); 334 /* FIXME: This is the non-cache-efficient way to do this. We want 335 * to evaluate one codeword at a time but this requires us to have 336 * a reverse codebook to senone mapping, which we don't have 337 * (yet), since different codebooks have different top-N 338 * codewords. */ 339 if (compall) 340 n_senone_active = s->n_sen; 341 bestscore = 0x7fffffff; 342 for (lastsen = i = 0; i < n_senone_active; ++i) { 343 int sen, f, cb; 344 int ascore; 345 346 if (compall) 347 sen = i; 348 else 349 sen = senone_active[i] + lastsen; 350 lastsen = sen; 351 cb = s->sen2cb[sen]; 352 353 if (bitvec_is_clear(s->f->mgau_active, cb)) { 354 int j; 355 /* Because senone_active is deltas we can't really "knock 356 * out" senones from pruned codebooks, and in any case, 357 * it wouldn't make any difference to the search code, 358 * which doesn't expect senone_active to change. */ 359 for (f = 0; f < s->g->n_feat; ++f) { 360 for (j = 0; j < s->max_topn; ++j) { 361 s->f->topn[cb][f][j].score = MAX_NEG_ASCR; 362 } 363 } 364 } 365 /* For each feature, log-sum codeword scores + mixw to get 366 * feature density, then sum (multiply) to get ascore */ 367 ascore = 0; 368 for (f = 0; f < s->g->n_feat; ++f) { 369 ptm_topn_t *topn; 370 int j, fden = 0; 371 topn = s->f->topn[cb][f]; 372 for (j = 0; j < s->max_topn; ++j) { 373 int mixw; 374 /* Find mixture weight for this codeword. */ 375 if (s->mixw_cb) { 376 int dcw = s->mixw[f][topn[j].cw][sen/2]; 377 dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f; 378 mixw = s->mixw_cb[dcw]; 379 } 380 else { 381 mixw = s->mixw[f][topn[j].cw][sen]; 382 } 383 if (j == 0) 384 fden = mixw + topn[j].score; 385 else 386 fden = fast_logmath_add(s->lmath_8b, fden, 387 mixw + topn[j].score); 388 E_DEBUG(3, ("fden[%d][%d] l+= %d + %d = %d\n", 389 sen, f, mixw, topn[j].score, fden)); 390 } 391 ascore += fden; 392 } 393 if (ascore < bestscore) bestscore = ascore; 394 senone_scores[sen] = ascore; 395 } 396 /* Normalize the scores again (finishing the job we started above 397 * in ptm_mgau_codebook_eval...) */ 398 for (i = 0; i < s->n_sen; ++i) { 399 senone_scores[i] -= bestscore; 400 } 401 402 return 0; 403 } 404 405 /** 406 * Compute senone scores for the active senones. 407 */ 408 int32 409 ptm_mgau_frame_eval(ps_mgau_t *ps, 410 int16 *senone_scores, 411 uint8 *senone_active, 412 int32 n_senone_active, 413 mfcc_t ** featbuf, int32 frame, 414 int32 compallsen) 415 { 416 ptm_mgau_t *s = (ptm_mgau_t *)ps; 417 int fast_eval_idx; 418 419 /* Find the appropriate frame in the rotating history buffer 420 * corresponding to the requested input frame. No bounds checking 421 * is done here, which just means you'll get semi-random crap if 422 * you request a frame in the future or one that's too far in the 423 * past. Since the history buffer is just used for fast match 424 * that might not be fatal. */ 425 fast_eval_idx = frame % s->n_fast_hist; 426 s->f = s->hist + fast_eval_idx; 427 /* Compute the top-N codewords for every codebook, unless this 428 * is a past frame, in which case we already have them (we 429 * hope!) */ 430 if (frame >= ps_mgau_base(ps)->frame_idx) { 431 ptm_fast_eval_t *lastf; 432 /* Get the previous frame's top-N information (on the 433 * first frame of the input this is just all WORST_DIST, 434 * no harm in that) */ 435 if (fast_eval_idx == 0) 436 lastf = s->hist + s->n_fast_hist - 1; 437 else 438 lastf = s->hist + fast_eval_idx - 1; 439 /* Copy in initial top-N info */ 440 memcpy(s->f->topn[0][0], lastf->topn[0][0], 441 s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t)); 442 /* Generate initial active codebook list (this might not be 443 * necessary) */ 444 ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen); 445 /* Now evaluate top-N, prune, and evaluate remaining codebooks. */ 446 ptm_mgau_codebook_eval(s, featbuf, frame); 447 ptm_mgau_codebook_norm(s, featbuf, frame); 448 } 449 /* Evaluate intersection of active senones and active codebooks. */ 450 ptm_mgau_senone_eval(s, senone_scores, senone_active, 451 n_senone_active, compallsen); 452 453 return 0; 454 } 455 456 static int32 457 read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file) 458 { 459 FILE *fp; 460 char line[1000]; 461 int32 i, n, r, c; 462 int32 do_swap, do_mmap; 463 size_t offset; 464 int n_clust = 0; 465 int n_feat = s->g->n_feat; 466 int n_density = s->g->n_density; 467 int n_sen = bin_mdef_n_sen(mdef); 468 int n_bits = 8; 469 470 s->n_sen = n_sen; /* FIXME: Should have been done earlier */ 471 do_mmap = cmd_ln_boolean_r(s->config, "-mmap"); 472 473 if ((fp = fopen(file, "rb")) == NULL) 474 return -1; 475 476 E_INFO("Loading senones from dump file %s\n", file); 477 /* Read title size, title */ 478 if (fread(&n, sizeof(int32), 1, fp) != 1) { 479 E_ERROR_SYSTEM("Failed to read title size from %s", file); 480 goto error_out; 481 } 482 /* This is extremely bogus */ 483 do_swap = 0; 484 if (n < 1 || n > 999) { 485 SWAP_INT32(&n); 486 if (n < 1 || n > 999) { 487 E_ERROR("Title length %x in dump file %s out of range\n", n, file); 488 goto error_out; 489 } 490 do_swap = 1; 491 } 492 if (fread(line, sizeof(char), n, fp) != n) { 493 E_ERROR_SYSTEM("Cannot read title"); 494 goto error_out; 495 } 496 if (line[n - 1] != '\0') { 497 E_ERROR("Bad title in dump file\n"); 498 goto error_out; 499 } 500 E_INFO("%s\n", line); 501 502 /* Read header size, header */ 503 if (fread(&n, sizeof(n), 1, fp) != 1) { 504 E_ERROR_SYSTEM("Failed to read header size from %s", file); 505 goto error_out; 506 } 507 if (do_swap) SWAP_INT32(&n); 508 if (fread(line, sizeof(char), n, fp) != n) { 509 E_ERROR_SYSTEM("Cannot read header"); 510 goto error_out; 511 } 512 if (line[n - 1] != '\0') { 513 E_ERROR("Bad header in dump file\n"); 514 goto error_out; 515 } 516 517 /* Read other header strings until string length = 0 */ 518 for (;;) { 519 if (fread(&n, sizeof(n), 1, fp) != 1) { 520 E_ERROR_SYSTEM("Failed to read header string size from %s", file); 521 goto error_out; 522 } 523 if (do_swap) SWAP_INT32(&n); 524 if (n == 0) 525 break; 526 if (fread(line, sizeof(char), n, fp) != n) { 527 E_ERROR_SYSTEM("Cannot read header"); 528 goto error_out; 529 } 530 /* Look for a cluster count, if present */ 531 if (!strncmp(line, "feature_count ", strlen("feature_count "))) { 532 n_feat = atoi(line + strlen("feature_count ")); 533 } 534 if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) { 535 n_density = atoi(line + strlen("mixture_count ")); 536 } 537 if (!strncmp(line, "model_count ", strlen("model_count "))) { 538 n_sen = atoi(line + strlen("model_count ")); 539 } 540 if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) { 541 n_clust = atoi(line + strlen("cluster_count ")); 542 } 543 if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) { 544 n_bits = atoi(line + strlen("cluster_bits ")); 545 } 546 } 547 548 /* Defaults for #rows, #columns in mixw array. */ 549 c = n_sen; 550 r = n_density; 551 if (n_clust == 0) { 552 /* Older mixw files have them here, and they might be padded. */ 553 if (fread(&r, sizeof(r), 1, fp) != 1) { 554 E_ERROR_SYSTEM("Cannot read #rows"); 555 goto error_out; 556 } 557 if (do_swap) SWAP_INT32(&r); 558 if (fread(&c, sizeof(c), 1, fp) != 1) { 559 E_ERROR_SYSTEM("Cannot read #columns"); 560 goto error_out; 561 } 562 if (do_swap) SWAP_INT32(&c); 563 E_INFO("Rows: %d, Columns: %d\n", r, c); 564 } 565 566 if (n_feat != s->g->n_feat) { 567 E_ERROR("Number of feature streams mismatch: %d != %d\n", 568 n_feat, s->g->n_feat); 569 goto error_out; 570 } 571 if (n_density != s->g->n_density) { 572 E_ERROR("Number of densities mismatch: %d != %d\n", 573 n_density, s->g->n_density); 574 goto error_out; 575 } 576 if (n_sen != s->n_sen) { 577 E_ERROR("Number of senones mismatch: %d != %d\n", 578 n_sen, s->n_sen); 579 goto error_out; 580 } 581 582 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) { 583 E_ERROR("Cluster count must be 0, 15, or 16\n"); 584 goto error_out; 585 } 586 if (n_clust == 15) 587 ++n_clust; 588 589 if (!((n_bits == 8) || (n_bits == 4))) { 590 E_ERROR("Cluster count must be 4 or 8\n"); 591 goto error_out; 592 } 593 594 if (do_mmap) { 595 E_INFO("Using memory-mapped I/O for senones\n"); 596 } 597 offset = ftell(fp); 598 599 /* Allocate memory for pdfs (or memory map them) */ 600 if (do_mmap) { 601 s->sendump_mmap = mmio_file_read(file); 602 /* Get cluster codebook if any. */ 603 if (n_clust) { 604 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; 605 offset += n_clust; 606 } 607 } 608 else { 609 /* Get cluster codebook if any. */ 610 if (n_clust) { 611 s->mixw_cb = ckd_calloc(1, n_clust); 612 if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) { 613 E_ERROR("Failed to read %d bytes from sendump\n", n_clust); 614 goto error_out; 615 } 616 } 617 } 618 619 /* Set up pointers, or read, or whatever */ 620 if (s->sendump_mmap) { 621 s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw)); 622 for (n = 0; n < n_feat; n++) { 623 int step = c; 624 if (n_bits == 4) 625 step = (step + 1) / 2; 626 for (i = 0; i < r; i++) { 627 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; 628 offset += step; 629 } 630 } 631 } 632 else { 633 s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw)); 634 /* Read pdf values and ids */ 635 for (n = 0; n < n_feat; n++) { 636 int step = c; 637 if (n_bits == 4) 638 step = (step + 1) / 2; 639 for (i = 0; i < r; i++) { 640 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp) 641 != (size_t) step) { 642 E_ERROR("Failed to read %d bytes from sendump\n", step); 643 goto error_out; 644 } 645 } 646 } 647 } 648 649 fclose(fp); 650 return 0; 651 error_out: 652 fclose(fp); 653 return -1; 654 } 655 656 static int32 657 read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin) 658 { 659 char **argname, **argval; 660 char eofchk; 661 FILE *fp; 662 int32 byteswap, chksum_present; 663 uint32 chksum; 664 float32 *pdf; 665 int32 i, f, c, n; 666 int32 n_sen; 667 int32 n_feat; 668 int32 n_comp; 669 int32 n_err; 670 671 E_INFO("Reading mixture weights file '%s'\n", file_name); 672 673 if ((fp = fopen(file_name, "rb")) == NULL) 674 E_FATAL_SYSTEM("Failed to open mixture file '%s' for reading", file_name); 675 676 /* Read header, including argument-value info and 32-bit byteorder magic */ 677 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) 678 E_FATAL("Failed to read header from '%s'\n", file_name); 679 680 /* Parse argument-value list */ 681 chksum_present = 0; 682 for (i = 0; argname[i]; i++) { 683 if (strcmp(argname[i], "version") == 0) { 684 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0) 685 E_WARN("Version mismatch(%s): %s, expecting %s\n", 686 file_name, argval[i], MGAU_MIXW_VERSION); 687 } 688 else if (strcmp(argname[i], "chksum0") == 0) { 689 chksum_present = 1; /* Ignore the associated value */ 690 } 691 } 692 bio_hdrarg_free(argname, argval); 693 argname = argval = NULL; 694 695 chksum = 0; 696 697 /* Read #senones, #features, #codewords, arraysize */ 698 if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1) 699 || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 700 1) 701 || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) != 702 1) 703 || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { 704 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); 705 } 706 if (n_feat != s->g->n_feat) 707 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat); 708 if (n != n_sen * n_feat * n_comp) { 709 E_FATAL 710 ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n", 711 file_name, i, n_sen, n_feat, n_comp); 712 } 713 714 /* n_sen = number of mixture weights per codeword, which is 715 * fixed at the number of senones since we have only one codebook. 716 */ 717 s->n_sen = n_sen; 718 719 /* Quantized mixture weight arrays. */ 720 s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density, 721 n_sen, sizeof(***s->mixw)); 722 723 /* Temporary structure to read in floats before conversion to (int32) logs3 */ 724 pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32)); 725 726 /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ 727 n_err = 0; 728 for (i = 0; i < n_sen; i++) { 729 for (f = 0; f < n_feat; f++) { 730 if (bio_fread((void *) pdf, sizeof(float32), 731 n_comp, fp, byteswap, &chksum) != n_comp) { 732 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); 733 } 734 735 /* Normalize and floor */ 736 if (vector_sum_norm(pdf, n_comp) <= 0.0) 737 n_err++; 738 vector_floor(pdf, n_comp, SmoothMin); 739 vector_sum_norm(pdf, n_comp); 740 741 /* Convert to LOG, quantize, and transpose */ 742 for (c = 0; c < n_comp; c++) { 743 int32 qscr; 744 745 qscr = -logmath_log(s->lmath_8b, pdf[c]); 746 if ((qscr > MAX_NEG_MIXW) || (qscr < 0)) 747 qscr = MAX_NEG_MIXW; 748 s->mixw[f][c][i] = qscr; 749 } 750 } 751 } 752 if (n_err > 0) 753 E_WARN("Weight normalization failed for %d mixture weights components\n", n_err); 754 755 ckd_free(pdf); 756 757 if (chksum_present) 758 bio_verify_chksum(fp, byteswap, chksum); 759 760 if (fread(&eofchk, 1, 1, fp) == 1) 761 E_FATAL("More data than expected in %s\n", file_name); 762 763 fclose(fp); 764 765 E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp); 766 return n_sen; 767 } 768 769 ps_mgau_t * 770 ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef) 771 { 772 ptm_mgau_t *s; 773 ps_mgau_t *ps; 774 char const *sendump_path; 775 int i; 776 777 s = ckd_calloc(1, sizeof(*s)); 778 s->config = acmod->config; 779 780 s->lmath = logmath_retain(acmod->lmath); 781 /* Log-add table. */ 782 s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE); 783 if (s->lmath_8b == NULL) 784 goto error_out; 785 /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */ 786 if (logmath_get_width(s->lmath_8b) != 1) { 787 E_ERROR("Log base %f is too small to represent add table in 8 bits\n", 788 logmath_get_base(s->lmath_8b)); 789 goto error_out; 790 } 791 792 /* Read means and variances. */ 793 if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"), 794 cmd_ln_str_r(s->config, "-var"), 795 cmd_ln_float32_r(s->config, "-varfloor"), 796 s->lmath)) == NULL) 797 goto error_out; 798 /* We only support 256 codebooks or less (like 640k or 2GB, this 799 * should be enough for anyone) */ 800 if (s->g->n_mgau > 256) { 801 E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau); 802 goto error_out; 803 } 804 if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) { 805 E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef)); 806 goto error_out; 807 } 808 /* Verify n_feat and veclen, against acmod. */ 809 if (s->g->n_feat != feat_dimension1(acmod->fcb)) { 810 E_ERROR("Number of streams does not match: %d != %d\n", 811 s->g->n_feat, feat_dimension1(acmod->fcb)); 812 goto error_out; 813 } 814 for (i = 0; i < s->g->n_feat; ++i) { 815 if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) { 816 E_ERROR("Dimension of stream %d does not match: %d != %d\n", 817 s->g->featlen[i], feat_dimension2(acmod->fcb, i)); 818 goto error_out; 819 } 820 } 821 /* Read mixture weights. */ 822 if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) { 823 if (read_sendump(s, acmod->mdef, sendump_path) < 0) { 824 goto error_out; 825 } 826 } 827 else { 828 if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"), 829 cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) { 830 goto error_out; 831 } 832 } 833 s->ds_ratio = cmd_ln_int32_r(s->config, "-ds"); 834 s->max_topn = cmd_ln_int32_r(s->config, "-topn"); 835 E_INFO("Maximum top-N: %d\n", s->max_topn); 836 837 /* Assume mapping of senones to their base phones, though this 838 * will become more flexible in the future. */ 839 s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb)); 840 for (i = 0; i < s->n_sen; ++i) 841 s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i); 842 843 /* Allocate fast-match history buffers. We need enough for the 844 * phoneme lookahead window, plus the current frame, plus one for 845 * good measure? (FIXME: I don't remember why) */ 846 s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2; 847 s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist)); 848 /* s->f will be a rotating pointer into s->hist. */ 849 s->f = s->hist; 850 for (i = 0; i < s->n_fast_hist; ++i) { 851 int j, k, m; 852 /* Top-N codewords for every codebook and feature. */ 853 s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat, 854 s->max_topn, sizeof(ptm_topn_t)); 855 /* Initialize them to sane (yet arbitrary) defaults. */ 856 for (j = 0; j < s->g->n_mgau; ++j) { 857 for (k = 0; k < s->g->n_feat; ++k) { 858 for (m = 0; m < s->max_topn; ++m) { 859 s->hist[i].topn[j][k][m].cw = m; 860 s->hist[i].topn[j][k][m].score = WORST_DIST; 861 } 862 } 863 } 864 /* Active codebook mapping (just codebook, not features, 865 at least not yet) */ 866 s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau); 867 /* Start with them all on, prune them later. */ 868 bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau); 869 } 870 871 ps = (ps_mgau_t *)s; 872 ps->vt = &ptm_mgau_funcs; 873 return ps; 874 error_out: 875 ptm_mgau_free(ps_mgau_base(s)); 876 return NULL; 877 } 878 879 int 880 ptm_mgau_mllr_transform(ps_mgau_t *ps, 881 ps_mllr_t *mllr) 882 { 883 ptm_mgau_t *s = (ptm_mgau_t *)ps; 884 return gauden_mllr_transform(s->g, mllr, s->config); 885 } 886 887 void 888 ptm_mgau_free(ps_mgau_t *ps) 889 { 890 int i; 891 ptm_mgau_t *s = (ptm_mgau_t *)ps; 892 893 logmath_free(s->lmath); 894 logmath_free(s->lmath_8b); 895 if (s->sendump_mmap) { 896 ckd_free_2d(s->mixw); 897 mmio_file_unmap(s->sendump_mmap); 898 } 899 else { 900 ckd_free_3d(s->mixw); 901 } 902 ckd_free(s->sen2cb); 903 904 for (i = 0; i < s->n_fast_hist; i++) { 905 ckd_free_3d(s->hist[i].topn); 906 bitvec_free(s->hist[i].mgau_active); 907 } 908 ckd_free(s->hist); 909 910 gauden_free(s->g); 911 ckd_free(s); 912 } 913