1 /* ==================================================================== 2 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 3 * reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * 17 * This work was supported in part by funding from the Defense Advanced 18 * Research Projects Agency and the National Science Foundation of the 19 * United States of America, and the CMU Sphinx Speech Consortium. 20 * 21 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 22 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 25 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * ==================================================================== 34 * 35 */ 36 /* 37 * senone.c -- Mixture density weights associated with each tied state. 38 * 39 * ********************************************** 40 * CMU ARPA Speech Project 41 * 42 * Copyright (c) 1996 Carnegie Mellon University. 43 * ALL RIGHTS RESERVED. 44 * ********************************************** 45 * 46 * HISTORY 47 * 48 * $Log$ 49 * Revision 1.6 2006/02/22 17:27:39 arthchan2003 50 * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: 1, NOT doing truncation in the multi-stream GMM computation \n. 2, Added .s3cont. to be the alias of the old multi-stream GMM computation routine \n. 3, Added license \n. 4, Fixed dox-doc. \n 51 * 52 * Revision 1.5.4.5 2006/01/17 22:57:06 arthchan2003 53 * Add directive TRUNCATE_LOGPDF in ms_senone.c 54 * 55 * Revision 1.5.4.4 2006/01/16 19:47:05 arthchan2003 56 * Removed the truncation of senone probability code. 57 * 58 * Revision 1.5.4.3 2005/08/03 18:53:43 dhdfu 59 * Add memory deallocation functions. Also move all the initialization 60 * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it 61 * from decode_anytopo and friends. 62 * 63 * Revision 1.5.4.2 2005/08/02 21:06:33 arthchan2003 64 * Change options such that .s3cont. works as well. 65 * 66 * Revision 1.5.4.1 2005/07/20 19:39:01 arthchan2003 67 * Added licences in ms_* series of code. 68 * 69 * Revision 1.5 2005/06/21 18:57:31 arthchan2003 70 * 1, Fixed doxygen documentation. 2, Added $ keyword. 71 * 72 * Revision 1.1.1.1 2005/03/24 15:24:00 archan 73 * I found Evandro's suggestion is quite right after yelling at him 2 days later. So I decide to check this in again without any binaries. (I have done make distcheck. ) . Again, this is a candidate for s3.6 and I believe I need to work out 4-5 intermediate steps before I can complete the first prototype. That's why I keep local copies. 74 * 75 * Revision 1.4 2004/12/05 12:01:31 arthchan2003 76 * 1, move libutil/libutil.h to s3types.h, seems to me not very nice to have it in every files. 2, Remove warning messages of main_align.c 3, Remove warning messages in chgCase.c 77 * 78 * Revision 1.3 2004/11/13 21:25:19 arthchan2003 79 * commit of 1, absolute CI-GMMS , 2, fast CI senone computation using svq, 3, Decrease the number of static variables, 4, fixing the random generator problem of vector_vqgen, 5, move all unused files to NOTUSED 80 * 81 * Revision 1.2 2004/08/09 01:02:33 arthchan2003 82 * check in the windows setup for align 83 * 84 * Revision 1.1 2004/08/09 00:17:11 arthchan2003 85 * Incorporating s3.0 align, at this point, there are still some small problems in align but they don't hurt. For example, the score doesn't match with s3.0 and the output will have problem if files are piped to /dev/null/. I think we can go for it. 86 * 87 * Revision 1.1 2003/02/14 14:40:34 cbq 88 * Compiles. Analysis is probably hosed. 89 * 90 * Revision 1.1 2000/04/24 09:39:41 lenzo 91 * s3 import. 92 * 93 * 94 * 06-Mar-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. 95 * Added handling of .semi. and .cont. special cases for senone-mgau 96 * mapping. 97 * 98 * 20-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. 99 * Changed senone_mixw_read and senone_mgau_map_read to use the new 100 * libio/bio_fread functions. 101 * 102 * 20-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. 103 * Modified senone_eval to accommodate both normal and transposed 104 * senone organization. 105 * 106 * 13-Dec-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. 107 * Added implementation of senone_mgau_map_read. 108 * Added senone_eval_all() optimized for the semicontinuous case. 109 * 110 * 12-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. 111 * Created. 112 */ 113 114 115 #include "ms_senone.h" 116 #include "logs3.h" 117 118 #include "bio.h" 119 #include <string.h> 120 #include <assert.h> 121 122 123 #define MIXW_PARAM_VERSION "1.0" 124 #define SPDEF_PARAM_VERSION "1.2" 125 126 127 static int32 128 senone_mgau_map_read(senone_t * s, const char *file_name) 129 { 130 FILE *fp; 131 int32 byteswap, chksum_present, n_gauden_present; 132 uint32 chksum; 133 int32 i; 134 char eofchk; 135 char **argname, **argval; 136 float32 v; 137 138 E_INFO("Reading senone gauden-codebook map file: %s\n", file_name); 139 140 if ((fp = fopen(file_name, "rb")) == NULL) 141 E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); 142 143 /* Read header, including argument-value info and 32-bit byteorder magic */ 144 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) 145 E_FATAL("bio_readhdr(%s) failed\n", file_name); 146 147 /* Parse argument-value list */ 148 chksum_present = 0; 149 n_gauden_present = 0; 150 for (i = 0; argname[i]; i++) { 151 if (strcmp(argname[i], "version") == 0) { 152 if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) { 153 E_WARN("Version mismatch(%s): %s, expecting %s\n", 154 file_name, argval[i], SPDEF_PARAM_VERSION); 155 } 156 157 /* HACK!! Convert version# to float32 and take appropriate action */ 158 if (sscanf(argval[i], "%f", &v) != 1) 159 E_FATAL("%s: Bad version no. string: %s\n", file_name, 160 argval[i]); 161 162 n_gauden_present = (v > 1.1) ? 1 : 0; 163 } 164 else if (strcmp(argname[i], "chksum0") == 0) { 165 chksum_present = 1; /* Ignore the associated value */ 166 } 167 } 168 bio_hdrarg_free(argname, argval); 169 argname = argval = NULL; 170 171 chksum = 0; 172 173 /* Read #gauden (if version matches) */ 174 if (n_gauden_present) { 175 if (bio_fread 176 (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1) 177 E_FATAL("fread(%s) (#gauden) failed\n", file_name); 178 } 179 180 /* Read 1d array data */ 181 if (bio_fread_1d 182 ((void **) (&s->mgau), sizeof(s3mgauid_t), &(s->n_sen), fp, 183 byteswap, &chksum) < 0) { 184 E_FATAL("bio_fread_1d(%s) failed\n", file_name); 185 } 186 187 /* Infer n_gauden if not present in this version */ 188 if (!n_gauden_present) { 189 s->n_gauden = 1; 190 for (i = 0; i < s->n_sen; i++) 191 if (s->mgau[i] >= s->n_gauden) 192 s->n_gauden = s->mgau[i] + 1; 193 } 194 195 if (chksum_present) 196 bio_verify_chksum(fp, byteswap, chksum); 197 198 if (fread(&eofchk, 1, 1, fp) == 1) 199 E_FATAL("More data than expected in %s\n", file_name); 200 201 fclose(fp); 202 203 E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen, 204 s->n_gauden); 205 206 return 1; 207 } 208 209 210 static int32 211 senone_mixw_read(senone_t * s, const char *file_name) 212 { 213 char eofchk; 214 FILE *fp; 215 int32 byteswap, chksum_present; 216 uint32 chksum; 217 float32 *pdf; 218 int32 i, f, c, p, n_err; 219 char **argname, **argval; 220 221 E_INFO("Reading senone mixture weights: %s\n", file_name); 222 223 if ((fp = fopen(file_name, "rb")) == NULL) 224 E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); 225 226 /* Read header, including argument-value info and 32-bit byteorder magic */ 227 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) 228 E_FATAL("bio_readhdr(%s) failed\n", file_name); 229 230 /* Parse argument-value list */ 231 chksum_present = 0; 232 for (i = 0; argname[i]; i++) { 233 if (strcmp(argname[i], "version") == 0) { 234 if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0) 235 E_WARN("Version mismatch(%s): %s, expecting %s\n", 236 file_name, argval[i], MIXW_PARAM_VERSION); 237 } 238 else if (strcmp(argname[i], "chksum0") == 0) { 239 chksum_present = 1; /* Ignore the associated value */ 240 } 241 } 242 bio_hdrarg_free(argname, argval); 243 argname = argval = NULL; 244 245 chksum = 0; 246 247 /* Read #senones, #features, #codewords, arraysize */ 248 if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 249 1) 250 || 251 (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum) 252 != 1) 253 || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum) 254 != 1) 255 || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { 256 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); 257 } 258 if (i != s->n_sen * s->n_feat * s->n_cw) { 259 E_FATAL 260 ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", 261 file_name, i, s->n_sen, s->n_feat, s->n_cw); 262 } 263 264 /* 265 * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits. 266 * All PDF values will be truncated (in the LSB positions) by these many bits. 267 */ 268 if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0)) 269 E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor); 270 271 p = logs3(s->logmath, s->mixwfloor); 272 273 #if TRUNCATE_LOGPDF 274 for (s->shift = 0, p = -p; p >= 256; s->shift++, p >>= 1); 275 E_INFO("Truncating senone logs3(pdf) values by %d bits, to 8 bits\n", 276 s->shift); 277 #endif 278 279 /* 280 * Allocate memory for senone PDF data. Organize normally or transposed depending on 281 * s->n_gauden. 282 */ 283 if (s->n_gauden > 1) { 284 s->pdf = 285 (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw, 286 sizeof(senprob_t)); 287 } 288 else { 289 s->pdf = 290 (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen, 291 sizeof(senprob_t)); 292 } 293 294 /* Temporary structure to read in floats */ 295 pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32)); 296 297 /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ 298 n_err = 0; 299 for (i = 0; i < s->n_sen; i++) { 300 for (f = 0; f < s->n_feat; f++) { 301 if (bio_fread 302 ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap, 303 &chksum) 304 != s->n_cw) { 305 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); 306 } 307 308 /* Normalize and floor */ 309 if (vector_sum_norm(pdf, s->n_cw) <= 0.0) 310 n_err++; 311 vector_floor(pdf, s->n_cw, s->mixwfloor); 312 vector_sum_norm(pdf, s->n_cw); 313 314 /* Convert to logs3, truncate to 8 bits, and store in s->pdf */ 315 for (c = 0; c < s->n_cw; c++) { 316 p = -(logs3(s->logmath, pdf[c])); 317 318 #if TRUNCATE_LOGPDF 319 p += (1 << (s->shift - 1)) - 1; /* Rounding before truncation */ 320 321 if (s->n_gauden > 1) 322 s->pdf[i][f][c] = 323 (p < (255 << s->shift)) ? (p >> s->shift) : 255; 324 else 325 s->pdf[f][c][i] = 326 (p < (255 << s->shift)) ? (p >> s->shift) : 255; 327 328 #else 329 if (s->n_gauden > 1) 330 s->pdf[i][f][c] = p; 331 else 332 s->pdf[f][c][i] = p; 333 #endif 334 } 335 } 336 } 337 if (n_err > 0) 338 E_ERROR("Weight normalization failed for %d senones\n", n_err); 339 340 ckd_free(pdf); 341 342 if (chksum_present) 343 bio_verify_chksum(fp, byteswap, chksum); 344 345 if (fread(&eofchk, 1, 1, fp) == 1) 346 E_FATAL("More data than expected in %s\n", file_name); 347 348 fclose(fp); 349 350 E_INFO 351 ("Read mixture weights for %d senones: %d features x %d codewords\n", 352 s->n_sen, s->n_feat, s->n_cw); 353 354 return 1; 355 } 356 357 358 senone_t * 359 senone_init(const char *mixwfile, const char *sen2mgau_map_file, float32 mixwfloor, logmath_t *logmath) 360 { 361 senone_t *s; 362 int32 n = 0, i; 363 364 assert(sen2mgau_map_file); 365 366 s = (senone_t *) ckd_calloc(1, sizeof(senone_t)); 367 s->logmath = logmath; 368 s->mixwfloor = mixwfloor; 369 370 371 if (strcmp(sen2mgau_map_file, ".semi.") == 0) 372 s->n_gauden = 1; 373 else if (strcmp(sen2mgau_map_file, ".cont.") == 0 374 || strcmp(sen2mgau_map_file, ".s3cont.") == 0) 375 s->n_gauden = 2; /* HACK!! Dummy value >1 for the moment; fixed below */ 376 else { 377 senone_mgau_map_read(s, sen2mgau_map_file); 378 n = s->n_sen; 379 } 380 381 senone_mixw_read(s, mixwfile); 382 383 if (strcmp(sen2mgau_map_file, ".semi.") == 0) { 384 /* All-to-1 senones-codebook mapping */ 385 s->mgau = (s3mgauid_t *) ckd_calloc(s->n_sen, sizeof(s3mgauid_t)); 386 } 387 else if (strcmp(sen2mgau_map_file, ".cont.") == 0 388 || strcmp(sen2mgau_map_file, ".s3cont.") == 0) { 389 /* 1-to-1 senone-codebook mapping */ 390 if (s->n_sen <= 1) 391 E_FATAL("#senone=%d; must be >1\n", s->n_sen); 392 393 s->mgau = (s3mgauid_t *) ckd_calloc(s->n_sen, sizeof(s3mgauid_t)); 394 for (i = 0; i < s->n_sen; i++) 395 s->mgau[i] = i; 396 397 s->n_gauden = s->n_sen; 398 } 399 else { 400 if (s->n_sen != n) 401 E_FATAL("#senones inconsistent: %d in %s; %d in %s\n", 402 n, sen2mgau_map_file, s->n_sen, mixwfile); 403 } 404 405 s->featscr = NULL; 406 return s; 407 } 408 409 void 410 senone_free(senone_t * s) 411 { 412 if (s == NULL) 413 return; 414 if (s->pdf) 415 ckd_free_3d((void *) s->pdf); 416 if (s->mgau) 417 ckd_free(s->mgau); 418 if (s->featscr) 419 ckd_free(s->featscr); 420 ckd_free(s); 421 } 422 423 424 /* 425 * Compute senone score for one senone. 426 * NOTE: Remember that senone PDF tables contain SCALED logs3 values. 427 * NOTE: Remember also that PDF data may be transposed or not depending on s->n_gauden. 428 */ 429 int32 430 senone_eval(senone_t * s, s3senid_t id, gauden_dist_t ** dist, int32 n_top) 431 { 432 int32 scr; /* total senone score */ 433 int32 fscr; /* senone score for one feature */ 434 int32 fwscr; /* senone score for one feature, one codeword */ 435 int32 f, t; 436 gauden_dist_t *fdist; 437 438 assert((id >= 0) && (id < s->n_sen)); 439 assert((n_top > 0) && (n_top <= s->n_cw)); 440 441 scr = 0; 442 443 for (f = 0; f < s->n_feat; f++) { 444 fdist = dist[f]; 445 446 /* Top codeword for feature f */ 447 #if TRUNCATE_LOGPDF 448 fscr = (s->n_gauden > 1) ? fdist[0].dist - (s->pdf[id][f][fdist[0].id] << s->shift) : /* untransposed */ 449 fdist[0].dist - (s->pdf[f][fdist[0].id][id] << s->shift); /* transposed */ 450 #else 451 452 fscr = (s->n_gauden > 1) ? fdist[0].dist - (s->pdf[id][f][fdist[0].id]) : /* untransposed */ 453 fdist[0].dist - (s->pdf[f][fdist[0].id][id]); /* transposed */ 454 #endif 455 456 /* Remaining of n_top codewords for feature f */ 457 for (t = 1; t < n_top; t++) { 458 #if TRUNCATE_LOGPDF 459 fwscr = (s->n_gauden > 1) ? 460 fdist[t].dist - (s->pdf[id][f][fdist[t].id] << s->shift) : 461 fdist[t].dist - (s->pdf[f][fdist[t].id][id] << s->shift); 462 #else 463 464 fwscr = (s->n_gauden > 1) ? 465 fdist[t].dist - (s->pdf[id][f][fdist[t].id]) : 466 fdist[t].dist - (s->pdf[f][fdist[t].id][id]); 467 #endif 468 469 fscr = logmath_add(s->logmath, fscr, fwscr); 470 471 } 472 473 scr += fscr; 474 475 } 476 477 return scr; 478 } 479 480 481 /* 482 * Optimized for special case of all senones sharing one codebook (perhaps many features). 483 * In particular, the PDF tables are transposed in memory. 484 */ 485 void 486 senone_eval_all(senone_t * s, gauden_dist_t ** dist, int32 n_top, 487 int32 * senscr) 488 { 489 int32 i, f, k, cwdist, scr; 490 491 senprob_t *pdf; 492 int32 *featscr = NULL; 493 featscr = s->featscr; 494 495 assert(s->n_gauden == 1); 496 assert((n_top > 0) && (n_top <= s->n_cw)); 497 498 if ((s->n_feat > 1) && (!featscr)) 499 featscr = (int32 *) ckd_calloc(s->n_sen, sizeof(int32)); 500 501 /* Feature 0 */ 502 /* Top-N codeword 0 */ 503 cwdist = dist[0][0].dist; 504 pdf = s->pdf[0][dist[0][0].id]; 505 506 #if TRUNCATE_LOGPDF 507 for (i = 0; i < s->n_sen; i++) 508 senscr[i] = cwdist - (pdf[i] << s->shift); 509 #else 510 for (i = 0; i < s->n_sen; i++) 511 senscr[i] = cwdist - (pdf[i]); 512 513 #endif 514 515 /* Remaining top-N codewords */ 516 for (k = 1; k < n_top; k++) { 517 cwdist = dist[0][k].dist; 518 pdf = s->pdf[0][dist[0][k].id]; 519 520 for (i = 0; i < s->n_sen; i++) { 521 #if TRUNCATE_LOGPDF 522 scr = cwdist - (pdf[i] << s->shift); 523 #else 524 scr = cwdist - (pdf[i]); 525 #endif 526 senscr[i] = logmath_add(s->logmath, senscr[i], scr); 527 } 528 } 529 530 /* Remaining features */ 531 for (f = 1; f < s->n_feat; f++) { 532 /* Top-N codeword 0 */ 533 cwdist = dist[f][0].dist; 534 pdf = s->pdf[f][dist[f][0].id]; 535 536 #if TRUNCATE_LOGPDF 537 for (i = 0; i < s->n_sen; i++) 538 featscr[i] = cwdist - (pdf[i] << s->shift); 539 #else 540 for (i = 0; i < s->n_sen; i++) 541 featscr[i] = cwdist - (pdf[i]); 542 #endif 543 544 /* Remaining top-N codewords */ 545 for (k = 1; k < n_top; k++) { 546 cwdist = dist[f][k].dist; 547 pdf = s->pdf[f][dist[f][k].id]; 548 549 for (i = 0; i < s->n_sen; i++) { 550 #if TRUNCATE_LOGPDF 551 scr = cwdist - (pdf[i] << s->shift); 552 #else 553 scr = cwdist - (pdf[i]); 554 #endif 555 featscr[i] = logmath_add(s->logmath, featscr[i], scr); 556 } 557 } 558 559 for (i = 0; i < s->n_sen; i++) 560 senscr[i] += featscr[i]; 561 } 562 } 563