1 /**
2 * @file read_binhmm.c
3 *
4 * <JA>
5 * @brief �Х��ʥ������ %HMM ����ե�������ɤ߹���
6 *
7 * Julius ���ȼ��ΥХ��ʥ������ %HMM ����ե�����ݡ��Ȥ��Ƥ��ޤ���
8 * HTK�Υ������������� %HMM ����ե����뤫��Х��ʥ�����ؤ��Ѵ��ϡ�
9 * ��°�Υġ��� mkbinhmm �ǹԤʤ��ޤ������ΥХ��ʥ�����ϡ�HTK ��
10 * �Х��ʥ�����Ȥ���ߴ��Ǥ��Τ���դ��Ʋ�������
11 * </JA>
12 *
13 * <EN>
14 * @brief Read a binary %HMM definition file
15 *
16 * Julius supports a binary format of %HMM definition file.
17 * The tool "mkbinhmm" can convert the ascii format HTK %HMM definition
18 * file to this format. Please note that this binary format is
19 * not compatible with the HTK binary format.
20 * </EN>
21 *
22 * @author Akinobu LEE
23 * @date Wed Feb 16 05:23:59 2005
24 *
25 * $Revision: 1.5 $
26 *
27 */
28 /*
29 * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology
30 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
31 * All rights reserved
32 */
33
34 #include <sent/stddefs.h>
35 #include <sent/htk_param.h>
36 #include <sent/htk_hmm.h>
37
38 #undef DMES /* define to enable debug message */
39
40 static boolean gzfile; ///< TRUE when opened by fopen_readfile
41
42 #define rdn(A,B,C,D) if (rdnfunc(A,B,C,D) == FALSE) return FALSE
43 #define rdn_str(A,B,C) if ((C = rdn_strfunc(A,B)) == NULL) return FALSE
44
45 /**
46 * Binary read function with byte swaping (assume file is BIG ENDIAN)
47 *
48 * @param fp [in] file pointer
49 * @param buf [out] read data
50 * @param unitbyte [in] size of a unit in bytes
51 * @param unitnum [in] number of unit to be read
52 */
53 static boolean
rdnfunc(FILE * fp,void * buf,size_t unitbyte,int unitnum)54 rdnfunc(FILE *fp, void *buf, size_t unitbyte, int unitnum)
55 {
56 size_t tmp;
57
58 if (unitnum == 0) return TRUE;
59
60 if (gzfile) {
61 tmp = myfread(buf, unitbyte, unitnum, fp);
62 } else {
63 tmp = fread(buf, unitbyte, unitnum, fp);
64 }
65 if (tmp < (size_t)unitnum) {
66 jlog("Error: read_binhmm: failed to read %d bytes\n", unitbyte * unitnum);
67 return FALSE;
68 }
69 #ifndef WORDS_BIGENDIAN
70 if (unitbyte != 1) {
71 swap_bytes(buf, unitbyte, unitnum);
72 }
73 #endif
74 return TRUE;
75 }
76
77 static char buf[MAXLINELEN]; ///< Local work are for text handling
78 static char nostr = '\0';
79 /**
80 * Read a string till NULL.
81 *
82 * @param fp [in] file pointer
83 * @param hmm [out] pointer to %HMM definition data to store the values.
84 *
85 * @return pointer to a newly allocated buffer that contains the read string.
86 */
87 static char *
rdn_strfunc(FILE * fp,HTK_HMM_INFO * hmm)88 rdn_strfunc(FILE *fp, HTK_HMM_INFO *hmm)
89 {
90 int c;
91 int len;
92 char *p;
93
94 len = 0;
95 while ((c = gzfile ? myfgetc(fp) : fgetc(fp)) != -1) {
96 if (len >= MAXLINELEN) {
97 jlog("Error: read_binhmm: string len exceeded %d bytes\n", MAXLINELEN);
98 jlog("Error: read_binhmm: please check the value of MAXLINELEN\n");
99 return NULL;
100 }
101 buf[len++] = c;
102 if (c == '\0') break;
103 }
104 if (len == 0) return NULL;
105 if (len == 1) {
106 p = &nostr;
107 } else {
108 p = (char *)mybmalloc2(len, &(hmm->mroot));
109 strcpy(p, buf);
110 }
111 return(p);
112 }
113
114
115 static char *binhmm_header = BINHMM_HEADER; ///< Header string
116 static char *binhmm_header_v2 = BINHMM_HEADER_V2; ///< Header string for V2
117
118 /**
119 * Read acoustic analysis configration parameters from header of binary HMM.
120 *
121 * @param fp [in] file pointer
122 * @param para [out] acoustic analysis configration parameters
123 */
124 static boolean
rd_para(FILE * fp,Value * para)125 rd_para(FILE *fp, Value *para)
126 {
127 short version;
128 float dummy;
129
130 /* read version */
131 rdn(fp, &version, sizeof(short), 1);
132
133 if (version > VALUE_VERSION) {
134 jlog("Error: read_binhmm: unknown embedded parameter format version: %d\n", version);
135 return FALSE;
136 }
137 jlog("Stat: rd_para: found embedded acoutic parameter (ver.%d)\n", version);
138
139 /* read parameters */
140 rdn(fp, &(para->smp_period), sizeof(long), 1);
141 rdn(fp, &(para->smp_freq), sizeof(long), 1);
142 rdn(fp, &(para->framesize), sizeof(int), 1);
143 rdn(fp, &(para->frameshift), sizeof(int), 1);
144 rdn(fp, &(para->preEmph), sizeof(float), 1);
145 rdn(fp, &(para->lifter), sizeof(int), 1);
146 rdn(fp, &(para->fbank_num), sizeof(int), 1);
147 rdn(fp, &(para->delWin), sizeof(int), 1);
148 rdn(fp, &(para->accWin), sizeof(int), 1);
149 rdn(fp, &(para->silFloor), sizeof(float), 1);
150 rdn(fp, &(para->escale), sizeof(float), 1);
151 rdn(fp, &(para->hipass), sizeof(int), 1);
152 rdn(fp, &(para->lopass), sizeof(int), 1);
153 rdn(fp, &(para->enormal), sizeof(int), 1);
154 rdn(fp, &(para->raw_e), sizeof(int), 1);
155 if (version == 1) {
156 /* version 1 has ss related parameters, but version 2 and later not */
157 /* skip ss related parameters (ss_alpha and ss_floor) */
158 rdn(fp, &dummy, sizeof(float), 1);
159 rdn(fp, &dummy, sizeof(float), 1);
160 }
161 rdn(fp, &(para->zmeanframe), sizeof(int), 1);
162 if (version >= 3) {
163 rdn(fp, &(para->usepower), sizeof(int), 1);
164 }
165
166 return(TRUE);
167 }
168
169 /**
170 * Read header string of binary HMM file.
171 *
172 * @param fp [in] file pointer
173 * @param hmm [out] pointer to %HMM definition data to store the values.
174 * @param para [out] store embedded acoustic parameters if any (V2)
175 * @param mpdf_macro_ret [out] will be set to TRUE if the file contains mixture pdf macro defined by "~p"
176 *
177 * @return TRUE if a correct header was read, FALSE if header string does not
178 * match the current version.
179 */
180 static boolean
rd_header(FILE * fp,HTK_HMM_INFO * hmm,Value * para,boolean * mpdf_macro_ret)181 rd_header(FILE *fp, HTK_HMM_INFO *hmm, Value *para, boolean *mpdf_macro_ret)
182 {
183 char *p, *q;
184 boolean emp, inv;
185
186 rdn_str(fp, hmm, p);
187 if (strmatch(p, binhmm_header)) {
188 /* version 1 */
189 hmm->variance_inversed = FALSE;
190 } else if (strmatch(p, binhmm_header_v2)) {
191 /* version 2 */
192 emp = inv = FALSE;
193 rdn_str(fp, hmm, q);
194 if (*q != '\0') {
195 while(*q == '_') {
196 q++;
197 switch (*q) {
198 case BINHMM_HEADER_V2_EMBEDPARA:
199 /* read in embedded acoutic condition parameters */
200 emp = TRUE;
201 jlog("Stat: binhmm-header: analysis parameter embedded\n");
202 break;
203 case BINHMM_HEADER_V2_VARINV:
204 inv = TRUE;
205 jlog("Stat: binhmm-header: variance inversed\n");
206 break;
207 case BINHMM_HEADER_V2_MPDFMACRO:
208 *mpdf_macro_ret = TRUE;
209 jlog("Stat: binhmm-header: mixture PDF macro used\n");
210 break;
211 default:
212 jlog("Error: unknown format qualifier in header: \"%c\"\n", *q);
213 return FALSE;
214 }
215 q++;
216 }
217 }
218 if (emp) {
219 para->loaded = 1;
220 if (rd_para(fp, para) == FALSE) {
221 jlog("Error: read_binhmm: failed to read embeded parameter\n");
222 return FALSE;
223 }
224 jlog("Stat: read_binhmm: has acoutic analysis configurations in its header\n");
225 }
226 if (inv) {
227 hmm->variance_inversed = TRUE;
228 jlog("Stat: read_binhmm: has inversed variances\n");
229 } else {
230 hmm->variance_inversed = FALSE;
231 }
232 } else {
233 /* failed to read header */
234 return FALSE;
235 }
236 return TRUE;
237 }
238
239
240
241 /**
242 * Read %HMM option specifications.
243 *
244 * @param fp [in] file pointer
245 * @param opt [out] pointer to the %HMM option structure to hold the read
246 * values.
247 */
248 static boolean
rd_opt(FILE * fp,HTK_HMM_Options * opt)249 rd_opt(FILE *fp, HTK_HMM_Options *opt)
250 {
251 rdn(fp, &(opt->stream_info.num), sizeof(short), 1);
252 rdn(fp, opt->stream_info.vsize, sizeof(short), MAXSTREAMNUM);
253 rdn(fp, &(opt->vec_size), sizeof(short), 1);
254 rdn(fp, &(opt->cov_type), sizeof(short), 1);
255 rdn(fp, &(opt->dur_type), sizeof(short), 1);
256 rdn(fp, &(opt->param_type), sizeof(short), 1);
257
258 return(TRUE);
259 }
260
261 /**
262 * Read %HMM type of mixture tying.
263 *
264 * @param fp [in] file pointer
265 * @param hmm [out] pointer to %HMM definition data to store the values.
266 */
267 static boolean
rd_type(FILE * fp,HTK_HMM_INFO * hmm)268 rd_type(FILE *fp, HTK_HMM_INFO *hmm)
269 {
270 rdn(fp, &(hmm->is_tied_mixture), sizeof(boolean), 1);
271 rdn(fp, &(hmm->maxmixturenum), sizeof(int), 1);
272 return TRUE;
273 }
274
275
276 /* read transition data */
277 static HTK_HMM_Trans **tr_index; ///< Map transition matrix id to its pointer
278 static unsigned int tr_num; ///< Length of above
279
280 /**
281 * @brief Read a sequence of transition matrix data for @a tr_num.
282 *
283 * The transition matrixes are stored into @a hmm, and their pointers
284 * are also stored in @a tr_index for later data mapping operation
285 * from upper structure (state etc.).
286 *
287 * @param fp [in] file pointer
288 * @param hmm [out] %HMM definition structure to hold the read transitions.
289 */
290 static boolean
rd_trans(FILE * fp,HTK_HMM_INFO * hmm)291 rd_trans(FILE *fp, HTK_HMM_INFO *hmm)
292 {
293 HTK_HMM_Trans *t;
294 unsigned int idx;
295 int i;
296 PROB *atmp;
297 char *p;
298
299 rdn(fp, &tr_num, sizeof(unsigned int), 1);
300 tr_index = (HTK_HMM_Trans **)mymalloc(sizeof(HTK_HMM_Trans *) * tr_num);
301
302 hmm->trstart = NULL;
303 hmm->tr_root = NULL;
304 for (idx = 0; idx < tr_num; idx++) {
305 t = (HTK_HMM_Trans *)mybmalloc2(sizeof(HTK_HMM_Trans), &(hmm->mroot));
306 rdn_str(fp, hmm, p);
307 t->name = (*p == '\0') ? NULL : p;
308 rdn(fp, &(t->statenum), sizeof(short), 1);
309 t->a = (PROB **)mybmalloc2(sizeof(PROB *) * t->statenum, &(hmm->mroot));
310 atmp = (PROB *)mybmalloc2(sizeof(PROB) * t->statenum * t->statenum, &(hmm->mroot));
311 for (i=0;i<t->statenum;i++) {
312 t->a[i] = &(atmp[i*t->statenum]);
313 rdn(fp, t->a[i], sizeof(PROB), t->statenum);
314 }
315 trans_add(hmm, t);
316 tr_index[idx] = t;
317 }
318
319 #ifdef DMES
320 jlog("Stat: read_binhmm: %d transition maxtix read\n", tr_num);
321 #endif
322 return TRUE;
323 }
324
325
326 static HTK_HMM_Var **vr_index; ///< Map variance id to its pointer
327 static unsigned int vr_num; ///< Length of above
328
329 /**
330 * @brief Read a sequence of variance vector for @a vr_num.
331 *
332 * The variance vectors are stored into @a hmm, and their pointers
333 * are also stored in @a vr_index for later data mapping operation
334 * from upper structure (density etc.).
335 *
336 * @param fp [in] file pointer
337 * @param hmm [out] %HMM definition structure to hold the read variance.
338 */
339 static boolean
rd_var(FILE * fp,HTK_HMM_INFO * hmm)340 rd_var(FILE *fp, HTK_HMM_INFO *hmm)
341 {
342 HTK_HMM_Var *v;
343 unsigned int idx;
344 char *p;
345
346 rdn(fp, &vr_num, sizeof(unsigned int), 1);
347 vr_index = (HTK_HMM_Var **)mymalloc(sizeof(HTK_HMM_Var *) * vr_num);
348
349 hmm->vrstart = NULL;
350 hmm->vr_root = NULL;
351 for (idx = 0; idx < vr_num; idx++) {
352 v = (HTK_HMM_Var *)mybmalloc2(sizeof(HTK_HMM_Var), &(hmm->mroot));
353 rdn_str(fp, hmm, p);
354 v->name = (*p == '\0') ? NULL : p;
355 rdn(fp, &(v->len), sizeof(short), 1);
356 v->vec = (VECT *)mybmalloc2(sizeof(VECT) * v->len, &(hmm->mroot));
357 rdn(fp, v->vec, sizeof(VECT), v->len);
358 vr_index[idx] = v;
359 var_add(hmm, v);
360 }
361 #ifdef DMES
362 jlog("Stat: read_binhmm: %d variance read\n", vr_num);
363 #endif
364 return TRUE;
365 }
366
367
368 /* read density data */
369 static HTK_HMM_Dens **dens_index; ///< Map density id to its pointer
370 static unsigned int dens_num; ///< Length of above
371
372 /**
373 * @brief Read a sequence of mixture densities for @a dens_num.
374 *
375 * The mixture densities are stored into @a hmm, and their references
376 * to lower structure (variance etc.) are recovered from the id-to-pointer
377 * index. Their pointers are also stored in @a dens_index for
378 * later data mapping operation from upper structure (state etc.).
379 *
380 * @param fp [in] file pointer
381 * @param hmm [out] %HMM definition structure to hold the read densities.
382 */
383 static boolean
rd_dens(FILE * fp,HTK_HMM_INFO * hmm)384 rd_dens(FILE *fp, HTK_HMM_INFO *hmm)
385 {
386 HTK_HMM_Dens *d;
387 unsigned int idx;
388 unsigned int vid;
389 char *p;
390
391 rdn(fp, &dens_num, sizeof(unsigned int), 1);
392 hmm->totalmixnum = dens_num;
393 dens_index = (HTK_HMM_Dens **)mymalloc(sizeof(HTK_HMM_Dens *) * dens_num);
394
395 hmm->dnstart = NULL;
396 hmm->dn_root = NULL;
397 for (idx = 0; idx < dens_num; idx++) {
398 d = (HTK_HMM_Dens *)mybmalloc2(sizeof(HTK_HMM_Dens), &(hmm->mroot));
399 rdn_str(fp, hmm, p);
400 d->name = (*p == '\0') ? NULL : p;
401 rdn(fp, &(d->meanlen), sizeof(short), 1);
402 d->mean = (VECT *)mybmalloc2(sizeof(VECT) * d->meanlen, &(hmm->mroot));
403 rdn(fp, d->mean, sizeof(VECT), d->meanlen);
404 rdn(fp, &vid, sizeof(unsigned int), 1);
405 d->var = vr_index[vid];
406 rdn(fp, &(d->gconst), sizeof(LOGPROB), 1);
407 dens_index[idx] = d;
408 dens_add(hmm, d);
409 }
410 #ifdef DMES
411 jlog("Stat: read_binhmm: %d gaussian densities read\n", dens_num);
412 #endif
413 return TRUE;
414 }
415
416
417 /* read stream weight data */
418 static HTK_HMM_StreamWeight **streamweight_index; ///< Map stream weights id to its pointer
419 static unsigned int streamweight_num; ///< Length of above
420
421 /**
422 * @brief Read a sequence of stream weights for @a streamweight_num.
423 *
424 * The stream weights are stored into @a hmm, and their references
425 * to lower structure (variance etc.) are recovered from the id-to-pointer
426 * index. Their pointers are also stored in @a dens_index for
427 * later data mapping operation from upper structure (state etc.).
428 *
429 * @param fp [in] file pointer
430 * @param hmm [out] %HMM definition structure to hold the read stream weights.
431 */
432 static boolean
rd_streamweight(FILE * fp,HTK_HMM_INFO * hmm)433 rd_streamweight(FILE *fp, HTK_HMM_INFO *hmm)
434 {
435 HTK_HMM_StreamWeight *sw;
436 unsigned int idx;
437 char *p;
438
439 rdn(fp, &streamweight_num, sizeof(unsigned int), 1);
440 streamweight_index = (HTK_HMM_StreamWeight **)mymalloc(sizeof(HTK_HMM_StreamWeight *) * streamweight_num);
441
442 hmm->swstart = NULL;
443 hmm->sw_root = NULL;
444 for (idx = 0; idx < streamweight_num; idx++) {
445 sw = (HTK_HMM_StreamWeight *)mybmalloc2(sizeof(HTK_HMM_StreamWeight), &(hmm->mroot));
446 rdn_str(fp, hmm, p);
447 sw->name = (*p == '\0') ? NULL : p;
448 rdn(fp, &(sw->len), sizeof(short), 1);
449 sw->weight = (VECT *)mybmalloc2(sizeof(VECT) * sw->len, &(hmm->mroot));
450 rdn(fp, sw->weight, sizeof(VECT), sw->len);
451 streamweight_index[idx] = sw;
452 sw_add(hmm, sw);
453 }
454 #ifdef DMES
455 jlog("Stat: read_binhmm: %d stream weights read\n", streamweight_num);
456 #endif
457 return TRUE;
458 }
459
460
461 /* read tmix data */
462 static GCODEBOOK **tm_index; ///< Map codebook id to its pointer
463 static unsigned int tm_num; ///< Length of above
464
465 /**
466 * @brief Read a sequence of mixture codebook for @a tm_num.
467 *
468 * The mixture codebook data are stored into @a hmm, and their references
469 * to lower structure (mixtures etc.) are recovered from the id-to-pointer
470 * index. Their pointers are also stored in @a tm_index for
471 * later data mapping operation from upper structure (state etc.).
472 *
473 * @param fp [in] file pointer
474 * @param hmm [out] %HMM definition structure to hold the read codebooks.
475 */
476 static boolean
rd_tmix(FILE * fp,HTK_HMM_INFO * hmm)477 rd_tmix(FILE *fp, HTK_HMM_INFO *hmm)
478 {
479 GCODEBOOK *tm;
480 unsigned int idx;
481 unsigned int did;
482 int i;
483 char *p;
484
485 rdn(fp, &tm_num, sizeof(unsigned int), 1);
486 hmm->codebooknum = tm_num;
487 tm_index = (GCODEBOOK **)mymalloc(sizeof(GCODEBOOK *) * tm_num);
488 hmm->maxcodebooksize = 0;
489
490 hmm->codebook_root = NULL;
491 for (idx = 0; idx < tm_num; idx++) {
492 tm = (GCODEBOOK *)mybmalloc2(sizeof(GCODEBOOK), &(hmm->mroot));
493 rdn_str(fp, hmm, p);
494 tm->name = (*p == '\0') ? NULL : p;
495 rdn(fp, &(tm->num), sizeof(int), 1);
496 if (hmm->maxcodebooksize < tm->num) hmm->maxcodebooksize = tm->num;
497 tm->d = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * tm->num, &(hmm->mroot));
498 for(i=0;i<tm->num;i++) {
499 rdn(fp, &did, sizeof(unsigned int), 1);
500 if (did >= dens_num) {
501 tm->d[i] = NULL;
502 } else {
503 tm->d[i] = dens_index[did];
504 }
505 }
506 tm->id = idx;
507 tm_index[idx] = tm;
508 codebook_add(hmm, tm);
509 }
510 #ifdef DMES
511 jlog("Stat: read_binhmm: %d tied-mixture codebooks read\n", tm_num);
512 #endif
513 return TRUE;
514 }
515
516
517 /* read mpdf data */
518 static HTK_HMM_PDF **mpdf_index; ///< Map mixture pdf id to its pointer
519 static unsigned int mpdf_num; ///< Length of above
520
521 /**
522 * Read a mixture PDF.
523 *
524 * @param fp [in] file pointer
525 * @param hmm [out] %HMM definition structure to hold the read codebooks.
526 * @param m [out] pointer where to store the input mixture PDF.
527 *
528 * @return TRUE on success, FALSE on error.
529 *
530 */
531 static boolean
rd_pdf_sub(FILE * fp,HTK_HMM_INFO * hmm,HTK_HMM_PDF * m)532 rd_pdf_sub(FILE *fp, HTK_HMM_INFO *hmm, HTK_HMM_PDF *m)
533 {
534 int i;
535 unsigned int did;
536
537 rdn(fp, &(m->mix_num), sizeof(short), 1);
538 if (m->mix_num == -1) {
539 /* tmix */
540 rdn(fp, &did, sizeof(unsigned int), 1);
541 m->b = (HTK_HMM_Dens **)tm_index[did];
542 m->mix_num = (tm_index[did])->num;
543 m->tmix = TRUE;
544 } else {
545 /* mixture */
546 m->b = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * m->mix_num, &(hmm->mroot));
547 for (i=0;i<m->mix_num;i++) {
548 rdn(fp, &did, sizeof(unsigned int), 1);
549 if (did >= dens_num) {
550 m->b[i] = NULL;
551 } else {
552 m->b[i] = dens_index[did];
553 }
554 }
555 m->tmix = FALSE;
556 }
557 m->bweight = (PROB *)mybmalloc2(sizeof(PROB) * m->mix_num, &(hmm->mroot));
558 rdn(fp, m->bweight, sizeof(PROB), m->mix_num);
559
560 return TRUE;
561 }
562
563
564 /**
565 * @brief Read a sequence of mixture pdf for @a mpdf_num.
566 *
567 * The mixture pdfs are stored into @a hmm, and their references
568 * to lower structure (variance etc.) are recovered from the id-to-pointer
569 * index. Their pointers are also stored in @a mpdf_index for
570 * later data mapping operation from upper structure (state etc.).
571 *
572 * @param fp [in] file pointer
573 * @param hmm [out] %HMM definition structure to hold the read data.
574 */
575 static boolean
rd_mpdf(FILE * fp,HTK_HMM_INFO * hmm)576 rd_mpdf(FILE *fp, HTK_HMM_INFO *hmm)
577 {
578 HTK_HMM_PDF *m;
579 unsigned int idx;
580 char *p;
581
582 rdn(fp, &mpdf_num, sizeof(unsigned int), 1);
583 mpdf_index = (HTK_HMM_PDF **)mymalloc(sizeof(HTK_HMM_PDF *) * mpdf_num);
584
585 hmm->pdfstart = NULL;
586 hmm->pdf_root = NULL;
587 for (idx = 0; idx < mpdf_num; idx++) {
588 m = (HTK_HMM_PDF *)mybmalloc2(sizeof(HTK_HMM_PDF), &(hmm->mroot));
589 rdn_str(fp, hmm, p);
590 m->name = (*p == '\0') ? NULL : p;
591 rdn(fp, &(m->stream_id), sizeof(short), 1);
592 if (rd_pdf_sub(fp, hmm, m) == FALSE) return FALSE;
593 mpdf_index[idx] = m;
594 mpdf_add(hmm, m);
595 }
596 #ifdef DMES
597 jlog("Stat: read_binhmm: %d mixture PDFs read\n", mpdf_num);
598 #endif
599 return TRUE;
600 }
601
602
603 /* read state data */
604 static HTK_HMM_State **st_index; ///< Map state id to its pointer
605 static unsigned int st_num; ///< Length of above
606
607 /**
608 * @brief Read a sequence of state data for @a st_num.
609 *
610 * The state data are stored into @a hmm, and their references
611 * to lower structure (mixture, codebook, etc.) are recovered
612 * from the id-to-pointer index. Their pointers are also stored
613 * in @a st_index for later data mapping operation from
614 * upper structure (models etc.).
615 *
616 * @param fp [in] file pointer
617 * @param hmm [out] %HMM definition structure to hold the read states.
618 * @param mpdf_macro [in] TRUE if mixture pdfs are already read separatedly, or FALSE if they are all defined in-line
619 */
620 static boolean
rd_state(FILE * fp,HTK_HMM_INFO * hmm,boolean mpdf_macro)621 rd_state(FILE *fp, HTK_HMM_INFO *hmm, boolean mpdf_macro)
622 {
623 HTK_HMM_State *s;
624 unsigned int idx;
625 unsigned int mid, swid;
626 int m;
627 char *buf;
628
629 rdn(fp, &st_num, sizeof(unsigned int), 1);
630 hmm->totalstatenum = st_num;
631 st_index = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * st_num);
632
633 hmm->ststart = NULL;
634 hmm->st_root = NULL;
635 for (idx = 0; idx < st_num; idx++) {
636 s = (HTK_HMM_State *)mybmalloc2(sizeof(HTK_HMM_State), &(hmm->mroot));
637 rdn_str(fp, hmm, buf);
638 s->name = (*buf == '\0') ? NULL : buf;
639 s->nstream = hmm->opt.stream_info.num;
640 s->pdf = (HTK_HMM_PDF **)mybmalloc2(sizeof(HTK_HMM_PDF *) * s->nstream, &(hmm->mroot));
641 if (mpdf_macro) {
642 /* mpdf are stored separatedly, so read index */
643 for(m=0;m<s->nstream;m++) {
644 rdn(fp, &mid, sizeof(unsigned int), 1);
645 if (mid >= mpdf_num) {
646 s->pdf[m] = NULL;
647 } else {
648 s->pdf[m] = mpdf_index[mid];
649 }
650 }
651 } else {
652 /* mpdf are stored sequencially, so read the content here */
653 for(m=0;m<s->nstream;m++) {
654 s->pdf[m] = (HTK_HMM_PDF *)mybmalloc2(sizeof(HTK_HMM_PDF), &(hmm->mroot));
655 s->pdf[m]->name = NULL;
656 if (rd_pdf_sub(fp, hmm, s->pdf[m]) == FALSE) return FALSE;
657 s->pdf[m]->stream_id = m;
658 mpdf_add(hmm, s->pdf[m]);
659 }
660 }
661 if (hmm->opt.stream_info.num > 1) {
662 /* read steam weight info */
663 rdn(fp, &swid, sizeof(unsigned int), 1);
664 if (swid >= streamweight_num) {
665 s->w = NULL;
666 } else {
667 s->w = streamweight_index[swid];
668 }
669 } else {
670 s->w = NULL;
671 }
672 s->id = idx;
673 st_index[idx] = s;
674 state_add(hmm, s);
675 }
676 #ifdef DMES
677 jlog("Stat: read_binhmm: %d states read\n", st_num);
678 #endif
679 return TRUE;
680 }
681
682 /**
683 * @brief Read a sequence of %HMM models.
684 *
685 * The models are stored into @a hmm. Their references
686 * to lower structures (state, transition, etc.) are stored in schalar
687 * ID, and are recovered from the previously built id-to-pointer index.
688 * when reading the sub structures.
689 *
690 * @param fp [in] file pointer
691 * @param hmm [out] %HMM definition structure to hold the read models.
692 */
693 static boolean
rd_data(FILE * fp,HTK_HMM_INFO * hmm)694 rd_data(FILE *fp, HTK_HMM_INFO *hmm)
695 {
696 HTK_HMM_Data *d;
697 unsigned int md_num;
698 unsigned int sid, tid;
699 unsigned int idx;
700 int i;
701 char *p;
702
703 rdn(fp, &(md_num), sizeof(unsigned int), 1);
704 hmm->totalhmmnum = md_num;
705
706 hmm->start = NULL;
707 hmm->physical_root = NULL;
708 for (idx = 0; idx < md_num; idx++) {
709 d = (HTK_HMM_Data *)mybmalloc2(sizeof(HTK_HMM_Data), &(hmm->mroot));
710 rdn_str(fp, hmm, p);
711 d->name = (*p == '\0') ? NULL : p;
712 rdn(fp, &(d->state_num), sizeof(short), 1);
713 d->s = (HTK_HMM_State **)mybmalloc2(sizeof(HTK_HMM_State *) * d->state_num, &(hmm->mroot));
714 for (i=0;i<d->state_num;i++) {
715 rdn(fp, &sid, sizeof(unsigned int), 1);
716 if (sid > (unsigned int)hmm->totalstatenum) {
717 d->s[i] = NULL;
718 } else {
719 d->s[i] = st_index[sid];
720 }
721 }
722 rdn(fp, &tid, sizeof(unsigned int), 1);
723 d->tr = tr_index[tid];
724 htk_hmmdata_add(hmm, d);
725 }
726 #ifdef DMES
727 jlog("Stat: read_binhmm: %d HMM model definition read\n", md_num);
728 #endif
729 return TRUE;
730 }
731
732
733
734 /**
735 * Top function to read a binary %HMM file from @a fp.
736 *
737 * @param fp [in] file pointer
738 * @param hmm [out] %HMM definition structure to hold the read models.
739 * @param gzfile_p [in] TRUE if the file pointer points to a gzip file
740 * @param para [out] store acoustic parameters if embedded in binhmm (V2)
741 *
742 * @return TRUE on success, FALSE on failure.
743 */
744 boolean
read_binhmm(FILE * fp,HTK_HMM_INFO * hmm,boolean gzfile_p,Value * para)745 read_binhmm(FILE *fp, HTK_HMM_INFO *hmm, boolean gzfile_p, Value *para)
746 {
747 boolean mpdf_macro = FALSE;
748
749 gzfile = gzfile_p;
750
751 /* read header */
752 if (rd_header(fp, hmm, para, &mpdf_macro) == FALSE) {
753 return FALSE;
754 }
755
756 jlog("Stat: read_binhmm: binary format HMM definition\n");
757
758 /* read option data */
759 if (rd_opt(fp, &(hmm->opt)) == FALSE) {
760 jlog("Error: read_binhmm: failed to read HMM options\n");
761 return FALSE;
762 }
763
764 /* read type data */
765 if (rd_type(fp, hmm) == FALSE) {
766 jlog("Error: read_binhmm: failed to read HMM type of mixture tying\n");
767 return FALSE;
768 }
769
770 /* read transition data */
771 if (rd_trans(fp, hmm) == FALSE) {
772 jlog("Error: read_binhmm: failed to read HMM transition data\n");
773 return FALSE;
774 }
775
776 /* read variance data */
777 if (rd_var(fp, hmm) == FALSE) {
778 jlog("Error: read_binhmm: failed to read HMM variance data\n");
779 return FALSE;
780 }
781
782 /* read density data */
783 if (rd_dens(fp, hmm) == FALSE) {
784 jlog("Error: read_binhmm: failed to read HMM density data\n");
785 return FALSE;
786 }
787
788 /* read stream weight data */
789 if (hmm->opt.stream_info.num > 1) {
790 if (rd_streamweight(fp, hmm) == FALSE) {
791 jlog("Error: read_binhmm: failed to read stream weights data\n");
792 return FALSE;
793 }
794 }
795
796 /* read tmix data */
797 if (hmm->is_tied_mixture) {
798 if (rd_tmix(fp, hmm) == FALSE) {
799 jlog("Error: read_binhmm: failed to read HMM tied-mixture codebook data\n");
800 return FALSE;
801 }
802 }
803
804 /* read mixture pdf data */
805 if (mpdf_macro) {
806 if (rd_mpdf(fp, hmm) == FALSE) {
807 jlog("Error: read_binhmm: failed to read mixture PDF data\n");
808 return FALSE;
809 }
810 }
811
812 /* read state data */
813 if (rd_state(fp, hmm, mpdf_macro) == FALSE) {
814 jlog("Error: read_binhmm: failed to read HMM state data\n");
815 return FALSE;
816 }
817
818 /* read model data */
819 if (rd_data(fp, hmm) == FALSE) {
820 jlog("Error: read_binhmm: failed to read HMM data\n");
821 return FALSE;
822 }
823
824 /* free pointer->index work area */
825 if (mpdf_macro) free(mpdf_index);
826 free(tr_index);
827 free(vr_index);
828 if (hmm->opt.stream_info.num > 1) free(streamweight_index);
829 free(dens_index);
830 if (hmm->is_tied_mixture) free(tm_index);
831 free(st_index);
832
833 /* count maximum state num (it is not stored in binhmm... */
834 {
835 HTK_HMM_Data *dtmp;
836 int maxlen = 0;
837 for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) {
838 if (maxlen < dtmp->state_num) maxlen = dtmp->state_num;
839 }
840 hmm->maxstatenum = maxlen;
841 }
842
843 /* compute total number of mixture PDFs */
844 {
845 HTK_HMM_PDF *p;
846 int n = 0;
847 for (p = hmm->pdfstart; p; p = p->next) {
848 n++;
849 }
850 hmm->totalpdfnum = n;
851 }
852
853 /* determine whether this model needs multi-path handling */
854 hmm->need_multipath = htk_hmm_has_several_arc_on_edge(hmm);
855 if (hmm->need_multipath) {
856 jlog("Stat: read_binhmm: this HMM requires multipath handling at decoding\n");
857 } else {
858 jlog("Stat: read_binhmm: this HMM does not need multipath handling\n");
859 }
860
861 if (! hmm->variance_inversed) {
862 /* inverse all variance values for faster computation */
863 htk_hmm_inverse_variances(hmm);
864 hmm->variance_inversed = TRUE;
865 }
866
867 #ifdef ENABLE_MSD
868 /* check if MSD-HMM */
869 htk_hmm_check_msd(hmm);
870 #endif
871
872 return (TRUE);
873 }
874