1 /**
2  * @file   read_binhmm.c
3  *
4  * <JA>
5  * @brief  �Х��ʥ������ %HMM ����ե�������ɤ߹���
6  *
7  * Julius ���ȼ��ΥХ��ʥ������ %HMM ����ե�������ݡ��Ȥ��Ƥ��ޤ���
8  * HTK�Υ������������� %HMM ����ե����뤫��Х��ʥ�����ؤ��Ѵ��ϡ�
9  * ��°�Υġ��� mkbinhmm �ǹԤʤ��ޤ������ΥХ��ʥ�����ϡ�HTK ��
10  * �Х��ʥ�����Ȥ���ߴ��Ǥ��Τ���դ��Ʋ�������
11  * </JA>
12  *
13  * <EN>
14  * @brief  Read a binary %HMM definition file
15  *
16  * Julius supports a binary format of %HMM definition file.
17  * The tool "mkbinhmm" can convert the ascii format HTK %HMM definition
18  * file to this format.  Please note that this binary format is
19  * not compatible with the HTK binary format.
20  * </EN>
21  *
22  * @author Akinobu LEE
23  * @date   Wed Feb 16 05:23:59 2005
24  *
25  * $Revision: 1.5 $
26  *
27  */
28 /*
29  * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology
30  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
31  * All rights reserved
32  */
33 
34 #include <sent/stddefs.h>
35 #include <sent/htk_param.h>
36 #include <sent/htk_hmm.h>
37 
38 #undef DMES			/* define to enable debug message */
39 
40 static boolean gzfile;	      ///< TRUE when opened by fopen_readfile
41 
42 #define rdn(A,B,C,D) if (rdnfunc(A,B,C,D) == FALSE) return FALSE
43 #define rdn_str(A,B,C) if ((C = rdn_strfunc(A,B)) == NULL) return FALSE
44 
45 /**
46  * Binary read function with byte swaping (assume file is BIG ENDIAN)
47  *
48  * @param fp [in] file pointer
49  * @param buf [out] read data
50  * @param unitbyte [in] size of a unit in bytes
51  * @param unitnum [in] number of unit to be read
52  */
53 static boolean
rdnfunc(FILE * fp,void * buf,size_t unitbyte,int unitnum)54 rdnfunc(FILE *fp, void *buf, size_t unitbyte, int unitnum)
55 {
56   size_t tmp;
57 
58   if (unitnum == 0) return TRUE;
59 
60   if (gzfile) {
61     tmp = myfread(buf, unitbyte, unitnum, fp);
62   } else {
63     tmp = fread(buf, unitbyte, unitnum, fp);
64   }
65   if (tmp < (size_t)unitnum) {
66     jlog("Error: read_binhmm: failed to read %d bytes\n", unitbyte * unitnum);
67     return FALSE;
68   }
69 #ifndef WORDS_BIGENDIAN
70   if (unitbyte != 1) {
71     swap_bytes(buf, unitbyte, unitnum);
72   }
73 #endif
74   return TRUE;
75 }
76 
77 static char buf[MAXLINELEN];	///< Local work are for text handling
78 static char nostr = '\0';
79 /**
80  * Read a string till NULL.
81  *
82  * @param fp [in] file pointer
83  * @param hmm [out] pointer to %HMM definition data to store the values.
84  *
85  * @return pointer to a newly allocated buffer that contains the read string.
86  */
87 static char *
rdn_strfunc(FILE * fp,HTK_HMM_INFO * hmm)88 rdn_strfunc(FILE *fp, HTK_HMM_INFO *hmm)
89 {
90   int c;
91   int len;
92   char *p;
93 
94   len = 0;
95   while ((c = gzfile ? myfgetc(fp) : fgetc(fp)) != -1) {
96     if (len >= MAXLINELEN) {
97       jlog("Error: read_binhmm: string len exceeded %d bytes\n", MAXLINELEN);
98       jlog("Error: read_binhmm: please check the value of MAXLINELEN\n");
99       return NULL;
100     }
101     buf[len++] = c;
102     if (c == '\0') break;
103   }
104   if (len == 0) return NULL;
105   if (len == 1) {
106     p = &nostr;
107   } else {
108     p = (char *)mybmalloc2(len, &(hmm->mroot));
109     strcpy(p, buf);
110   }
111   return(p);
112 }
113 
114 
115 static char *binhmm_header = BINHMM_HEADER; ///< Header string
116 static char *binhmm_header_v2 = BINHMM_HEADER_V2; ///< Header string for V2
117 
118 /**
119  * Read acoustic analysis configration parameters from header of binary HMM.
120  *
121  * @param fp [in] file pointer
122  * @param para [out] acoustic analysis configration parameters
123  */
124 static boolean
rd_para(FILE * fp,Value * para)125 rd_para(FILE *fp, Value *para)
126 {
127   short version;
128   float dummy;
129 
130   /* read version */
131   rdn(fp, &version, sizeof(short), 1);
132 
133   if (version > VALUE_VERSION) {
134     jlog("Error: read_binhmm: unknown embedded parameter format version: %d\n", version);
135     return FALSE;
136   }
137   jlog("Stat: rd_para: found embedded acoutic parameter (ver.%d)\n", version);
138 
139   /* read parameters */
140   rdn(fp, &(para->smp_period), sizeof(long), 1);
141   rdn(fp, &(para->smp_freq), sizeof(long), 1);
142   rdn(fp, &(para->framesize), sizeof(int), 1);
143   rdn(fp, &(para->frameshift), sizeof(int), 1);
144   rdn(fp, &(para->preEmph), sizeof(float), 1);
145   rdn(fp, &(para->lifter), sizeof(int), 1);
146   rdn(fp, &(para->fbank_num), sizeof(int), 1);
147   rdn(fp, &(para->delWin), sizeof(int), 1);
148   rdn(fp, &(para->accWin), sizeof(int), 1);
149   rdn(fp, &(para->silFloor), sizeof(float), 1);
150   rdn(fp, &(para->escale), sizeof(float), 1);
151   rdn(fp, &(para->hipass), sizeof(int), 1);
152   rdn(fp, &(para->lopass), sizeof(int), 1);
153   rdn(fp, &(para->enormal), sizeof(int), 1);
154   rdn(fp, &(para->raw_e), sizeof(int), 1);
155   if (version == 1) {
156     /* version 1 has ss related parameters, but version 2 and later not */
157     /* skip ss related parameters (ss_alpha and ss_floor) */
158     rdn(fp, &dummy, sizeof(float), 1);
159     rdn(fp, &dummy, sizeof(float), 1);
160   }
161   rdn(fp, &(para->zmeanframe), sizeof(int), 1);
162   if (version >= 3) {
163     rdn(fp, &(para->usepower), sizeof(int), 1);
164   }
165 
166   return(TRUE);
167 }
168 
169 /**
170  * Read header string of binary HMM file.
171  *
172  * @param fp [in] file pointer
173  * @param hmm [out] pointer to %HMM definition data to store the values.
174  * @param para [out] store embedded acoustic parameters if any (V2)
175  * @param mpdf_macro_ret [out] will be set to TRUE if the file contains mixture pdf macro defined by "~p"
176  *
177  * @return TRUE if a correct header was read, FALSE if header string does not
178  * match the current version.
179  */
180 static boolean
rd_header(FILE * fp,HTK_HMM_INFO * hmm,Value * para,boolean * mpdf_macro_ret)181 rd_header(FILE *fp, HTK_HMM_INFO *hmm, Value *para, boolean *mpdf_macro_ret)
182 {
183   char *p, *q;
184   boolean emp, inv;
185 
186   rdn_str(fp, hmm, p);
187   if (strmatch(p, binhmm_header)) {
188     /* version 1 */
189     hmm->variance_inversed = FALSE;
190   } else if (strmatch(p, binhmm_header_v2)) {
191     /* version 2 */
192     emp = inv = FALSE;
193     rdn_str(fp, hmm, q);
194     if (*q != '\0') {
195       while(*q == '_') {
196 	q++;
197 	switch (*q) {
198 	case BINHMM_HEADER_V2_EMBEDPARA:
199 	  /* read in embedded acoutic condition parameters */
200 	  emp = TRUE;
201 	  jlog("Stat: binhmm-header: analysis parameter embedded\n");
202 	  break;
203 	case BINHMM_HEADER_V2_VARINV:
204 	  inv = TRUE;
205 	  jlog("Stat: binhmm-header: variance inversed\n");
206 	  break;
207 	case BINHMM_HEADER_V2_MPDFMACRO:
208 	  *mpdf_macro_ret = TRUE;
209 	  jlog("Stat: binhmm-header: mixture PDF macro used\n");
210 	  break;
211 	default:
212 	  jlog("Error: unknown format qualifier in header: \"%c\"\n", *q);
213 	  return FALSE;
214 	}
215 	q++;
216       }
217     }
218     if (emp) {
219       para->loaded = 1;
220       if (rd_para(fp, para) == FALSE) {
221 	jlog("Error: read_binhmm: failed to read embeded parameter\n");
222 	return FALSE;
223       }
224       jlog("Stat: read_binhmm: has acoutic analysis configurations in its header\n");
225     }
226     if (inv) {
227       hmm->variance_inversed = TRUE;
228       jlog("Stat: read_binhmm: has inversed variances\n");
229     } else {
230       hmm->variance_inversed = FALSE;
231     }
232   } else {
233     /* failed to read header */
234     return FALSE;
235   }
236   return TRUE;
237 }
238 
239 
240 
241 /**
242  * Read %HMM option specifications.
243  *
244  * @param fp [in] file pointer
245  * @param opt [out] pointer to the %HMM option structure to hold the read
246  * values.
247  */
248 static boolean
rd_opt(FILE * fp,HTK_HMM_Options * opt)249 rd_opt(FILE *fp, HTK_HMM_Options *opt)
250 {
251   rdn(fp, &(opt->stream_info.num), sizeof(short), 1);
252   rdn(fp, opt->stream_info.vsize, sizeof(short), MAXSTREAMNUM);
253   rdn(fp, &(opt->vec_size), sizeof(short), 1);
254   rdn(fp, &(opt->cov_type), sizeof(short), 1);
255   rdn(fp, &(opt->dur_type), sizeof(short), 1);
256   rdn(fp, &(opt->param_type), sizeof(short), 1);
257 
258   return(TRUE);
259 }
260 
261 /**
262  * Read %HMM type of mixture tying.
263  *
264  * @param fp [in] file pointer
265  * @param hmm [out] pointer to %HMM definition data to store the values.
266  */
267 static boolean
rd_type(FILE * fp,HTK_HMM_INFO * hmm)268 rd_type(FILE *fp, HTK_HMM_INFO *hmm)
269 {
270   rdn(fp, &(hmm->is_tied_mixture), sizeof(boolean), 1);
271   rdn(fp, &(hmm->maxmixturenum), sizeof(int), 1);
272   return TRUE;
273 }
274 
275 
276 /* read transition data */
277 static HTK_HMM_Trans **tr_index; ///< Map transition matrix id to its pointer
278 static unsigned int tr_num;	///< Length of above
279 
280 /**
281  * @brief  Read a sequence of transition matrix data for @a tr_num.
282  *
283  * The transition matrixes are stored into @a hmm, and their pointers
284  * are also stored in @a tr_index for later data mapping operation
285  * from upper structure (state etc.).
286  *
287  * @param fp [in] file pointer
288  * @param hmm [out] %HMM definition structure to hold the read transitions.
289  */
290 static boolean
rd_trans(FILE * fp,HTK_HMM_INFO * hmm)291 rd_trans(FILE *fp, HTK_HMM_INFO *hmm)
292 {
293   HTK_HMM_Trans *t;
294   unsigned int idx;
295   int i;
296   PROB *atmp;
297   char *p;
298 
299   rdn(fp, &tr_num, sizeof(unsigned int), 1);
300   tr_index = (HTK_HMM_Trans **)mymalloc(sizeof(HTK_HMM_Trans *) * tr_num);
301 
302   hmm->trstart = NULL;
303   hmm->tr_root = NULL;
304   for (idx = 0; idx < tr_num; idx++) {
305     t = (HTK_HMM_Trans *)mybmalloc2(sizeof(HTK_HMM_Trans), &(hmm->mroot));
306     rdn_str(fp, hmm, p);
307     t->name = (*p == '\0') ? NULL : p;
308     rdn(fp, &(t->statenum), sizeof(short), 1);
309     t->a = (PROB **)mybmalloc2(sizeof(PROB *) * t->statenum, &(hmm->mroot));
310     atmp = (PROB *)mybmalloc2(sizeof(PROB) * t->statenum * t->statenum, &(hmm->mroot));
311     for (i=0;i<t->statenum;i++) {
312       t->a[i] = &(atmp[i*t->statenum]);
313       rdn(fp, t->a[i], sizeof(PROB), t->statenum);
314     }
315     trans_add(hmm, t);
316     tr_index[idx] = t;
317   }
318 
319 #ifdef DMES
320   jlog("Stat: read_binhmm: %d transition maxtix read\n", tr_num);
321 #endif
322   return TRUE;
323 }
324 
325 
326 static HTK_HMM_Var **vr_index;	///< Map variance id to its pointer
327 static unsigned int vr_num;	///< Length of above
328 
329 /**
330  * @brief  Read a sequence of variance vector for @a vr_num.
331  *
332  * The variance vectors are stored into @a hmm, and their pointers
333  * are also stored in @a vr_index for later data mapping operation
334  * from upper structure (density etc.).
335  *
336  * @param fp [in] file pointer
337  * @param hmm [out] %HMM definition structure to hold the read variance.
338  */
339 static boolean
rd_var(FILE * fp,HTK_HMM_INFO * hmm)340 rd_var(FILE *fp, HTK_HMM_INFO *hmm)
341 {
342   HTK_HMM_Var *v;
343   unsigned int idx;
344   char *p;
345 
346   rdn(fp, &vr_num, sizeof(unsigned int), 1);
347   vr_index = (HTK_HMM_Var **)mymalloc(sizeof(HTK_HMM_Var *) * vr_num);
348 
349   hmm->vrstart = NULL;
350   hmm->vr_root = NULL;
351   for (idx = 0; idx < vr_num; idx++) {
352     v = (HTK_HMM_Var *)mybmalloc2(sizeof(HTK_HMM_Var), &(hmm->mroot));
353     rdn_str(fp, hmm, p);
354     v->name = (*p == '\0') ? NULL : p;
355     rdn(fp, &(v->len), sizeof(short), 1);
356     v->vec = (VECT *)mybmalloc2(sizeof(VECT) * v->len, &(hmm->mroot));
357     rdn(fp, v->vec, sizeof(VECT), v->len);
358     vr_index[idx] = v;
359     var_add(hmm, v);
360   }
361 #ifdef DMES
362   jlog("Stat: read_binhmm: %d variance read\n", vr_num);
363 #endif
364   return TRUE;
365 }
366 
367 
368 /* read density data */
369 static HTK_HMM_Dens **dens_index; ///< Map density id to its pointer
370 static unsigned int dens_num;	///< Length of above
371 
372 /**
373  * @brief  Read a sequence of mixture densities for @a dens_num.
374  *
375  * The mixture densities are stored into @a hmm, and their references
376  * to lower structure (variance etc.) are recovered from the id-to-pointer
377  * index.  Their pointers are also stored in @a dens_index for
378  * later data mapping operation from upper structure (state etc.).
379  *
380  * @param fp [in] file pointer
381  * @param hmm [out] %HMM definition structure to hold the read densities.
382  */
383 static boolean
rd_dens(FILE * fp,HTK_HMM_INFO * hmm)384 rd_dens(FILE *fp, HTK_HMM_INFO *hmm)
385 {
386   HTK_HMM_Dens *d;
387   unsigned int idx;
388   unsigned int vid;
389   char *p;
390 
391   rdn(fp, &dens_num, sizeof(unsigned int), 1);
392   hmm->totalmixnum = dens_num;
393   dens_index = (HTK_HMM_Dens **)mymalloc(sizeof(HTK_HMM_Dens *) * dens_num);
394 
395   hmm->dnstart = NULL;
396   hmm->dn_root = NULL;
397   for (idx = 0; idx < dens_num; idx++) {
398     d = (HTK_HMM_Dens *)mybmalloc2(sizeof(HTK_HMM_Dens), &(hmm->mroot));
399     rdn_str(fp, hmm, p);
400     d->name = (*p == '\0') ? NULL : p;
401     rdn(fp, &(d->meanlen), sizeof(short), 1);
402     d->mean = (VECT *)mybmalloc2(sizeof(VECT) * d->meanlen, &(hmm->mroot));
403     rdn(fp, d->mean, sizeof(VECT), d->meanlen);
404     rdn(fp, &vid, sizeof(unsigned int), 1);
405     d->var = vr_index[vid];
406     rdn(fp, &(d->gconst), sizeof(LOGPROB), 1);
407     dens_index[idx] = d;
408     dens_add(hmm, d);
409   }
410 #ifdef DMES
411   jlog("Stat: read_binhmm: %d gaussian densities read\n", dens_num);
412 #endif
413   return TRUE;
414 }
415 
416 
417 /* read stream weight data */
418 static HTK_HMM_StreamWeight **streamweight_index; ///< Map stream weights id to its pointer
419 static unsigned int streamweight_num;	///< Length of above
420 
421 /**
422  * @brief  Read a sequence of stream weights for @a streamweight_num.
423  *
424  * The stream weights are stored into @a hmm, and their references
425  * to lower structure (variance etc.) are recovered from the id-to-pointer
426  * index.  Their pointers are also stored in @a dens_index for
427  * later data mapping operation from upper structure (state etc.).
428  *
429  * @param fp [in] file pointer
430  * @param hmm [out] %HMM definition structure to hold the read stream weights.
431  */
432 static boolean
rd_streamweight(FILE * fp,HTK_HMM_INFO * hmm)433 rd_streamweight(FILE *fp, HTK_HMM_INFO *hmm)
434 {
435   HTK_HMM_StreamWeight *sw;
436   unsigned int idx;
437   char *p;
438 
439   rdn(fp, &streamweight_num, sizeof(unsigned int), 1);
440   streamweight_index = (HTK_HMM_StreamWeight **)mymalloc(sizeof(HTK_HMM_StreamWeight *) * streamweight_num);
441 
442   hmm->swstart = NULL;
443   hmm->sw_root = NULL;
444   for (idx = 0; idx < streamweight_num; idx++) {
445     sw = (HTK_HMM_StreamWeight *)mybmalloc2(sizeof(HTK_HMM_StreamWeight), &(hmm->mroot));
446     rdn_str(fp, hmm, p);
447     sw->name = (*p == '\0') ? NULL : p;
448     rdn(fp, &(sw->len), sizeof(short), 1);
449     sw->weight = (VECT *)mybmalloc2(sizeof(VECT) * sw->len, &(hmm->mroot));
450     rdn(fp, sw->weight, sizeof(VECT), sw->len);
451     streamweight_index[idx] = sw;
452     sw_add(hmm, sw);
453   }
454 #ifdef DMES
455   jlog("Stat: read_binhmm: %d stream weights read\n", streamweight_num);
456 #endif
457   return TRUE;
458 }
459 
460 
461 /* read tmix data */
462 static GCODEBOOK **tm_index;	///< Map codebook id to its pointer
463 static unsigned int tm_num;	///< Length of above
464 
465 /**
466  * @brief  Read a sequence of mixture codebook for @a tm_num.
467  *
468  * The mixture codebook data are stored into @a hmm, and their references
469  * to lower structure (mixtures etc.) are recovered from the id-to-pointer
470  * index.  Their pointers are also stored in @a tm_index for
471  * later data mapping operation from upper structure (state etc.).
472  *
473  * @param fp [in] file pointer
474  * @param hmm [out] %HMM definition structure to hold the read codebooks.
475  */
476 static boolean
rd_tmix(FILE * fp,HTK_HMM_INFO * hmm)477 rd_tmix(FILE *fp, HTK_HMM_INFO *hmm)
478 {
479   GCODEBOOK *tm;
480   unsigned int idx;
481   unsigned int did;
482   int i;
483   char *p;
484 
485   rdn(fp, &tm_num, sizeof(unsigned int), 1);
486   hmm->codebooknum = tm_num;
487   tm_index = (GCODEBOOK **)mymalloc(sizeof(GCODEBOOK *) * tm_num);
488   hmm->maxcodebooksize = 0;
489 
490   hmm->codebook_root = NULL;
491   for (idx = 0; idx < tm_num; idx++) {
492     tm = (GCODEBOOK *)mybmalloc2(sizeof(GCODEBOOK), &(hmm->mroot));
493     rdn_str(fp, hmm, p);
494     tm->name = (*p == '\0') ? NULL : p;
495     rdn(fp, &(tm->num), sizeof(int), 1);
496     if (hmm->maxcodebooksize < tm->num) hmm->maxcodebooksize = tm->num;
497     tm->d = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * tm->num, &(hmm->mroot));
498     for(i=0;i<tm->num;i++) {
499       rdn(fp, &did, sizeof(unsigned int), 1);
500       if (did >= dens_num) {
501 	tm->d[i] = NULL;
502       } else {
503 	tm->d[i] = dens_index[did];
504       }
505     }
506     tm->id = idx;
507     tm_index[idx] = tm;
508     codebook_add(hmm, tm);
509   }
510 #ifdef DMES
511   jlog("Stat: read_binhmm: %d tied-mixture codebooks read\n", tm_num);
512 #endif
513   return TRUE;
514 }
515 
516 
517 /* read mpdf data */
518 static HTK_HMM_PDF **mpdf_index; ///< Map mixture pdf id to its pointer
519 static unsigned int mpdf_num;	///< Length of above
520 
521 /**
522  * Read a mixture PDF.
523  *
524  * @param fp [in] file pointer
525  * @param hmm [out] %HMM definition structure to hold the read codebooks.
526  * @param m [out] pointer where to store the input mixture PDF.
527  *
528  * @return TRUE on success, FALSE on error.
529  *
530  */
531 static boolean
rd_pdf_sub(FILE * fp,HTK_HMM_INFO * hmm,HTK_HMM_PDF * m)532 rd_pdf_sub(FILE *fp, HTK_HMM_INFO *hmm, HTK_HMM_PDF *m)
533 {
534   int i;
535   unsigned int did;
536 
537   rdn(fp, &(m->mix_num), sizeof(short), 1);
538   if (m->mix_num == -1) {
539     /* tmix */
540     rdn(fp, &did, sizeof(unsigned int), 1);
541     m->b = (HTK_HMM_Dens **)tm_index[did];
542     m->mix_num = (tm_index[did])->num;
543     m->tmix = TRUE;
544   } else {
545     /* mixture */
546     m->b = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * m->mix_num, &(hmm->mroot));
547     for (i=0;i<m->mix_num;i++) {
548       rdn(fp, &did, sizeof(unsigned int), 1);
549       if (did >= dens_num) {
550 	m->b[i] = NULL;
551       } else {
552 	m->b[i] = dens_index[did];
553       }
554     }
555     m->tmix = FALSE;
556   }
557   m->bweight = (PROB *)mybmalloc2(sizeof(PROB) * m->mix_num, &(hmm->mroot));
558   rdn(fp, m->bweight, sizeof(PROB), m->mix_num);
559 
560   return TRUE;
561 }
562 
563 
564 /**
565  * @brief  Read a sequence of mixture pdf for @a mpdf_num.
566  *
567  * The mixture pdfs are stored into @a hmm, and their references
568  * to lower structure (variance etc.) are recovered from the id-to-pointer
569  * index.  Their pointers are also stored in @a mpdf_index for
570  * later data mapping operation from upper structure (state etc.).
571  *
572  * @param fp [in] file pointer
573  * @param hmm [out] %HMM definition structure to hold the read data.
574  */
575 static boolean
rd_mpdf(FILE * fp,HTK_HMM_INFO * hmm)576 rd_mpdf(FILE *fp, HTK_HMM_INFO *hmm)
577 {
578   HTK_HMM_PDF *m;
579   unsigned int idx;
580   char *p;
581 
582   rdn(fp, &mpdf_num, sizeof(unsigned int), 1);
583   mpdf_index = (HTK_HMM_PDF **)mymalloc(sizeof(HTK_HMM_PDF *) * mpdf_num);
584 
585   hmm->pdfstart = NULL;
586   hmm->pdf_root = NULL;
587   for (idx = 0; idx < mpdf_num; idx++) {
588     m = (HTK_HMM_PDF *)mybmalloc2(sizeof(HTK_HMM_PDF), &(hmm->mroot));
589     rdn_str(fp, hmm, p);
590     m->name = (*p == '\0') ? NULL : p;
591     rdn(fp, &(m->stream_id), sizeof(short), 1);
592     if (rd_pdf_sub(fp, hmm, m) == FALSE) return FALSE;
593     mpdf_index[idx] = m;
594     mpdf_add(hmm, m);
595   }
596 #ifdef DMES
597   jlog("Stat: read_binhmm: %d mixture PDFs read\n", mpdf_num);
598 #endif
599   return TRUE;
600 }
601 
602 
603 /* read state data */
604 static HTK_HMM_State **st_index; ///< Map state id to its pointer
605 static unsigned int st_num;	///< Length of above
606 
607 /**
608  * @brief  Read a sequence of state data for @a st_num.
609  *
610  * The state data are stored into @a hmm, and their references
611  * to lower structure (mixture, codebook, etc.) are recovered
612  * from the id-to-pointer index.  Their pointers are also stored
613  * in @a st_index for later data mapping operation from
614  * upper structure (models etc.).
615  *
616  * @param fp [in] file pointer
617  * @param hmm [out] %HMM definition structure to hold the read states.
618  * @param mpdf_macro [in] TRUE if mixture pdfs are already read separatedly, or FALSE if they are all defined in-line
619  */
620 static boolean
rd_state(FILE * fp,HTK_HMM_INFO * hmm,boolean mpdf_macro)621 rd_state(FILE *fp, HTK_HMM_INFO *hmm, boolean mpdf_macro)
622 {
623   HTK_HMM_State *s;
624   unsigned int idx;
625   unsigned int mid, swid;
626   int m;
627   char *buf;
628 
629   rdn(fp, &st_num, sizeof(unsigned int), 1);
630   hmm->totalstatenum = st_num;
631   st_index = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * st_num);
632 
633   hmm->ststart = NULL;
634   hmm->st_root = NULL;
635   for (idx = 0; idx < st_num; idx++) {
636     s = (HTK_HMM_State *)mybmalloc2(sizeof(HTK_HMM_State), &(hmm->mroot));
637     rdn_str(fp, hmm, buf);
638     s->name = (*buf == '\0') ? NULL : buf;
639     s->nstream = hmm->opt.stream_info.num;
640     s->pdf = (HTK_HMM_PDF **)mybmalloc2(sizeof(HTK_HMM_PDF *) * s->nstream, &(hmm->mroot));
641     if (mpdf_macro) {
642       /* mpdf are stored separatedly, so read index */
643       for(m=0;m<s->nstream;m++) {
644 	rdn(fp, &mid, sizeof(unsigned int), 1);
645 	if (mid >= mpdf_num) {
646 	  s->pdf[m] = NULL;
647 	} else {
648 	  s->pdf[m] = mpdf_index[mid];
649 	}
650       }
651     } else {
652       /* mpdf are stored sequencially, so read the content here */
653       for(m=0;m<s->nstream;m++) {
654 	s->pdf[m] = (HTK_HMM_PDF *)mybmalloc2(sizeof(HTK_HMM_PDF), &(hmm->mroot));
655 	s->pdf[m]->name = NULL;
656 	if (rd_pdf_sub(fp, hmm, s->pdf[m]) == FALSE) return FALSE;
657 	s->pdf[m]->stream_id = m;
658 	mpdf_add(hmm, s->pdf[m]);
659       }
660     }
661     if (hmm->opt.stream_info.num > 1) {
662       /* read steam weight info */
663       rdn(fp, &swid, sizeof(unsigned int), 1);
664       if (swid >= streamweight_num) {
665 	s->w = NULL;
666       } else {
667 	s->w = streamweight_index[swid];
668       }
669     } else {
670       s->w = NULL;
671     }
672     s->id = idx;
673     st_index[idx] = s;
674     state_add(hmm, s);
675   }
676 #ifdef DMES
677   jlog("Stat: read_binhmm: %d states read\n", st_num);
678 #endif
679   return TRUE;
680 }
681 
682 /**
683  * @brief  Read a sequence of %HMM models.
684  *
685  * The models are stored into @a hmm.  Their references
686  * to lower structures (state, transition, etc.) are stored in schalar
687  * ID, and are recovered from the previously built id-to-pointer index.
688  * when reading the sub structures.
689  *
690  * @param fp [in] file pointer
691  * @param hmm [out] %HMM definition structure to hold the read models.
692  */
693 static boolean
rd_data(FILE * fp,HTK_HMM_INFO * hmm)694 rd_data(FILE *fp, HTK_HMM_INFO *hmm)
695 {
696   HTK_HMM_Data *d;
697   unsigned int md_num;
698   unsigned int sid, tid;
699   unsigned int idx;
700   int i;
701   char *p;
702 
703   rdn(fp, &(md_num), sizeof(unsigned int), 1);
704   hmm->totalhmmnum = md_num;
705 
706   hmm->start = NULL;
707   hmm->physical_root = NULL;
708   for (idx = 0; idx < md_num; idx++) {
709     d = (HTK_HMM_Data *)mybmalloc2(sizeof(HTK_HMM_Data), &(hmm->mroot));
710     rdn_str(fp, hmm, p);
711     d->name = (*p == '\0') ? NULL : p;
712     rdn(fp, &(d->state_num), sizeof(short), 1);
713     d->s = (HTK_HMM_State **)mybmalloc2(sizeof(HTK_HMM_State *) * d->state_num, &(hmm->mroot));
714     for (i=0;i<d->state_num;i++) {
715       rdn(fp, &sid, sizeof(unsigned int), 1);
716       if (sid > (unsigned int)hmm->totalstatenum) {
717 	d->s[i] = NULL;
718       } else {
719 	d->s[i] = st_index[sid];
720       }
721     }
722     rdn(fp, &tid, sizeof(unsigned int), 1);
723     d->tr = tr_index[tid];
724     htk_hmmdata_add(hmm, d);
725   }
726 #ifdef DMES
727   jlog("Stat: read_binhmm: %d HMM model definition read\n", md_num);
728 #endif
729   return TRUE;
730 }
731 
732 
733 
734 /**
735  * Top function to read a binary %HMM file from @a fp.
736  *
737  * @param fp [in] file pointer
738  * @param hmm [out] %HMM definition structure to hold the read models.
739  * @param gzfile_p [in] TRUE if the file pointer points to a gzip file
740  * @param para [out] store acoustic parameters if embedded in binhmm (V2)
741  *
742  * @return TRUE on success, FALSE on failure.
743  */
744 boolean
read_binhmm(FILE * fp,HTK_HMM_INFO * hmm,boolean gzfile_p,Value * para)745 read_binhmm(FILE *fp, HTK_HMM_INFO *hmm, boolean gzfile_p, Value *para)
746 {
747   boolean mpdf_macro = FALSE;
748 
749   gzfile = gzfile_p;
750 
751   /* read header */
752   if (rd_header(fp, hmm, para, &mpdf_macro) == FALSE) {
753     return FALSE;
754   }
755 
756   jlog("Stat: read_binhmm: binary format HMM definition\n");
757 
758   /* read option data */
759   if (rd_opt(fp, &(hmm->opt)) == FALSE) {
760     jlog("Error: read_binhmm: failed to read HMM options\n");
761     return FALSE;
762   }
763 
764   /* read type data */
765   if (rd_type(fp, hmm) == FALSE) {
766     jlog("Error: read_binhmm: failed to read HMM type of mixture tying\n");
767     return FALSE;
768   }
769 
770   /* read transition data */
771   if (rd_trans(fp, hmm) == FALSE) {
772     jlog("Error: read_binhmm: failed to read HMM transition data\n");
773     return FALSE;
774   }
775 
776   /* read variance data */
777   if (rd_var(fp, hmm) == FALSE) {
778     jlog("Error: read_binhmm: failed to read HMM variance data\n");
779     return FALSE;
780   }
781 
782   /* read density data */
783   if (rd_dens(fp, hmm) == FALSE) {
784     jlog("Error: read_binhmm: failed to read HMM density data\n");
785     return FALSE;
786   }
787 
788   /* read stream weight data */
789   if (hmm->opt.stream_info.num > 1) {
790     if (rd_streamweight(fp, hmm) == FALSE) {
791       jlog("Error: read_binhmm: failed to read stream weights data\n");
792       return FALSE;
793     }
794   }
795 
796   /* read tmix data */
797   if (hmm->is_tied_mixture) {
798     if (rd_tmix(fp, hmm) == FALSE) {
799       jlog("Error: read_binhmm: failed to read HMM tied-mixture codebook data\n");
800       return FALSE;
801     }
802   }
803 
804   /* read mixture pdf data */
805   if (mpdf_macro) {
806     if (rd_mpdf(fp, hmm) == FALSE) {
807       jlog("Error: read_binhmm: failed to read mixture PDF data\n");
808       return FALSE;
809     }
810   }
811 
812   /* read state data */
813   if (rd_state(fp, hmm, mpdf_macro) == FALSE) {
814     jlog("Error: read_binhmm: failed to read HMM state data\n");
815     return FALSE;
816   }
817 
818   /* read model data */
819   if (rd_data(fp, hmm) == FALSE) {
820     jlog("Error: read_binhmm: failed to read HMM data\n");
821     return FALSE;
822   }
823 
824   /* free pointer->index work area */
825   if (mpdf_macro) free(mpdf_index);
826   free(tr_index);
827   free(vr_index);
828   if (hmm->opt.stream_info.num > 1) free(streamweight_index);
829   free(dens_index);
830   if (hmm->is_tied_mixture) free(tm_index);
831   free(st_index);
832 
833   /* count maximum state num (it is not stored in binhmm... */
834   {
835     HTK_HMM_Data *dtmp;
836     int maxlen = 0;
837     for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) {
838       if (maxlen < dtmp->state_num) maxlen = dtmp->state_num;
839     }
840     hmm->maxstatenum = maxlen;
841   }
842 
843   /* compute total number of mixture PDFs */
844   {
845     HTK_HMM_PDF *p;
846     int n = 0;
847     for (p = hmm->pdfstart; p; p = p->next) {
848       n++;
849     }
850     hmm->totalpdfnum = n;
851   }
852 
853   /* determine whether this model needs multi-path handling */
854   hmm->need_multipath = htk_hmm_has_several_arc_on_edge(hmm);
855   if (hmm->need_multipath) {
856     jlog("Stat: read_binhmm: this HMM requires multipath handling at decoding\n");
857   } else {
858     jlog("Stat: read_binhmm: this HMM does not need multipath handling\n");
859   }
860 
861   if (! hmm->variance_inversed) {
862     /* inverse all variance values for faster computation */
863     htk_hmm_inverse_variances(hmm);
864     hmm->variance_inversed = TRUE;
865   }
866 
867 #ifdef ENABLE_MSD
868   /* check if MSD-HMM */
869   htk_hmm_check_msd(hmm);
870 #endif
871 
872   return (TRUE);
873 }
874