1 /** 2 * @file mfcc.h 3 * 4 * <JA> 5 * @brief MFCC���Τ������� 6 * 7 * ���Υե�����ˤϡ������ȷ��ǡ�������MFCC��������ħ�̥٥��ȥ����� 8 * �����뤿��ι�¤�Τ��������ӥǥե�����ͤ��ޤޤ�Ƥ��ޤ��� 9 * �ǥե�����ͤ� Julius �ȤȤ�����ۤ���Ƥ��벻����ǥ�ǻ��Ѥ��Ƥ��� 10 * �ͤǤ��ꡤHTK�Υǥե���ȤȤ��ͤ��ۤʤ���ʬ������ޤ��Τ���դ��Ʋ������� 11 * </JA> 12 * <EN> 13 * @brief Definitions for MFCC computation 14 * 15 * This file contains structures and default values for extracting speech 16 * parameter vectors of Mel-Frequency Cepstral Cefficients (MFCC). 17 * The default values here are the ones used in the standard acoustic models 18 * distributed together with Julius, and some of them have different value from 19 * HTK defaults. So be careful of the default values. 20 * </EN> 21 * 22 * @sa libsent/src/wav2mfcc/wav2mfcc.c 23 * @sa libsent/src/wav2mfcc/wav2mfcc-pipe.c 24 * @sa julius/wav2mfcc.c 25 * @sa julius/realtime-1stpass.c 26 * 27 * @author Akinobu LEE 28 * @date Fri Feb 11 03:40:52 2005 29 * 30 * $Revision: 1.4 $ 31 * 32 */ 33 34 35 /************************************************************************/ 36 /* mfcc.h */ 37 /* */ 38 /* Author : Yuichiro Nakano */ 39 /************************************************************************/ 40 41 #ifndef __MFCC_H__ 42 #define __MFCC_H__ 43 44 /// DEBUG: define if you want to enable debug messages for sin/cos table operation 45 #undef MFCC_TABLE_DEBUG 46 47 #define CPMAX 500 ///< Maximum number of frames to store ceptral mean for realtime CMN update 48 #define CPSTEP 5 ///< allocate step of cmean list per sentence 49 50 #include <sent/stddefs.h> 51 #include <sent/htk_defs.h> 52 #include <sent/htk_param.h> 53 #include <ctype.h> 54 55 #define DEF_SMPPERIOD 625 ///< Default sampling period in 100ns (625 = 16kHz) 56 #define DEF_FRAMESIZE 400 ///< Default Window size in samples, similar to WINDOWSIZE in HTK (unit is different) 57 #define DEF_FFTNUM 512 ///< Number of FFT steps 58 #define DEF_FRAMESHIFT 160 ///< Default frame shift length in samples 59 #define DEF_PREENPH 0.97 ///< Default pre-emphasis coefficient, corresponds to PREEMCOEF in HTK 60 #define DEF_MFCCDIM 12 ///< Default number of MFCC dimension, corresponds to NUMCEPS in HTK 61 #define DEF_CEPLIF 22 ///< Default cepstral Liftering coefficient, corresponds to CEPLIFTER in HTK 62 #define DEF_FBANK 24 ///< Default number of filterbank channels, corresponds to NUMCHANS in HTK 63 #define DEF_DELWIN 2 ///< Default delta window size, corresponds to DELTAWINDOW in HTK 64 #define DEF_ACCWIN 2 ///< Default acceleration window size, corresponds to ACCWINDOW in HTK 65 #define DEF_SILFLOOR 50.0 ///< Default energy silence floor in dBs, corresponds to SILFLOOR in HTK 66 #define DEF_ESCALE 1.0 ///< Default scaling coefficient of log energy, corresponds to ESCALE in HTK 67 68 #define DEF_SSALPHA 2.0 ///< Default alpha coefficient for spectral subtraction 69 #define DEF_SSFLOOR 0.5 ///< Default flooring coefficient for spectral subtraction 70 71 /* version 2 ... ss_floor and ss_alpha removed */ 72 /* version 3 add usepower */ 73 #define VALUE_VERSION 3 ///< Integer version number of Value, for embedding 74 75 /// mfcc configuration parameter values 76 typedef struct { 77 long smp_period; ///< Sampling period in 100ns units 78 long smp_freq; ///< Sampling frequency 79 int framesize; ///< Window size in samples, similar to WINDOWSIZE in HTK (unit is different) 80 int frameshift; ///< Frame shift length in samples 81 float preEmph; ///< Pre-emphasis coefficient, corresponds to PREEMCOEF in HTK 82 int lifter; ///< Cepstral liftering coefficient, corresponds to CEPLIFTER in HTK 83 int fbank_num; ///< Number of filterbank channels, corresponds to NUMCHANS in HTK 84 int delWin; ///< Delta window size, corresponds to DELTAWINDOW in HTK 85 int accWin; ///< Acceleration window size, corresponds to ACCWINDOW in HTK 86 float silFloor; ///< Energy silence floor in dBs, corresponds to SILFLOOR in HTK 87 float escale; ///< Scaling coefficient of log energy, corresponds to ESCALE in HTK 88 int hipass; ///< High frequency cut-off in fbank analysis, -1 if disabled, corresponds to HIFREQ in HTK 89 int lopass; ///< Low frequency cut-off in fbank analysis, -1 if disabled, corresponds to LOFREQ in HTK 90 int enormal; ///< 1 if normalise raw energy, 0 if disabled, corresponds to ENORMALISE in HTK 91 int raw_e; ///< 1 if using raw energy, 0 if disabled, corresponds to RAWENERGY in HTK 92 int zmeanframe; ///< 1 if apply zero mean frame like ZMEANSOURCE in HTK 93 int usepower; ///< 1 if use power instead of magnitude in filterbank analysis 94 float vtln_alpha; ///< warping factor for VTLN, corresponds to WARPFREQ in HTK 95 float vtln_upper; ///< hi freq. cut off for VTLN, corresponds to WARPUCUTOFF in HTK 96 float vtln_lower; ///< low freq. cut off for VTLN, corresponds to WARPLCUTOFF in HTK 97 98 /* items below does not need to be embedded, because they can be 99 detemined from the acoustic model header, or should be computed 100 from run-time variables */ 101 int delta; ///< 1 if delta coef. needs to be computed 102 int acc; ///< 1 if acceleration coef. needs to be computed 103 int energy; ///< 1 if energy coef. needs to be computed 104 int c0; ///< 1 if use 0'th cepstral parameter, 0 if disabled, corresponds to _0 qualifier in HTK 105 int absesup; ///< 1 if absolute energy should be suppressed 106 int cmn; ///< 1 if use Cepstrum Mean Normalization, 0 if disabled, corresponds to _Z qualifier in HTK 107 int cvn; ///< 1 if use cepstral variance normalization, else 0 */ 108 int mfcc_dim; ///< Number of MFCC dimensions 109 int baselen; ///< Number of base MFCC dimension with energies 110 int vecbuflen; ///< Vector length needed for computation 111 int veclen; ///< Resulting length of vector 112 113 int loaded; ///< 1 if these parameters were loaded from HTK config file or binhmm header 114 }Value; 115 116 /// Workspace for filterbank analysis 117 typedef struct { 118 int fftN; ///< Number of FFT point 119 int n; ///< log2(fftN) 120 int klo; ///< FFT indices of lopass cut-off 121 int khi; ///< FFT indices of hipass cut-off 122 float fres; ///< Scaled FFT resolution 123 float *cf; ///< Array[1..pOrder+1] of centre freqs 124 short *loChan; ///< Array[1..fftN/2] of loChan index 125 float *loWt; ///< Array[1..fftN/2] of loChan weighting 126 float *Re; ///< Array[1..fftN] of fftchans (real part) 127 float *Im; ///< Array[1..fftN] of fftchans (imag part) 128 } FBankInfo; 129 130 /// Cycle buffer for delta computation 131 typedef struct { 132 float **mfcc; ///< MFCC buffer 133 int veclen; ///< Vector length of above 134 float *vec; ///< Points to the current MFCC 135 int win; ///< Delta window length 136 int len; ///< Length of the buffer (= win*2+1) 137 int store; ///< Current next storing point 138 boolean *is_on; ///< TRUE if data filled 139 int B; ///< B coef. for delta computation 140 } DeltaBuf; 141 142 /// Work area for MFCC computation 143 typedef struct { 144 float *bf; ///< Local buffer to hold windowed waveform 145 double *fbank; ///< Local buffer to hold filterbank 146 FBankInfo fb; ///< Local buffer to hold filterbank information 147 int bflen; ///< Length of above 148 #ifdef MFCC_SINCOS_TABLE 149 double *costbl_hamming; ///< Cos table for hamming window 150 int costbl_hamming_len; ///< Length of above 151 /* cos/-sin table for FFT */ 152 double *costbl_fft; ///< Cos table for FFT 153 double *sintbl_fft; ///< Sin table for FFT 154 int tbllen; ///< Length of above 155 /* cos table for MakeMFCC */ 156 double *costbl_makemfcc; ///< Cos table for DCT 157 int costbl_makemfcc_len; ///< Length of above 158 /* sin table for WeightCepstrum */ 159 double *sintbl_wcep; ///< Sin table for cepstrum weighting 160 int sintbl_wcep_len; ///< Length of above 161 #endif /* MFCC_SINCOS_TABLE */ 162 float sqrt2var; ///< Work area that holds value of sqrt(2.0) / fbank_num 163 float *ssbuf; ///< Pointer to noise spectrum for SS 164 int ssbuflen; ///< length of @a ssbuf 165 float ss_floor; ///< flooring value for SS 166 float ss_alpha; ///< alpha scaling value for SS 167 } MFCCWork; 168 169 /** 170 * Structure to hold sentence sum of MFCC for realtime CMN 171 * 172 */ 173 typedef struct { 174 float *mfcc_sum; ///< Sum of MFCC parameters 175 float *mfcc_var; ///< Variance sum of MFCC parameters 176 int framenum; ///< summed number of frames 177 } CMEAN; 178 179 /** 180 * Work area for real-time CMN 181 * 182 */ 183 typedef struct { 184 CMEAN *clist; ///< List of MFCC sum for previous inputs 185 int clist_max; ///< Allocated number of CMEAN in clist 186 int clist_num; ///< Currentlly filled CMEAN in clist 187 float cweight; ///< Weight of initial cepstral mean 188 float *cmean_init; ///< Initial cepstral mean for each input 189 float *cvar_init; ///< Inisial cepstral standard deviation for each input 190 int mfcc_dim; ///< base MFCC dimension (to apply CMN) 191 int veclen; ///< full MFCC vector length 192 boolean mean; ///< TRUE if CMN is enabled 193 boolean var; ///< TRUE if CVN is enabled 194 boolean cmean_init_set; ///< TRUE if cmean_init (and cvar_init) was set 195 CMEAN now; ///< Work area to hold current cepstral mean 196 } CMNWork; 197 198 /** 199 * work area for energy normalization on real time input 200 * 201 */ 202 typedef struct { 203 LOGPROB max_last; ///< Maximum energy value of last input 204 LOGPROB min_last; ///< Minimum floored energy value of last input 205 LOGPROB max; ///< Maximum energy value of current input 206 } ENERGYWork; 207 208 /**** mfcc-core.c ****/ 209 MFCCWork *WMP_work_new(Value *para); 210 void WMP_calc(MFCCWork *w, float *mfcc, Value *para); 211 void WMP_free(MFCCWork *w); 212 /* Get filterbank information */ 213 boolean InitFBank(MFCCWork *w, Value *para); 214 void FreeFBank(FBankInfo *fb); 215 /* Apply hamming window */ 216 void Hamming (float *wave, int framesize, MFCCWork *w); 217 /* Apply pre-emphasis filter */ 218 void PreEmphasise (float *wave, int framesize, float preEmph); 219 /* Return mel-frequency */ 220 float Mel(int k, float fres); 221 /* Apply FFT */ 222 void FFT(float *xRe, float *xIm, int p, MFCCWork *w); 223 /* Convert wave -> mel-frequency filterbank */ 224 void MakeFBank(float *wave, MFCCWork *w, Value *para); 225 /* Apply the DCT to filterbank */ 226 void MakeMFCC(float *mfcc, Value *para, MFCCWork *w); 227 /* Calculate 0'th Cepstral parameter*/ 228 float CalcC0(MFCCWork *w, Value *para); 229 /* Calculate Log Raw Energy */ 230 float CalcLogRawE(float *wave, int framesize); 231 /* Zero Mean Souce by frame */ 232 void ZMeanFrame(float *wave, int framesize); 233 /* Re-scale cepstral coefficients */ 234 void WeightCepstrum (float *mfcc, Value *para, MFCCWork *w); 235 236 /**** wav2mfcc-buffer.c ****/ 237 /* Convert wave -> MFCC_E_D_(Z) (batch) */ 238 int Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w); 239 /* Calculate delta coefficients (batch) */ 240 void Delta(float **c, int frame, Value *para); 241 /* Calculate acceleration coefficients (batch) */ 242 void Accel(float **c, int frame, Value *para); 243 /* Normalise log energy (batch) */ 244 void NormaliseLogE(float **c, int frame_num, Value *para); 245 /* Cepstrum Mean Normalization (batch) */ 246 void CMN(float **mfcc, int frame_num, int dim); 247 void MVN(float **mfcc, int frame_num, Value *para); 248 249 /**** wav2mfcc-pipe.c ****/ 250 DeltaBuf *WMP_deltabuf_new(int veclen, int windowlen); 251 void WMP_deltabuf_free(DeltaBuf *db); 252 void WMP_deltabuf_prepare(DeltaBuf *db); 253 boolean WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc); 254 boolean WMP_deltabuf_flush(DeltaBuf *db); 255 256 CMNWork *CMN_realtime_new(Value *para, float weight); 257 void CMN_realtime_free(CMNWork *c); 258 void CMN_realtime_prepare(CMNWork *c); 259 void CMN_realtime(CMNWork *c, float *mfcc); 260 void CMN_realtime_update(CMNWork *c, HTK_Param *param); 261 boolean CMN_load_from_file(CMNWork *c, char *filename); 262 boolean CMN_save_to_file(CMNWork *c, char *filename); 263 264 void energy_max_init(ENERGYWork *energy); 265 void energy_max_prepare(ENERGYWork *energy, Value *para); 266 LOGPROB energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para); 267 268 /**** ss.c ****/ 269 /* spectral subtraction */ 270 float *new_SS_load_from_file(char *filename, int *slen); 271 float *new_SS_calculate(SP16 *wave, int wavelen, int *slen, MFCCWork *w, Value *para); 272 273 /**** para.c *****/ 274 void undef_para(Value *para); 275 void make_default_para(Value *para); 276 void make_default_para_htk(Value *para); 277 void apply_para(Value *dst, Value *src); 278 boolean htk_config_file_parse(char *HTKconffile, Value *para); 279 void calc_para_from_header(Value *para, short param_type, short vec_size); 280 void put_para(FILE *fp, Value *para); 281 282 283 #endif /* __MFCC_H__ */ 284