1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1996-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 #include <stdio.h>
38 #include <string.h>
39 #include <math.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 #ifdef _WIN32_WCE
43 #include <windows.h>
44 #else
45 #include <time.h>
46 #endif
47 
48 #ifdef HAVE_CONFIG_H
49 #include <config.h>
50 #endif
51 
52 #include "sphinxbase/prim_type.h"
53 #include "sphinxbase/byteorder.h"
54 #include "sphinxbase/fixpoint.h"
55 #include "sphinxbase/genrand.h"
56 #include "sphinxbase/err.h"
57 #include "sphinxbase/cmd_ln.h"
58 #include "sphinxbase/ckd_alloc.h"
59 
60 #include "fe_internal.h"
61 #include "fe_warp.h"
62 
63 static const arg_t fe_args[] = {
64     waveform_to_cepstral_command_line_macro(),
65     { NULL, 0, NULL, NULL }
66 };
67 
68 int
fe_parse_general_params(cmd_ln_t * config,fe_t * fe)69 fe_parse_general_params(cmd_ln_t *config, fe_t * fe)
70 {
71     int j, frate;
72 
73     fe->config = config;
74     fe->sampling_rate = cmd_ln_float32_r(config, "-samprate");
75     frate = cmd_ln_int32_r(config, "-frate");
76     if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) {
77         E_ERROR
78             ("Frame rate %d can not be bigger than sample rate %.02f\n",
79              frate, fe->sampling_rate);
80         return -1;
81     }
82 
83     fe->frame_rate = (int16)frate;
84     if (cmd_ln_boolean_r(config, "-dither")) {
85         fe->dither = 1;
86         fe->seed = cmd_ln_int32_r(config, "-seed");
87     }
88 #ifdef WORDS_BIGENDIAN
89     fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
90 #else
91     fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
92 #endif
93     fe->window_length = cmd_ln_float32_r(config, "-wlen");
94     fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha");
95 
96     fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep");
97     fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft");
98 
99     /* Check FFT size, compute FFT order (log_2(n)) */
100     for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) {
101         if (((j % 2) != 0) || (fe->fft_size <= 0)) {
102             E_ERROR("fft: number of points must be a power of 2 (is %d)\n",
103                     fe->fft_size);
104             return -1;
105         }
106     }
107     /* Verify that FFT size is greater or equal to window length. */
108     if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) {
109         E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n",
110                 (int)(fe->window_length * fe->sampling_rate));
111         return -1;
112     }
113 
114     fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc");
115 
116     if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct"))
117         fe->transform = DCT_II;
118     else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy"))
119         fe->transform = LEGACY_DCT;
120     else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk"))
121         fe->transform = DCT_HTK;
122     else {
123         E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
124         return -1;
125     }
126 
127     if (cmd_ln_boolean_r(config, "-logspec"))
128         fe->log_spec = RAW_LOG_SPEC;
129     if (cmd_ln_boolean_r(config, "-smoothspec"))
130         fe->log_spec = SMOOTH_LOG_SPEC;
131 
132     return 0;
133 }
134 
135 static int
fe_parse_melfb_params(cmd_ln_t * config,fe_t * fe,melfb_t * mel)136 fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel)
137 {
138     mel->sampling_rate = fe->sampling_rate;
139     mel->fft_size = fe->fft_size;
140     mel->num_cepstra = fe->num_cepstra;
141     mel->num_filters = cmd_ln_int32_r(config, "-nfilt");
142 
143     if (fe->log_spec)
144         fe->feature_dimension = mel->num_filters;
145     else
146         fe->feature_dimension = fe->num_cepstra;
147 
148     mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf");
149     mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf");
150 
151     mel->doublewide = cmd_ln_boolean_r(config, "-doublebw");
152 
153     mel->warp_type = cmd_ln_str_r(config, "-warp_type");
154     mel->warp_params = cmd_ln_str_r(config, "-warp_params");
155     mel->lifter_val = cmd_ln_int32_r(config, "-lifter");
156 
157     mel->unit_area = cmd_ln_boolean_r(config, "-unit_area");
158     mel->round_filters = cmd_ln_boolean_r(config, "-round_filters");
159 
160     if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) {
161         E_ERROR("Failed to initialize the warping function.\n");
162         return -1;
163     }
164     fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate);
165     return 0;
166 }
167 
168 void
fe_print_current(fe_t const * fe)169 fe_print_current(fe_t const *fe)
170 {
171     E_INFO("Current FE Parameters:\n");
172     E_INFO("\tSampling Rate:             %f\n", fe->sampling_rate);
173     E_INFO("\tFrame Size:                %d\n", fe->frame_size);
174     E_INFO("\tFrame Shift:               %d\n", fe->frame_shift);
175     E_INFO("\tFFT Size:                  %d\n", fe->fft_size);
176     E_INFO("\tLower Frequency:           %g\n",
177            fe->mel_fb->lower_filt_freq);
178     E_INFO("\tUpper Frequency:           %g\n",
179            fe->mel_fb->upper_filt_freq);
180     E_INFO("\tNumber of filters:         %d\n", fe->mel_fb->num_filters);
181     E_INFO("\tNumber of Overflow Samps:  %d\n", fe->num_overflow_samps);
182     E_INFO("\tStart Utt Status:          %d\n", fe->start_flag);
183     E_INFO("Will %sremove DC offset at frame level\n",
184            fe->remove_dc ? "" : "not ");
185     if (fe->dither) {
186         E_INFO("Will add dither to audio\n");
187         E_INFO("Dither seeded with %d\n", fe->seed);
188     }
189     else {
190         E_INFO("Will not add dither to audio\n");
191     }
192     if (fe->mel_fb->lifter_val) {
193         E_INFO("Will apply sine-curve liftering, period %d\n",
194                fe->mel_fb->lifter_val);
195     }
196     E_INFO("Will %snormalize filters to unit area\n",
197            fe->mel_fb->unit_area ? "" : "not ");
198     E_INFO("Will %sround filter frequencies to DFT points\n",
199            fe->mel_fb->round_filters ? "" : "not ");
200     E_INFO("Will %suse double bandwidth in mel filter\n",
201            fe->mel_fb->doublewide ? "" : "not ");
202 }
203 
204 fe_t *
fe_init_auto()205 fe_init_auto()
206 {
207     return fe_init_auto_r(cmd_ln_get());
208 }
209 
210 fe_t *
fe_init_auto_r(cmd_ln_t * config)211 fe_init_auto_r(cmd_ln_t *config)
212 {
213     fe_t *fe;
214 
215     fe = ckd_calloc(1, sizeof(*fe));
216     fe->refcount = 1;
217 
218     /* transfer params to front end */
219     if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) {
220         fe_free(fe);
221         return NULL;
222     }
223 
224     /* compute remaining fe parameters */
225     /* We add 0.5 so approximate the float with the closest
226      * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4
227      */
228     fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
229     fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
230     fe->prior = 0;
231     fe->frame_counter = 0;
232 
233     assert (fe->frame_shift > 1);
234 
235     if (fe->frame_size > (fe->fft_size)) {
236         E_WARN
237             ("Number of FFT points has to be a power of 2 higher than %d\n",
238              (fe->frame_size));
239         fe_free(fe);
240         return (NULL);
241     }
242 
243     if (fe->dither)
244         fe_init_dither(fe->seed);
245 
246     /* establish buffers for overflow samps and hamming window */
247     fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16));
248     fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t));
249 
250     /* create hamming window */
251     fe_create_hamming(fe->hamming_window, fe->frame_size);
252 
253     /* init and fill appropriate filter structure */
254     fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb));
255 
256     /* transfer params to mel fb */
257     fe_parse_melfb_params(config, fe, fe->mel_fb);
258     fe_build_melfilters(fe->mel_fb);
259     fe_compute_melcosine(fe->mel_fb);
260 
261     /* Create temporary FFT, spectrum and mel-spectrum buffers. */
262     /* FIXME: Gosh there are a lot of these. */
263     fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch));
264     fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame));
265     fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec));
266     fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec));
267 
268     /* create twiddle factors */
269     fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc));
270     fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss));
271     fe_create_twiddle(fe);
272 
273     if (cmd_ln_boolean_r(config, "-verbose")) {
274         fe_print_current(fe);
275     }
276 
277     /*** Z.A.B. ***/
278     /*** Initialize the overflow buffers ***/
279     fe_start_utt(fe);
280     return fe;
281 }
282 
283 arg_t const *
fe_get_args(void)284 fe_get_args(void)
285 {
286     return fe_args;
287 }
288 
289 const cmd_ln_t *
fe_get_config(fe_t * fe)290 fe_get_config(fe_t *fe)
291 {
292     return fe->config;
293 }
294 
295 void
fe_init_dither(int32 seed)296 fe_init_dither(int32 seed)
297 {
298     if (seed < 0) {
299         E_INFO("You are using the internal mechanism to generate the seed.\n");
300 #ifdef _WIN32_WCE
301         s3_rand_seed(GetTickCount());
302 #else
303         s3_rand_seed((long) time(0));
304 #endif
305     }
306     else {
307         E_INFO("You are using %d as the seed.\n", seed);
308         s3_rand_seed(seed);
309     }
310 }
311 
312 int32
fe_start_utt(fe_t * fe)313 fe_start_utt(fe_t * fe)
314 {
315     fe->num_overflow_samps = 0;
316     memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16));
317     fe->start_flag = 1;
318     fe->prior = 0;
319     return 0;
320 }
321 
322 int
fe_get_output_size(fe_t * fe)323 fe_get_output_size(fe_t *fe)
324 {
325     return (int)fe->feature_dimension;
326 }
327 
328 void
fe_get_input_size(fe_t * fe,int * out_frame_shift,int * out_frame_size)329 fe_get_input_size(fe_t *fe, int *out_frame_shift,
330                   int *out_frame_size)
331 {
332     if (out_frame_shift)
333         *out_frame_shift = fe->frame_shift;
334     if (out_frame_size)
335         *out_frame_size = fe->frame_size;
336 }
337 
338 int32
fe_process_frame(fe_t * fe,int16 const * spch,int32 nsamps,mfcc_t * fr_cep)339 fe_process_frame(fe_t * fe, int16 const *spch, int32 nsamps, mfcc_t * fr_cep)
340 {
341     fe_read_frame(fe, spch, nsamps);
342     return fe_write_frame(fe, fr_cep);
343 }
344 
345 int
fe_process_frames(fe_t * fe,int16 const ** inout_spch,size_t * inout_nsamps,mfcc_t ** buf_cep,int32 * inout_nframes)346 fe_process_frames(fe_t *fe,
347                   int16 const **inout_spch,
348                   size_t *inout_nsamps,
349                   mfcc_t **buf_cep,
350                   int32 *inout_nframes)
351 {
352     int32 frame_count;
353     int outidx, i, n, n_overflow, orig_n_overflow;
354     int16 const *orig_spch;
355 
356     /* In the special case where there is no output buffer, return the
357      * maximum number of frames which would be generated. */
358     if (buf_cep == NULL) {
359         if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size)
360             *inout_nframes = 0;
361         else
362             *inout_nframes = 1
363                 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
364                    / fe->frame_shift);
365         return *inout_nframes;
366     }
367 
368     /* Are there not enough samples to make at least 1 frame? */
369     if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) {
370         if (*inout_nsamps > 0) {
371             /* Append them to the overflow buffer. */
372             memcpy(fe->overflow_samps + fe->num_overflow_samps,
373                    *inout_spch, *inout_nsamps * (sizeof(int16)));
374             fe->num_overflow_samps += *inout_nsamps;
375             /* Update input-output pointers and counters. */
376             *inout_spch += *inout_nsamps;
377             *inout_nsamps = 0;
378         }
379         /* We produced no frames of output, sorry! */
380         *inout_nframes = 0;
381         return 0;
382     }
383 
384     /* Can't write a frame?  Then do nothing! */
385     if (*inout_nframes < 1) {
386         *inout_nframes = 0;
387         return 0;
388     }
389 
390     /* Keep track of the original start of the buffer. */
391     orig_spch = *inout_spch;
392     orig_n_overflow = fe->num_overflow_samps;
393     /* How many frames will we be able to get? */
394     frame_count = 1
395         + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
396            / fe->frame_shift);
397     /* Limit it to the number of output frames available. */
398     if (frame_count > *inout_nframes)
399         frame_count = *inout_nframes;
400     /* Index of output frame. */
401     outidx = 0;
402 
403     /* Start processing, taking care of any incoming overflow. */
404     if (fe->num_overflow_samps) {
405         int offset = fe->frame_size - fe->num_overflow_samps;
406 
407         /* Append start of spch to overflow samples to make a full frame. */
408         memcpy(fe->overflow_samps + fe->num_overflow_samps,
409                *inout_spch, offset * sizeof(**inout_spch));
410         fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
411         assert(outidx < frame_count);
412         if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
413             return -1;
414         outidx += n;
415         /* Update input-output pointers and counters. */
416         *inout_spch += offset;
417         *inout_nsamps -= offset;
418         fe->num_overflow_samps -= fe->frame_shift;
419     }
420     else {
421         fe_read_frame(fe, *inout_spch, fe->frame_size);
422         assert(outidx < frame_count);
423         if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
424             return -1;
425         outidx += n;
426         /* Update input-output pointers and counters. */
427         *inout_spch += fe->frame_size;
428         *inout_nsamps -= fe->frame_size;
429     }
430 
431     /* Process all remaining frames. */
432     for (i = 1; i < frame_count; ++i) {
433         assert(*inout_nsamps >= (size_t)fe->frame_shift);
434 
435         fe_shift_frame(fe, *inout_spch, fe->frame_shift);
436         assert(outidx < frame_count);
437         if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
438             return -1;
439         outidx += n;
440         /* Update input-output pointers and counters. */
441         *inout_spch += fe->frame_shift;
442         *inout_nsamps -= fe->frame_shift;
443         /* Amount of data behind the original input which is still needed. */
444         if (fe->num_overflow_samps > 0)
445             fe->num_overflow_samps -= fe->frame_shift;
446     }
447 
448     /* How many relevant overflow samples are there left? */
449     if (fe->num_overflow_samps <= 0) {
450         /* Maximum number of overflow samples past *inout_spch to save. */
451         n_overflow = *inout_nsamps;
452         if (n_overflow > fe->frame_shift)
453             n_overflow = fe->frame_shift;
454         fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
455         /* Make sure this isn't an illegal read! */
456         if (fe->num_overflow_samps > *inout_spch - orig_spch)
457             fe->num_overflow_samps = *inout_spch - orig_spch;
458         fe->num_overflow_samps += n_overflow;
459         if (fe->num_overflow_samps > 0) {
460             memcpy(fe->overflow_samps,
461                    *inout_spch - (fe->frame_size - fe->frame_shift),
462                    fe->num_overflow_samps * sizeof(**inout_spch));
463             /* Update the input pointer to cover this stuff. */
464             *inout_spch += n_overflow;
465             *inout_nsamps -= n_overflow;
466         }
467     }
468     else {
469         /* There is still some relevant data left in the overflow buffer. */
470         /* Shift existing data to the beginning. */
471         memmove(fe->overflow_samps,
472                 fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
473                 fe->num_overflow_samps * sizeof(*fe->overflow_samps));
474         /* Copy in whatever we had in the original speech buffer. */
475         n_overflow = *inout_spch - orig_spch + *inout_nsamps;
476         if (n_overflow > fe->frame_size - fe->num_overflow_samps)
477             n_overflow = fe->frame_size - fe->num_overflow_samps;
478         memcpy(fe->overflow_samps + fe->num_overflow_samps,
479                orig_spch, n_overflow * sizeof(*orig_spch));
480         fe->num_overflow_samps += n_overflow;
481         /* Advance the input pointers. */
482         if (n_overflow > *inout_spch - orig_spch) {
483             n_overflow -= (*inout_spch - orig_spch);
484             *inout_spch += n_overflow;
485             *inout_nsamps -= n_overflow;
486         }
487     }
488 
489     /* Finally update the frame counter with the number of frames we procesed. */
490     *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */
491     return 0;
492 }
493 
494 int
fe_process_utt(fe_t * fe,int16 const * spch,size_t nsamps,mfcc_t *** cep_block,int32 * nframes)495 fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps,
496                mfcc_t *** cep_block, int32 * nframes)
497 {
498     mfcc_t **cep;
499     int rv;
500 
501     /* Figure out how many frames we will need. */
502     fe_process_frames(fe, NULL, &nsamps, NULL, nframes);
503     /* Create the output buffer (it has to exist, even if there are no output frames). */
504     if (*nframes)
505         cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep));
506     else
507         cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep));
508     /* Now just call fe_process_frames() with the allocated buffer. */
509     rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes);
510     *cep_block = cep;
511 
512     return rv;
513 }
514 
515 
516 int32
fe_end_utt(fe_t * fe,mfcc_t * cepvector,int32 * nframes)517 fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes)
518 {
519     /* Process any remaining data. */
520     if (fe->num_overflow_samps > 0) {
521         fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
522         *nframes = fe_write_frame(fe, cepvector);
523     }
524     else {
525         *nframes = 0;
526     }
527 
528     /* reset overflow buffers... */
529     fe->num_overflow_samps = 0;
530     fe->start_flag = 0;
531 
532     return 0;
533 }
534 
535 fe_t *
fe_retain(fe_t * fe)536 fe_retain(fe_t *fe)
537 {
538     ++fe->refcount;
539     return fe;
540 }
541 
542 int
fe_free(fe_t * fe)543 fe_free(fe_t * fe)
544 {
545     if (fe == NULL)
546         return 0;
547     if (--fe->refcount > 0)
548         return fe->refcount;
549 
550     /* kill FE instance - free everything... */
551     if (fe->mel_fb) {
552         if (fe->mel_fb->mel_cosine)
553             fe_free_2d((void *) fe->mel_fb->mel_cosine);
554         ckd_free(fe->mel_fb->lifter);
555         ckd_free(fe->mel_fb->spec_start);
556         ckd_free(fe->mel_fb->filt_start);
557         ckd_free(fe->mel_fb->filt_width);
558         ckd_free(fe->mel_fb->filt_coeffs);
559         ckd_free(fe->mel_fb);
560     }
561     ckd_free(fe->spch);
562     ckd_free(fe->frame);
563     ckd_free(fe->ccc);
564     ckd_free(fe->sss);
565     ckd_free(fe->spec);
566     ckd_free(fe->mfspec);
567     ckd_free(fe->overflow_samps);
568     ckd_free(fe->hamming_window);
569     cmd_ln_free_r(fe->config);
570     ckd_free(fe);
571 
572     return 0;
573 }
574 
575 /**
576  * Convert a block of mfcc_t to float32 (can be done in-place)
577  **/
578 int32
fe_mfcc_to_float(fe_t * fe,mfcc_t ** input,float32 ** output,int32 nframes)579 fe_mfcc_to_float(fe_t * fe,
580                  mfcc_t ** input, float32 ** output, int32 nframes)
581 {
582     int32 i;
583 
584 #ifndef FIXED_POINT
585     if ((void *) input == (void *) output)
586         return nframes * fe->feature_dimension;
587 #endif
588     for (i = 0; i < nframes * fe->feature_dimension; ++i)
589         output[0][i] = MFCC2FLOAT(input[0][i]);
590 
591     return i;
592 }
593 
594 /**
595  * Convert a block of float32 to mfcc_t (can be done in-place)
596  **/
597 int32
fe_float_to_mfcc(fe_t * fe,float32 ** input,mfcc_t ** output,int32 nframes)598 fe_float_to_mfcc(fe_t * fe,
599                  float32 ** input, mfcc_t ** output, int32 nframes)
600 {
601     int32 i;
602 
603 #ifndef FIXED_POINT
604     if ((void *) input == (void *) output)
605         return nframes * fe->feature_dimension;
606 #endif
607     for (i = 0; i < nframes * fe->feature_dimension; ++i)
608         output[0][i] = FLOAT2MFCC(input[0][i]);
609 
610     return i;
611 }
612 
613 int32
fe_logspec_to_mfcc(fe_t * fe,const mfcc_t * fr_spec,mfcc_t * fr_cep)614 fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
615 {
616 #ifdef FIXED_POINT
617     fe_spec2cep(fe, fr_spec, fr_cep);
618 #else                           /* ! FIXED_POINT */
619     powspec_t *powspec;
620     int32 i;
621 
622     powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
623     for (i = 0; i < fe->mel_fb->num_filters; ++i)
624         powspec[i] = (powspec_t) fr_spec[i];
625     fe_spec2cep(fe, powspec, fr_cep);
626     ckd_free(powspec);
627 #endif                          /* ! FIXED_POINT */
628     return 0;
629 }
630 
631 int32
fe_logspec_dct2(fe_t * fe,const mfcc_t * fr_spec,mfcc_t * fr_cep)632 fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
633 {
634 #ifdef FIXED_POINT
635     fe_dct2(fe, fr_spec, fr_cep, 0);
636 #else                           /* ! FIXED_POINT */
637     powspec_t *powspec;
638     int32 i;
639 
640     powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
641     for (i = 0; i < fe->mel_fb->num_filters; ++i)
642         powspec[i] = (powspec_t) fr_spec[i];
643     fe_dct2(fe, powspec, fr_cep, 0);
644     ckd_free(powspec);
645 #endif                          /* ! FIXED_POINT */
646     return 0;
647 }
648 
649 int32
fe_mfcc_dct3(fe_t * fe,const mfcc_t * fr_cep,mfcc_t * fr_spec)650 fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec)
651 {
652 #ifdef FIXED_POINT
653     fe_dct3(fe, fr_cep, fr_spec);
654 #else                           /* ! FIXED_POINT */
655     powspec_t *powspec;
656     int32 i;
657 
658     powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
659     fe_dct3(fe, fr_cep, powspec);
660     for (i = 0; i < fe->mel_fb->num_filters; ++i)
661         fr_spec[i] = (mfcc_t) powspec[i];
662     ckd_free(powspec);
663 #endif                          /* ! FIXED_POINT */
664     return 0;
665 }
666