1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37 #include <stdio.h>
38 #include <string.h>
39 #include <math.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 #ifdef _WIN32_WCE
43 #include <windows.h>
44 #else
45 #include <time.h>
46 #endif
47
48 #ifdef HAVE_CONFIG_H
49 #include <config.h>
50 #endif
51
52 #include "sphinxbase/prim_type.h"
53 #include "sphinxbase/byteorder.h"
54 #include "sphinxbase/fixpoint.h"
55 #include "sphinxbase/genrand.h"
56 #include "sphinxbase/err.h"
57 #include "sphinxbase/cmd_ln.h"
58 #include "sphinxbase/ckd_alloc.h"
59
60 #include "fe_internal.h"
61 #include "fe_warp.h"
62
63 static const arg_t fe_args[] = {
64 waveform_to_cepstral_command_line_macro(),
65 { NULL, 0, NULL, NULL }
66 };
67
68 int
fe_parse_general_params(cmd_ln_t * config,fe_t * fe)69 fe_parse_general_params(cmd_ln_t *config, fe_t * fe)
70 {
71 int j, frate;
72
73 fe->config = config;
74 fe->sampling_rate = cmd_ln_float32_r(config, "-samprate");
75 frate = cmd_ln_int32_r(config, "-frate");
76 if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) {
77 E_ERROR
78 ("Frame rate %d can not be bigger than sample rate %.02f\n",
79 frate, fe->sampling_rate);
80 return -1;
81 }
82
83 fe->frame_rate = (int16)frate;
84 if (cmd_ln_boolean_r(config, "-dither")) {
85 fe->dither = 1;
86 fe->seed = cmd_ln_int32_r(config, "-seed");
87 }
88 #ifdef WORDS_BIGENDIAN
89 fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
90 #else
91 fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
92 #endif
93 fe->window_length = cmd_ln_float32_r(config, "-wlen");
94 fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha");
95
96 fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep");
97 fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft");
98
99 /* Check FFT size, compute FFT order (log_2(n)) */
100 for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) {
101 if (((j % 2) != 0) || (fe->fft_size <= 0)) {
102 E_ERROR("fft: number of points must be a power of 2 (is %d)\n",
103 fe->fft_size);
104 return -1;
105 }
106 }
107 /* Verify that FFT size is greater or equal to window length. */
108 if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) {
109 E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n",
110 (int)(fe->window_length * fe->sampling_rate));
111 return -1;
112 }
113
114 fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc");
115
116 if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct"))
117 fe->transform = DCT_II;
118 else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy"))
119 fe->transform = LEGACY_DCT;
120 else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk"))
121 fe->transform = DCT_HTK;
122 else {
123 E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
124 return -1;
125 }
126
127 if (cmd_ln_boolean_r(config, "-logspec"))
128 fe->log_spec = RAW_LOG_SPEC;
129 if (cmd_ln_boolean_r(config, "-smoothspec"))
130 fe->log_spec = SMOOTH_LOG_SPEC;
131
132 return 0;
133 }
134
135 static int
fe_parse_melfb_params(cmd_ln_t * config,fe_t * fe,melfb_t * mel)136 fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel)
137 {
138 mel->sampling_rate = fe->sampling_rate;
139 mel->fft_size = fe->fft_size;
140 mel->num_cepstra = fe->num_cepstra;
141 mel->num_filters = cmd_ln_int32_r(config, "-nfilt");
142
143 if (fe->log_spec)
144 fe->feature_dimension = mel->num_filters;
145 else
146 fe->feature_dimension = fe->num_cepstra;
147
148 mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf");
149 mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf");
150
151 mel->doublewide = cmd_ln_boolean_r(config, "-doublebw");
152
153 mel->warp_type = cmd_ln_str_r(config, "-warp_type");
154 mel->warp_params = cmd_ln_str_r(config, "-warp_params");
155 mel->lifter_val = cmd_ln_int32_r(config, "-lifter");
156
157 mel->unit_area = cmd_ln_boolean_r(config, "-unit_area");
158 mel->round_filters = cmd_ln_boolean_r(config, "-round_filters");
159
160 if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) {
161 E_ERROR("Failed to initialize the warping function.\n");
162 return -1;
163 }
164 fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate);
165 return 0;
166 }
167
168 void
fe_print_current(fe_t const * fe)169 fe_print_current(fe_t const *fe)
170 {
171 E_INFO("Current FE Parameters:\n");
172 E_INFO("\tSampling Rate: %f\n", fe->sampling_rate);
173 E_INFO("\tFrame Size: %d\n", fe->frame_size);
174 E_INFO("\tFrame Shift: %d\n", fe->frame_shift);
175 E_INFO("\tFFT Size: %d\n", fe->fft_size);
176 E_INFO("\tLower Frequency: %g\n",
177 fe->mel_fb->lower_filt_freq);
178 E_INFO("\tUpper Frequency: %g\n",
179 fe->mel_fb->upper_filt_freq);
180 E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters);
181 E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps);
182 E_INFO("\tStart Utt Status: %d\n", fe->start_flag);
183 E_INFO("Will %sremove DC offset at frame level\n",
184 fe->remove_dc ? "" : "not ");
185 if (fe->dither) {
186 E_INFO("Will add dither to audio\n");
187 E_INFO("Dither seeded with %d\n", fe->seed);
188 }
189 else {
190 E_INFO("Will not add dither to audio\n");
191 }
192 if (fe->mel_fb->lifter_val) {
193 E_INFO("Will apply sine-curve liftering, period %d\n",
194 fe->mel_fb->lifter_val);
195 }
196 E_INFO("Will %snormalize filters to unit area\n",
197 fe->mel_fb->unit_area ? "" : "not ");
198 E_INFO("Will %sround filter frequencies to DFT points\n",
199 fe->mel_fb->round_filters ? "" : "not ");
200 E_INFO("Will %suse double bandwidth in mel filter\n",
201 fe->mel_fb->doublewide ? "" : "not ");
202 }
203
204 fe_t *
fe_init_auto()205 fe_init_auto()
206 {
207 return fe_init_auto_r(cmd_ln_get());
208 }
209
210 fe_t *
fe_init_auto_r(cmd_ln_t * config)211 fe_init_auto_r(cmd_ln_t *config)
212 {
213 fe_t *fe;
214
215 fe = ckd_calloc(1, sizeof(*fe));
216 fe->refcount = 1;
217
218 /* transfer params to front end */
219 if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) {
220 fe_free(fe);
221 return NULL;
222 }
223
224 /* compute remaining fe parameters */
225 /* We add 0.5 so approximate the float with the closest
226 * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4
227 */
228 fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
229 fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
230 fe->prior = 0;
231 fe->frame_counter = 0;
232
233 assert (fe->frame_shift > 1);
234
235 if (fe->frame_size > (fe->fft_size)) {
236 E_WARN
237 ("Number of FFT points has to be a power of 2 higher than %d\n",
238 (fe->frame_size));
239 fe_free(fe);
240 return (NULL);
241 }
242
243 if (fe->dither)
244 fe_init_dither(fe->seed);
245
246 /* establish buffers for overflow samps and hamming window */
247 fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16));
248 fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t));
249
250 /* create hamming window */
251 fe_create_hamming(fe->hamming_window, fe->frame_size);
252
253 /* init and fill appropriate filter structure */
254 fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb));
255
256 /* transfer params to mel fb */
257 fe_parse_melfb_params(config, fe, fe->mel_fb);
258 fe_build_melfilters(fe->mel_fb);
259 fe_compute_melcosine(fe->mel_fb);
260
261 /* Create temporary FFT, spectrum and mel-spectrum buffers. */
262 /* FIXME: Gosh there are a lot of these. */
263 fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch));
264 fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame));
265 fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec));
266 fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec));
267
268 /* create twiddle factors */
269 fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc));
270 fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss));
271 fe_create_twiddle(fe);
272
273 if (cmd_ln_boolean_r(config, "-verbose")) {
274 fe_print_current(fe);
275 }
276
277 /*** Z.A.B. ***/
278 /*** Initialize the overflow buffers ***/
279 fe_start_utt(fe);
280 return fe;
281 }
282
283 arg_t const *
fe_get_args(void)284 fe_get_args(void)
285 {
286 return fe_args;
287 }
288
289 const cmd_ln_t *
fe_get_config(fe_t * fe)290 fe_get_config(fe_t *fe)
291 {
292 return fe->config;
293 }
294
295 void
fe_init_dither(int32 seed)296 fe_init_dither(int32 seed)
297 {
298 if (seed < 0) {
299 E_INFO("You are using the internal mechanism to generate the seed.\n");
300 #ifdef _WIN32_WCE
301 s3_rand_seed(GetTickCount());
302 #else
303 s3_rand_seed((long) time(0));
304 #endif
305 }
306 else {
307 E_INFO("You are using %d as the seed.\n", seed);
308 s3_rand_seed(seed);
309 }
310 }
311
312 int32
fe_start_utt(fe_t * fe)313 fe_start_utt(fe_t * fe)
314 {
315 fe->num_overflow_samps = 0;
316 memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16));
317 fe->start_flag = 1;
318 fe->prior = 0;
319 return 0;
320 }
321
322 int
fe_get_output_size(fe_t * fe)323 fe_get_output_size(fe_t *fe)
324 {
325 return (int)fe->feature_dimension;
326 }
327
328 void
fe_get_input_size(fe_t * fe,int * out_frame_shift,int * out_frame_size)329 fe_get_input_size(fe_t *fe, int *out_frame_shift,
330 int *out_frame_size)
331 {
332 if (out_frame_shift)
333 *out_frame_shift = fe->frame_shift;
334 if (out_frame_size)
335 *out_frame_size = fe->frame_size;
336 }
337
338 int32
fe_process_frame(fe_t * fe,int16 const * spch,int32 nsamps,mfcc_t * fr_cep)339 fe_process_frame(fe_t * fe, int16 const *spch, int32 nsamps, mfcc_t * fr_cep)
340 {
341 fe_read_frame(fe, spch, nsamps);
342 return fe_write_frame(fe, fr_cep);
343 }
344
345 int
fe_process_frames(fe_t * fe,int16 const ** inout_spch,size_t * inout_nsamps,mfcc_t ** buf_cep,int32 * inout_nframes)346 fe_process_frames(fe_t *fe,
347 int16 const **inout_spch,
348 size_t *inout_nsamps,
349 mfcc_t **buf_cep,
350 int32 *inout_nframes)
351 {
352 int32 frame_count;
353 int outidx, i, n, n_overflow, orig_n_overflow;
354 int16 const *orig_spch;
355
356 /* In the special case where there is no output buffer, return the
357 * maximum number of frames which would be generated. */
358 if (buf_cep == NULL) {
359 if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size)
360 *inout_nframes = 0;
361 else
362 *inout_nframes = 1
363 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
364 / fe->frame_shift);
365 return *inout_nframes;
366 }
367
368 /* Are there not enough samples to make at least 1 frame? */
369 if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) {
370 if (*inout_nsamps > 0) {
371 /* Append them to the overflow buffer. */
372 memcpy(fe->overflow_samps + fe->num_overflow_samps,
373 *inout_spch, *inout_nsamps * (sizeof(int16)));
374 fe->num_overflow_samps += *inout_nsamps;
375 /* Update input-output pointers and counters. */
376 *inout_spch += *inout_nsamps;
377 *inout_nsamps = 0;
378 }
379 /* We produced no frames of output, sorry! */
380 *inout_nframes = 0;
381 return 0;
382 }
383
384 /* Can't write a frame? Then do nothing! */
385 if (*inout_nframes < 1) {
386 *inout_nframes = 0;
387 return 0;
388 }
389
390 /* Keep track of the original start of the buffer. */
391 orig_spch = *inout_spch;
392 orig_n_overflow = fe->num_overflow_samps;
393 /* How many frames will we be able to get? */
394 frame_count = 1
395 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
396 / fe->frame_shift);
397 /* Limit it to the number of output frames available. */
398 if (frame_count > *inout_nframes)
399 frame_count = *inout_nframes;
400 /* Index of output frame. */
401 outidx = 0;
402
403 /* Start processing, taking care of any incoming overflow. */
404 if (fe->num_overflow_samps) {
405 int offset = fe->frame_size - fe->num_overflow_samps;
406
407 /* Append start of spch to overflow samples to make a full frame. */
408 memcpy(fe->overflow_samps + fe->num_overflow_samps,
409 *inout_spch, offset * sizeof(**inout_spch));
410 fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
411 assert(outidx < frame_count);
412 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
413 return -1;
414 outidx += n;
415 /* Update input-output pointers and counters. */
416 *inout_spch += offset;
417 *inout_nsamps -= offset;
418 fe->num_overflow_samps -= fe->frame_shift;
419 }
420 else {
421 fe_read_frame(fe, *inout_spch, fe->frame_size);
422 assert(outidx < frame_count);
423 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
424 return -1;
425 outidx += n;
426 /* Update input-output pointers and counters. */
427 *inout_spch += fe->frame_size;
428 *inout_nsamps -= fe->frame_size;
429 }
430
431 /* Process all remaining frames. */
432 for (i = 1; i < frame_count; ++i) {
433 assert(*inout_nsamps >= (size_t)fe->frame_shift);
434
435 fe_shift_frame(fe, *inout_spch, fe->frame_shift);
436 assert(outidx < frame_count);
437 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
438 return -1;
439 outidx += n;
440 /* Update input-output pointers and counters. */
441 *inout_spch += fe->frame_shift;
442 *inout_nsamps -= fe->frame_shift;
443 /* Amount of data behind the original input which is still needed. */
444 if (fe->num_overflow_samps > 0)
445 fe->num_overflow_samps -= fe->frame_shift;
446 }
447
448 /* How many relevant overflow samples are there left? */
449 if (fe->num_overflow_samps <= 0) {
450 /* Maximum number of overflow samples past *inout_spch to save. */
451 n_overflow = *inout_nsamps;
452 if (n_overflow > fe->frame_shift)
453 n_overflow = fe->frame_shift;
454 fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
455 /* Make sure this isn't an illegal read! */
456 if (fe->num_overflow_samps > *inout_spch - orig_spch)
457 fe->num_overflow_samps = *inout_spch - orig_spch;
458 fe->num_overflow_samps += n_overflow;
459 if (fe->num_overflow_samps > 0) {
460 memcpy(fe->overflow_samps,
461 *inout_spch - (fe->frame_size - fe->frame_shift),
462 fe->num_overflow_samps * sizeof(**inout_spch));
463 /* Update the input pointer to cover this stuff. */
464 *inout_spch += n_overflow;
465 *inout_nsamps -= n_overflow;
466 }
467 }
468 else {
469 /* There is still some relevant data left in the overflow buffer. */
470 /* Shift existing data to the beginning. */
471 memmove(fe->overflow_samps,
472 fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
473 fe->num_overflow_samps * sizeof(*fe->overflow_samps));
474 /* Copy in whatever we had in the original speech buffer. */
475 n_overflow = *inout_spch - orig_spch + *inout_nsamps;
476 if (n_overflow > fe->frame_size - fe->num_overflow_samps)
477 n_overflow = fe->frame_size - fe->num_overflow_samps;
478 memcpy(fe->overflow_samps + fe->num_overflow_samps,
479 orig_spch, n_overflow * sizeof(*orig_spch));
480 fe->num_overflow_samps += n_overflow;
481 /* Advance the input pointers. */
482 if (n_overflow > *inout_spch - orig_spch) {
483 n_overflow -= (*inout_spch - orig_spch);
484 *inout_spch += n_overflow;
485 *inout_nsamps -= n_overflow;
486 }
487 }
488
489 /* Finally update the frame counter with the number of frames we procesed. */
490 *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */
491 return 0;
492 }
493
494 int
fe_process_utt(fe_t * fe,int16 const * spch,size_t nsamps,mfcc_t *** cep_block,int32 * nframes)495 fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps,
496 mfcc_t *** cep_block, int32 * nframes)
497 {
498 mfcc_t **cep;
499 int rv;
500
501 /* Figure out how many frames we will need. */
502 fe_process_frames(fe, NULL, &nsamps, NULL, nframes);
503 /* Create the output buffer (it has to exist, even if there are no output frames). */
504 if (*nframes)
505 cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep));
506 else
507 cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep));
508 /* Now just call fe_process_frames() with the allocated buffer. */
509 rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes);
510 *cep_block = cep;
511
512 return rv;
513 }
514
515
516 int32
fe_end_utt(fe_t * fe,mfcc_t * cepvector,int32 * nframes)517 fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes)
518 {
519 /* Process any remaining data. */
520 if (fe->num_overflow_samps > 0) {
521 fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
522 *nframes = fe_write_frame(fe, cepvector);
523 }
524 else {
525 *nframes = 0;
526 }
527
528 /* reset overflow buffers... */
529 fe->num_overflow_samps = 0;
530 fe->start_flag = 0;
531
532 return 0;
533 }
534
535 fe_t *
fe_retain(fe_t * fe)536 fe_retain(fe_t *fe)
537 {
538 ++fe->refcount;
539 return fe;
540 }
541
542 int
fe_free(fe_t * fe)543 fe_free(fe_t * fe)
544 {
545 if (fe == NULL)
546 return 0;
547 if (--fe->refcount > 0)
548 return fe->refcount;
549
550 /* kill FE instance - free everything... */
551 if (fe->mel_fb) {
552 if (fe->mel_fb->mel_cosine)
553 fe_free_2d((void *) fe->mel_fb->mel_cosine);
554 ckd_free(fe->mel_fb->lifter);
555 ckd_free(fe->mel_fb->spec_start);
556 ckd_free(fe->mel_fb->filt_start);
557 ckd_free(fe->mel_fb->filt_width);
558 ckd_free(fe->mel_fb->filt_coeffs);
559 ckd_free(fe->mel_fb);
560 }
561 ckd_free(fe->spch);
562 ckd_free(fe->frame);
563 ckd_free(fe->ccc);
564 ckd_free(fe->sss);
565 ckd_free(fe->spec);
566 ckd_free(fe->mfspec);
567 ckd_free(fe->overflow_samps);
568 ckd_free(fe->hamming_window);
569 cmd_ln_free_r(fe->config);
570 ckd_free(fe);
571
572 return 0;
573 }
574
575 /**
576 * Convert a block of mfcc_t to float32 (can be done in-place)
577 **/
578 int32
fe_mfcc_to_float(fe_t * fe,mfcc_t ** input,float32 ** output,int32 nframes)579 fe_mfcc_to_float(fe_t * fe,
580 mfcc_t ** input, float32 ** output, int32 nframes)
581 {
582 int32 i;
583
584 #ifndef FIXED_POINT
585 if ((void *) input == (void *) output)
586 return nframes * fe->feature_dimension;
587 #endif
588 for (i = 0; i < nframes * fe->feature_dimension; ++i)
589 output[0][i] = MFCC2FLOAT(input[0][i]);
590
591 return i;
592 }
593
594 /**
595 * Convert a block of float32 to mfcc_t (can be done in-place)
596 **/
597 int32
fe_float_to_mfcc(fe_t * fe,float32 ** input,mfcc_t ** output,int32 nframes)598 fe_float_to_mfcc(fe_t * fe,
599 float32 ** input, mfcc_t ** output, int32 nframes)
600 {
601 int32 i;
602
603 #ifndef FIXED_POINT
604 if ((void *) input == (void *) output)
605 return nframes * fe->feature_dimension;
606 #endif
607 for (i = 0; i < nframes * fe->feature_dimension; ++i)
608 output[0][i] = FLOAT2MFCC(input[0][i]);
609
610 return i;
611 }
612
613 int32
fe_logspec_to_mfcc(fe_t * fe,const mfcc_t * fr_spec,mfcc_t * fr_cep)614 fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
615 {
616 #ifdef FIXED_POINT
617 fe_spec2cep(fe, fr_spec, fr_cep);
618 #else /* ! FIXED_POINT */
619 powspec_t *powspec;
620 int32 i;
621
622 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
623 for (i = 0; i < fe->mel_fb->num_filters; ++i)
624 powspec[i] = (powspec_t) fr_spec[i];
625 fe_spec2cep(fe, powspec, fr_cep);
626 ckd_free(powspec);
627 #endif /* ! FIXED_POINT */
628 return 0;
629 }
630
631 int32
fe_logspec_dct2(fe_t * fe,const mfcc_t * fr_spec,mfcc_t * fr_cep)632 fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
633 {
634 #ifdef FIXED_POINT
635 fe_dct2(fe, fr_spec, fr_cep, 0);
636 #else /* ! FIXED_POINT */
637 powspec_t *powspec;
638 int32 i;
639
640 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
641 for (i = 0; i < fe->mel_fb->num_filters; ++i)
642 powspec[i] = (powspec_t) fr_spec[i];
643 fe_dct2(fe, powspec, fr_cep, 0);
644 ckd_free(powspec);
645 #endif /* ! FIXED_POINT */
646 return 0;
647 }
648
649 int32
fe_mfcc_dct3(fe_t * fe,const mfcc_t * fr_cep,mfcc_t * fr_spec)650 fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec)
651 {
652 #ifdef FIXED_POINT
653 fe_dct3(fe, fr_cep, fr_spec);
654 #else /* ! FIXED_POINT */
655 powspec_t *powspec;
656 int32 i;
657
658 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
659 fe_dct3(fe, fr_cep, powspec);
660 for (i = 0; i < fe->mel_fb->num_filters; ++i)
661 fr_spec[i] = (mfcc_t) powspec[i];
662 ckd_free(powspec);
663 #endif /* ! FIXED_POINT */
664 return 0;
665 }
666