1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37 /*
38 * feat.c -- Feature vector description and cepstra->feature computation.
39 *
40 * **********************************************
41 * CMU ARPA Speech Project
42 *
43 * Copyright (c) 1996 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
46 *
47 * HISTORY
48 * $Log$
49 * Revision 1.22 2006/02/23 03:59:40 arthchan2003
50 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
51 *
52 * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003
53 * Free stuffs in cmn and feat corectly.
54 *
55 * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003
56 * Add message to show the directory which the feature is searched for.
57 *
58 * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003
59 * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point.
60 *
61 * Revision 1.21 2005/06/22 03:29:35 arthchan2003
62 * Makefile.am s for all subdirectory of libs3decoder/
63 *
64 * Revision 1.4 2005/04/21 23:50:26 archan
65 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
66 *
67 * Revision 1.3 2005/03/30 01:22:46 archan
68 * Fixed mistakes in last updates. Add
69 *
70 *
71 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
72 * Adding feat_free() to free allocated memory
73 *
74 * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
75 * Modified feat_s2mfc2feat_block() to handle empty buffers at
76 * the end of an utterance
77 *
78 * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
79 * Added feat_s2mfc2feat_block() to allow feature computation
80 * from sequences of blocks of cepstral vectors
81 *
82 * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
83 * Major changes to accommodate arbitrary feature input types. Added
84 * feat_read(), moved various cep2feat functions from other files into
85 * this one. Also, made this module object-oriented with the feat_t type.
86 * Changed definition of s2mfc_read to let the caller manage MFC buffers.
87 *
88 * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
89 * Added unistd.h include.
90 *
91 * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
92 * Added check for sf argument to s2mfc_read being within file size.
93 *
94 * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
95 * Added sf, ef parameters to s2mfc_read().
96 *
97 * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
98 * Added feat_cepsize().
99 * Added different feature-handling (s2_4x, s3_1x39 at this point).
100 * Moved feature-dependent functions to feature-dependent files.
101 *
102 * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
103 * Moved constant declarations from feat.h into here.
104 *
105 * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
106 * Created.
107 */
108
109
110 /*
111 * This module encapsulates different feature streams used by the Sphinx group. New
112 * stream types can be added by augmenting feat_init() and providing an accompanying
113 * compute_feat function. It also provides a "generic" feature vector definition for
114 * handling "arbitrary" speech input feature types (see the last section in feat_init()).
115 * In this case the speech input data should already be feature vectors; no computation,
116 * such as MFC->feature conversion, is available or needed.
117 */
118
119 #include <assert.h>
120 #include <string.h>
121 #ifdef HAVE_CONFIG_H
122 #include <config.h>
123 #endif
124
125 #ifdef _MSC_VER
126 #pragma warning (disable: 4244 4996)
127 #endif
128
129 #include "sphinxbase/fe.h"
130 #include "sphinxbase/feat.h"
131 #include "sphinxbase/bio.h"
132 #include "sphinxbase/pio.h"
133 #include "sphinxbase/cmn.h"
134 #include "sphinxbase/agc.h"
135 #include "sphinxbase/err.h"
136 #include "sphinxbase/ckd_alloc.h"
137 #include "sphinxbase/prim_type.h"
138 #include "sphinxbase/glist.h"
139
140 #define FEAT_VERSION "1.0"
141 #define FEAT_DCEP_WIN 2
142
143 #ifdef DUMP_FEATURES
144 static void
cep_dump_dbg(feat_t * fcb,mfcc_t ** mfc,int32 nfr,const char * text)145 cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text)
146 {
147 int32 i, j;
148
149 E_INFO("%s\n", text);
150 for (i = 0; i < nfr; i++) {
151 for (j = 0; j < fcb->cepsize; j++) {
152 fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j]));
153 }
154 fprintf(stderr, "\n");
155 }
156 }
157 static void
feat_print_dbg(feat_t * fcb,mfcc_t *** feat,int32 nfr,const char * text)158 feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text)
159 {
160 E_INFO("%s\n", text);
161 feat_print(fcb, feat, nfr, stderr);
162 }
163 #else /* !DUMP_FEATURES */
164 #define cep_dump_dbg(fcb,mfc,nfr,text)
165 #define feat_print_dbg(fcb,mfc,nfr,text)
166 #endif
167
168 int32 **
parse_subvecs(char const * str)169 parse_subvecs(char const *str)
170 {
171 char const *strp;
172 int32 n, n2, l;
173 glist_t dimlist; /* List of dimensions in one subvector */
174 glist_t veclist; /* List of dimlists (subvectors) */
175 int32 **subvec;
176 gnode_t *gn, *gn2;
177
178 veclist = NULL;
179
180 strp = str;
181 for (;;) {
182 dimlist = NULL;
183
184 for (;;) {
185 if (sscanf(strp, "%d%n", &n, &l) != 1)
186 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
187 strp - str);
188 strp += l;
189
190 if (*strp == '-') {
191 strp++;
192
193 if (sscanf(strp, "%d%n", &n2, &l) != 1)
194 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
195 strp - str);
196 strp += l;
197 }
198 else
199 n2 = n;
200
201 if ((n < 0) || (n > n2))
202 E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str,
203 strp - str);
204
205 for (; n <= n2; n++) {
206 gnode_t *gn;
207 for (gn = dimlist; gn; gn = gnode_next(gn))
208 if (gnode_int32(gn) == n)
209 break;
210 if (gn != NULL)
211 E_FATAL("'%s': Duplicate dimension ending @pos %d\n",
212 str, strp - str);
213
214 dimlist = glist_add_int32(dimlist, n);
215 }
216
217 if ((*strp == '\0') || (*strp == '/'))
218 break;
219
220 if (*strp != ',')
221 E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str);
222
223 strp++;
224 }
225
226 veclist = glist_add_ptr(veclist, (void *) dimlist);
227
228 if (*strp == '\0')
229 break;
230
231 assert(*strp == '/');
232 strp++;
233 }
234
235 /* Convert the glists to arrays; remember the glists are in reverse order of the input! */
236 n = glist_count(veclist); /* #Subvectors */
237 subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */
238 subvec[n] = NULL; /* sentinel */
239
240 for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) {
241 gn2 = (glist_t) gnode_ptr(gn);
242
243 n2 = glist_count(gn2); /* Length of this subvector */
244 if (n2 <= 0)
245 E_FATAL("'%s': 0-length subvector\n", str);
246
247 subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */
248 subvec[n][n2] = -1; /* sentinel */
249
250 for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2)
251 subvec[n][n2] = gnode_int32(gn2);
252 assert((n2 < 0) && (!gn2));
253 }
254 assert((n < 0) && (!gn));
255
256 /* Free the glists */
257 for (gn = veclist; gn; gn = gnode_next(gn)) {
258 gn2 = (glist_t) gnode_ptr(gn);
259 glist_free(gn2);
260 }
261 glist_free(veclist);
262
263 return subvec;
264 }
265
266 void
subvecs_free(int32 ** subvecs)267 subvecs_free(int32 **subvecs)
268 {
269 int32 **sv;
270
271 for (sv = subvecs; sv && *sv; ++sv)
272 ckd_free(*sv);
273 ckd_free(subvecs);
274 }
275
276 int
feat_set_subvecs(feat_t * fcb,int32 ** subvecs)277 feat_set_subvecs(feat_t *fcb, int32 **subvecs)
278 {
279 int32 **sv;
280 uint32 n_sv, n_dim, i;
281
282 if (subvecs == NULL) {
283 subvecs_free(fcb->subvecs);
284 ckd_free(fcb->sv_buf);
285 ckd_free(fcb->sv_len);
286 fcb->n_sv = 0;
287 fcb->subvecs = NULL;
288 fcb->sv_len = NULL;
289 fcb->sv_buf = NULL;
290 fcb->sv_dim = 0;
291 return 0;
292 }
293
294 if (fcb->n_stream != 1) {
295 E_ERROR("Subvector specifications require single-stream features!");
296 return -1;
297 }
298
299 n_sv = 0;
300 n_dim = 0;
301 for (sv = subvecs; sv && *sv; ++sv) {
302 int32 *d;
303
304 for (d = *sv; d && *d != -1; ++d) {
305 ++n_dim;
306 }
307 ++n_sv;
308 }
309 if (n_dim > feat_dimension(fcb)) {
310 E_ERROR("Total dimensionality of subvector specification %d "
311 "> feature dimensionality %d\n", n_dim, feat_dimension(fcb));
312 return -1;
313 }
314
315 fcb->n_sv = n_sv;
316 fcb->subvecs = subvecs;
317 fcb->sv_len = (uint32 *)ckd_calloc(n_sv, sizeof(*fcb->sv_len));
318 fcb->sv_buf = (mfcc_t *)ckd_calloc(n_dim, sizeof(*fcb->sv_buf));
319 fcb->sv_dim = n_dim;
320 for (i = 0; i < n_sv; ++i) {
321 int32 *d;
322 for (d = subvecs[i]; d && *d != -1; ++d) {
323 ++fcb->sv_len[i];
324 }
325 }
326
327 return 0;
328 }
329
330 /**
331 * Project feature components to subvectors (if any).
332 */
333 static void
feat_subvec_project(feat_t * fcb,mfcc_t *** inout_feat,uint32 nfr)334 feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
335 {
336 uint32 i;
337
338 if (fcb->subvecs == NULL)
339 return;
340 for (i = 0; i < nfr; ++i) {
341 mfcc_t *out;
342 int32 j;
343
344 out = fcb->sv_buf;
345 for (j = 0; j < fcb->n_sv; ++j) {
346 int32 *d;
347 for (d = fcb->subvecs[j]; d && *d != -1; ++d) {
348 *out++ = inout_feat[i][0][*d];
349 }
350 }
351 memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf));
352 }
353 }
354
355 mfcc_t ***
feat_array_alloc(feat_t * fcb,int32 nfr)356 feat_array_alloc(feat_t * fcb, int32 nfr)
357 {
358 int32 i, j, k;
359 mfcc_t *data, *d, ***feat;
360
361 assert(fcb);
362 assert(nfr > 0);
363 assert(feat_dimension(fcb) > 0);
364
365 /* Make sure to use the dimensionality of the features *before*
366 LDA and subvector projection. */
367 k = 0;
368 for (i = 0; i < fcb->n_stream; ++i)
369 k += fcb->stream_len[i];
370 assert(k >= feat_dimension(fcb));
371 assert(k >= fcb->sv_dim);
372
373 feat =
374 (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *));
375 data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t));
376
377 for (i = 0; i < nfr; i++) {
378 d = data + i * k;
379 for (j = 0; j < feat_dimension1(fcb); j++) {
380 feat[i][j] = d;
381 d += feat_dimension2(fcb, j);
382 }
383 }
384
385 return feat;
386 }
387
388 mfcc_t ***
feat_array_realloc(feat_t * fcb,mfcc_t *** old_feat,int32 ofr,int32 nfr)389 feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
390 {
391 int32 i, k, cf;
392 mfcc_t*** new_feat;
393
394 assert(fcb);
395 assert(nfr > 0);
396 assert(ofr > 0);
397 assert(feat_dimension(fcb) > 0);
398
399 /* Make sure to use the dimensionality of the features *before*
400 LDA and subvector projection. */
401 k = 0;
402 for (i = 0; i < fcb->n_stream; ++i)
403 k += fcb->stream_len[i];
404 assert(k >= feat_dimension(fcb));
405 assert(k >= fcb->sv_dim);
406
407 new_feat = feat_array_alloc(fcb, nfr);
408
409 cf = (nfr < ofr) ? nfr : ofr;
410 memcpy(new_feat[0][0], old_feat[0][0], cf * k * sizeof(mfcc_t));
411
412 feat_array_free(old_feat);
413
414 return new_feat;
415 }
416
417 void
feat_array_free(mfcc_t *** feat)418 feat_array_free(mfcc_t ***feat)
419 {
420 ckd_free(feat[0][0]);
421 ckd_free_2d((void **)feat);
422 }
423
424 static void
feat_s2_4x_cep2feat(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)425 feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
426 {
427 mfcc_t *f;
428 mfcc_t *w, *_w;
429 mfcc_t *w1, *w_1, *_w1, *_w_1;
430 mfcc_t d1, d2;
431 int32 i, j;
432
433 assert(fcb);
434 assert(feat_cepsize(fcb) == 13);
435 assert(feat_n_stream(fcb) == 4);
436 assert(feat_stream_len(fcb, 0) == 12);
437 assert(feat_stream_len(fcb, 1) == 24);
438 assert(feat_stream_len(fcb, 2) == 3);
439 assert(feat_stream_len(fcb, 3) == 12);
440 assert(feat_window_size(fcb) == 4);
441
442 /* CEP; skip C0 */
443 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
444
445 /*
446 * DCEP(SHORT): mfc[2] - mfc[-2]
447 * DCEP(LONG): mfc[4] - mfc[-4]
448 */
449 w = mfc[2] + 1; /* +1 to skip C0 */
450 _w = mfc[-2] + 1;
451
452 f = feat[1];
453 for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */
454 f[i] = w[i] - _w[i];
455
456 w = mfc[4] + 1; /* +1 to skip C0 */
457 _w = mfc[-4] + 1;
458
459 for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */
460 f[i] = w[j] - _w[j];
461
462 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
463 w1 = mfc[3] + 1; /* Final +1 to skip C0 */
464 _w1 = mfc[-1] + 1;
465 w_1 = mfc[1] + 1;
466 _w_1 = mfc[-3] + 1;
467
468 f = feat[3];
469 for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
470 d1 = w1[i] - _w1[i];
471 d2 = w_1[i] - _w_1[i];
472
473 f[i] = d1 - d2;
474 }
475
476 /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */
477 f = feat[2];
478 f[0] = mfc[0][0];
479 f[1] = mfc[2][0] - mfc[-2][0];
480
481 d1 = mfc[3][0] - mfc[-1][0];
482 d2 = mfc[1][0] - mfc[-3][0];
483 f[2] = d1 - d2;
484 }
485
486
487 static void
feat_s3_1x39_cep2feat(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)488 feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
489 {
490 mfcc_t *f;
491 mfcc_t *w, *_w;
492 mfcc_t *w1, *w_1, *_w1, *_w_1;
493 mfcc_t d1, d2;
494 int32 i;
495
496 assert(fcb);
497 assert(feat_cepsize(fcb) == 13);
498 assert(feat_n_stream(fcb) == 1);
499 assert(feat_stream_len(fcb, 0) == 39);
500 assert(feat_window_size(fcb) == 3);
501
502 /* CEP; skip C0 */
503 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
504 /*
505 * DCEP: mfc[2] - mfc[-2];
506 */
507 f = feat[0] + feat_cepsize(fcb) - 1;
508 w = mfc[2] + 1; /* +1 to skip C0 */
509 _w = mfc[-2] + 1;
510
511 for (i = 0; i < feat_cepsize(fcb) - 1; i++)
512 f[i] = w[i] - _w[i];
513
514 /* POW: C0, DC0, D2C0 */
515 f += feat_cepsize(fcb) - 1;
516
517 f[0] = mfc[0][0];
518 f[1] = mfc[2][0] - mfc[-2][0];
519
520 d1 = mfc[3][0] - mfc[-1][0];
521 d2 = mfc[1][0] - mfc[-3][0];
522 f[2] = d1 - d2;
523
524 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
525 f += 3;
526
527 w1 = mfc[3] + 1; /* Final +1 to skip C0 */
528 _w1 = mfc[-1] + 1;
529 w_1 = mfc[1] + 1;
530 _w_1 = mfc[-3] + 1;
531
532 for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
533 d1 = w1[i] - _w1[i];
534 d2 = w_1[i] - _w_1[i];
535
536 f[i] = d1 - d2;
537 }
538 }
539
540
541 static void
feat_s3_cep(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)542 feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
543 {
544 assert(fcb);
545 assert(feat_n_stream(fcb) == 1);
546 assert(feat_window_size(fcb) == 0);
547
548 /* CEP */
549 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
550 }
551
552 static void
feat_s3_cep_dcep(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)553 feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
554 {
555 mfcc_t *f;
556 mfcc_t *w, *_w;
557 int32 i;
558
559 assert(fcb);
560 assert(feat_n_stream(fcb) == 1);
561 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2);
562 assert(feat_window_size(fcb) == 2);
563
564 /* CEP */
565 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
566
567 /*
568 * DCEP: mfc[2] - mfc[-2];
569 */
570 f = feat[0] + feat_cepsize(fcb);
571 w = mfc[2];
572 _w = mfc[-2];
573
574 for (i = 0; i < feat_cepsize(fcb); i++)
575 f[i] = w[i] - _w[i];
576 }
577
578 static void
feat_1s_c_d_dd_cep2feat(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)579 feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
580 {
581 mfcc_t *f;
582 mfcc_t *w, *_w;
583 mfcc_t *w1, *w_1, *_w1, *_w_1;
584 mfcc_t d1, d2;
585 int32 i;
586
587 assert(fcb);
588 assert(feat_n_stream(fcb) == 1);
589 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3);
590 assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1);
591
592 /* CEP */
593 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
594
595 /*
596 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
597 */
598 f = feat[0] + feat_cepsize(fcb);
599 w = mfc[FEAT_DCEP_WIN];
600 _w = mfc[-FEAT_DCEP_WIN];
601
602 for (i = 0; i < feat_cepsize(fcb); i++)
603 f[i] = w[i] - _w[i];
604
605 /*
606 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
607 * where w = FEAT_DCEP_WIN
608 */
609 f += feat_cepsize(fcb);
610
611 w1 = mfc[FEAT_DCEP_WIN + 1];
612 _w1 = mfc[-FEAT_DCEP_WIN + 1];
613 w_1 = mfc[FEAT_DCEP_WIN - 1];
614 _w_1 = mfc[-FEAT_DCEP_WIN - 1];
615
616 for (i = 0; i < feat_cepsize(fcb); i++) {
617 d1 = w1[i] - _w1[i];
618 d2 = w_1[i] - _w_1[i];
619
620 f[i] = d1 - d2;
621 }
622 }
623
624 static void
feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)625 feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
626 {
627 mfcc_t *f;
628 mfcc_t *w, *_w;
629 mfcc_t *w1, *w_1, *_w1, *_w_1;
630 mfcc_t d1, d2;
631 int32 i;
632
633 assert(fcb);
634 assert(feat_n_stream(fcb) == 1);
635 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4);
636 assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2);
637
638 /* CEP */
639 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
640
641 /*
642 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
643 */
644 f = feat[0] + feat_cepsize(fcb);
645 w = mfc[FEAT_DCEP_WIN];
646 _w = mfc[-FEAT_DCEP_WIN];
647
648 for (i = 0; i < feat_cepsize(fcb); i++)
649 f[i] = w[i] - _w[i];
650
651 /*
652 * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2;
653 */
654 f += feat_cepsize(fcb);
655 w = mfc[FEAT_DCEP_WIN * 2];
656 _w = mfc[-FEAT_DCEP_WIN * 2];
657
658 for (i = 0; i < feat_cepsize(fcb); i++)
659 f[i] = w[i] - _w[i];
660
661 /*
662 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
663 * where w = FEAT_DCEP_WIN
664 */
665 f += feat_cepsize(fcb);
666
667 w1 = mfc[FEAT_DCEP_WIN + 1];
668 _w1 = mfc[-FEAT_DCEP_WIN + 1];
669 w_1 = mfc[FEAT_DCEP_WIN - 1];
670 _w_1 = mfc[-FEAT_DCEP_WIN - 1];
671
672 for (i = 0; i < feat_cepsize(fcb); i++) {
673 d1 = w1[i] - _w1[i];
674 d2 = w_1[i] - _w_1[i];
675
676 f[i] = d1 - d2;
677 }
678 }
679
680 static void
feat_copy(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)681 feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
682 {
683 int32 win, i, j;
684
685 win = feat_window_size(fcb);
686
687 /* Concatenate input features */
688 for (i = -win; i <= win; ++i) {
689 uint32 spos = 0;
690
691 for (j = 0; j < feat_n_stream(fcb); ++j) {
692 uint32 stream_len;
693
694 /* Unscale the stream length by the window. */
695 stream_len = feat_stream_len(fcb, j) / (2 * win + 1);
696 memcpy(feat[j] + ((i + win) * stream_len),
697 mfc[i] + spos,
698 stream_len * sizeof(mfcc_t));
699 spos += stream_len;
700 }
701 }
702 }
703
704 feat_t *
feat_init(char const * type,cmn_type_t cmn,int32 varnorm,agc_type_t agc,int32 breport,int32 cepsize)705 feat_init(char const *type, cmn_type_t cmn, int32 varnorm,
706 agc_type_t agc, int32 breport, int32 cepsize)
707 {
708 feat_t *fcb;
709
710 if (cepsize == 0)
711 cepsize = 13;
712 if (breport)
713 E_INFO
714 ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
715 type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]);
716
717 fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t));
718 fcb->refcount = 1;
719 fcb->name = (char *) ckd_salloc(type);
720 if (strcmp(type, "s2_4x") == 0) {
721 /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */
722 if (cepsize != 13) {
723 E_ERROR("s2_4x features require cepsize == 13\n");
724 ckd_free(fcb);
725 return NULL;
726 }
727 fcb->cepsize = 13;
728 fcb->n_stream = 4;
729 fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32));
730 fcb->stream_len[0] = 12;
731 fcb->stream_len[1] = 24;
732 fcb->stream_len[2] = 3;
733 fcb->stream_len[3] = 12;
734 fcb->out_dim = 51;
735 fcb->window_size = 4;
736 fcb->compute_feat = feat_s2_4x_cep2feat;
737 }
738 else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) {
739 /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */
740 if (cepsize != 13) {
741 E_ERROR("s2_4x features require cepsize == 13\n");
742 ckd_free(fcb);
743 return NULL;
744 }
745 fcb->cepsize = 13;
746 fcb->n_stream = 1;
747 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
748 fcb->stream_len[0] = 39;
749 fcb->out_dim = 39;
750 fcb->window_size = 3;
751 fcb->compute_feat = feat_s3_1x39_cep2feat;
752 }
753 else if (strncmp(type, "1s_c_d_dd", 9) == 0) {
754 fcb->cepsize = cepsize;
755 fcb->n_stream = 1;
756 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
757 fcb->stream_len[0] = cepsize * 3;
758 fcb->out_dim = cepsize * 3;
759 fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */
760 fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
761 }
762 else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) {
763 fcb->cepsize = cepsize;
764 fcb->n_stream = 1;
765 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
766 fcb->stream_len[0] = cepsize * 4;
767 fcb->out_dim = cepsize * 4;
768 fcb->window_size = FEAT_DCEP_WIN * 2;
769 fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
770 }
771 else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) {
772 /* 1-stream cep/dcep */
773 fcb->cepsize = cepsize;
774 fcb->n_stream = 1;
775 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
776 fcb->stream_len[0] = feat_cepsize(fcb) * 2;
777 fcb->out_dim = fcb->stream_len[0];
778 fcb->window_size = 2;
779 fcb->compute_feat = feat_s3_cep_dcep;
780 }
781 else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) {
782 /* 1-stream cep */
783 fcb->cepsize = cepsize;
784 fcb->n_stream = 1;
785 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
786 fcb->stream_len[0] = feat_cepsize(fcb);
787 fcb->out_dim = fcb->stream_len[0];
788 fcb->window_size = 0;
789 fcb->compute_feat = feat_s3_cep;
790 }
791 else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) {
792 /* 1-stream cep with frames concatenated, so called cepwin features */
793 if (strncmp(type, "1s_3c", 5) == 0)
794 fcb->window_size = 3;
795 else
796 fcb->window_size = 4;
797
798 fcb->cepsize = cepsize;
799 fcb->n_stream = 1;
800 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
801 fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1);
802 fcb->out_dim = fcb->stream_len[0];
803 fcb->compute_feat = feat_copy;
804 }
805 else {
806 int32 i, k, l;
807 size_t len;
808 char *strp;
809 char *mtype = ckd_salloc(type);
810 char *wd = ckd_salloc(type);
811 /*
812 * Generic definition: Format should be %d,%d,%d,...,%d (i.e.,
813 * comma separated list of feature stream widths; #items =
814 * #streams). An optional window size (frames will be
815 * concatenated) is also allowed, which can be specified with
816 * a colon after the list of feature streams.
817 */
818 len = strlen(mtype);
819 k = 0;
820 for (i = 1; i < len - 1; i++) {
821 if (mtype[i] == ',') {
822 mtype[i] = ' ';
823 k++;
824 }
825 else if (mtype[i] == ':') {
826 mtype[i] = '\0';
827 fcb->window_size = atoi(mtype + i + 1);
828 break;
829 }
830 }
831 k++; /* Presumably there are (#commas+1) streams */
832 fcb->n_stream = k;
833 fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32));
834
835 /* Scan individual feature stream lengths */
836 strp = mtype;
837 i = 0;
838 fcb->out_dim = 0;
839 fcb->cepsize = 0;
840 while (sscanf(strp, "%s%n", wd, &l) == 1) {
841 strp += l;
842 if ((i >= fcb->n_stream)
843 || (sscanf(wd, "%u", &(fcb->stream_len[i])) != 1)
844 || (fcb->stream_len[i] <= 0))
845 E_FATAL("Bad feature type argument\n");
846 /* Input size before windowing */
847 fcb->cepsize += fcb->stream_len[i];
848 if (fcb->window_size > 0)
849 fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
850 /* Output size after windowing */
851 fcb->out_dim += fcb->stream_len[i];
852 i++;
853 }
854 if (i != fcb->n_stream)
855 E_FATAL("Bad feature type argument\n");
856 if (fcb->cepsize != cepsize)
857 E_FATAL("Bad feature type argument\n");
858
859 /* Input is already the feature stream */
860 fcb->compute_feat = feat_copy;
861 ckd_free(mtype);
862 ckd_free(wd);
863 }
864
865 if (cmn != CMN_NONE)
866 fcb->cmn_struct = cmn_init(feat_cepsize(fcb));
867 fcb->cmn = cmn;
868 fcb->varnorm = varnorm;
869 if (agc != AGC_NONE) {
870 fcb->agc_struct = agc_init();
871 /*
872 * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things
873 * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY
874 * switches to EMAX
875 */
876 /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */
877 agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
878 }
879 fcb->agc = agc;
880 /*
881 * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt()
882 */
883 fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE,
884 feat_cepsize(fcb),
885 sizeof(mfcc_t));
886 /* This one is actually just an array of pointers to "flatten out"
887 * wraparounds. */
888 fcb->tmpcepbuf = (mfcc_t** )ckd_calloc(2 * feat_window_size(fcb) + 1,
889 sizeof(*fcb->tmpcepbuf));
890
891 return fcb;
892 }
893
894
895 void
feat_print(feat_t * fcb,mfcc_t *** feat,int32 nfr,FILE * fp)896 feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp)
897 {
898 uint32 i, j, k;
899
900 for (i = 0; i < nfr; i++) {
901 fprintf(fp, "%8d:\n", i);
902
903 for (j = 0; j < feat_dimension1(fcb); j++) {
904 fprintf(fp, "\t%2d:", j);
905
906 for (k = 0; k < feat_dimension2(fcb, j); k++)
907 fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k]));
908 fprintf(fp, "\n");
909 }
910 }
911
912 fflush(fp);
913 }
914
915 static void
feat_cmn(feat_t * fcb,mfcc_t ** mfc,int32 nfr,int32 beginutt,int32 endutt)916 feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
917 {
918 cmn_type_t cmn_type = fcb->cmn;
919
920 if (!(beginutt && endutt)
921 && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */
922 fcb->cmn = cmn_type = CMN_PRIOR;
923
924 switch (cmn_type) {
925 case CMN_CURRENT:
926 cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
927 break;
928 case CMN_PRIOR:
929 cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
930 if (endutt)
931 cmn_prior_update(fcb->cmn_struct);
932 break;
933 default:
934 ;
935 }
936 cep_dump_dbg(fcb, mfc, nfr, "After CMN");
937 }
938
939 static void
feat_agc(feat_t * fcb,mfcc_t ** mfc,int32 nfr,int32 beginutt,int32 endutt)940 feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
941 {
942 agc_type_t agc_type = fcb->agc;
943
944 if (!(beginutt && endutt)
945 && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */
946 agc_type = AGC_EMAX;
947
948 switch (agc_type) {
949 case AGC_MAX:
950 agc_max(fcb->agc_struct, mfc, nfr);
951 break;
952 case AGC_EMAX:
953 agc_emax(fcb->agc_struct, mfc, nfr);
954 if (endutt)
955 agc_emax_update(fcb->agc_struct);
956 break;
957 case AGC_NOISE:
958 agc_noise(fcb->agc_struct, mfc, nfr);
959 break;
960 default:
961 ;
962 }
963 cep_dump_dbg(fcb, mfc, nfr, "After AGC");
964 }
965
966 static void
feat_compute_utt(feat_t * fcb,mfcc_t ** mfc,int32 nfr,int32 win,mfcc_t *** feat)967 feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat)
968 {
969 int32 i;
970
971 cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)");
972
973 /* Create feature vectors */
974 for (i = win; i < nfr - win; i++) {
975 fcb->compute_feat(fcb, mfc + i, feat[i - win]);
976 }
977
978 feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation");
979
980 if (fcb->lda) {
981 feat_lda_transform(fcb, feat, nfr - win * 2);
982 feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA");
983 }
984
985 if (fcb->subvecs) {
986 feat_subvec_project(fcb, feat, nfr - win * 2);
987 feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection");
988 }
989 }
990
991
992 /**
993 * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data).
994 * If out_mfc is NULL, no actual reading will be done, and the number of
995 * frames (plus padding) that would be read is returned.
996 *
997 * It's important that normalization is done before padding because
998 * frames outside the data we are interested in shouldn't be taken
999 * into normalization stats.
1000 *
1001 * @return # frames read (plus padding) if successful, -1 if
1002 * error (e.g., mfc array too small).
1003 */
1004 static int32
feat_s2mfc_read_norm_pad(feat_t * fcb,char * file,int32 win,int32 sf,int32 ef,mfcc_t *** out_mfc,int32 maxfr,int32 cepsize)1005 feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win,
1006 int32 sf, int32 ef,
1007 mfcc_t ***out_mfc,
1008 int32 maxfr,
1009 int32 cepsize)
1010 {
1011 FILE *fp;
1012 int32 n_float32;
1013 float32 *float_feat;
1014 struct stat statbuf;
1015 int32 i, n, byterev;
1016 int32 start_pad, end_pad;
1017 mfcc_t **mfc;
1018
1019 /* Initialize the output pointer to NULL, so that any attempts to
1020 free() it if we fail before allocating it will not segfault! */
1021 if (out_mfc)
1022 *out_mfc = NULL;
1023 E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
1024 if (ef >= 0 && ef <= sf) {
1025 E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
1026 return -1;
1027 }
1028
1029 /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */
1030 if ((stat_retry(file, &statbuf) < 0)
1031 || ((fp = fopen(file, "rb")) == NULL)) {
1032 E_ERROR_SYSTEM("Failed to open file '%s' for reading", file);
1033 return -1;
1034 }
1035
1036 /* Read #floats in header */
1037 if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) {
1038 E_ERROR("%s: fread(#floats) failed\n", file);
1039 fclose(fp);
1040 return -1;
1041 }
1042
1043 /* Check if n_float32 matches file size */
1044 byterev = 0;
1045 if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */
1046 n = n_float32;
1047 SWAP_INT32(&n);
1048
1049 if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */
1050 E_ERROR
1051 ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
1052 file, n_float32, n_float32, statbuf.st_size,
1053 statbuf.st_size);
1054 fclose(fp);
1055 return -1;
1056 }
1057
1058 n_float32 = n;
1059 byterev = 1;
1060 }
1061 if (n_float32 <= 0) {
1062 E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32);
1063 fclose(fp);
1064 return -1;
1065 }
1066
1067 /* Convert n to #frames of input */
1068 n = n_float32 / cepsize;
1069 if (n * cepsize != n_float32) {
1070 E_ERROR("Header size field: %d; not multiple of %d\n", n_float32,
1071 cepsize);
1072 fclose(fp);
1073 return -1;
1074 }
1075
1076 /* Check start and end frames */
1077 if (sf > 0) {
1078 if (sf >= n) {
1079 E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file,
1080 sf, n);
1081 fclose(fp);
1082 return -1;
1083 }
1084 }
1085 if (ef < 0)
1086 ef = n-1;
1087 else if (ef >= n) {
1088 E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n",
1089 file, ef, n);
1090 ef = n-1;
1091 }
1092
1093 /* Add window to start and end frames */
1094 sf -= win;
1095 ef += win;
1096 if (sf < 0) {
1097 start_pad = -sf;
1098 sf = 0;
1099 }
1100 else
1101 start_pad = 0;
1102 if (ef >= n) {
1103 end_pad = ef - n + 1;
1104 ef = n - 1;
1105 }
1106 else
1107 end_pad = 0;
1108
1109 /* Limit n if indicated by [sf..ef] */
1110 if ((ef - sf + 1) < n)
1111 n = (ef - sf + 1);
1112 if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
1113 E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n",
1114 file, maxfr, n + start_pad + end_pad);
1115 fclose(fp);
1116 return -1;
1117 }
1118
1119 /* If no output buffer was supplied, then skip the actual data reading. */
1120 if (out_mfc != NULL) {
1121 /* Position at desired start frame and read actual MFC data */
1122 mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t));
1123 if (sf > 0)
1124 fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR);
1125 n_float32 = n * cepsize;
1126 #ifdef FIXED_POINT
1127 float_feat = ckd_calloc(n_float32, sizeof(float32));
1128 #else
1129 float_feat = mfc[start_pad];
1130 #endif
1131 if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) {
1132 E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
1133 ckd_free_2d(mfc);
1134 fclose(fp);
1135 return -1;
1136 }
1137 if (byterev) {
1138 for (i = 0; i < n_float32; i++) {
1139 SWAP_FLOAT32(&float_feat[i]);
1140 }
1141 }
1142 #ifdef FIXED_POINT
1143 for (i = 0; i < n_float32; ++i) {
1144 mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
1145 }
1146 ckd_free(float_feat);
1147 #endif
1148
1149 /* Normalize */
1150 feat_cmn(fcb, mfc + start_pad, n, 1, 1);
1151 feat_agc(fcb, mfc + start_pad, n, 1, 1);
1152
1153 /* Replicate start and end frames if necessary. */
1154 for (i = 0; i < start_pad; ++i)
1155 memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t));
1156 for (i = 0; i < end_pad; ++i)
1157 memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
1158 cepsize * sizeof(mfcc_t));
1159
1160 *out_mfc = mfc;
1161 }
1162
1163 fclose(fp);
1164 return n + start_pad + end_pad;
1165 }
1166
1167
1168
1169 int32
feat_s2mfc2feat(feat_t * fcb,const char * file,const char * dir,const char * cepext,int32 sf,int32 ef,mfcc_t *** feat,int32 maxfr)1170 feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext,
1171 int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr)
1172 {
1173 char *path;
1174 char *ps = "/";
1175 int32 win, nfr;
1176 size_t file_length, cepext_length, path_length = 0;
1177 mfcc_t **mfc;
1178
1179 if (fcb->cepsize <= 0) {
1180 E_ERROR("Bad cepsize: %d\n", fcb->cepsize);
1181 return -1;
1182 }
1183
1184 if (cepext == NULL)
1185 cepext = "";
1186
1187 /*
1188 * Create mfc filename, combining file, dir and extension if
1189 * necessary
1190 */
1191
1192 /*
1193 * First we decide about the path. If dir is defined, then use
1194 * it. Otherwise assume the filename already contains the path.
1195 */
1196 if (dir == NULL) {
1197 dir = "";
1198 ps = "";
1199 /*
1200 * This is not true but some 3rd party apps
1201 * may parse the output explicitly checking for this line
1202 */
1203 E_INFO("At directory . (current directory)\n");
1204 }
1205 else {
1206 E_INFO("At directory %s\n", dir);
1207 /*
1208 * Do not forget the path separator!
1209 */
1210 path_length += strlen(dir) + 1;
1211 }
1212
1213 /*
1214 * Include cepext, if it's not already part of the filename.
1215 */
1216 file_length = strlen(file);
1217 cepext_length = strlen(cepext);
1218 if ((file_length > cepext_length)
1219 && (strcmp(file + file_length - cepext_length, cepext) == 0)) {
1220 cepext = "";
1221 cepext_length = 0;
1222 }
1223
1224 /*
1225 * Do not forget the '\0'
1226 */
1227 path_length += file_length + cepext_length + 1;
1228 path = (char*) ckd_calloc(path_length, sizeof(char));
1229
1230 #ifdef HAVE_SNPRINTF
1231 /*
1232 * Paranoia is our best friend...
1233 */
1234 while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) {
1235 path_length = file_length;
1236 path = (char*) ckd_realloc(path, path_length * sizeof(char));
1237 }
1238 #else
1239 sprintf(path, "%s%s%s%s", dir, ps, file, cepext);
1240 #endif
1241
1242 win = feat_window_size(fcb);
1243 /* Pad maxfr with win, so we read enough raw feature data to
1244 * calculate the requisite number of dynamic features. */
1245 if (maxfr >= 0)
1246 maxfr += win * 2;
1247
1248 if (feat != NULL) {
1249 /* Read mfc file including window or padding if necessary. */
1250 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize);
1251 ckd_free(path);
1252 if (nfr < 0) {
1253 ckd_free_2d((void **) mfc);
1254 return -1;
1255 }
1256
1257 /* Actually compute the features */
1258 feat_compute_utt(fcb, mfc, nfr, win, feat);
1259
1260 ckd_free_2d((void **) mfc);
1261 }
1262 else {
1263 /* Just calculate the number of frames we would need. */
1264 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize);
1265 ckd_free(path);
1266 if (nfr < 0)
1267 return nfr;
1268 }
1269
1270
1271 return (nfr - win * 2);
1272 }
1273
1274 static int32
feat_s2mfc2feat_block_utt(feat_t * fcb,mfcc_t ** uttcep,int32 nfr,mfcc_t *** ofeat)1275 feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep,
1276 int32 nfr, mfcc_t *** ofeat)
1277 {
1278 mfcc_t **cepbuf;
1279 int32 i, win, cepsize;
1280
1281 win = feat_window_size(fcb);
1282 cepsize = feat_cepsize(fcb);
1283
1284 /* Copy and pad out the utterance (this requires that the
1285 * feature computation functions always access the buffer via
1286 * the frame pointers, which they do) */
1287 cepbuf = (mfcc_t **)ckd_calloc(nfr + win * 2, sizeof(mfcc_t *));
1288 memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *));
1289
1290 /* Do normalization before we interpolate on the boundary */
1291 feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
1292 feat_agc(fcb, cepbuf + win, nfr, 1, 1);
1293
1294 /* Now interpolate */
1295 for (i = 0; i < win; ++i) {
1296 cepbuf[i] = fcb->cepbuf[i];
1297 memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t));
1298 cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
1299 memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t));
1300 }
1301 /* Compute as usual. */
1302 feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
1303 ckd_free(cepbuf);
1304 return nfr;
1305 }
1306
1307 int32
feat_s2mfc2feat_live(feat_t * fcb,mfcc_t ** uttcep,int32 * inout_ncep,int32 beginutt,int32 endutt,mfcc_t *** ofeat)1308 feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep,
1309 int32 beginutt, int32 endutt, mfcc_t *** ofeat)
1310 {
1311 int32 win, cepsize, nbufcep;
1312 int32 i, j, nfeatvec;
1313 int32 zero = 0;
1314
1315 /* Avoid having to check this everywhere. */
1316 if (inout_ncep == NULL) inout_ncep = &zero;
1317
1318 /* Special case for entire utterances. */
1319 if (beginutt && endutt && *inout_ncep > 0)
1320 return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);
1321
1322 win = feat_window_size(fcb);
1323 cepsize = feat_cepsize(fcb);
1324
1325 /* Empty the input buffer on start of utterance. */
1326 if (beginutt)
1327 fcb->bufpos = fcb->curpos;
1328
1329 /* Calculate how much data is in the buffer already. */
1330 nbufcep = fcb->bufpos - fcb->curpos;
1331 if (nbufcep < 0)
1332 nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
1333 /* Add any data that we have to replicate. */
1334 if (beginutt && *inout_ncep > 0)
1335 nbufcep += win;
1336 if (endutt)
1337 nbufcep += win;
1338
1339 /* Only consume as much input as will fit in the buffer. */
1340 if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
1341 /* We also can't overwrite the trailing window, hence the
1342 * reason why win is subtracted here. */
1343 *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
1344 /* Cancel end of utterance processing. */
1345 endutt = FALSE;
1346 }
1347
1348 /* FIXME: Don't modify the input! */
1349 feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
1350 feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);
1351
1352 /* Replicate first frame into the first win frames if we're at the
1353 * beginning of the utterance and there was some actual input to
1354 * deal with. (FIXME: Not entirely sure why that condition) */
1355 if (beginutt && *inout_ncep > 0) {
1356 for (i = 0; i < win; i++) {
1357 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
1358 cepsize * sizeof(mfcc_t));
1359 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1360 }
1361 /* Move the current pointer past this data. */
1362 fcb->curpos = fcb->bufpos;
1363 nbufcep -= win;
1364 }
1365
1366 /* Copy in frame data to the circular buffer. */
1367 for (i = 0; i < *inout_ncep; ++i) {
1368 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
1369 cepsize * sizeof(mfcc_t));
1370 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1371 ++nbufcep;
1372 }
1373
1374 /* Replicate last frame into the last win frames if we're at the
1375 * end of the utterance (even if there was no input, so we can
1376 * flush the output). */
1377 if (endutt) {
1378 int32 tpos; /* Index of last input frame. */
1379 if (fcb->bufpos == 0)
1380 tpos = LIVEBUFBLOCKSIZE - 1;
1381 else
1382 tpos = fcb->bufpos - 1;
1383 for (i = 0; i < win; ++i) {
1384 memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
1385 cepsize * sizeof(mfcc_t));
1386 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1387 }
1388 }
1389
1390 /* We have to leave the trailing window of frames. */
1391 nfeatvec = nbufcep - win;
1392 if (nfeatvec <= 0)
1393 return 0; /* Do nothing. */
1394
1395 for (i = 0; i < nfeatvec; ++i) {
1396 /* Handle wraparound cases. */
1397 if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
1398 /* Use tmpcepbuf for this case. Actually, we just need the pointers. */
1399 for (j = -win; j <= win; ++j) {
1400 int32 tmppos =
1401 (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
1402 fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
1403 }
1404 fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
1405 }
1406 else {
1407 fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
1408 }
1409 /* Move the read pointer forward. */
1410 ++fcb->curpos;
1411 fcb->curpos %= LIVEBUFBLOCKSIZE;
1412 }
1413
1414 if (fcb->lda)
1415 feat_lda_transform(fcb, ofeat, nfeatvec);
1416
1417 if (fcb->subvecs)
1418 feat_subvec_project(fcb, ofeat, nfeatvec);
1419
1420 return nfeatvec;
1421 }
1422
1423 void
feat_update_stats(feat_t * fcb)1424 feat_update_stats(feat_t *fcb)
1425 {
1426 if (fcb->cmn == CMN_PRIOR) {
1427 cmn_prior_update(fcb->cmn_struct);
1428 }
1429 if (fcb->agc == AGC_EMAX || fcb->agc == AGC_MAX) {
1430 agc_emax_update(fcb->agc_struct);
1431 }
1432 }
1433
1434 feat_t *
feat_retain(feat_t * f)1435 feat_retain(feat_t *f)
1436 {
1437 ++f->refcount;
1438 return f;
1439 }
1440
1441 int
feat_free(feat_t * f)1442 feat_free(feat_t * f)
1443 {
1444 if (f == NULL)
1445 return 0;
1446 if (--f->refcount > 0)
1447 return f->refcount;
1448
1449 if (f->cepbuf)
1450 ckd_free_2d((void **) f->cepbuf);
1451 ckd_free(f->tmpcepbuf);
1452
1453 if (f->name) {
1454 ckd_free((void *) f->name);
1455 }
1456 if (f->lda)
1457 ckd_free_3d((void ***) f->lda);
1458
1459 ckd_free(f->stream_len);
1460 ckd_free(f->sv_len);
1461 ckd_free(f->sv_buf);
1462 subvecs_free(f->subvecs);
1463
1464 cmn_free(f->cmn_struct);
1465 agc_free(f->agc_struct);
1466
1467 ckd_free(f);
1468 return 0;
1469 }
1470
1471
1472 void
feat_report(feat_t * f)1473 feat_report(feat_t * f)
1474 {
1475 int i;
1476 E_INFO_NOFN("Initialization of feat_t, report:\n");
1477 E_INFO_NOFN("Feature type = %s\n", f->name);
1478 E_INFO_NOFN("Cepstral size = %d\n", f->cepsize);
1479 E_INFO_NOFN("Number of streams = %d\n", f->n_stream);
1480 for (i = 0; i < f->n_stream; i++) {
1481 E_INFO_NOFN("Vector size of stream[%d]: %d\n", i,
1482 f->stream_len[i]);
1483 }
1484 E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv);
1485 for (i = 0; i < f->n_sv; i++) {
1486 int32 *sv;
1487
1488 E_INFO_NOFN("Components of subvector[%d]:", i);
1489 for (sv = f->subvecs[i]; sv && *sv != -1; ++sv)
1490 E_INFOCONT(" %d", *sv);
1491 E_INFOCONT("\n");
1492 }
1493 E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn);
1494 E_INFO_NOFN("Whether AGC is used = %d\n", f->agc);
1495 E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm);
1496 E_INFO_NOFN("\n");
1497 }
1498