1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * feat.c -- Feature vector description and cepstra->feature computation.
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log$
49  * Revision 1.22  2006/02/23  03:59:40  arthchan2003
50  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
51  *
52  * Revision 1.21.4.3  2005/10/17 04:45:57  arthchan2003
53  * Free stuffs in cmn and feat corectly.
54  *
55  * Revision 1.21.4.2  2005/09/26 02:19:57  arthchan2003
56  * Add message to show the directory which the feature is searched for.
57  *
58  * Revision 1.21.4.1  2005/07/03 22:55:50  arthchan2003
59  * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point.
60  *
61  * Revision 1.21  2005/06/22 03:29:35  arthchan2003
62  * Makefile.am s  for all subdirectory of libs3decoder/
63  *
64  * Revision 1.4  2005/04/21 23:50:26  archan
65  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
66  *
67  * Revision 1.3  2005/03/30 01:22:46  archan
68  * Fixed mistakes in last updates. Add
69  *
70  *
71  * 20.Apr.2001  RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
72  *              Adding feat_free() to free allocated memory
73  *
74  * 02-Jan-2001	Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
75  *		Modified feat_s2mfc2feat_block() to handle empty buffers at
76  *		the end of an utterance
77  *
78  * 30-Dec-2000	Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
79  *		Added feat_s2mfc2feat_block() to allow feature computation
80  *		from sequences of blocks of cepstral vectors
81  *
82  * 12-Jun-98	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
83  * 		Major changes to accommodate arbitrary feature input types.  Added
84  * 		feat_read(), moved various cep2feat functions from other files into
85  *		this one.  Also, made this module object-oriented with the feat_t type.
86  * 		Changed definition of s2mfc_read to let the caller manage MFC buffers.
87  *
88  * 03-Oct-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
89  * 		Added unistd.h include.
90  *
91  * 02-Oct-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
92  * 		Added check for sf argument to s2mfc_read being within file size.
93  *
94  * 18-Sep-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
95  * 		Added sf, ef parameters to s2mfc_read().
96  *
97  * 10-Jan-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
98  * 		Added feat_cepsize().
99  * 		Added different feature-handling (s2_4x, s3_1x39 at this point).
100  * 		Moved feature-dependent functions to feature-dependent files.
101  *
102  * 09-Jan-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
103  * 		Moved constant declarations from feat.h into here.
104  *
105  * 04-Nov-95	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
106  * 		Created.
107  */
108 
109 
110 /*
111  * This module encapsulates different feature streams used by the Sphinx group.  New
112  * stream types can be added by augmenting feat_init() and providing an accompanying
113  * compute_feat function.  It also provides a "generic" feature vector definition for
114  * handling "arbitrary" speech input feature types (see the last section in feat_init()).
115  * In this case the speech input data should already be feature vectors; no computation,
116  * such as MFC->feature conversion, is available or needed.
117  */
118 
119 #include <assert.h>
120 #include <string.h>
121 #ifdef HAVE_CONFIG_H
122 #include <config.h>
123 #endif
124 
125 #ifdef _MSC_VER
126 #pragma warning (disable: 4244 4996)
127 #endif
128 
129 #include "sphinxbase/fe.h"
130 #include "sphinxbase/feat.h"
131 #include "sphinxbase/bio.h"
132 #include "sphinxbase/pio.h"
133 #include "sphinxbase/cmn.h"
134 #include "sphinxbase/agc.h"
135 #include "sphinxbase/err.h"
136 #include "sphinxbase/ckd_alloc.h"
137 #include "sphinxbase/prim_type.h"
138 #include "sphinxbase/glist.h"
139 
140 #define FEAT_VERSION	"1.0"
141 #define FEAT_DCEP_WIN		2
142 
143 #ifdef DUMP_FEATURES
144 static void
cep_dump_dbg(feat_t * fcb,mfcc_t ** mfc,int32 nfr,const char * text)145 cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text)
146 {
147     int32 i, j;
148 
149     E_INFO("%s\n", text);
150     for (i = 0; i < nfr; i++) {
151         for (j = 0; j < fcb->cepsize; j++) {
152             fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j]));
153         }
154         fprintf(stderr, "\n");
155     }
156 }
157 static void
feat_print_dbg(feat_t * fcb,mfcc_t *** feat,int32 nfr,const char * text)158 feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text)
159 {
160     E_INFO("%s\n", text);
161     feat_print(fcb, feat, nfr, stderr);
162 }
163 #else /* !DUMP_FEATURES */
164 #define cep_dump_dbg(fcb,mfc,nfr,text)
165 #define feat_print_dbg(fcb,mfc,nfr,text)
166 #endif
167 
168 int32 **
parse_subvecs(char const * str)169 parse_subvecs(char const *str)
170 {
171     char const *strp;
172     int32 n, n2, l;
173     glist_t dimlist;            /* List of dimensions in one subvector */
174     glist_t veclist;            /* List of dimlists (subvectors) */
175     int32 **subvec;
176     gnode_t *gn, *gn2;
177 
178     veclist = NULL;
179 
180     strp = str;
181     for (;;) {
182         dimlist = NULL;
183 
184         for (;;) {
185             if (sscanf(strp, "%d%n", &n, &l) != 1)
186                 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
187                         strp - str);
188             strp += l;
189 
190             if (*strp == '-') {
191                 strp++;
192 
193                 if (sscanf(strp, "%d%n", &n2, &l) != 1)
194                     E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
195                             strp - str);
196                 strp += l;
197             }
198             else
199                 n2 = n;
200 
201             if ((n < 0) || (n > n2))
202                 E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str,
203                         strp - str);
204 
205             for (; n <= n2; n++) {
206 		gnode_t *gn;
207 		for (gn = dimlist; gn; gn = gnode_next(gn))
208 		    if (gnode_int32(gn) == n)
209 			break;
210 		if (gn != NULL)
211                     E_FATAL("'%s': Duplicate dimension ending @pos %d\n",
212                             str, strp - str);
213 
214                 dimlist = glist_add_int32(dimlist, n);
215             }
216 
217             if ((*strp == '\0') || (*strp == '/'))
218                 break;
219 
220             if (*strp != ',')
221                 E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str);
222 
223             strp++;
224         }
225 
226         veclist = glist_add_ptr(veclist, (void *) dimlist);
227 
228         if (*strp == '\0')
229             break;
230 
231         assert(*strp == '/');
232         strp++;
233     }
234 
235     /* Convert the glists to arrays; remember the glists are in reverse order of the input! */
236     n = glist_count(veclist);   /* #Subvectors */
237     subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *));     /* +1 for sentinel */
238     subvec[n] = NULL;           /* sentinel */
239 
240     for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) {
241         gn2 = (glist_t) gnode_ptr(gn);
242 
243         n2 = glist_count(gn2);  /* Length of this subvector */
244         if (n2 <= 0)
245             E_FATAL("'%s': 0-length subvector\n", str);
246 
247         subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32));        /* +1 for sentinel */
248         subvec[n][n2] = -1;     /* sentinel */
249 
250         for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2)
251             subvec[n][n2] = gnode_int32(gn2);
252         assert((n2 < 0) && (!gn2));
253     }
254     assert((n < 0) && (!gn));
255 
256     /* Free the glists */
257     for (gn = veclist; gn; gn = gnode_next(gn)) {
258         gn2 = (glist_t) gnode_ptr(gn);
259         glist_free(gn2);
260     }
261     glist_free(veclist);
262 
263     return subvec;
264 }
265 
266 void
subvecs_free(int32 ** subvecs)267 subvecs_free(int32 **subvecs)
268 {
269     int32 **sv;
270 
271     for (sv = subvecs; sv && *sv; ++sv)
272         ckd_free(*sv);
273     ckd_free(subvecs);
274 }
275 
276 int
feat_set_subvecs(feat_t * fcb,int32 ** subvecs)277 feat_set_subvecs(feat_t *fcb, int32 **subvecs)
278 {
279     int32 **sv;
280     uint32 n_sv, n_dim, i;
281 
282     if (subvecs == NULL) {
283         subvecs_free(fcb->subvecs);
284         ckd_free(fcb->sv_buf);
285         ckd_free(fcb->sv_len);
286         fcb->n_sv = 0;
287         fcb->subvecs = NULL;
288         fcb->sv_len = NULL;
289         fcb->sv_buf = NULL;
290         fcb->sv_dim = 0;
291         return 0;
292     }
293 
294     if (fcb->n_stream != 1) {
295         E_ERROR("Subvector specifications require single-stream features!");
296         return -1;
297     }
298 
299     n_sv = 0;
300     n_dim = 0;
301     for (sv = subvecs; sv && *sv; ++sv) {
302         int32 *d;
303 
304         for (d = *sv; d && *d != -1; ++d) {
305             ++n_dim;
306         }
307         ++n_sv;
308     }
309     if (n_dim > feat_dimension(fcb)) {
310         E_ERROR("Total dimensionality of subvector specification %d "
311                 "> feature dimensionality %d\n", n_dim, feat_dimension(fcb));
312         return -1;
313     }
314 
315     fcb->n_sv = n_sv;
316     fcb->subvecs = subvecs;
317     fcb->sv_len = (uint32 *)ckd_calloc(n_sv, sizeof(*fcb->sv_len));
318     fcb->sv_buf = (mfcc_t *)ckd_calloc(n_dim, sizeof(*fcb->sv_buf));
319     fcb->sv_dim = n_dim;
320     for (i = 0; i < n_sv; ++i) {
321         int32 *d;
322         for (d = subvecs[i]; d && *d != -1; ++d) {
323             ++fcb->sv_len[i];
324         }
325     }
326 
327     return 0;
328 }
329 
330 /**
331  * Project feature components to subvectors (if any).
332  */
333 static void
feat_subvec_project(feat_t * fcb,mfcc_t *** inout_feat,uint32 nfr)334 feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
335 {
336     uint32 i;
337 
338     if (fcb->subvecs == NULL)
339         return;
340     for (i = 0; i < nfr; ++i) {
341         mfcc_t *out;
342         int32 j;
343 
344         out = fcb->sv_buf;
345         for (j = 0; j < fcb->n_sv; ++j) {
346             int32 *d;
347             for (d = fcb->subvecs[j]; d && *d != -1; ++d) {
348                 *out++ = inout_feat[i][0][*d];
349             }
350         }
351         memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf));
352     }
353 }
354 
355 mfcc_t ***
feat_array_alloc(feat_t * fcb,int32 nfr)356 feat_array_alloc(feat_t * fcb, int32 nfr)
357 {
358     int32 i, j, k;
359     mfcc_t *data, *d, ***feat;
360 
361     assert(fcb);
362     assert(nfr > 0);
363     assert(feat_dimension(fcb) > 0);
364 
365     /* Make sure to use the dimensionality of the features *before*
366        LDA and subvector projection. */
367     k = 0;
368     for (i = 0; i < fcb->n_stream; ++i)
369         k += fcb->stream_len[i];
370     assert(k >= feat_dimension(fcb));
371     assert(k >= fcb->sv_dim);
372 
373     feat =
374         (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *));
375     data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t));
376 
377     for (i = 0; i < nfr; i++) {
378         d = data + i * k;
379         for (j = 0; j < feat_dimension1(fcb); j++) {
380             feat[i][j] = d;
381             d += feat_dimension2(fcb, j);
382         }
383     }
384 
385     return feat;
386 }
387 
388 mfcc_t ***
feat_array_realloc(feat_t * fcb,mfcc_t *** old_feat,int32 ofr,int32 nfr)389 feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
390 {
391     int32 i, k, cf;
392     mfcc_t*** new_feat;
393 
394     assert(fcb);
395     assert(nfr > 0);
396     assert(ofr > 0);
397     assert(feat_dimension(fcb) > 0);
398 
399     /* Make sure to use the dimensionality of the features *before*
400        LDA and subvector projection. */
401     k = 0;
402     for (i = 0; i < fcb->n_stream; ++i)
403         k += fcb->stream_len[i];
404     assert(k >= feat_dimension(fcb));
405     assert(k >= fcb->sv_dim);
406 
407     new_feat = feat_array_alloc(fcb, nfr);
408 
409     cf = (nfr < ofr) ? nfr : ofr;
410     memcpy(new_feat[0][0], old_feat[0][0], cf * k * sizeof(mfcc_t));
411 
412     feat_array_free(old_feat);
413 
414     return new_feat;
415 }
416 
417 void
feat_array_free(mfcc_t *** feat)418 feat_array_free(mfcc_t ***feat)
419 {
420     ckd_free(feat[0][0]);
421     ckd_free_2d((void **)feat);
422 }
423 
424 static void
feat_s2_4x_cep2feat(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)425 feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
426 {
427     mfcc_t *f;
428     mfcc_t *w, *_w;
429     mfcc_t *w1, *w_1, *_w1, *_w_1;
430     mfcc_t d1, d2;
431     int32 i, j;
432 
433     assert(fcb);
434     assert(feat_cepsize(fcb) == 13);
435     assert(feat_n_stream(fcb) == 4);
436     assert(feat_stream_len(fcb, 0) == 12);
437     assert(feat_stream_len(fcb, 1) == 24);
438     assert(feat_stream_len(fcb, 2) == 3);
439     assert(feat_stream_len(fcb, 3) == 12);
440     assert(feat_window_size(fcb) == 4);
441 
442     /* CEP; skip C0 */
443     memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
444 
445     /*
446      * DCEP(SHORT): mfc[2] - mfc[-2]
447      * DCEP(LONG):  mfc[4] - mfc[-4]
448      */
449     w = mfc[2] + 1;             /* +1 to skip C0 */
450     _w = mfc[-2] + 1;
451 
452     f = feat[1];
453     for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */
454         f[i] = w[i] - _w[i];
455 
456     w = mfc[4] + 1;             /* +1 to skip C0 */
457     _w = mfc[-4] + 1;
458 
459     for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++)    /* Long-term */
460         f[i] = w[j] - _w[j];
461 
462     /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
463     w1 = mfc[3] + 1;            /* Final +1 to skip C0 */
464     _w1 = mfc[-1] + 1;
465     w_1 = mfc[1] + 1;
466     _w_1 = mfc[-3] + 1;
467 
468     f = feat[3];
469     for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
470         d1 = w1[i] - _w1[i];
471         d2 = w_1[i] - _w_1[i];
472 
473         f[i] = d1 - d2;
474     }
475 
476     /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */
477     f = feat[2];
478     f[0] = mfc[0][0];
479     f[1] = mfc[2][0] - mfc[-2][0];
480 
481     d1 = mfc[3][0] - mfc[-1][0];
482     d2 = mfc[1][0] - mfc[-3][0];
483     f[2] = d1 - d2;
484 }
485 
486 
487 static void
feat_s3_1x39_cep2feat(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)488 feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
489 {
490     mfcc_t *f;
491     mfcc_t *w, *_w;
492     mfcc_t *w1, *w_1, *_w1, *_w_1;
493     mfcc_t d1, d2;
494     int32 i;
495 
496     assert(fcb);
497     assert(feat_cepsize(fcb) == 13);
498     assert(feat_n_stream(fcb) == 1);
499     assert(feat_stream_len(fcb, 0) == 39);
500     assert(feat_window_size(fcb) == 3);
501 
502     /* CEP; skip C0 */
503     memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
504     /*
505      * DCEP: mfc[2] - mfc[-2];
506      */
507     f = feat[0] + feat_cepsize(fcb) - 1;
508     w = mfc[2] + 1;             /* +1 to skip C0 */
509     _w = mfc[-2] + 1;
510 
511     for (i = 0; i < feat_cepsize(fcb) - 1; i++)
512         f[i] = w[i] - _w[i];
513 
514     /* POW: C0, DC0, D2C0 */
515     f += feat_cepsize(fcb) - 1;
516 
517     f[0] = mfc[0][0];
518     f[1] = mfc[2][0] - mfc[-2][0];
519 
520     d1 = mfc[3][0] - mfc[-1][0];
521     d2 = mfc[1][0] - mfc[-3][0];
522     f[2] = d1 - d2;
523 
524     /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
525     f += 3;
526 
527     w1 = mfc[3] + 1;            /* Final +1 to skip C0 */
528     _w1 = mfc[-1] + 1;
529     w_1 = mfc[1] + 1;
530     _w_1 = mfc[-3] + 1;
531 
532     for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
533         d1 = w1[i] - _w1[i];
534         d2 = w_1[i] - _w_1[i];
535 
536         f[i] = d1 - d2;
537     }
538 }
539 
540 
541 static void
feat_s3_cep(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)542 feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
543 {
544     assert(fcb);
545     assert(feat_n_stream(fcb) == 1);
546     assert(feat_window_size(fcb) == 0);
547 
548     /* CEP */
549     memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
550 }
551 
552 static void
feat_s3_cep_dcep(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)553 feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
554 {
555     mfcc_t *f;
556     mfcc_t *w, *_w;
557     int32 i;
558 
559     assert(fcb);
560     assert(feat_n_stream(fcb) == 1);
561     assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2);
562     assert(feat_window_size(fcb) == 2);
563 
564     /* CEP */
565     memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
566 
567     /*
568      * DCEP: mfc[2] - mfc[-2];
569      */
570     f = feat[0] + feat_cepsize(fcb);
571     w = mfc[2];
572     _w = mfc[-2];
573 
574     for (i = 0; i < feat_cepsize(fcb); i++)
575         f[i] = w[i] - _w[i];
576 }
577 
578 static void
feat_1s_c_d_dd_cep2feat(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)579 feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
580 {
581     mfcc_t *f;
582     mfcc_t *w, *_w;
583     mfcc_t *w1, *w_1, *_w1, *_w_1;
584     mfcc_t d1, d2;
585     int32 i;
586 
587     assert(fcb);
588     assert(feat_n_stream(fcb) == 1);
589     assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3);
590     assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1);
591 
592     /* CEP */
593     memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
594 
595     /*
596      * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
597      */
598     f = feat[0] + feat_cepsize(fcb);
599     w = mfc[FEAT_DCEP_WIN];
600     _w = mfc[-FEAT_DCEP_WIN];
601 
602     for (i = 0; i < feat_cepsize(fcb); i++)
603         f[i] = w[i] - _w[i];
604 
605     /*
606      * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
607      * where w = FEAT_DCEP_WIN
608      */
609     f += feat_cepsize(fcb);
610 
611     w1 = mfc[FEAT_DCEP_WIN + 1];
612     _w1 = mfc[-FEAT_DCEP_WIN + 1];
613     w_1 = mfc[FEAT_DCEP_WIN - 1];
614     _w_1 = mfc[-FEAT_DCEP_WIN - 1];
615 
616     for (i = 0; i < feat_cepsize(fcb); i++) {
617         d1 = w1[i] - _w1[i];
618         d2 = w_1[i] - _w_1[i];
619 
620         f[i] = d1 - d2;
621     }
622 }
623 
624 static void
feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)625 feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
626 {
627     mfcc_t *f;
628     mfcc_t *w, *_w;
629     mfcc_t *w1, *w_1, *_w1, *_w_1;
630     mfcc_t d1, d2;
631     int32 i;
632 
633     assert(fcb);
634     assert(feat_n_stream(fcb) == 1);
635     assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4);
636     assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2);
637 
638     /* CEP */
639     memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
640 
641     /*
642      * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
643      */
644     f = feat[0] + feat_cepsize(fcb);
645     w = mfc[FEAT_DCEP_WIN];
646     _w = mfc[-FEAT_DCEP_WIN];
647 
648     for (i = 0; i < feat_cepsize(fcb); i++)
649         f[i] = w[i] - _w[i];
650 
651     /*
652      * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2;
653      */
654     f += feat_cepsize(fcb);
655     w = mfc[FEAT_DCEP_WIN * 2];
656     _w = mfc[-FEAT_DCEP_WIN * 2];
657 
658     for (i = 0; i < feat_cepsize(fcb); i++)
659         f[i] = w[i] - _w[i];
660 
661     /*
662      * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
663      * where w = FEAT_DCEP_WIN
664      */
665     f += feat_cepsize(fcb);
666 
667     w1 = mfc[FEAT_DCEP_WIN + 1];
668     _w1 = mfc[-FEAT_DCEP_WIN + 1];
669     w_1 = mfc[FEAT_DCEP_WIN - 1];
670     _w_1 = mfc[-FEAT_DCEP_WIN - 1];
671 
672     for (i = 0; i < feat_cepsize(fcb); i++) {
673         d1 = w1[i] - _w1[i];
674         d2 = w_1[i] - _w_1[i];
675 
676         f[i] = d1 - d2;
677     }
678 }
679 
680 static void
feat_copy(feat_t * fcb,mfcc_t ** mfc,mfcc_t ** feat)681 feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
682 {
683     int32 win, i, j;
684 
685     win = feat_window_size(fcb);
686 
687     /* Concatenate input features */
688     for (i = -win; i <= win; ++i) {
689         uint32 spos = 0;
690 
691         for (j = 0; j < feat_n_stream(fcb); ++j) {
692             uint32 stream_len;
693 
694             /* Unscale the stream length by the window. */
695             stream_len = feat_stream_len(fcb, j) / (2 * win + 1);
696             memcpy(feat[j] + ((i + win) * stream_len),
697                    mfc[i] + spos,
698                    stream_len * sizeof(mfcc_t));
699             spos += stream_len;
700         }
701     }
702 }
703 
704 feat_t *
feat_init(char const * type,cmn_type_t cmn,int32 varnorm,agc_type_t agc,int32 breport,int32 cepsize)705 feat_init(char const *type, cmn_type_t cmn, int32 varnorm,
706           agc_type_t agc, int32 breport, int32 cepsize)
707 {
708     feat_t *fcb;
709 
710     if (cepsize == 0)
711         cepsize = 13;
712     if (breport)
713         E_INFO
714             ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
715              type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]);
716 
717     fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t));
718     fcb->refcount = 1;
719     fcb->name = (char *) ckd_salloc(type);
720     if (strcmp(type, "s2_4x") == 0) {
721         /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */
722         if (cepsize != 13) {
723             E_ERROR("s2_4x features require cepsize == 13\n");
724             ckd_free(fcb);
725             return NULL;
726         }
727         fcb->cepsize = 13;
728         fcb->n_stream = 4;
729         fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32));
730         fcb->stream_len[0] = 12;
731         fcb->stream_len[1] = 24;
732         fcb->stream_len[2] = 3;
733         fcb->stream_len[3] = 12;
734         fcb->out_dim = 51;
735         fcb->window_size = 4;
736         fcb->compute_feat = feat_s2_4x_cep2feat;
737     }
738     else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) {
739         /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */
740         if (cepsize != 13) {
741             E_ERROR("s2_4x features require cepsize == 13\n");
742             ckd_free(fcb);
743             return NULL;
744         }
745         fcb->cepsize = 13;
746         fcb->n_stream = 1;
747         fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
748         fcb->stream_len[0] = 39;
749         fcb->out_dim = 39;
750         fcb->window_size = 3;
751         fcb->compute_feat = feat_s3_1x39_cep2feat;
752     }
753     else if (strncmp(type, "1s_c_d_dd", 9) == 0) {
754         fcb->cepsize = cepsize;
755         fcb->n_stream = 1;
756         fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
757         fcb->stream_len[0] = cepsize * 3;
758         fcb->out_dim = cepsize * 3;
759         fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */
760         fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
761     }
762     else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) {
763         fcb->cepsize = cepsize;
764         fcb->n_stream = 1;
765         fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
766         fcb->stream_len[0] = cepsize * 4;
767         fcb->out_dim = cepsize * 4;
768         fcb->window_size = FEAT_DCEP_WIN * 2;
769         fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
770     }
771     else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) {
772         /* 1-stream cep/dcep */
773         fcb->cepsize = cepsize;
774         fcb->n_stream = 1;
775         fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
776         fcb->stream_len[0] = feat_cepsize(fcb) * 2;
777         fcb->out_dim = fcb->stream_len[0];
778         fcb->window_size = 2;
779         fcb->compute_feat = feat_s3_cep_dcep;
780     }
781     else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) {
782         /* 1-stream cep */
783         fcb->cepsize = cepsize;
784         fcb->n_stream = 1;
785         fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
786         fcb->stream_len[0] = feat_cepsize(fcb);
787         fcb->out_dim = fcb->stream_len[0];
788         fcb->window_size = 0;
789         fcb->compute_feat = feat_s3_cep;
790     }
791     else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) {
792 	/* 1-stream cep with frames concatenated, so called cepwin features */
793         if (strncmp(type, "1s_3c", 5) == 0)
794             fcb->window_size = 3;
795         else
796     	    fcb->window_size = 4;
797 
798         fcb->cepsize = cepsize;
799         fcb->n_stream = 1;
800         fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
801         fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1);
802         fcb->out_dim = fcb->stream_len[0];
803         fcb->compute_feat = feat_copy;
804     }
805     else {
806         int32 i, k, l;
807         size_t len;
808         char *strp;
809         char *mtype = ckd_salloc(type);
810         char *wd = ckd_salloc(type);
811         /*
812          * Generic definition: Format should be %d,%d,%d,...,%d (i.e.,
813          * comma separated list of feature stream widths; #items =
814          * #streams).  An optional window size (frames will be
815          * concatenated) is also allowed, which can be specified with
816          * a colon after the list of feature streams.
817          */
818         len = strlen(mtype);
819         k = 0;
820         for (i = 1; i < len - 1; i++) {
821             if (mtype[i] == ',') {
822                 mtype[i] = ' ';
823                 k++;
824             }
825             else if (mtype[i] == ':') {
826                 mtype[i] = '\0';
827                 fcb->window_size = atoi(mtype + i + 1);
828                 break;
829             }
830         }
831         k++;                    /* Presumably there are (#commas+1) streams */
832         fcb->n_stream = k;
833         fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32));
834 
835         /* Scan individual feature stream lengths */
836         strp = mtype;
837         i = 0;
838         fcb->out_dim = 0;
839         fcb->cepsize = 0;
840         while (sscanf(strp, "%s%n", wd, &l) == 1) {
841             strp += l;
842             if ((i >= fcb->n_stream)
843                 || (sscanf(wd, "%u", &(fcb->stream_len[i])) != 1)
844                 || (fcb->stream_len[i] <= 0))
845                 E_FATAL("Bad feature type argument\n");
846             /* Input size before windowing */
847             fcb->cepsize += fcb->stream_len[i];
848             if (fcb->window_size > 0)
849                 fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
850             /* Output size after windowing */
851             fcb->out_dim += fcb->stream_len[i];
852             i++;
853         }
854         if (i != fcb->n_stream)
855             E_FATAL("Bad feature type argument\n");
856         if (fcb->cepsize != cepsize)
857     	    E_FATAL("Bad feature type argument\n");
858 
859         /* Input is already the feature stream */
860         fcb->compute_feat = feat_copy;
861         ckd_free(mtype);
862         ckd_free(wd);
863     }
864 
865     if (cmn != CMN_NONE)
866         fcb->cmn_struct = cmn_init(feat_cepsize(fcb));
867     fcb->cmn = cmn;
868     fcb->varnorm = varnorm;
869     if (agc != AGC_NONE) {
870         fcb->agc_struct = agc_init();
871         /*
872          * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things
873          * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY
874          * switches to EMAX
875          */
876         /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */
877         agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
878     }
879     fcb->agc = agc;
880     /*
881      * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt()
882      */
883     fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE,
884                                             feat_cepsize(fcb),
885                                             sizeof(mfcc_t));
886     /* This one is actually just an array of pointers to "flatten out"
887      * wraparounds. */
888     fcb->tmpcepbuf = (mfcc_t** )ckd_calloc(2 * feat_window_size(fcb) + 1,
889                                 sizeof(*fcb->tmpcepbuf));
890 
891     return fcb;
892 }
893 
894 
895 void
feat_print(feat_t * fcb,mfcc_t *** feat,int32 nfr,FILE * fp)896 feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp)
897 {
898     uint32 i, j, k;
899 
900     for (i = 0; i < nfr; i++) {
901         fprintf(fp, "%8d:\n", i);
902 
903         for (j = 0; j < feat_dimension1(fcb); j++) {
904             fprintf(fp, "\t%2d:", j);
905 
906             for (k = 0; k < feat_dimension2(fcb, j); k++)
907                 fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k]));
908             fprintf(fp, "\n");
909         }
910     }
911 
912     fflush(fp);
913 }
914 
915 static void
feat_cmn(feat_t * fcb,mfcc_t ** mfc,int32 nfr,int32 beginutt,int32 endutt)916 feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
917 {
918     cmn_type_t cmn_type = fcb->cmn;
919 
920     if (!(beginutt && endutt)
921         && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */
922         fcb->cmn = cmn_type = CMN_PRIOR;
923 
924     switch (cmn_type) {
925     case CMN_CURRENT:
926         cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
927         break;
928     case CMN_PRIOR:
929         cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
930         if (endutt)
931             cmn_prior_update(fcb->cmn_struct);
932         break;
933     default:
934         ;
935     }
936     cep_dump_dbg(fcb, mfc, nfr, "After CMN");
937 }
938 
939 static void
feat_agc(feat_t * fcb,mfcc_t ** mfc,int32 nfr,int32 beginutt,int32 endutt)940 feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
941 {
942     agc_type_t agc_type = fcb->agc;
943 
944     if (!(beginutt && endutt)
945         && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */
946         agc_type = AGC_EMAX;
947 
948     switch (agc_type) {
949     case AGC_MAX:
950         agc_max(fcb->agc_struct, mfc, nfr);
951         break;
952     case AGC_EMAX:
953         agc_emax(fcb->agc_struct, mfc, nfr);
954         if (endutt)
955             agc_emax_update(fcb->agc_struct);
956         break;
957     case AGC_NOISE:
958         agc_noise(fcb->agc_struct, mfc, nfr);
959         break;
960     default:
961         ;
962     }
963     cep_dump_dbg(fcb, mfc, nfr, "After AGC");
964 }
965 
966 static void
feat_compute_utt(feat_t * fcb,mfcc_t ** mfc,int32 nfr,int32 win,mfcc_t *** feat)967 feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat)
968 {
969     int32 i;
970 
971     cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)");
972 
973     /* Create feature vectors */
974     for (i = win; i < nfr - win; i++) {
975         fcb->compute_feat(fcb, mfc + i, feat[i - win]);
976     }
977 
978     feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation");
979 
980     if (fcb->lda) {
981         feat_lda_transform(fcb, feat, nfr - win * 2);
982         feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA");
983     }
984 
985     if (fcb->subvecs) {
986         feat_subvec_project(fcb, feat, nfr - win * 2);
987         feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection");
988     }
989 }
990 
991 
992 /**
993  * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data).
994  * If out_mfc is NULL, no actual reading will be done, and the number of
995  * frames (plus padding) that would be read is returned.
996  *
997  * It's important that normalization is done before padding because
998  * frames outside the data we are interested in shouldn't be taken
999  * into normalization stats.
1000  *
1001  * @return # frames read (plus padding) if successful, -1 if
1002  * error (e.g., mfc array too small).
1003  */
1004 static int32
feat_s2mfc_read_norm_pad(feat_t * fcb,char * file,int32 win,int32 sf,int32 ef,mfcc_t *** out_mfc,int32 maxfr,int32 cepsize)1005 feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win,
1006             		 int32 sf, int32 ef,
1007             		 mfcc_t ***out_mfc,
1008             		 int32 maxfr,
1009             		 int32 cepsize)
1010 {
1011     FILE *fp;
1012     int32 n_float32;
1013     float32 *float_feat;
1014     struct stat statbuf;
1015     int32 i, n, byterev;
1016     int32 start_pad, end_pad;
1017     mfcc_t **mfc;
1018 
1019     /* Initialize the output pointer to NULL, so that any attempts to
1020        free() it if we fail before allocating it will not segfault! */
1021     if (out_mfc)
1022         *out_mfc = NULL;
1023     E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
1024     if (ef >= 0 && ef <= sf) {
1025         E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
1026         return -1;
1027     }
1028 
1029     /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */
1030     if ((stat_retry(file, &statbuf) < 0)
1031         || ((fp = fopen(file, "rb")) == NULL)) {
1032         E_ERROR_SYSTEM("Failed to open file '%s' for reading", file);
1033         return -1;
1034     }
1035 
1036     /* Read #floats in header */
1037     if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) {
1038         E_ERROR("%s: fread(#floats) failed\n", file);
1039         fclose(fp);
1040         return -1;
1041     }
1042 
1043     /* Check if n_float32 matches file size */
1044     byterev = 0;
1045     if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */
1046         n = n_float32;
1047         SWAP_INT32(&n);
1048 
1049         if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) {   /* RAH, typecast both sides to remove compile warning */
1050             E_ERROR
1051                 ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
1052                  file, n_float32, n_float32, statbuf.st_size,
1053                  statbuf.st_size);
1054             fclose(fp);
1055             return -1;
1056         }
1057 
1058         n_float32 = n;
1059         byterev = 1;
1060     }
1061     if (n_float32 <= 0) {
1062         E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32);
1063         fclose(fp);
1064         return -1;
1065     }
1066 
1067     /* Convert n to #frames of input */
1068     n = n_float32 / cepsize;
1069     if (n * cepsize != n_float32) {
1070         E_ERROR("Header size field: %d; not multiple of %d\n", n_float32,
1071                 cepsize);
1072         fclose(fp);
1073         return -1;
1074     }
1075 
1076     /* Check start and end frames */
1077     if (sf > 0) {
1078         if (sf >= n) {
1079             E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file,
1080                     sf, n);
1081             fclose(fp);
1082             return -1;
1083         }
1084     }
1085     if (ef < 0)
1086         ef = n-1;
1087     else if (ef >= n) {
1088         E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n",
1089                file, ef, n);
1090         ef = n-1;
1091     }
1092 
1093     /* Add window to start and end frames */
1094     sf -= win;
1095     ef += win;
1096     if (sf < 0) {
1097         start_pad = -sf;
1098         sf = 0;
1099     }
1100     else
1101         start_pad = 0;
1102     if (ef >= n) {
1103         end_pad = ef - n + 1;
1104         ef = n - 1;
1105     }
1106     else
1107         end_pad = 0;
1108 
1109     /* Limit n if indicated by [sf..ef] */
1110     if ((ef - sf + 1) < n)
1111         n = (ef - sf + 1);
1112     if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
1113         E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n",
1114                 file, maxfr, n + start_pad + end_pad);
1115         fclose(fp);
1116         return -1;
1117     }
1118 
1119     /* If no output buffer was supplied, then skip the actual data reading. */
1120     if (out_mfc != NULL) {
1121         /* Position at desired start frame and read actual MFC data */
1122         mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t));
1123         if (sf > 0)
1124             fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR);
1125         n_float32 = n * cepsize;
1126 #ifdef FIXED_POINT
1127         float_feat = ckd_calloc(n_float32, sizeof(float32));
1128 #else
1129         float_feat = mfc[start_pad];
1130 #endif
1131         if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) {
1132             E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
1133             ckd_free_2d(mfc);
1134             fclose(fp);
1135             return -1;
1136         }
1137         if (byterev) {
1138             for (i = 0; i < n_float32; i++) {
1139                 SWAP_FLOAT32(&float_feat[i]);
1140             }
1141         }
1142 #ifdef FIXED_POINT
1143         for (i = 0; i < n_float32; ++i) {
1144             mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
1145         }
1146         ckd_free(float_feat);
1147 #endif
1148 
1149         /* Normalize */
1150         feat_cmn(fcb, mfc + start_pad, n, 1, 1);
1151         feat_agc(fcb, mfc + start_pad, n, 1, 1);
1152 
1153         /* Replicate start and end frames if necessary. */
1154         for (i = 0; i < start_pad; ++i)
1155             memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t));
1156         for (i = 0; i < end_pad; ++i)
1157             memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
1158                    cepsize * sizeof(mfcc_t));
1159 
1160         *out_mfc = mfc;
1161     }
1162 
1163     fclose(fp);
1164     return n + start_pad + end_pad;
1165 }
1166 
1167 
1168 
1169 int32
feat_s2mfc2feat(feat_t * fcb,const char * file,const char * dir,const char * cepext,int32 sf,int32 ef,mfcc_t *** feat,int32 maxfr)1170 feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext,
1171                 int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr)
1172 {
1173     char *path;
1174     char *ps = "/";
1175     int32 win, nfr;
1176     size_t file_length, cepext_length, path_length = 0;
1177     mfcc_t **mfc;
1178 
1179     if (fcb->cepsize <= 0) {
1180         E_ERROR("Bad cepsize: %d\n", fcb->cepsize);
1181         return -1;
1182     }
1183 
1184     if (cepext == NULL)
1185         cepext = "";
1186 
1187     /*
1188      * Create mfc filename, combining file, dir and extension if
1189      * necessary
1190      */
1191 
1192     /*
1193      * First we decide about the path. If dir is defined, then use
1194      * it. Otherwise assume the filename already contains the path.
1195      */
1196     if (dir == NULL) {
1197         dir = "";
1198         ps = "";
1199         /*
1200          * This is not true but some 3rd party apps
1201          * may parse the output explicitly checking for this line
1202          */
1203         E_INFO("At directory . (current directory)\n");
1204     }
1205     else {
1206         E_INFO("At directory %s\n", dir);
1207         /*
1208          * Do not forget the path separator!
1209          */
1210         path_length += strlen(dir) + 1;
1211     }
1212 
1213     /*
1214      * Include cepext, if it's not already part of the filename.
1215      */
1216     file_length = strlen(file);
1217     cepext_length = strlen(cepext);
1218     if ((file_length > cepext_length)
1219         && (strcmp(file + file_length - cepext_length, cepext) == 0)) {
1220         cepext = "";
1221         cepext_length = 0;
1222     }
1223 
1224     /*
1225      * Do not forget the '\0'
1226      */
1227     path_length += file_length + cepext_length + 1;
1228     path = (char*) ckd_calloc(path_length, sizeof(char));
1229 
1230 #ifdef HAVE_SNPRINTF
1231     /*
1232      * Paranoia is our best friend...
1233      */
1234     while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) {
1235         path_length = file_length;
1236         path = (char*) ckd_realloc(path, path_length * sizeof(char));
1237     }
1238 #else
1239     sprintf(path, "%s%s%s%s", dir, ps, file, cepext);
1240 #endif
1241 
1242     win = feat_window_size(fcb);
1243     /* Pad maxfr with win, so we read enough raw feature data to
1244      * calculate the requisite number of dynamic features. */
1245     if (maxfr >= 0)
1246         maxfr += win * 2;
1247 
1248     if (feat != NULL) {
1249         /* Read mfc file including window or padding if necessary. */
1250         nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize);
1251         ckd_free(path);
1252         if (nfr < 0) {
1253             ckd_free_2d((void **) mfc);
1254             return -1;
1255         }
1256 
1257         /* Actually compute the features */
1258         feat_compute_utt(fcb, mfc, nfr, win, feat);
1259 
1260         ckd_free_2d((void **) mfc);
1261     }
1262     else {
1263         /* Just calculate the number of frames we would need. */
1264         nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize);
1265         ckd_free(path);
1266         if (nfr < 0)
1267             return nfr;
1268     }
1269 
1270 
1271     return (nfr - win * 2);
1272 }
1273 
1274 static int32
feat_s2mfc2feat_block_utt(feat_t * fcb,mfcc_t ** uttcep,int32 nfr,mfcc_t *** ofeat)1275 feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep,
1276 			  int32 nfr, mfcc_t *** ofeat)
1277 {
1278     mfcc_t **cepbuf;
1279     int32 i, win, cepsize;
1280 
1281     win = feat_window_size(fcb);
1282     cepsize = feat_cepsize(fcb);
1283 
1284     /* Copy and pad out the utterance (this requires that the
1285      * feature computation functions always access the buffer via
1286      * the frame pointers, which they do)  */
1287     cepbuf = (mfcc_t **)ckd_calloc(nfr + win * 2, sizeof(mfcc_t *));
1288     memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *));
1289 
1290     /* Do normalization before we interpolate on the boundary */
1291     feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
1292     feat_agc(fcb, cepbuf + win, nfr, 1, 1);
1293 
1294     /* Now interpolate */
1295     for (i = 0; i < win; ++i) {
1296         cepbuf[i] = fcb->cepbuf[i];
1297         memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t));
1298         cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
1299         memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t));
1300     }
1301     /* Compute as usual. */
1302     feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
1303     ckd_free(cepbuf);
1304     return nfr;
1305 }
1306 
1307 int32
feat_s2mfc2feat_live(feat_t * fcb,mfcc_t ** uttcep,int32 * inout_ncep,int32 beginutt,int32 endutt,mfcc_t *** ofeat)1308 feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep,
1309 		     int32 beginutt, int32 endutt, mfcc_t *** ofeat)
1310 {
1311     int32 win, cepsize, nbufcep;
1312     int32 i, j, nfeatvec;
1313     int32 zero = 0;
1314 
1315     /* Avoid having to check this everywhere. */
1316     if (inout_ncep == NULL) inout_ncep = &zero;
1317 
1318     /* Special case for entire utterances. */
1319     if (beginutt && endutt && *inout_ncep > 0)
1320         return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);
1321 
1322     win = feat_window_size(fcb);
1323     cepsize = feat_cepsize(fcb);
1324 
1325     /* Empty the input buffer on start of utterance. */
1326     if (beginutt)
1327         fcb->bufpos = fcb->curpos;
1328 
1329     /* Calculate how much data is in the buffer already. */
1330     nbufcep = fcb->bufpos - fcb->curpos;
1331     if (nbufcep < 0)
1332 	nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
1333     /* Add any data that we have to replicate. */
1334     if (beginutt && *inout_ncep > 0)
1335         nbufcep += win;
1336     if (endutt)
1337         nbufcep += win;
1338 
1339     /* Only consume as much input as will fit in the buffer. */
1340     if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
1341         /* We also can't overwrite the trailing window, hence the
1342          * reason why win is subtracted here. */
1343         *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
1344         /* Cancel end of utterance processing. */
1345         endutt = FALSE;
1346     }
1347 
1348     /* FIXME: Don't modify the input! */
1349     feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
1350     feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);
1351 
1352     /* Replicate first frame into the first win frames if we're at the
1353      * beginning of the utterance and there was some actual input to
1354      * deal with.  (FIXME: Not entirely sure why that condition) */
1355     if (beginutt && *inout_ncep > 0) {
1356         for (i = 0; i < win; i++) {
1357             memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
1358                    cepsize * sizeof(mfcc_t));
1359             fcb->bufpos %= LIVEBUFBLOCKSIZE;
1360         }
1361         /* Move the current pointer past this data. */
1362         fcb->curpos = fcb->bufpos;
1363         nbufcep -= win;
1364     }
1365 
1366     /* Copy in frame data to the circular buffer. */
1367     for (i = 0; i < *inout_ncep; ++i) {
1368         memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
1369                cepsize * sizeof(mfcc_t));
1370         fcb->bufpos %= LIVEBUFBLOCKSIZE;
1371 	++nbufcep;
1372     }
1373 
1374     /* Replicate last frame into the last win frames if we're at the
1375      * end of the utterance (even if there was no input, so we can
1376      * flush the output). */
1377     if (endutt) {
1378         int32 tpos; /* Index of last input frame. */
1379         if (fcb->bufpos == 0)
1380             tpos = LIVEBUFBLOCKSIZE - 1;
1381         else
1382             tpos = fcb->bufpos - 1;
1383         for (i = 0; i < win; ++i) {
1384             memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
1385                    cepsize * sizeof(mfcc_t));
1386             fcb->bufpos %= LIVEBUFBLOCKSIZE;
1387         }
1388     }
1389 
1390     /* We have to leave the trailing window of frames. */
1391     nfeatvec = nbufcep - win;
1392     if (nfeatvec <= 0)
1393         return 0; /* Do nothing. */
1394 
1395     for (i = 0; i < nfeatvec; ++i) {
1396         /* Handle wraparound cases. */
1397         if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
1398             /* Use tmpcepbuf for this case.  Actually, we just need the pointers. */
1399             for (j = -win; j <= win; ++j) {
1400                 int32 tmppos =
1401                     (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
1402 		fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
1403             }
1404             fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
1405         }
1406         else {
1407             fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
1408         }
1409 	/* Move the read pointer forward. */
1410         ++fcb->curpos;
1411         fcb->curpos %= LIVEBUFBLOCKSIZE;
1412     }
1413 
1414     if (fcb->lda)
1415         feat_lda_transform(fcb, ofeat, nfeatvec);
1416 
1417     if (fcb->subvecs)
1418         feat_subvec_project(fcb, ofeat, nfeatvec);
1419 
1420     return nfeatvec;
1421 }
1422 
1423 void
feat_update_stats(feat_t * fcb)1424 feat_update_stats(feat_t *fcb)
1425 {
1426     if (fcb->cmn == CMN_PRIOR) {
1427         cmn_prior_update(fcb->cmn_struct);
1428     }
1429     if (fcb->agc == AGC_EMAX || fcb->agc == AGC_MAX) {
1430 	agc_emax_update(fcb->agc_struct);
1431     }
1432 }
1433 
1434 feat_t *
feat_retain(feat_t * f)1435 feat_retain(feat_t *f)
1436 {
1437     ++f->refcount;
1438     return f;
1439 }
1440 
1441 int
feat_free(feat_t * f)1442 feat_free(feat_t * f)
1443 {
1444     if (f == NULL)
1445         return 0;
1446     if (--f->refcount > 0)
1447         return f->refcount;
1448 
1449     if (f->cepbuf)
1450         ckd_free_2d((void **) f->cepbuf);
1451     ckd_free(f->tmpcepbuf);
1452 
1453     if (f->name) {
1454         ckd_free((void *) f->name);
1455     }
1456     if (f->lda)
1457         ckd_free_3d((void ***) f->lda);
1458 
1459     ckd_free(f->stream_len);
1460     ckd_free(f->sv_len);
1461     ckd_free(f->sv_buf);
1462     subvecs_free(f->subvecs);
1463 
1464     cmn_free(f->cmn_struct);
1465     agc_free(f->agc_struct);
1466 
1467     ckd_free(f);
1468     return 0;
1469 }
1470 
1471 
1472 void
feat_report(feat_t * f)1473 feat_report(feat_t * f)
1474 {
1475     int i;
1476     E_INFO_NOFN("Initialization of feat_t, report:\n");
1477     E_INFO_NOFN("Feature type         = %s\n", f->name);
1478     E_INFO_NOFN("Cepstral size        = %d\n", f->cepsize);
1479     E_INFO_NOFN("Number of streams    = %d\n", f->n_stream);
1480     for (i = 0; i < f->n_stream; i++) {
1481         E_INFO_NOFN("Vector size of stream[%d]: %d\n", i,
1482                     f->stream_len[i]);
1483     }
1484     E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv);
1485     for (i = 0; i < f->n_sv; i++) {
1486         int32 *sv;
1487 
1488         E_INFO_NOFN("Components of subvector[%d]:", i);
1489         for (sv = f->subvecs[i]; sv && *sv != -1; ++sv)
1490             E_INFOCONT(" %d", *sv);
1491         E_INFOCONT("\n");
1492     }
1493     E_INFO_NOFN("Whether CMN is used  = %d\n", f->cmn);
1494     E_INFO_NOFN("Whether AGC is used  = %d\n", f->agc);
1495     E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm);
1496     E_INFO_NOFN("\n");
1497 }
1498