1 /*
2  * This software has been licensed to the Centre of Speech Technology, KTH
3  * by Microsoft Corp. with the terms in the accompanying file BSD.txt,
4  * which is a BSD style license.
5  *
6  *    "Copyright (c) 1990-1996 Entropic Research Laboratory, Inc.
7  *                   All rights reserved"
8  *
9  * Written by:  Derek Lin
10  * Checked by:
11  * Revised by:  David Talkin
12  *
13  * Brief description:  Estimates F0 using normalized cross correlation and
14  *   dynamic programming.
15  *
16  */
17 
18 
19 #include "snack.h"
20 #include <math.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <limits.h>
25 #ifndef TRUE
26 # define TRUE 1
27 # define FALSE 0
28 #endif
29 #ifndef FLT_MAX
30 # define FLT_MAX (3.40282347E+38f)
31 #endif
32 #ifndef M_PI
33 # define M_PI (3.1415926536f)
34 #endif
35 #include "jkGetF0.h"
36 
37 int	    debug_level = 0;
38 
39 void free_dp_f0();
40 static int check_f0_params();
41 
42 int
43 Get_f0(Sound *sound, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])
44 {
45   float *fdata;
46   int done;
47   long buff_size, actsize;
48   double sf, start_time;
49   F0_params *par, *read_f0_params();
50   float *f0p, *vuvp, *rms_speech, *acpkp;
51   int i, vecsize;
52   int init_dp_f0(), dp_f0();
53   static int framestep = -1;
54   long sdstep = 0, total_samps;
55   int ndone = 0;
56   Tcl_Obj *list;
57   double framestep2 = 0.0, wind_dur;
58 
59   int arg, startpos = 0, endpos = -1, fmax, fmin;
60   static CONST84 char *subOptionStrings[] = {
61     "-start", "-end", "-maxpitch", "-minpitch", "-progress",
62     "-framelength", "-method", "-windowlength", NULL
63   };
64   enum subOptions {
65     START, END, F0MAX, F0MIN, PROGRESS, FRAME, METHOD, WINLEN
66   };
67 
68   if (sound->cmdPtr != NULL) {
69     Tcl_DecrRefCount(sound->cmdPtr);
70     sound->cmdPtr = NULL;
71   }
72 
73   par = (F0_params *) ckalloc(sizeof(F0_params));
74   par->cand_thresh = 0.3f;
75   par->lag_weight = 0.3f;
76   par->freq_weight = 0.02f;
77   par->trans_cost = 0.005f;
78   par->trans_amp = 0.5f;
79   par->trans_spec = 0.5f;
80   par->voice_bias = 0.0f;
81   par->double_cost = 0.35f;
82   par->min_f0 = 50;
83   par->max_f0 = 550;
84   par->frame_step = 0.01f;
85   par->wind_dur = 0.0075f;
86   par->n_cands = 20;
87   par->mean_f0 = 200;     /* unused */
88   par->mean_f0_weight = 0.0f;  /* unused */
89   par->conditioning = 0;    /*unused */
90 
91   for (arg = 2; arg < objc; arg += 2) {
92     int index;
93 
94     if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings,
95 			    "option", 0, &index) != TCL_OK) {
96       return TCL_ERROR;
97     }
98 
99     if (arg + 1 == objc) {
100       Tcl_AppendResult(interp, "No argument given for ",
101 		       subOptionStrings[index], " option", (char *) NULL);
102       return TCL_ERROR;
103     }
104 
105     switch ((enum subOptions) index) {
106     case START:
107       {
108 	if (Tcl_GetIntFromObj(interp, objv[arg+1], &startpos) != TCL_OK)
109 	  return TCL_ERROR;
110 	break;
111       }
112     case END:
113       {
114 	if (Tcl_GetIntFromObj(interp, objv[arg+1], &endpos) != TCL_OK)
115 	  return TCL_ERROR;
116 	break;
117       }
118     case F0MAX:
119       {
120 	if (Tcl_GetIntFromObj(interp, objv[arg+1], &fmax) != TCL_OK)
121 	  return TCL_ERROR;
122 	par->max_f0 = (float) fmax;
123 	break;
124       }
125     case F0MIN:
126       {
127 	if (Tcl_GetIntFromObj(interp, objv[arg+1], &fmin) != TCL_OK)
128 	  return TCL_ERROR;
129 	par->min_f0 = (float) fmin;
130 	break;
131       }
132     case PROGRESS:
133       {
134 	char *str = Tcl_GetStringFromObj(objv[arg+1], NULL);
135 
136 	if (strlen(str) > 0) {
137 	  Tcl_IncrRefCount(objv[arg+1]);
138 	  sound->cmdPtr = objv[arg+1];
139 	}
140 	break;
141       }
142     case FRAME:
143       {
144 	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &framestep2)
145 	    != TCL_OK)
146 	  return TCL_ERROR;
147 	par->frame_step = (float) framestep2;
148 	break;
149       }
150     case METHOD:
151       {
152 	break;
153       }
154     case WINLEN:
155       {
156 	if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &wind_dur)
157 	    != TCL_OK)
158 	  return TCL_ERROR;
159 	par->wind_dur = (float) wind_dur;
160 	break;
161       }
162     }
163   }
164   if (startpos < 0) startpos = 0;
165   if (endpos >= (sound->length - 1) || endpos == -1)
166     endpos = sound->length - 1;
167   if (startpos > endpos) return TCL_OK;
168 
169   sf = (double) sound->samprate;
170 
171   if (framestep > 0)  /* If a value was specified with -S, use it. */
172     par->frame_step = (float) (framestep / sf);
173   start_time = 0.0f;
174   if(check_f0_params(interp, par, sf)){
175     Tcl_AppendResult(interp, "invalid/inconsistent parameters -- exiting.", NULL);
176     return TCL_ERROR;
177   }
178 
179   total_samps = endpos - startpos + 1;
180   if(total_samps < ((par->frame_step * 2.0) + par->wind_dur) * sf) {
181     Tcl_AppendResult(interp, "input range too small for analysis by get_f0.", NULL);
182     return TCL_ERROR;
183   }
184   /* Initialize variables in get_f0.c; allocate data structures;
185    * determine length and overlap of input frames to read.
186    */
187   if (init_dp_f0(sf, par, &buff_size, &sdstep)
188       || buff_size > INT_MAX || sdstep > INT_MAX)
189   {
190     Tcl_AppendResult(interp, "problem in init_dp_f0().", NULL);
191     return TCL_ERROR;
192   }
193 
194   if (debug_level)
195     Fprintf(stderr, "init_dp_f0 returned buff_size %ld, sdstep %ld.\n",buff_size, sdstep);
196 
197   if (buff_size > total_samps)
198     buff_size = total_samps;
199 
200   actsize = min(buff_size, sound->length);
201   fdata = (float *) ckalloc(sizeof(float) * max(buff_size, sdstep));
202   list = Tcl_NewListObj(0, NULL);
203   Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", 0.0);
204   ndone = startpos;
205 
206   while (TRUE) {
207     done = (actsize < buff_size) || (total_samps == buff_size);
208     Snack_GetSoundData(sound, ndone, fdata, actsize);
209     /*if (sound->debug > 0) Snack_WriteLog("dp_f0...\n");*/
210     if (dp_f0(fdata, (int) actsize, (int) sdstep, sf, par,
211 	      &f0p, &vuvp, &rms_speech, &acpkp, &vecsize, done)) {
212       Tcl_AppendResult(interp, "problem in dp_f0().", NULL);
213       return TCL_ERROR;
214     }
215     /*if (sound->debug > 0) Snack_WriteLogInt("done dp_f0",vecsize);*/
216     for (i = vecsize - 1; i >= 0; i--) {
217       Tcl_Obj *frameList;
218       frameList = Tcl_NewListObj(0, NULL);
219       Tcl_ListObjAppendElement(interp, list, frameList);
220       Tcl_ListObjAppendElement(interp, frameList,
221 			       Tcl_NewDoubleObj((double)f0p[i]));
222       Tcl_ListObjAppendElement(interp, frameList,
223 			       Tcl_NewDoubleObj((double)vuvp[i]));
224       Tcl_ListObjAppendElement(interp, frameList,
225 			       Tcl_NewDoubleObj((double)rms_speech[i]));
226       Tcl_ListObjAppendElement(interp, frameList,
227 			       Tcl_NewDoubleObj((double)acpkp[i]));
228     }
229 
230     if (done) break;
231 
232     ndone += sdstep;
233     actsize = min(buff_size, sound->length - ndone);
234     total_samps -= sdstep;
235 
236     if (actsize > total_samps)
237       actsize = total_samps;
238 
239     if (1) {
240       int res = Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", (double) ndone / sound->length);
241       if (res != TCL_OK) {
242 	return TCL_ERROR;
243       }
244     }
245   }
246 
247   Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", 1.0);
248 
249   ckfree((void *)fdata);
250 
251   ckfree((void *)par);
252 
253   free_dp_f0();
254 
255   Tcl_SetObjResult(interp, list);
256 
257   return TCL_OK;
258 }
259 
260 
261 /*
262  * Some consistency checks on parameter values.
263  * Return a positive integer if any errors detected, 0 if none.
264  */
265 
266 static int
267 check_f0_params(Tcl_Interp *interp, F0_params *par, double sample_freq)
268 {
269   int	  error = 0;
270   double  dstep;
271 
272   if((par->cand_thresh < 0.01) || (par->cand_thresh > 0.99)) {
273     Tcl_AppendResult(interp,
274 	  "ERROR: cand_thresh parameter must be between [0.01, 0.99].",NULL);
275     error++;
276   }
277   if((par->wind_dur > .1) || (par->wind_dur < .0001)) {
278     Tcl_AppendResult(interp,"ERROR: wind_dur parameter must be between [0.0001, 0.1].",NULL);
279     error++;
280   }
281   if((par->n_cands > 100) || (par->n_cands < 3)){
282     Tcl_AppendResult(interp,
283 	    "ERROR: n_cands parameter must be between [3,100].",NULL);
284     error++;
285   }
286   if((par->max_f0 <= par->min_f0) || (par->max_f0 >= (sample_freq/2.0)) ||
287      (par->min_f0 < (sample_freq/10000.0))){
288     Tcl_AppendResult(interp,
289 	    "ERROR: min(max)_f0 parameter inconsistent with sampling frequency.",NULL);
290     error++;
291   }
292   dstep = ((double)((int)(0.5 + (sample_freq * par->frame_step))))/sample_freq;
293   if(dstep != par->frame_step) {
294     if(debug_level)
295       Tcl_AppendResult(interp,
296 	      "Frame step set to exactly match signal sample rate.",NULL);
297     par->frame_step = (float) dstep;
298   }
299   if((par->frame_step > 0.1) || (par->frame_step < (1.0/sample_freq))){
300     Tcl_AppendResult(interp,
301 	    "ERROR: frame_step parameter must be between [1/sampling rate, 0.1].",NULL);
302     error++;
303   }
304 
305   return(error);
306 }
307 
308 static void get_cand(), peak(), do_ffir();
309 static int lc_lin_fir(), downsamp();
310 
311 /* ----------------------------------------------------------------------- */
312 void get_fast_cands(fdata, fdsdata, ind, step, size, dec, start, nlags, engref, maxloc, maxval, cp, peaks, locs, ncand, par)
313      float *fdata, *fdsdata, *engref, *maxval, *peaks;
314      int size, start, nlags, *maxloc, *locs, *ncand, ind, step, dec;
315      Cross *cp;
316      F0_params *par;
317 {
318   int decind, decstart, decnlags, decsize, i, j, *lp;
319   float *corp, xp, yp, lag_wt;
320   register float *pe;
321 
322   lag_wt = par->lag_weight/nlags;
323   decnlags = 1 + (nlags/dec);
324   if((decstart = start/dec) < 1) decstart = 1;
325   decind = (ind * step)/dec;
326   decsize = 1 + (size/dec);
327   corp = cp->correl;
328 
329   crossf(fdsdata + decind, decsize, decstart, decnlags, engref, maxloc,
330 	maxval, corp);
331   cp->maxloc = *maxloc;	/* location of maximum in correlation */
332   cp->maxval = *maxval;	/* max. correlation value (found at maxloc) */
333   cp->rms = (float) sqrt(*engref/size); /* rms in reference window */
334   cp->firstlag = decstart;
335 
336   get_cand(cp,peaks,locs,decnlags,ncand,par->cand_thresh); /* return high peaks in xcorr */
337 
338   /* Interpolate to estimate peak locations and values at high sample rate. */
339   for(i = *ncand, lp = locs, pe = peaks; i--; pe++, lp++) {
340     j = *lp - decstart - 1;
341     peak(&corp[j],&xp,&yp);
342     *lp = (*lp * dec) + (int)(0.5+(xp*dec)); /* refined lag */
343     *pe = yp*(1.0f - (lag_wt* *lp)); /* refined amplitude */
344   }
345 
346   if(*ncand >= par->n_cands) {	/* need to prune candidates? */
347     register int *loc, *locm, lt;
348     register float smaxval, *pem;
349     register int outer, inner, lim;
350     for(outer=0, lim = par->n_cands-1; outer < lim; outer++)
351       for(inner = *ncand - 1 - outer,
352 	  pe = peaks + (*ncand) -1, pem = pe-1,
353 	  loc = locs + (*ncand) - 1, locm = loc-1;
354 	  inner--;
355 	  pe--,pem--,loc--,locm--)
356 	if((smaxval = *pe) > *pem) {
357 	  *pe = *pem;
358 	  *pem = smaxval;
359 	  lt = *loc;
360 	  *loc = *locm;
361 	  *locm = lt;
362 	}
363     *ncand = par->n_cands-1;  /* leave room for the unvoiced hypothesis */
364   }
365   crossfi(fdata + (ind * step), size, start, nlags, 7, engref, maxloc,
366 	  maxval, corp, locs, *ncand);
367 
368   cp->maxloc = *maxloc;	/* location of maximum in correlation */
369   cp->maxval = *maxval;	/* max. correlation value (found at maxloc) */
370   cp->rms = (float) sqrt(*engref/size); /* rms in reference window */
371   cp->firstlag = start;
372   get_cand(cp,peaks,locs,nlags,ncand,par->cand_thresh); /* return high peaks in xcorr */
373     if(*ncand >= par->n_cands) {	/* need to prune candidates again? */
374     register int *loc, *locm, lt;
375     register float smaxval, *pe, *pem;
376     register int outer, inner, lim;
377     for(outer=0, lim = par->n_cands-1; outer < lim; outer++)
378       for(inner = *ncand - 1 - outer,
379 	  pe = peaks + (*ncand) -1, pem = pe-1,
380 	  loc = locs + (*ncand) - 1, locm = loc-1;
381 	  inner--;
382 	  pe--,pem--,loc--,locm--)
383 	if((smaxval = *pe) > *pem) {
384 	  *pe = *pem;
385 	  *pem = smaxval;
386 	  lt = *loc;
387 	  *loc = *locm;
388 	  *locm = lt;
389 	}
390     *ncand = par->n_cands - 1;  /* leave room for the unvoiced hypothesis */
391   }
392 }
393 
394 /* ----------------------------------------------------------------------- */
395 float *downsample(input,samsin,state_idx,freq,samsout,decimate, first_time, last_time)
396      double freq;
397      float *input;
398       int samsin, *samsout, decimate, state_idx, first_time, last_time;
399 {
400   static float	b[2048];
401   static float *foutput = NULL;
402   float	beta = 0.0f;
403   static int	ncoeff = 127, ncoefft = 0;
404   int init;
405 
406   if(input && (samsin > 0) && (decimate > 0) && *samsout) {
407     if(decimate == 1) {
408       return(input);
409     }
410 
411     if(first_time){
412       int nbuff = (samsin/decimate) + (2*ncoeff);
413 
414       ncoeff = ((int)(freq * .005)) | 1;
415       beta = .5f/decimate;
416       foutput = (float*)ckrealloc((void *)foutput, sizeof(float) * nbuff);
417       /*      spsassert(foutput, "Can't allocate foutput in downsample");*/
418       for( ; nbuff > 0 ;)
419 	foutput[--nbuff] = 0.0;
420 
421       if( !lc_lin_fir(beta,&ncoeff,b)) {
422 	fprintf(stderr,"\nProblems computing interpolation filter\n");
423 	ckfree((void *)foutput);
424 	return(NULL);
425       }
426       ncoefft = (ncoeff/2) + 1;
427     }		    /*  endif new coefficients need to be computed */
428 
429     if(first_time) init = 1;
430     else if (last_time) init = 2;
431     else init = 0;
432 
433     if(downsamp(input,foutput,samsin,samsout,state_idx,decimate,ncoefft,b,init)) {
434       return(foutput);
435     } else
436       Fprintf(stderr,"Problems in downsamp() in downsample()\n");
437   } else
438     Fprintf(stderr,"Bad parameters passed to downsample()\n");
439 
440   return(NULL);
441 }
442 
443 /* ----------------------------------------------------------------------- */
444 /* Get likely candidates for F0 peaks. */
445 static void get_cand(cross,peak,loc,nlags,ncand,cand_thresh)
446      Cross *cross;
447      float *peak, cand_thresh;
448      int *loc;
449      int  *ncand, nlags;
450 {
451   register int i, lastl, *t;
452   register float o, p, q, *r, *s, clip;
453   int start, ncan, maxl;
454 
455   clip = (float) (cand_thresh * cross->maxval);
456   maxl = cross->maxloc;
457   lastl = nlags - 2;
458   start = cross->firstlag;
459 
460   r = cross->correl;
461   o= *r++;			/* first point */
462   q = *r++;	                /* middle point */
463   p = *r++;
464   s = peak;
465   t = loc;
466   ncan=0;
467   for(i=1; i < lastl; i++, o=q, q=p, p= *r++){
468     if((q > clip) &&		/* is this a high enough value? */
469       (q >= p) && (q >= o)){ /* NOTE: this finds SHOLDERS and PLATEAUS
470 				      as well as peaks (is this a good idea?) */
471 	*s++ = q;		/* record the peak value */
472 	*t++ = i + start;	/* and its location */
473 	ncan++;			/* count number of peaks found */
474       }
475   }
476 /*
477   o = q;
478   q = p;
479   if( (q > clip) && (q >=0)){
480     *s++ = q;
481     *t++ = i+start;
482     ncan++;
483   }
484 */
485   *ncand = ncan;
486 }
487 
488 /* ----------------------------------------------------------------------- */
489 /* buffer-to-buffer downsample operation */
490 /* This is STRICTLY a decimator! (no upsample) */
491 static int downsamp(in, out, samples, outsamps, state_idx, decimate, ncoef, fc, init)
492      float *in, *out;
493      int samples, *outsamps, decimate, ncoef, state_idx;
494      float fc[];
495      int init;
496 {
497   if(in && out) {
498     do_ffir(in, samples, out, outsamps, state_idx, ncoef, fc, 0, decimate, init);
499     return(TRUE);
500   } else
501     printf("Bad signal(s) passed to downsamp()\n");
502   return(FALSE);
503 }
504 
505 /*      ----------------------------------------------------------      */
506 static void do_ffir(buf,in_samps,bufo,out_samps,idx, ncoef,fc,invert,skip,init)
507 /* fc contains 1/2 the coefficients of a symmetric FIR filter with unity
508     passband gain.  This filter is convolved with the signal in buf.
509     The output is placed in buf2.  If(invert), the filter magnitude
510     response will be inverted.  If(init&1), beginning of signal is in buf;
511     if(init&2), end of signal is in buf.  out_samps is set to the number of
512     output points placed in bufo. */
513 register float	*buf, *bufo;
514 float *fc;
515 register int in_samps, ncoef, invert, skip, init, *out_samps;
516 int idx;
517 {
518   register float *dp1, *dp2, *dp3, sum, integral;
519   static float *co=NULL, *mem=NULL;
520   static float state[1000];
521   static int fsize=0, resid=0;
522   register int i, j, k, l;
523   register float *sp;
524   register float *buf1;
525 
526   buf1 = buf;
527   if(ncoef > fsize) {/*allocate memory for full coeff. array and filter memory */    fsize = 0;
528     i = (ncoef+1)*2;
529     if(!((co = (float *)ckrealloc((void *)co, sizeof(float)*i)) &&
530 	 (mem = (float *)ckrealloc((void *)mem, sizeof(float)*i)))) {
531       fprintf(stderr,"allocation problems in do_fir()\n");
532       return;
533     }
534     fsize = ncoef;
535   }
536 
537   /* fill 2nd half with data */
538   for(i=ncoef, dp1=mem+ncoef-1; i-- > 0; )  *dp1++ = *buf++;
539 
540   if(init & 1) {	/* Is the beginning of the signal in buf? */
541     /* Copy the half-filter and its mirror image into the coefficient array. */
542     for(i=ncoef-1, dp3=fc+ncoef-1, dp2=co, dp1 = co+((ncoef-1)*2),
543 	integral = 0.0; i-- > 0; )
544       if(!invert) *dp1-- = *dp2++ = *dp3--;
545       else {
546 	integral += (sum = *dp3--);
547 	*dp1-- = *dp2++ = -sum;
548       }
549     if(!invert)  *dp1 = *dp3;	/* point of symmetry */
550     else {
551       integral *= 2;
552       integral += *dp3;
553       *dp1 = integral - *dp3;
554     }
555 
556     for(i=ncoef-1, dp1=mem; i-- > 0; ) *dp1++ = 0;
557   }
558   else
559     for(i=ncoef-1, dp1=mem, sp=state; i-- > 0; ) *dp1++ = *sp++;
560 
561   i = in_samps;
562   resid = 0;
563 
564   k = (ncoef << 1) -1;	/* inner-product loop limit */
565 
566   if(skip <= 1) {       /* never used */
567 /*    *out_samps = i;
568     for( ; i-- > 0; ) {
569       for(j=k, dp1=mem, dp2=co, dp3=mem+1, sum = 0.0; j-- > 0;
570 	  *dp1++ = *dp3++ )
571 	sum += *dp2++ * *dp1;
572 
573       *--dp1 = *buf++;
574       *bufo++ = (sum < 0.0)? sum -0.5 : sum +0.5;
575     }
576     if(init & 2) {
577       for(i=ncoef; i-- > 0; ) {
578 	for(j=k, dp1=mem, dp2=co, dp3=mem+1, sum = 0.0; j-- > 0;
579 	    *dp1++ = *dp3++ )
580 	  sum += *dp2++ * *dp1;
581 	*--dp1 = 0.0;
582 	*bufo++ = (sum < 0)? sum -0.5 : sum +0.5;
583       }
584       *out_samps += ncoef;
585     }
586     return;
587 */
588   }
589   else {			/* skip points (e.g. for downsampling) */
590     /* the buffer end is padded with (ncoef-1) data points */
591     for( l=0 ; l < *out_samps; l++ ) {
592       for(j=k-skip, dp1=mem, dp2=co, dp3=mem+skip, sum=0.0; j-- >0;
593 	  *dp1++ = *dp3++)
594 	sum += *dp2++ * *dp1;
595       for(j=skip; j-- >0; *dp1++ = *buf++) /* new data to memory */
596 	sum += *dp2++ * *dp1;
597       *bufo++ = (sum<0.0) ? sum -0.5f : sum +0.5f;
598     }
599     if(init & 2){
600       resid = in_samps - *out_samps * skip;
601       for(l=resid/skip; l-- >0; ){
602 	for(j=k-skip, dp1=mem, dp2=co, dp3=mem+skip, sum=0.0; j-- >0;
603 	    *dp1++ = *dp3++)
604 	    sum += *dp2++ * *dp1;
605 	for(j=skip; j-- >0; *dp1++ = 0.0)
606 	  sum += *dp2++ * *dp1;
607 	*bufo++ = (sum<0.0) ? sum -0.5f : sum +0.5f;
608 	(*out_samps)++;
609       }
610     }
611     else
612       for(dp3=buf1+idx-ncoef+1, l=ncoef-1, sp=state; l-- >0; ) *sp++ = *dp3++;
613   }
614 }
615 
616 /*      ----------------------------------------------------------      */
617 static int lc_lin_fir(fc,nf,coef)
618 /* create the coefficients for a symmetric FIR lowpass filter using the
619    window technique with a Hanning window. */
620 register float	fc;
621 float	*coef;
622 int	*nf;
623 {
624     register int	i, n;
625     register double	twopi, fn, c;
626 
627     if(((*nf % 2) != 1))
628 	*nf = *nf + 1;
629     n = (*nf + 1)/2;
630 
631     /*  Compute part of the ideal impulse response (the sin(x)/x kernel). */
632     twopi = M_PI * 2.0;
633     coef[0] = (float) (2.0 * fc);
634     c = M_PI;
635     fn = twopi * fc;
636     for(i=1;i < n; i++) coef[i] = (float)(sin(i * fn)/(c * i));
637 
638     /* Now apply a Hanning window to the (infinite) impulse response. */
639     /* (Probably should use a better window, like Kaiser...) */
640     fn = twopi/(double)(*nf);
641     for(i=0;i<n;i++)
642 	coef[n-i-1] *= (float)((.5 - (.5 * cos(fn * ((double)i + 0.5)))));
643 
644     return(TRUE);
645 }
646 
647 
648 /* ----------------------------------------------------------------------- */
649 /* Use parabolic interpolation over the three points defining the peak
650  * vicinity to estimate the "true" peak. */
651 static void peak(y, xp, yp)
652      float *y,			/* vector of length 3 defining peak */
653        *xp, *yp;  /* x,y values of parabolic peak fitting the input points. */
654 {
655   register float a, c;
656 
657   a = (float)((y[2]-y[1])+(.5*(y[0]-y[2])));
658   if(fabs(a) > .000001) {
659     *xp = c = (float)((y[0]-y[2])/(4.0*a));
660     *yp = y[1] - (a*c*c);
661   } else {
662     *xp = 0.0;
663     *yp = y[1];
664   }
665 }
666 
667 /* A fundamental frequency estimation algorithm using the normalized
668    cross correlation function and dynamic programming.  The algorithm
669    implemented here is similar to that presented by B. Secrest and
670    G. Doddington, "An integrated pitch tracking algorithm for speech
671    systems", Proc. ICASSP-83, pp.1352-1355.  It is fully described
672    by D. Talkin, "A robust algorithm for ptich tracking (RAPT)", in
673    W. B. Kleijn & K. K. Paliwal (eds.) Speech Coding and Synthesis,
674    (New York: Elsevier, 1995). */
675 
676 /* For each frame, up to par->n_cands cross correlation peaks are
677    considered as F0 intervals.  Each is scored according to its within-
678    frame properties (relative amplitude, relative location), and
679    according to its connectivity with each of the candidates in the
680    previous frame.  An unvoiced hypothesis is also generated at each
681    frame and is considered in the light of voicing state change cost,
682    the quality of the cross correlation peak, and frequency continuity. */
683 
684 /* At each frame, each candidate has associated with it the following
685    items:
686 	its peak value
687 	its peak value modified by its within-frame properties
688 	its location
689 	the candidate # in the previous frame yielding the min. err.
690 		(this is the optimum path pointer!)
691 	its cumulative cost: (local cost + connectivity cost +
692 		cumulative cost of its best-previous-frame-match). */
693 
694 /* Dynamic programming is then used to pick the best F0 trajectory and voicing
695    state given the local and transition costs for the entire utterance. */
696 
697 /* To avoid the necessity of computing the full crosscorrelation at
698    the input sample rate, the signal is downsampled; a full ccf is
699    computed at the lower frequency; interpolation is used to estimate the
700    location of the peaks at the higher sample rate; and the fine-grained
701    ccf is computed only in the vicinity of these estimated peak
702    locations. */
703 
704 
705 /*
706  * READ_SIZE: length of input data frame in sec to read
707  * DP_CIRCULAR: determines the initial size of DP circular buffer in sec
708  * DP_HIST: stored frame history in second before checking for common path
709  *      DP_CIRCULAR > READ_SIZE, DP_CIRCULAR at least 2 times of DP_HIST
710  * DP_LIMIT: in case no convergence is found, DP frames of DP_LIMIT secs
711  *      are kept before output is forced by simply picking the lowest cost
712  *      path
713  */
714 
715 #define READ_SIZE 0.2
716 #define DP_CIRCULAR 1.5
717 #define DP_HIST 0.5
718 #define DP_LIMIT 1.0
719 
720 /*
721  * stationarity parameters -
722  * STAT_WSIZE: window size in sec used in measuring frame energy/stationarity
723  * STAT_AINT: analysis interval in sec in measuring frame energy/stationarity
724  */
725 #define STAT_WSIZE 0.030
726 #define STAT_AINT 0.020
727 
728 /*
729  * headF points to current frame in the circular buffer,
730  * tailF points to the frame where tracks start
731  * cmpthF points to starting frame of converged path to backtrack
732  */
733 
734 static Frame *headF = NULL, *tailF = NULL, *cmpthF = NULL;
735 
736 static  int *pcands = NULL;	/* array for backtracking in convergence check */
737 static int cir_buff_growth_count = 0;
738 
739 static int size_cir_buffer,	/* # of frames in circular DP buffer */
740            size_frame_hist,	/* # of frames required before convergence test */
741            size_frame_out,	/* # of frames before forcing output */
742            num_active_frames,	/* # of frames from tailF to headF */
743            output_buf_size;	/* # of frames allocated to output buffers */
744 
745 /*
746  * DP parameters
747  */
748 static float tcost, tfact_a, tfact_s, frame_int, vbias, fdouble, wdur, ln2,
749              freqwt, lagwt;
750 static int step, size, nlags, start, stop, ncomp, *locs = NULL;
751 static short maxpeaks;
752 
753 static int wReuse = 0;  /* number of windows seen before resued */
754 static Windstat *windstat = NULL;
755 
756 static float *f0p = NULL, *vuvp = NULL, *rms_speech = NULL,
757              *acpkp = NULL, *peaks = NULL;
758 static int first_time = 1, pad;
759 
760 
761 /*--------------------------------------------------------------------*/
762 int
763 get_Nframes(buffsize, pad, step)
764     long    buffsize;
765     int     pad, step;
766 {
767   if (buffsize < pad)
768     return (0);
769   else
770     return ((buffsize - pad)/step);
771 }
772 
773 
774 /*--------------------------------------------------------------------*/
775 int
776 init_dp_f0(freq, par, buffsize, sdstep)
777     double	freq;
778     F0_params	*par;
779     long	*buffsize, *sdstep;
780 {
781   int nframes;
782   int i;
783   int stat_wsize, agap, ind, downpatch;
784 
785 /*
786  * reassigning some constants
787  */
788 
789   tcost = par->trans_cost;
790   tfact_a = par->trans_amp;
791   tfact_s = par->trans_spec;
792   vbias = par->voice_bias;
793   fdouble = par->double_cost;
794   frame_int = par->frame_step;
795 
796   step = eround(frame_int * freq);
797   size = eround(par->wind_dur * freq);
798   frame_int = (float)(((float)step)/freq);
799   wdur = (float)(((float)size)/freq);
800   start = eround(freq / par->max_f0);
801   stop = eround(freq / par->min_f0);
802   nlags = stop - start + 1;
803   ncomp = size + stop + 1; /* # of samples required by xcorr
804 			      comp. per fr. */
805   maxpeaks = 2 + (nlags/2);	/* maximum number of "peaks" findable in ccf */
806   ln2 = (float)log(2.0);
807   size_frame_hist = (int) (DP_HIST / frame_int);
808   size_frame_out = (int) (DP_LIMIT / frame_int);
809 
810 /*
811  * SET UP THE D.P. WEIGHTING FACTORS:
812  *      The intent is to make the effectiveness of the various fudge factors
813  *      independent of frame rate or sampling frequency.
814  */
815 
816   /* Lag-dependent weighting factor to emphasize early peaks (higher freqs)*/
817   lagwt = par->lag_weight/stop;
818 
819   /* Penalty for a frequency skip in F0 per frame */
820   freqwt = par->freq_weight/frame_int;
821 
822   i = (int) (READ_SIZE *freq);
823   if(ncomp >= step) nframes = ((i-ncomp)/step ) + 1;
824   else nframes = i / step;
825 
826   /* *buffsize is the number of samples needed to make F0 computation
827      of nframes DP frames possible.  The last DP frame is patched with
828      enough points so that F0 computation on it can be carried.  F0
829      computaion on each frame needs enough points to do
830 
831      1) xcross or cross correlation measure:
832            enough points to do xcross - ncomp
833 
834      2) stationarity measure:
835            enough to make 30 msec windowing possible - ind
836 
837      3) downsampling:
838            enough to make filtering possible -- downpatch
839 
840      So there are nframes whole DP frames, padded with pad points
841      to make the last frame F0 computation ok.
842 
843   */
844 
845   /* last point in data frame needs points of 1/2 downsampler filter length
846      long, 0.005 is the filter length used in downsampler */
847   downpatch = (((int) (freq * 0.005))+1) / 2;
848 
849   stat_wsize = (int) (STAT_WSIZE * freq);
850   agap = (int) (STAT_AINT * freq);
851   ind = ( agap - stat_wsize ) / 2;
852   i = stat_wsize + ind;
853   pad = downpatch + ((i>ncomp) ? i:ncomp);
854   *buffsize = nframes * step + pad;
855   *sdstep = nframes * step;
856 
857   /* Allocate space for the DP storage circularly linked data structure */
858 
859   size_cir_buffer = (int) (DP_CIRCULAR / frame_int);
860 
861   /* creating circularly linked data structures */
862   tailF = alloc_frame(nlags, par->n_cands);
863   headF = tailF;
864 
865   /* link them up */
866   for(i=1; i<size_cir_buffer; i++){
867     headF->next = alloc_frame(nlags, par->n_cands);
868     headF->next->prev = headF;
869     headF = headF->next;
870   }
871   headF->next = tailF;
872   tailF->prev = headF;
873 
874   headF = tailF;
875 
876   /* Allocate sscratch array to use during backtrack convergence test. */
877   if( ! pcands ) {
878     pcands = (int *) ckalloc( par->n_cands * sizeof(int));
879     /*    spsassert(pcands,"can't allocate pathcands");*/
880   }
881 
882   /* Allocate arrays to return F0 and related signals. */
883 
884   /* Note: remember to compare *vecsize with size_frame_out, because
885      size_cir_buffer is not constant */
886   output_buf_size = size_cir_buffer;
887   rms_speech = (float*)ckalloc(sizeof(float) * output_buf_size);
888   /*  spsassert(rms_speech,"rms_speech ckalloc failed");*/
889   f0p = (float*)ckalloc(sizeof(float) * output_buf_size);
890   /*  spsassert(f0p,"f0p ckalloc failed");*/
891   vuvp = (float*)ckalloc(sizeof(float)* output_buf_size);
892   /*  spsassert(vuvp,"vuvp ckalloc failed");*/
893   acpkp = (float*)ckalloc(sizeof(float) * output_buf_size);
894   /*  spsassert(acpkp,"acpkp ckalloc failed");*/
895 
896   /* Allocate space for peak location and amplitude scratch arrays. */
897   peaks = (float*)ckalloc(sizeof(float) * maxpeaks);
898   /*  spsassert(peaks,"peaks ckalloc failed");*/
899   locs = (int*)ckalloc(sizeof(int) * maxpeaks);
900   /*  spsassert(locs, "locs ckalloc failed");*/
901 
902   /* Initialise the retrieval/saving scheme of window statistic measures */
903   wReuse = agap / step;
904   if (wReuse){
905       windstat = (Windstat *) ckalloc( wReuse * sizeof(Windstat));
906       /*      spsassert(windstat, "windstat ckalloc failed");*/
907       for(i=0; i<wReuse; i++){
908 	  windstat[i].err = 0;
909 	  windstat[i].rms = 0;
910       }
911   }
912 
913   if(debug_level){
914     Fprintf(stderr, "done with initialization:\n");
915     Fprintf(stderr,
916 	    " size_cir_buffer:%d  xcorr frame size:%d start lag:%d nlags:%d\n",
917 	    size_cir_buffer, size, start, nlags);
918   }
919 
920   num_active_frames = 0;
921   first_time = 1;
922 
923   return(0);
924 }
925 
926 static Stat *get_stationarity();
927 
928 /*--------------------------------------------------------------------*/
929 int
930 dp_f0(fdata, buff_size, sdstep, freq,
931       par, f0p_pt, vuvp_pt, rms_speech_pt, acpkp_pt, vecsize, last_time)
932     float	*fdata;
933     int		buff_size, sdstep;
934     double	freq;
935     F0_params	*par;		/* analysis control parameters */
936     float	**f0p_pt, **vuvp_pt, **rms_speech_pt, **acpkp_pt;
937     int		*vecsize, last_time;
938 {
939   float  maxval, engref, *sta, *rms_ratio, *dsdata, *downsample();
940   register float ttemp, ftemp, ft1, ferr, err, errmin;
941   register int  i, j, k, loc1, loc2;
942   int   nframes, maxloc, ncand, ncandp, minloc,
943         decimate, samsds;
944 
945   Stat *stat = NULL;
946 
947   nframes = get_Nframes((long) buff_size, pad, step); /* # of whole frames */
948 
949   if(debug_level)
950     Fprintf(stderr,
951 	    "******* Computing %d dp frames ******** from %d points\n", nframes, buff_size);
952 
953   /* Now downsample the signal for coarse peak estimates. */
954 
955   decimate = (int)(freq/2000.0);    /* downsample to about 2kHz */
956   if (decimate <= 1)
957     dsdata = fdata;
958   else {
959     samsds = ((nframes-1) * step + ncomp) / decimate;
960     dsdata = downsample(fdata, buff_size, sdstep, freq, &samsds, decimate,
961 			first_time, last_time);
962     if (!dsdata) {
963       Fprintf(stderr, "can't get downsampled data.\n");
964       return 1;
965     }
966   }
967 
968   /* Get a function of the "stationarity" of the speech signal. */
969 
970   stat = get_stationarity(fdata, freq, buff_size, nframes, step, first_time);
971   if (!stat) {
972     Fprintf(stderr, "can't get stationarity\n");
973     return(1);
974   }
975   sta = stat->stat;
976   rms_ratio = stat->rms_ratio;
977 
978   /***********************************************************************/
979   /* MAIN FUNDAMENTAL FREQUENCY ESTIMATION LOOP */
980   /***********************************************************************/
981   if(!first_time && nframes > 0) headF = headF->next;
982 
983   for(i = 0; i < nframes; i++) {
984 
985     /* NOTE: This buffer growth provision is probably not necessary.
986        It was put in (with errors) by Derek Lin and apparently never
987        tested.  My tests and analysis suggest it is completely
988        superfluous. DT 9/5/96 */
989     /* Dynamically allocating more space for the circular buffer */
990     if(headF == tailF->prev){
991       Frame *frm;
992 
993       if(cir_buff_growth_count > 5){
994 	Fprintf(stderr,
995 		"too many requests (%d) for dynamically allocating space.\n   There may be a problem in finding converged path.\n",cir_buff_growth_count);
996 	return(1);
997       }
998       if(debug_level)
999 	Fprintf(stderr, "allocating %d more frames for DP circ. buffer.\n", size_cir_buffer);
1000       frm = alloc_frame(nlags, par->n_cands);
1001       headF->next = frm;
1002       frm->prev = headF;
1003       for(k=1; k<size_cir_buffer; k++){
1004 	frm->next = alloc_frame(nlags, par->n_cands);
1005 	frm->next->prev = frm;
1006 	frm = frm->next;
1007       }
1008       frm->next = tailF;
1009       tailF->prev = frm;
1010       cir_buff_growth_count++;
1011     }
1012 
1013     headF->rms = stat->rms[i];
1014     get_fast_cands(fdata, dsdata, i, step, size, decimate, start,
1015 		   nlags, &engref, &maxloc,
1016 		   &maxval, headF->cp, peaks, locs, &ncand, par);
1017 
1018     /*    Move the peak value and location arrays into the dp structure */
1019     {
1020       register float *ftp1, *ftp2;
1021       register short *sp1;
1022       register int *sp2;
1023 
1024       for(ftp1 = headF->dp->pvals, ftp2 = peaks,
1025 	  sp1 = headF->dp->locs, sp2 = locs, j=ncand; j--; ) {
1026 	*ftp1++ = *ftp2++;
1027 	*sp1++ = *sp2++;
1028       }
1029       *sp1 = -1;		/* distinguish the UNVOICED candidate */
1030       *ftp1 = maxval;
1031       headF->dp->mpvals[ncand] = vbias+maxval; /* (high cost if cor. is high)*/
1032     }
1033 
1034     /* Apply a lag-dependent weight to the peaks to encourage the selection
1035        of the first major peak.  Translate the modified peak values into
1036        costs (high peak ==> low cost). */
1037     for(j=0; j < ncand; j++){
1038       ftemp = 1.0f - ((float)locs[j] * lagwt);
1039       headF->dp->mpvals[j] = 1.0f - (peaks[j] * ftemp);
1040     }
1041     ncand++;			/* include the unvoiced candidate */
1042     headF->dp->ncands = ncand;
1043 
1044     /*********************************************************************/
1045     /*    COMPUTE THE DISTANCE MEASURES AND ACCUMULATE THE COSTS.       */
1046     /*********************************************************************/
1047 
1048     ncandp = headF->prev->dp->ncands;
1049     for(k=0; k<ncand; k++){	/* for each of the current candidates... */
1050       minloc = 0;
1051       errmin = FLT_MAX;
1052       if((loc2 = headF->dp->locs[k]) > 0) { /* current cand. is voiced */
1053 	for(j=0; j<ncandp; j++){ /* for each PREVIOUS candidate... */
1054 	  /*    Get cost due to inter-frame period change. */
1055 	  loc1 = headF->prev->dp->locs[j];
1056 	  if (loc1 > 0) { /* prev. was voiced */
1057 	    ftemp = (float) log(((double) loc2) / loc1);
1058 	    ttemp = (float) fabs(ftemp);
1059 	    ft1 = (float) (fdouble + fabs(ftemp + ln2));
1060 	    if (ttemp > ft1)
1061 	      ttemp = ft1;
1062 	    ft1 = (float) (fdouble + fabs(ftemp - ln2));
1063 	    if (ttemp > ft1)
1064 	      ttemp = ft1;
1065 	    ferr = ttemp * freqwt;
1066 	  } else {		/* prev. was unvoiced */
1067 	    ferr = tcost + (tfact_s * sta[i]) + (tfact_a / rms_ratio[i]);
1068 	  }
1069 	  /*    Add in cumulative cost associated with previous peak. */
1070 	  err = ferr + headF->prev->dp->dpvals[j];
1071 	  if(err < errmin){	/* find min. cost */
1072 	    errmin = err;
1073 	    minloc = j;
1074 	  }
1075 	}
1076       } else {			/* this is the unvoiced candidate */
1077 	for(j=0; j<ncandp; j++){ /* for each PREVIOUS candidate... */
1078 
1079 	  /*    Get voicing transition cost. */
1080 	  if (headF->prev->dp->locs[j] > 0) { /* previous was voiced */
1081 	    ferr = tcost + (tfact_s * sta[i]) + (tfact_a * rms_ratio[i]);
1082 	  }
1083 	  else
1084 	    ferr = 0.0;
1085 	  /*    Add in cumulative cost associated with previous peak. */
1086 	  err = ferr + headF->prev->dp->dpvals[j];
1087 	  if(err < errmin){	/* find min. cost */
1088 	    errmin = err;
1089 	    minloc = j;
1090 	  }
1091 	}
1092       }
1093       /* Now have found the best path from this cand. to prev. frame */
1094       if (first_time && i==0) {		/* this is the first frame */
1095 	headF->dp->dpvals[k] = headF->dp->mpvals[k];
1096 	headF->dp->prept[k] = 0;
1097       } else {
1098 	headF->dp->dpvals[k] = errmin + headF->dp->mpvals[k];
1099 	headF->dp->prept[k] = minloc;
1100       }
1101     } /*    END OF THIS DP FRAME */
1102 
1103     if (i < nframes - 1)
1104       headF = headF->next;
1105 
1106     if (debug_level >= 2) {
1107       Fprintf(stderr,"%d engref:%10.0f max:%7.5f loc:%4d\n",
1108 	      i,engref,maxval,maxloc);
1109     }
1110 
1111   } /* end for (i ...) */
1112 
1113   /***************************************************************/
1114   /* DONE WITH FILLING DP STRUCTURES FOR THE SET OF SAMPLED DATA */
1115   /*    NOW FIND A CONVERGED DP PATH                             */
1116   /***************************************************************/
1117 
1118   *vecsize = 0;			/* # of output frames returned */
1119 
1120   num_active_frames += nframes;
1121 
1122   if( num_active_frames >= size_frame_hist  || last_time ){
1123     Frame *frm;
1124     int  num_paths, best_cand, frmcnt, checkpath_done = 1;
1125     float patherrmin;
1126 
1127     if(debug_level)
1128       Fprintf(stderr, "available frames for backtracking: %d\n",
1129 num_active_frames);
1130 
1131     patherrmin = FLT_MAX;
1132     best_cand = 0;
1133     num_paths = headF->dp->ncands;
1134 
1135     /* Get the best candidate for the final frame and initialize the
1136        paths' backpointers. */
1137     frm = headF;
1138     for(k=0; k < num_paths; k++) {
1139       if (patherrmin > headF->dp->dpvals[k]){
1140 	patherrmin = headF->dp->dpvals[k];
1141 	best_cand = k;	/* index indicating the best candidate at a path */
1142       }
1143       pcands[k] = frm->dp->prept[k];
1144     }
1145 
1146     if(last_time){     /* Input data was exhausted. force final outputs. */
1147       cmpthF = headF;		/* Use the current frame as starting point. */
1148     } else {
1149       /* Starting from the most recent frame, trace back each candidate's
1150 	 best path until reaching a common candidate at some past frame. */
1151       frmcnt = 0;
1152       while (1) {
1153 	frm = frm->prev;
1154 	frmcnt++;
1155 	checkpath_done = 1;
1156 	for(k=1; k < num_paths; k++){ /* Check for convergence. */
1157 	  if(pcands[0] != pcands[k])
1158 	    checkpath_done = 0;
1159 	}
1160 	if( ! checkpath_done) { /* Prepare for checking at prev. frame. */
1161 	  for(k=0; k < num_paths; k++){
1162 	    pcands[k] = frm->dp->prept[pcands[k]];
1163 	  }
1164 	} else {	/* All paths have converged. */
1165 	  cmpthF = frm;
1166 	  best_cand = pcands[0];
1167 	  if(debug_level)
1168 	    Fprintf(stderr,
1169 		    "paths went back %d frames before converging\n",frmcnt);
1170 	  break;
1171 	}
1172 	if(frm == tailF){	/* Used all available data? */
1173 	  if( num_active_frames < size_frame_out) { /* Delay some more? */
1174 	    checkpath_done = 0; /* Yes, don't backtrack at this time. */
1175 	    cmpthF = NULL;
1176 	  } else {		/* No more delay! Force best-guess output. */
1177 	    checkpath_done = 1;
1178 	    cmpthF = headF;
1179 	    /*	    Fprintf(stderr,
1180 		    "WARNING: no converging path found after going back %d frames, will use the lowest cost path\n",num_active_frames);*/
1181 	  }
1182 	  break;
1183 	} /* end if (frm ...) */
1184       }	/* end while (1) */
1185     } /* end if (last_time) ... else */
1186 
1187     /*************************************************************/
1188     /* BACKTRACKING FROM cmpthF (best_cand) ALL THE WAY TO tailF    */
1189     /*************************************************************/
1190     i = 0;
1191     frm = cmpthF;	/* Start where convergence was found (or faked). */
1192     while( frm != tailF->prev && checkpath_done){
1193       if( i == output_buf_size ){ /* Need more room for outputs? */
1194 	output_buf_size *= 2;
1195 	if(debug_level)
1196 	  Fprintf(stderr,
1197 		  "reallocating space for output frames: %d\n",
1198 		  output_buf_size);
1199 	rms_speech = (float *) ckrealloc((void *) rms_speech,
1200 				       sizeof(float) * output_buf_size);
1201 	/*	spsassert(rms_speech, "rms_speech realloc failed in dp_f0()");*/
1202 	f0p = (float *) ckrealloc((void *) f0p,
1203 				sizeof(float) * output_buf_size);
1204 	/*	spsassert(f0p, "f0p realloc failed in dp_f0()");*/
1205 	vuvp = (float *) ckrealloc((void *) vuvp, sizeof(float) * output_buf_size);
1206 	/*	spsassert(vuvp, "vuvp realloc failed in dp_f0()");*/
1207 	acpkp = (float *) ckrealloc((void *) acpkp, sizeof(float) * output_buf_size);
1208 	/*	spsassert(acpkp, "acpkp realloc failed in dp_f0()");*/
1209       }
1210       rms_speech[i] = frm->rms;
1211       acpkp[i] =  frm->dp->pvals[best_cand];
1212       loc1 = frm->dp->locs[best_cand];
1213       vuvp[i] = 1.0;
1214       best_cand = frm->dp->prept[best_cand];
1215       ftemp = (float) loc1;
1216       if(loc1 > 0) {		/* Was f0 actually estimated for this frame? */
1217 	if (loc1 > start && loc1 < stop) { /* loc1 must be a local maximum. */
1218 	  float cormax, cprev, cnext, den;
1219 
1220 	  j = loc1 - start;
1221 	  cormax = frm->cp->correl[j];
1222 	  cprev = frm->cp->correl[j+1];
1223 	  cnext = frm->cp->correl[j-1];
1224 	  den = (float) (2.0 * ( cprev + cnext - (2.0 * cormax) ));
1225 	  /*
1226 	   * Only parabolic interpolate if cormax is indeed a local
1227 	   * turning point. Find peak of curve that goes though the 3 points
1228 	   */
1229 
1230 	  if (fabs(den) > 0.000001)
1231 	    ftemp += 2.0f - ((((5.0f*cprev)+(3.0f*cnext)-(8.0f*cormax))/den));
1232 	}
1233 	f0p[i] = (float) (freq/ftemp);
1234       } else {		/* No valid estimate; just fake some arbitrary F0. */
1235 	f0p[i] = 0;
1236 	vuvp[i] = 0.0;
1237       }
1238       frm = frm->prev;
1239 
1240       if (debug_level >= 2)
1241 	Fprintf(stderr," i:%4d%8.1f%8.1f\n",i,f0p[i],vuvp[i]);
1242       /* f0p[i] starts from the most recent one */
1243       /* Need to reverse the order in the calling function */
1244       i++;
1245     } /* end while() */
1246     if (checkpath_done){
1247       *vecsize = i;
1248       tailF = cmpthF->next;
1249       num_active_frames -= *vecsize;
1250     }
1251   } /* end if() */
1252 
1253   if (debug_level)
1254     Fprintf(stderr, "writing out %d frames.\n", *vecsize);
1255 
1256   *f0p_pt = f0p;
1257   *vuvp_pt = vuvp;
1258   *acpkp_pt = acpkp;
1259   *rms_speech_pt = rms_speech;
1260   /*  *acpkp_pt = acpkp;*/
1261 
1262   if(first_time) first_time = 0;
1263   return(0);
1264 }
1265 
1266 
1267 /*--------------------------------------------------------------------*/
1268 Frame *
1269 alloc_frame(nlags, ncands)
1270     int     nlags, ncands;
1271 {
1272   Frame *frm;
1273   int j;
1274 
1275   frm = (Frame*)ckalloc(sizeof(Frame));
1276   frm->dp = (Dprec *) ckalloc(sizeof(Dprec));
1277   /*  spsassert(frm->dp,"frm->dp ckalloc failed in alloc_frame");*/
1278   frm->dp->ncands = 0;
1279   frm->cp = (Cross *) ckalloc(sizeof(Cross));
1280   /*  spsassert(frm->cp,"frm->cp ckalloc failed in alloc_frame");*/
1281   frm->cp->correl = (float *) ckalloc(sizeof(float) * nlags);
1282   /*  spsassert(frm->cp->correl, "frm->cp->correl ckalloc failed");*/
1283   /* Allocate space for candidates and working arrays. */
1284   frm->dp->locs = (short*)ckalloc(sizeof(short) * ncands);
1285   /*  spsassert(frm->dp->locs,"frm->dp->locs ckalloc failed in alloc_frame()");*/
1286   frm->dp->pvals = (float*)ckalloc(sizeof(float) * ncands);
1287 /*  spsassert(frm->dp->pvals,"frm->dp->pvals ckalloc failed in alloc_frame()");*/
1288   frm->dp->mpvals = (float*)ckalloc(sizeof(float) * ncands);
1289   /*  spsassert(frm->dp->mpvals,"frm->dp->mpvals ckalloc failed in alloc_frame()");*/
1290   frm->dp->prept = (short*)ckalloc(sizeof(short) * ncands);
1291   /*  spsassert(frm->dp->prept,"frm->dp->prept ckalloc failed in alloc_frame()");*/
1292   frm->dp->dpvals = (float*)ckalloc(sizeof(float) * ncands);
1293   /*  spsassert(frm->dp->dpvals,"frm->dp->dpvals ckalloc failed in alloc_frame()");*/
1294 
1295   /*  Initialize the cumulative DP costs to zero */
1296   for(j = ncands-1; j >= 0; j--)
1297     frm->dp->dpvals[j] = 0.0;
1298 
1299   return(frm);
1300 }
1301 
1302 
1303 /*--------------------------------------------------------------------*/
1304 /* push window stat to stack, and pop the oldest one */
1305 
1306 static int
1307 save_windstat(rho, order, err, rms)
1308     float   *rho;
1309     int     order;
1310     float   err;
1311     float   rms;
1312 {
1313     int i,j;
1314 
1315     if(wReuse > 1){               /* push down the stack */
1316 	for(j=1; j<wReuse; j++){
1317 	    for(i=0;i<=order; i++) windstat[j-1].rho[i] = windstat[j].rho[i];
1318 	    windstat[j-1].err = windstat[j].err;
1319 	    windstat[j-1].rms = windstat[j].rms;
1320 	}
1321 	for(i=0;i<=order; i++) windstat[wReuse-1].rho[i] = rho[i]; /*save*/
1322 	windstat[wReuse-1].err = (float) err;
1323 	windstat[wReuse-1].rms = (float) rms;
1324 	return 1;
1325     } else if (wReuse == 1) {
1326 	for(i=0;i<=order; i++) windstat[0].rho[i] = rho[i];  /* save */
1327 	windstat[0].err = (float) err;
1328 	windstat[0].rms = (float) rms;
1329 	return 1;
1330     } else
1331 	return 0;
1332 }
1333 
1334 
1335 /*--------------------------------------------------------------------*/
1336 static int
1337 retrieve_windstat(rho, order, err, rms)
1338     float   *rho;
1339     int     order;
1340     float   *err;
1341     float   *rms;
1342 {
1343     Windstat wstat;
1344     int i;
1345 
1346     if(wReuse){
1347 	wstat = windstat[0];
1348 	for(i=0; i<=order; i++) rho[i] = wstat.rho[i];
1349 	*err = wstat.err;
1350 	*rms = wstat.rms;
1351 	return 1;
1352     }
1353     else return 0;
1354 }
1355 
1356 
1357 /*--------------------------------------------------------------------*/
1358 static float
1359 get_similarity(order, size, pdata, cdata,
1360 	       rmsa, rms_ratio, pre, stab, w_type, init)
1361     int     order, size;
1362     float   *pdata, *cdata;
1363     float   *rmsa, *rms_ratio, pre, stab;
1364     int     w_type, init;
1365 {
1366   float rho3[BIGSORD+1], err3, rms3, rmsd3, b0, t, a2[BIGSORD+1],
1367       rho1[BIGSORD+1], a1[BIGSORD+1], b[BIGSORD+1], err1, rms1, rmsd1;
1368   float xitakura(), wind_energy();
1369   void xa_to_aca ();
1370   int xlpc();
1371 
1372 /* (In the lpc() calls below, size-1 is used, since the windowing and
1373    preemphasis function assumes an extra point is available in the
1374    input data array.  This condition is apparently no longer met after
1375    Derek's modifications.) */
1376 
1377   /* get current window stat */
1378   xlpc(order, stab, size-1, cdata,
1379       a2, rho3, (float *) NULL, &err3, &rmsd3, pre, w_type);
1380   rms3 = wind_energy(cdata, size, w_type);
1381 
1382   if(!init) {
1383       /* get previous window stat */
1384       if( !retrieve_windstat(rho1, order, &err1, &rms1)){
1385 	  xlpc(order, stab, size-1, pdata,
1386 	      a1, rho1, (float *) NULL, &err1, &rmsd1, pre, w_type);
1387 	  rms1 = wind_energy(pdata, size, w_type);
1388       }
1389       xa_to_aca(a2+1,b,&b0,order);
1390       t = xitakura(order,b,&b0,rho1+1,&err1) - .8f;
1391       if(rms1 > 0.0)
1392 	  *rms_ratio = (0.001f + rms3)/rms1;
1393       else
1394 	  if(rms3 > 0.0)
1395 	      *rms_ratio = 2.0;	/* indicate some energy increase */
1396 	  else
1397 	      *rms_ratio = 1.0;	/* no change */
1398   } else {
1399       *rms_ratio = 1.0;
1400       t = 10.0;
1401   }
1402   *rmsa = rms3;
1403   save_windstat( rho3, order, err3, rms3);
1404   return((float)(0.2/t));
1405 }
1406 
1407 
1408 /* -------------------------------------------------------------------- */
1409 /* This is an ad hoc signal stationarity function based on Itakura
1410  * distance and relative amplitudes.
1411  */
1412 /*
1413   This illustrates the window locations when the very first frame is read.
1414   It shows an example where each frame step |  .  | is 10 msec.  The
1415   frame step size is variable.  The window size is always 30 msec.
1416   The window centers '*' is always 20 msec apart.
1417   The windows cross each other right at the center of the DP frame, or
1418   where the '.' is.
1419 
1420                           ---------*---------   current window
1421 
1422               ---------*---------  previous window
1423 
1424   |  .  |  .  |  .  |  .  |  .  |  .  |  .  |  .  |  .  |
1425               ^           ^  ^
1426               ^           ^  ^
1427               ^           ^  fdata
1428               ^           ^
1429               ^           q
1430 	      p
1431 
1432                           ---
1433                           ind
1434 
1435   fdata, q, p, ind, are variables used below.
1436 
1437 */
1438 
1439 static Stat *stat = NULL;
1440 static float *mem = NULL;
1441 
1442 static Stat*
1443 get_stationarity(fdata, freq, buff_size, nframes, frame_step, first_time)
1444     float   *fdata;
1445     double  freq;
1446     int     buff_size, nframes, frame_step, first_time;
1447 {
1448   static int nframes_old = 0, memsize;
1449   float preemp = 0.4f, stab = 30.0f;
1450   float *p, *q, *r, *datend;
1451   int ind, i, j, m, size, order, agap, w_type = 3;
1452 
1453   agap = (int) (STAT_AINT *freq);
1454   size = (int) (STAT_WSIZE * freq);
1455   ind = (agap - size) / 2;
1456 
1457   if( nframes_old < nframes || !stat || first_time){
1458     /* move this to init_dp_f0() later */
1459     nframes_old = nframes;
1460     if(stat){
1461       ckfree((char *) stat->stat);
1462       ckfree((char *) stat->rms);
1463       ckfree((char *) stat->rms_ratio);
1464       ckfree((char *) stat);
1465     }
1466     if (mem) ckfree((void *)mem);
1467     stat = (Stat *) ckalloc(sizeof(Stat));
1468     /*    spsassert(stat,"stat ckalloc failed in get_stationarity");*/
1469     stat->stat = (float*)ckalloc(sizeof(float)*nframes);
1470     /*    spsassert(stat->stat,"stat->stat ckalloc failed in get_stationarity");*/
1471     stat->rms = (float*)ckalloc(sizeof(float)*nframes);
1472     /*    spsassert(stat->rms,"stat->rms ckalloc failed in get_stationarity");*/
1473     stat->rms_ratio = (float*)ckalloc(sizeof(float)*nframes);
1474     /*    spsassert(stat->rms_ratio,"stat->ratio ckalloc failed in get_stationarity");*/
1475     memsize = (int) (STAT_WSIZE * freq) + (int) (STAT_AINT * freq);
1476     mem = (float *) ckalloc( sizeof(float) * memsize);
1477     /*    spsassert(mem, "mem ckalloc failed in get_stationarity()");*/
1478     for(j=0; j<memsize; j++) mem[j] = 0;
1479   }
1480 
1481   if(nframes == 0) return(stat);
1482 
1483   q = fdata + ind;
1484   datend = fdata + buff_size;
1485 
1486   if((order = (int) (2.0 + (freq/1000.0))) > BIGSORD) {
1487     Fprintf(stderr,
1488 	    "Optimim order (%d) exceeds that allowable (%d); reduce Fs\n",order, BIGSORD);
1489     order = BIGSORD;
1490   }
1491 
1492   /* prepare for the first frame */
1493   for(j=memsize/2, i=0; j<memsize; j++, i++) mem[j] = fdata[i];
1494 
1495   /* never run over end of frame, should already taken care of when read */
1496 
1497   for(j=0, p = q - agap; j < nframes; j++, p += frame_step, q += frame_step){
1498       if( (p >= fdata) && (q >= fdata) && ( q + size <= datend) )
1499 	  stat->stat[j] = get_similarity(order,size, p, q,
1500 					     &(stat->rms[j]),
1501 					     &(stat->rms_ratio[j]),preemp,
1502 					     stab,w_type, 0);
1503       else {
1504 	  if(first_time) {
1505 	      if( (p < fdata) && (q >= fdata) && (q+size <=datend) )
1506 		  stat->stat[j] = get_similarity(order,size, NULL, q,
1507 						     &(stat->rms[j]),
1508 						     &(stat->rms_ratio[j]),
1509 						     preemp,stab,w_type, 1);
1510 	      else{
1511 		  stat->rms[j] = 0.0;
1512 		  stat->stat[j] = 0.01f * 0.2f;   /* a big transition */
1513 		  stat->rms_ratio[j] = 1.0;   /* no amplitude change */
1514 	      }
1515 	  } else {
1516 	      if( (p<fdata) && (q+size <=datend) ){
1517 		  stat->stat[j] = get_similarity(order,size, mem,
1518 						     mem + (memsize/2) + ind,
1519 						     &(stat->rms[j]),
1520 						     &(stat->rms_ratio[j]),
1521 						     preemp, stab,w_type, 0);
1522 		  /* prepare for the next frame_step if needed */
1523 		  if(p + frame_step < fdata ){
1524 		      for( m=0; m<(memsize-frame_step); m++)
1525 			  mem[m] = mem[m+frame_step];
1526 		      r = q + size;
1527 		      for( m=0; m<frame_step; m++)
1528 			  mem[memsize-frame_step+m] = *r++;
1529 		  }
1530 	      }
1531 	  }
1532       }
1533   }
1534 
1535   /* last frame, prepare for next call */
1536   for(j=(memsize/2)-1, p=fdata + (nframes * frame_step)-1; j>=0 && p>=fdata; j--)
1537     mem[j] = *p--;
1538   return(stat);
1539 }
1540 
1541 
1542 /* -------------------------------------------------------------------- */
1543 /*	Round the argument to the nearest integer.			*/
1544 /*
1545 int
1546 eround(flnum)
1547     double  flnum;
1548 {
1549   return((flnum >= 0.0) ? (int)(flnum + 0.5) : (int)(flnum - 0.5));
1550 }
1551 
1552 */
1553 void free_dp_f0()
1554 {
1555   int i;
1556   Frame *frm, *next;
1557 
1558   ckfree((void *)pcands);
1559   pcands = NULL;
1560 
1561   ckfree((void *)rms_speech);
1562   rms_speech = NULL;
1563 
1564   ckfree((void *)f0p);
1565   f0p = NULL;
1566 
1567   ckfree((void *)vuvp);
1568   vuvp = NULL;
1569 
1570   ckfree((void *)acpkp);
1571   acpkp = NULL;
1572 
1573   ckfree((void *)peaks);
1574   peaks = NULL;
1575 
1576   ckfree((void *)locs);
1577   locs = NULL;
1578 
1579   if (wReuse) {
1580     ckfree((void *)windstat);
1581     windstat = NULL;
1582   }
1583 
1584   frm = headF;
1585 
1586   for(i = 0; i < size_cir_buffer; i++) {
1587     next = frm->next;
1588     ckfree((void *)frm->cp->correl);
1589     ckfree((void *)frm->dp->locs);
1590     ckfree((void *)frm->dp->pvals);
1591     ckfree((void *)frm->dp->mpvals);
1592     ckfree((void *)frm->dp->prept);
1593     ckfree((void *)frm->dp->dpvals);
1594     ckfree((void *)frm->cp);
1595     ckfree((void *)frm->dp);
1596     ckfree((void *)frm);
1597     frm = next;
1598   }
1599   headF = NULL;
1600   tailF = NULL;
1601 
1602   ckfree((void *)stat->stat);
1603   ckfree((void *)stat->rms);
1604   ckfree((void *)stat->rms_ratio);
1605 
1606   ckfree((void *)stat);
1607   stat = NULL;
1608 
1609   ckfree((void *)mem);
1610   mem = NULL;
1611 }
1612 
1613 int
1614 cGet_f0(Sound *sound, Tcl_Interp *interp, float **outlist, int *length)
1615 {
1616   float *fdata;
1617   int done;
1618   long buff_size, actsize;
1619   double sf, start_time;
1620   F0_params *par, *read_f0_params();
1621   float *f0p, *vuvp, *rms_speech, *acpkp;
1622   int i, vecsize;
1623   int init_dp_f0(), dp_f0();
1624   static int framestep = -1;
1625   long sdstep = 0, total_samps;
1626   int ndone = 0;
1627   Tcl_Obj *list;
1628   float *tmp = (float *)ckalloc(sizeof(float) * (5 + sound->length / 80));
1629   int count = 0;
1630   int startpos = 0, endpos = -1;
1631 
1632   if (sound->cmdPtr != NULL) {
1633     Tcl_DecrRefCount(sound->cmdPtr);
1634     sound->cmdPtr = NULL;
1635   }
1636 
1637   par = (F0_params *) ckalloc(sizeof(F0_params));
1638   par->cand_thresh = 0.3f;
1639   par->lag_weight = 0.3f;
1640   par->freq_weight = 0.02f;
1641   par->trans_cost = 0.005f;
1642   par->trans_amp = 0.5f;
1643   par->trans_spec = 0.5f;
1644   par->voice_bias = 0.0f;
1645   par->double_cost = 0.35f;
1646   par->min_f0 = 50;
1647   par->max_f0 = 550;
1648   par->frame_step = 0.01f;
1649   par->wind_dur = 0.0075f;
1650   par->n_cands = 20;
1651   par->mean_f0 = 200;          /* unused */
1652   par->mean_f0_weight = 0.0f;  /* unused */
1653   par->conditioning = 0;       /* unused */
1654 
1655   if (startpos < 0) startpos = 0;
1656   if (endpos >= (sound->length - 1) || endpos == -1)
1657     endpos = sound->length - 1;
1658   if (startpos > endpos) return TCL_OK;
1659 
1660   sf = (double) sound->samprate;
1661 
1662   if (framestep > 0)  /* If a value was specified with -S, use it. */
1663     par->frame_step = (float) (framestep / sf);
1664   start_time = 0.0f;
1665   if(check_f0_params(interp, par, sf)){
1666     Tcl_AppendResult(interp, "invalid/inconsistent parameters -- exiting.", NULL);
1667     return TCL_ERROR;
1668   }
1669 
1670   total_samps = endpos - startpos + 1;
1671   if(total_samps < ((par->frame_step * 2.0) + par->wind_dur) * sf) {
1672     Tcl_AppendResult(interp, "input range too small for analysis by get_f0.", NULL);
1673     return TCL_ERROR;
1674   }
1675   /* Initialize variables in get_f0.c; allocate data structures;
1676    * determine length and overlap of input frames to read.
1677    */
1678   if (init_dp_f0(sf, par, &buff_size, &sdstep)
1679       || buff_size > INT_MAX || sdstep > INT_MAX)
1680   {
1681     Tcl_AppendResult(interp, "problem in init_dp_f0().", NULL);
1682     return TCL_ERROR;
1683   }
1684 
1685   if (debug_level)
1686     Fprintf(stderr, "init_dp_f0 returned buff_size %ld, sdstep %ld.\n",buff_size, sdstep);
1687 
1688   if (buff_size > total_samps)
1689     buff_size = total_samps;
1690 
1691   actsize = min(buff_size, sound->length);
1692   fdata = (float *) ckalloc(sizeof(float) * max(buff_size, sdstep));
1693   list = Tcl_NewListObj(0, NULL);
1694   /*  Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", 0.0);*/
1695   ndone = startpos;
1696 
1697   while (TRUE) {
1698     done = (actsize < buff_size) || (total_samps == buff_size);
1699     Snack_GetSoundData(sound, ndone, fdata, actsize);
1700     /*if (sound->debug > 0) Snack_WriteLog("dp_f0...\n");*/
1701     if (dp_f0(fdata, (int) actsize, (int) sdstep, sf, par,
1702 	      &f0p, &vuvp, &rms_speech, &acpkp, &vecsize, done)) {
1703       Tcl_AppendResult(interp, "problem in dp_f0().", NULL);
1704       return TCL_ERROR;
1705     }
1706     /*if (sound->debug > 0) Snack_WriteLogInt("done dp_f0",vecsize);*/
1707     for (i = vecsize - 1; i >= 0; i--) {
1708       tmp[count] = f0p[i];
1709       count++;
1710     }
1711 
1712     if (done) break;
1713 
1714     ndone += sdstep;
1715     actsize = min(buff_size, sound->length - ndone);
1716     total_samps -= sdstep;
1717 
1718     if (actsize > total_samps)
1719       actsize = total_samps;
1720 
1721     /*    if (1) {
1722       int res = Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", (double) ndone / sound->length);
1723       if (res != TCL_OK) {
1724 	return TCL_ERROR;
1725       }
1726       }*/
1727   }
1728 
1729   /*Snack_ProgressCallback(sound->cmdPtr, interp, "Computing pitch", 1.0);*/
1730 
1731   ckfree((void *)fdata);
1732 
1733   ckfree((void *)par);
1734 
1735   free_dp_f0();
1736 
1737   *outlist = tmp;
1738   *length = count;
1739   /*Tcl_SetObjResult(interp, list);*/
1740 
1741   return TCL_OK;
1742 }
1743