1 /* formant.c */ 2 /* 3 * This software has been licensed to the Centre of Speech Technology, KTH 4 * by AT&T Corp. and Microsoft Corp. with the terms in the accompanying 5 * file BSD.txt, which is a BSD style license. 6 * 7 * "Copyright (c) 1987-1990 AT&T, Inc. 8 * "Copyright (c) 1986-1990 Entropic Speech, Inc. 9 * "Copyright (c) 1990-1994 Entropic Research Laboratory, Inc. 10 * All rights reserved" 11 * 12 * Written by: David Talkin 13 * Revised by: John Shore 14 * 15 */ 16 17 #include "snack.h" 18 #include <stdio.h> 19 #include <math.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include "jkFormant.h" 23 24 25 int debug = 0; 26 int w_verbose = 0; 27 28 /* dpform.c */ 29 30 /* a formant tracker based on LPC polynomial roots and dynamic programming */ 31 /***/ 32 /* At each frame, the LPC poles are ordered by increasing frequency. All 33 "reasonable" mappings of the poles to F1, F2, ... are performed. 34 The cost of "connecting" each of these mappings with each of the mappings 35 in the previous frame is computed. The lowest cost connection is then 36 chosen as the optimum one. At each frame, each mapping has associated 37 with it a cost based on the formant bandwidths and frequencies. This 38 "local" cost is finally added to the cost of the best "connection." At 39 end of utterance (or after a reasonable delay like .5sec) the best 40 mappings for the entire utterance may be found by retracing back through 41 best candidate mappings, starting at end of utterance (or current frame). 42 */ 43 44 /* Here are the major fudge factors for tweaking the formant tracker. */ 45 #define MAXCAN 300 /* maximum number of candidate mappings allowed */ 46 static double MISSING = 1, /* equivalent delta-Hz cost for missing formant */ 47 NOBAND = 1000, /* equivalent bandwidth cost of a missing formant */ 48 DF_FACT = 20.0, /* cost for proportional frequency changes */ 49 /* with good "stationarity" function:*/ 50 /* DF_FACT = 80.0, *//* cost for proportional frequency changes */ 51 DFN_FACT = 0.3, /* cost for proportional dev. from nominal freqs. */ 52 BAND_FACT = .002, /* cost per Hz of bandwidth in the poles */ 53 /* F_BIAS = 0.0004, bias toward selecting low-freq. poles */ 54 F_BIAS = 0.000, /* bias toward selecting low-freq. poles */ 55 F_MERGE = 2000.0; /* cost of mapping f1 and f2 to same frequency */ 56 static double *fre, 57 fnom[] = { 500, 1500, 2500, 3500, 4500, 5500, 6500},/* "nominal" freqs.*/ 58 fmins[] = { 50, 400, 1000, 2000, 2000, 3000, 3000}, /* frequency bounds */ 59 fmaxs[] = { 1500, 3500, 4500, 5000, 6000, 6000, 8000}; /* for 1st 5 formants */ 60 61 static int maxp, /* number of poles to consider */ 62 maxf, /* number of formants to find */ 63 ncan, domerge = TRUE; 64 65 static short **pc; 66 67 static int canbe(pnumb, fnumb) /* can this pole be this freq.? */ 68 int pnumb, fnumb; 69 { 70 return((fre[pnumb] >= fmins[fnumb])&&(fre[pnumb] <= fmaxs[fnumb])); 71 } 72 73 /* This does the real work of mapping frequencies to formants. */ 74 static void candy(cand,pnumb,fnumb) 75 int cand, /* candidate number being considered */ 76 pnumb, /* pole number under consideration */ 77 fnumb; /* formant number under consideration */ 78 { 79 int i,j; 80 81 if(fnumb < maxf) pc[cand][fnumb] = -1; 82 if((pnumb < maxp)&&(fnumb < maxf)){ 83 /* printf("\ncan:%3d pnumb:%3d fnumb:%3d",cand,pnumb,fnumb); */ 84 if(canbe(pnumb,fnumb)){ 85 pc[cand][fnumb] = pnumb; 86 if(domerge&&(fnumb==0)&&(canbe(pnumb,fnumb+1))){ /* allow for f1,f2 merger */ 87 ncan++; 88 pc[ncan][0] = pc[cand][0]; 89 candy(ncan,pnumb,fnumb+1); /* same pole, next formant */ 90 } 91 candy(cand,pnumb+1,fnumb+1); /* next formant; next pole */ 92 if(((pnumb+1) < maxp) && canbe(pnumb+1,fnumb)){ 93 /* try other frequencies for this formant */ 94 ncan++; /* add one to the candidate index/tally */ 95 /* printf("\n%4d %4d %4d",ncan,pnumb+1,fnumb); */ 96 for(i=0; i<fnumb; i++) /* clone the lower formants */ 97 pc[ncan][i] = pc[cand][i]; 98 candy(ncan,pnumb+1,fnumb); 99 } 100 } else { 101 candy(cand,pnumb+1,fnumb); 102 } 103 } 104 /* If all pole frequencies have been examined without finding one which 105 will map onto the current formant, go on to the next formant leaving the 106 current formant null. */ 107 if((pnumb >= maxp) && (fnumb < maxf-1) && (pc[cand][fnumb] < 0)){ 108 if(fnumb){ 109 j=fnumb-1; 110 while((j>0) && pc[cand][j] < 0) j--; 111 i = ((j=pc[cand][j]) >= 0)? j : 0; 112 } else i = 0; 113 candy(cand,i,fnumb+1); 114 } 115 } 116 117 /* Given a set of pole frequencies and allowable formant frequencies 118 for nform formants, calculate all possible mappings of pole frequencies 119 to formants, including, possibly, mappings with missing formants. */ 120 void get_fcand(npole,freq,band,nform,pcan) 121 int npole, nform; 122 short **pcan; 123 double *freq, *band; /* poles ordered by increasing FREQUENCY */ 124 { 125 126 ncan = 0; 127 pc = pcan; 128 fre = freq; 129 maxp = npole; 130 maxf = nform; 131 candy(ncan, 0, 0); 132 ncan++; /* (converts ncan as an index to ncan as a candidate count) */ 133 } 134 135 void set_nominal_freqs(f1) 136 double f1; 137 { 138 int i; 139 for(i=0; i < MAXFORMANTS; i++) { 140 fnom[i] = ((i * 2) + 1) * f1; 141 fmins[i] = fnom[i] - ((i+1) * f1) + 50.0; 142 fmaxs[i] = fnom[i] + (i * f1) + 1000.0; 143 } 144 } 145 146 /* ---------------------------------------------------------- */ 147 /* find the maximum in the "stationarity" function (stored in rms) */ 148 double get_stat_max(pole, nframes) 149 register POLE **pole; 150 register int nframes; 151 { 152 register int i; 153 register double amax, t; 154 155 for(i=1, amax = (*pole++)->rms; i++ < nframes; ) 156 if((t = (*pole++)->rms) > amax) amax = t; 157 158 return(amax); 159 } 160 161 Sound *dpform(ps, nform, nom_f1) 162 Sound *ps; 163 int nform; 164 double nom_f1; 165 { 166 double pferr, conerr, minerr, dffact, ftemp, berr, ferr, bfact, ffact, 167 rmsmax, fbias, **fr, **ba, rmsdffact, merger=0.0, merge_cost, 168 FBIAS; 169 register int i, j, k, l, ic, ip, mincan=0; 170 short **pcan; 171 FORM **fl; 172 POLE **pole; /* raw LPC pole data structure array */ 173 Sound *fbs; 174 int dmaxc,dminc,dcountc,dcountf; 175 176 if(ps) { 177 if(nom_f1 > 0.0) 178 set_nominal_freqs(nom_f1); 179 pole = (POLE**)ps->extHead; 180 rmsmax = get_stat_max(pole, ps->length); 181 FBIAS = F_BIAS /(.01 * ps->samprate); 182 /* Setup working values of the cost weights. */ 183 dffact = (DF_FACT * .01) * ps->samprate; /* keep dffact scaled to frame rate */ 184 bfact = BAND_FACT /(.01 * ps->samprate); 185 ffact = DFN_FACT /(.01 * ps->samprate); 186 merge_cost = F_MERGE; 187 if(merge_cost > 1000.0) domerge = FALSE; 188 189 /* Allocate space for the formant and bandwidth arrays to be passed back. */ 190 if(debug & DEB_ENTRY){ 191 printf("Allocating formant and bandwidth arrays in dpform()\n"); 192 } 193 fr = (double**)ckalloc(sizeof(double*) * nform * 2); 194 ba = fr + nform; 195 for(i=0;i < nform*2; i++){ 196 fr[i] = (double*)ckalloc(sizeof(double) * ps->length); 197 } 198 /* cp = new_ext(ps->name,"fb");*/ 199 /* if((fbs=new_signal(cp,SIG_UNKNOWN,dup_header(ps->header),fr,ps->length, ps->samprate, nform * 2))) {*/ 200 if (1) { 201 /* Allocate space for the raw candidate array. */ 202 if(debug & DEB_ENTRY){ 203 printf("Allocating raw candidate array in dpform()\n"); 204 } 205 pcan = (short**)ckalloc(sizeof(short*) * MAXCAN); 206 for(i=0;i<MAXCAN;i++) pcan[i] = (short*)ckalloc(sizeof(short) * nform); 207 208 /* Allocate space for the dp lattice */ 209 if(debug & DEB_ENTRY){ 210 printf("Allocating DP lattice structure in dpform()\n"); 211 } 212 fl = (FORM**)ckalloc(sizeof(FORM*) * ps->length); 213 for(i=0;i < ps->length; i++) 214 fl[i] = (FORM*)ckalloc(sizeof(FORM)); 215 216 /*******************************************************************/ 217 /* main formant tracking loop */ 218 /*******************************************************************/ 219 if(debug & DEB_ENTRY){ 220 printf("Entering main computation loop in dpform()\n"); 221 } 222 for(i=0; i < ps->length; i++){ /* for all analysis frames... */ 223 224 ncan = 0; /* initialize candidate mapping count to 0 */ 225 226 /* moderate the cost of frequency jumps by the relative amplitude */ 227 rmsdffact = pole[i]->rms; 228 rmsdffact = rmsdffact/rmsmax; 229 rmsdffact = rmsdffact * dffact; 230 231 /* Get all likely mappings of the poles onto formants for this frame. */ 232 if(pole[i]->npoles){ /* if there ARE pole frequencies available... */ 233 get_fcand(pole[i]->npoles,pole[i]->freq,pole[i]->band,nform,pcan); 234 235 /* Allocate space for this frame's candidates in the dp lattice. */ 236 fl[i]->prept = (short*)ckalloc(sizeof(short) * ncan); 237 fl[i]->cumerr = (double*)ckalloc(sizeof(double) * ncan); 238 fl[i]->cand = (short**)ckalloc(sizeof(short*) * ncan); 239 for(j=0;j<ncan;j++){ /* allocate cand. slots and install candidates */ 240 fl[i]->cand[j] = (short*)ckalloc(sizeof(short) * nform); 241 for(k=0; k<nform; k++) 242 fl[i]->cand[j][k] = pcan[j][k]; 243 } 244 } 245 fl[i]->ncand = ncan; 246 /* compute the distance between the current and previous mappings */ 247 for(j=0;j<ncan;j++){ /* for each CURRENT mapping... */ 248 if( i ){ /* past the first frame? */ 249 minerr = 0; 250 if(fl[i-1]->ncand) minerr = 2.0e30; 251 mincan = -1; 252 for(k=0; k < fl[i-1]->ncand; k++){ /* for each PREVIOUS map... */ 253 for(pferr=0.0, l=0; l<nform; l++){ 254 ic = fl[i]->cand[j][l]; 255 ip = fl[i-1]->cand[k][l]; 256 if((ic >= 0) && (ip >= 0)){ 257 ftemp = 2.0 * fabs(pole[i]->freq[ic] - pole[i-1]->freq[ip])/ 258 (pole[i]->freq[ic] + pole[i-1]->freq[ip]); 259 /* ftemp = pole[i]->freq[ic] - pole[i-1]->freq[ip]; 260 if(ftemp >= 0.0) 261 ftemp = ftemp/pole[i-1]->freq[ip]; 262 else 263 ftemp = ftemp/pole[i]->freq[ic]; */ 264 /* cost prop. to SQUARE of deviation to discourage large jumps */ 265 pferr += ftemp * ftemp; 266 } 267 else pferr += MISSING; 268 } 269 /* scale delta-frequency cost and add in prev. cum. cost */ 270 conerr = (rmsdffact * pferr) + fl[i-1]->cumerr[k]; 271 if(conerr < minerr){ 272 minerr = conerr; 273 mincan = k; 274 } 275 } /* end for each PREVIOUS mapping... */ 276 } else { /* (i.e. if this is the first frame... ) */ 277 minerr = 0; 278 } 279 280 fl[i]->prept[j] = mincan; /* point to best previous mapping */ 281 /* (Note that mincan=-1 if there were no candidates in prev. fr.) */ 282 /* Compute the local costs for this current mapping. */ 283 for(k=0, berr=0, ferr=0, fbias=0; k<nform; k++){ 284 ic = fl[i]->cand[j][k]; 285 if(ic >= 0){ 286 if( !k ){ /* F1 candidate? */ 287 ftemp = pole[i]->freq[ic]; 288 merger = (domerge && 289 (ftemp == pole[i]->freq[fl[i]->cand[j][1]]))? 290 merge_cost: 0.0; 291 } 292 berr += pole[i]->band[ic]; 293 ferr += (fabs(pole[i]->freq[ic]-fnom[k])/fnom[k]); 294 fbias += pole[i]->freq[ic]; 295 } else { /* if there was no freq. for this formant */ 296 fbias += fnom[k]; 297 berr += NOBAND; 298 ferr += MISSING; 299 } 300 } 301 302 /* Compute the total cost of this mapping and best previous. */ 303 fl[i]->cumerr[j] = (FBIAS * fbias) + (bfact * berr) + merger + 304 (ffact * ferr) + minerr; 305 } /* end for each CURRENT mapping... */ 306 307 if(debug & DEB_LPC_PARS){ 308 printf("\nFrame %4d # candidates:%3d stat:%f prms:%f",i,ncan,rmsdffact,pole[i]->rms); 309 for (j=0; j<ncan; j++){ 310 printf("\n "); 311 for(k=0; k<nform; k++) 312 if(pcan[j][k] >= 0) 313 printf("%6.0f ",pole[i]->freq[fl[i]->cand[j][k]]); 314 else 315 printf(" NA "); 316 printf(" cum:%7.2f pp:%d",fl[i]->cumerr[j], fl[i]->prept[j]); 317 } 318 } 319 } /* end for all analysis frames... */ 320 /**************************************************************************/ 321 322 /* Pick the candidate in the final frame with the lowest cost. */ 323 /* Starting with that min.-cost cand., work back thru the lattice. */ 324 if(debug & DEB_ENTRY){ 325 printf("Entering backtrack loop in dpform()\n"); 326 } 327 dmaxc = 0; 328 dminc = 100; 329 dcountc = dcountf = 0; 330 for(mincan = -1, i=ps->length - 1; i>=0; i--){ 331 if(debug & DEB_LPC_PARS){ 332 printf("\nFrame:%4d mincan:%2d ncand:%2d ",i,mincan,fl[i]->ncand); 333 } 334 if(mincan < 0) /* need to find best starting candidate? */ 335 if(fl[i]->ncand){ /* have candidates at this frame? */ 336 minerr = fl[i]->cumerr[0]; 337 mincan = 0; 338 for(j=1; j<fl[i]->ncand; j++) 339 if( fl[i]->cumerr[j] < minerr ){ 340 minerr = fl[i]->cumerr[j]; 341 mincan = j; 342 } 343 } 344 if(mincan >= 0){ /* if there is a "best" candidate at this frame */ 345 if((j = fl[i]->ncand) > dmaxc) dmaxc = j; 346 else 347 if( j < dminc) dminc = j; 348 dcountc += j; 349 dcountf++; 350 for(j=0; j<nform; j++){ 351 k = fl[i]->cand[mincan][j]; 352 if(k >= 0){ 353 fr[j][i] = pole[i]->freq[k]; 354 if(debug & DEB_LPC_PARS){ 355 printf("%6.0f",fr[j][i]); 356 } 357 ba[j][i] = pole[i]->band[k]; 358 } else { /* IF FORMANT IS MISSING... */ 359 if(i < ps->length - 1){ 360 fr[j][i] = fr[j][i+1]; /* replicate backwards */ 361 ba[j][i] = ba[j][i+1]; 362 } else { 363 fr[j][i] = fnom[j]; /* or insert neutral values */ 364 ba[j][i] = NOBAND; 365 } 366 if(debug & DEB_LPC_PARS){ 367 printf("%6.0f",fr[j][i]); 368 } 369 } 370 } 371 mincan = fl[i]->prept[mincan]; 372 } else { /* if no candidates, fake with "nominal" frequencies. */ 373 for(j=0; j < nform; j++){ 374 fr[j][i] = fnom[j]; 375 ba[j][i] = NOBAND; 376 if(debug & DEB_LPC_PARS){ 377 printf("%6.0f",fr[j][i]); 378 } 379 } 380 } /* note that mincan will remain =-1 if no candidates */ 381 } /* end unpacking formant tracks from the dp lattice */ 382 /* Deallocate all the DP lattice work space. */ 383 /*if(debug & DEB_ENTRY){ 384 printf("%s complete; max. cands:%d min. cands.:%d average cands.:%f\n", 385 fbs->name,dmaxc,dminc,((double)dcountc)/dcountf); 386 printf("Entering memory deallocation in dpform()\n"); 387 }*/ 388 for(i=ps->length - 1; i>=0; i--){ 389 if(fl[i]->ncand){ 390 if(fl[i]->cand) { 391 for(j=0; j<fl[i]->ncand; j++) ckfree((void *)fl[i]->cand[j]); 392 ckfree((void *)fl[i]->cand); 393 ckfree((void *)fl[i]->cumerr); 394 ckfree((void *)fl[i]->prept); 395 } 396 } 397 } 398 for(i=0; i<ps->length; i++) ckfree((void *)fl[i]); 399 ckfree((void *)fl); 400 fl = 0; 401 402 for(i=0; i<ps->length; i++) { 403 ckfree((void *)pole[i]->freq); 404 ckfree((void *)pole[i]->band); 405 ckfree((void *)pole[i]); 406 } 407 ckfree((void *)pole); 408 409 /* Deallocate space for the raw candidate aray. */ 410 for(i=0;i<MAXCAN;i++) ckfree((void *)pcan[i]); 411 ckfree((void *)pcan); 412 413 fbs = Snack_NewSound(ps->samprate, SNACK_FLOAT, nform * 2); 414 Snack_ResizeSoundStorage(fbs, ps->length); 415 for (i = 0; i < ps->length; i++) { 416 for (j = 0; j < nform * 2; j++) { 417 Snack_SetSample(fbs, j, i, (float)fr[j][i]); 418 } 419 } 420 fbs->length = ps->length; 421 422 for(i = 0; i < nform*2; i++) ckfree((void *)fr[i]); 423 ckfree((void *)fr); 424 425 return(fbs); 426 } else 427 printf("Can't create a new Signal in dpform()\n"); 428 } else 429 printf("Bad data pointers passed into dpform()\n"); 430 return(NULL); 431 } 432 433 /* lpc_poles.c */ 434 435 /* computation and I/O routines for dealing with LPC poles */ 436 437 #define MAXORDER 30 438 439 extern int formant(), lpc(), lpcbsa(), dlpcwtd(), w_covar(); 440 441 /*************************************************************************/ 442 double integerize(time, freq) 443 register double time, freq; 444 { 445 register int i; 446 447 i = (int) (.5 + (freq * time)); 448 return(((double)i)/freq); 449 } 450 451 /* Round the argument to the nearest integer. */ 452 int eround(flnum) 453 register double flnum; 454 { 455 return((flnum >= 0.0) ? (int)(flnum + 0.5) : (int)(flnum - 0.5)); 456 } 457 458 /*************************************************************************/ 459 Sound *lpc_poles(sp,wdur,frame_int,lpc_ord,preemp,lpc_type,w_type) 460 Sound *sp; 461 int lpc_ord, lpc_type, w_type; 462 double wdur, frame_int, preemp; 463 { 464 int i, j, size, step, nform, init, nfrm; 465 POLE **pole; 466 double lpc_stabl = 70.0, energy, lpca[MAXORDER], normerr, 467 *bap=NULL, *frp=NULL, *rhp=NULL; 468 short *datap, *dporg; 469 Sound *lp; 470 471 if(lpc_type == 1) { /* force "standard" stabilized covariance (ala bsa) */ 472 wdur = 0.025; 473 preemp = exp(-62.831853 * 90. / sp->samprate); /* exp(-1800*pi*T) */ 474 } 475 if((lpc_ord > MAXORDER) || (lpc_ord < 2)/* || (! ((short**)sp->data)[0])*/) 476 return(NULL); 477 /* np = (char*)new_ext(sp->name,"pole");*/ 478 wdur = integerize(wdur,(double)sp->samprate); 479 frame_int = integerize(frame_int,(double)sp->samprate); 480 nfrm= 1 + (int) (((((double)sp->length)/sp->samprate) - wdur)/(frame_int)); 481 if(nfrm >= 1/*lp->buff_size >= 1*/) { 482 size = (int) (.5 + (wdur * sp->samprate)); 483 step = (int) (.5 + (frame_int * sp->samprate)); 484 pole = (POLE**)ckalloc(nfrm/*lp->buff_size*/ * sizeof(POLE*)); 485 datap = dporg = (short *) ckalloc(sizeof(short) * sp->length); 486 for (i = 0; i < Snack_GetLength(sp); i++) { 487 datap[i] = (short) Snack_GetSample(sp, 0, i); 488 } 489 for(j=0, init=TRUE/*, datap=((short**)sp->data)[0]*/; j < nfrm/*lp->buff_size*/;j++, datap += step){ 490 pole[j] = (POLE*)ckalloc(sizeof(POLE)); 491 pole[j]->freq = frp = (double*)ckalloc(sizeof(double)*lpc_ord); 492 pole[j]->band = bap = (double*)ckalloc(sizeof(double)*lpc_ord); 493 494 switch(lpc_type) { 495 case 0: 496 if(! lpc(lpc_ord,lpc_stabl,size,datap,lpca,rhp,NULL,&normerr, 497 &energy, preemp, w_type)){ 498 printf("Problems with lpc in lpc_poles()"); 499 break; 500 } 501 break; 502 case 1: 503 if(! lpcbsa(lpc_ord,lpc_stabl,size,datap,lpca,rhp,NULL,&normerr, 504 &energy, preemp)){ 505 printf("Problems with lpc in lpc_poles()"); 506 break; 507 } 508 break; 509 case 2: 510 { 511 int Ord = lpc_ord; 512 double alpha, r0; 513 514 w_covar(datap, &Ord, size, 0, lpca, &alpha, &r0, preemp, 0); 515 if((Ord != lpc_ord) || (alpha <= 0.0)) 516 printf("Problems with covar(); alpha:%f Ord:%d\n",alpha,Ord); 517 energy = sqrt(r0/(size-Ord)); 518 } 519 break; 520 } 521 pole[j]->change = 0.0; 522 /* don't waste time on low energy frames */ 523 if((pole[j]->rms = energy) > 1.0){ 524 formant(lpc_ord,(double)sp->samprate, lpca, &nform, frp, bap, init); 525 pole[j]->npoles = nform; 526 init=FALSE; /* use old poles to start next search */ 527 } else { /* write out no pole frequencies */ 528 pole[j]->npoles = 0; 529 init = TRUE; /* restart root search in a neutral zone */ 530 } 531 /* if(debug & 4) { 532 printf("\nfr:%4d np:%4d rms:%7.0f ",j,pole[j]->npoles,pole[j]->rms); 533 for(k=0; k<pole[j]->npoles; k++) 534 printf(" %7.1f",pole[j]->freq[k]); 535 printf("\n "); 536 for(k=0; k<pole[j]->npoles; k++) 537 printf(" %7.1f",pole[j]->band[k]); 538 printf("\n"); 539 }*/ 540 } /* end LPC pole computation for all lp->buff_size frames */ 541 /* lp->data = (caddr_t)pole;*/ 542 ckfree((void *)dporg); 543 lp = Snack_NewSound((int)(1.0/frame_int), LIN16, lpc_ord); 544 Snack_ResizeSoundStorage(lp, nfrm); 545 for (i = 0; i < nfrm; i++) { 546 for (j = 0; j < lpc_ord; j++) { 547 Snack_SetSample(lp, j, i, (float)pole[i]->freq[j]); 548 } 549 } 550 lp->length = nfrm; 551 lp->extHead = (char *)pole; 552 return(lp); 553 } else { 554 printf("Bad buffer size in lpc_poles()\n"); 555 } 556 return(NULL); 557 } 558 559 /**********************************************************************/ 560 double frand() 561 { 562 return (((double)rand())/(double)RAND_MAX); 563 } 564 565 /**********************************************************************/ 566 /* a quick and dirty interface to bsa's stabilized covariance LPC */ 567 #define NPM 30 /* max lpc order */ 568 569 int lpcbsa(np, lpc_stabl, wind, data, lpc, rho, nul1, nul2, energy, preemp) 570 int np, wind; 571 short *data; 572 double *lpc, *rho, *nul1, *nul2, *energy, lpc_stabl, preemp; 573 { 574 static int i, mm, owind=0, wind1; 575 static double w[1000]; 576 double rc[NPM],phi[NPM*NPM],shi[NPM],sig[1000]; 577 double xl = .09, fham, amax; 578 register double *psp1, *psp3, *pspl; 579 580 if(owind != wind) { /* need to compute a new window? */ 581 fham = 6.28318506 / wind; 582 for(psp1=w,i=0;i<wind;i++,psp1++) 583 *psp1 = .54 - .46 * cos(i * fham); 584 owind = wind; 585 } 586 wind += np + 1; 587 wind1 = wind-1; 588 589 for(psp3=sig,pspl=sig+wind; psp3 < pspl; ) 590 *psp3++ = (double)(*data++) + .016 * frand() - .008; 591 for(psp3=sig+1,pspl=sig+wind;psp3<pspl;psp3++) 592 *(psp3-1) = *psp3 - preemp * *(psp3-1); 593 for(amax = 0.,psp3=sig+np,pspl=sig+wind1;psp3<pspl;psp3++) 594 amax += *psp3 * *psp3; 595 *energy = sqrt(amax / (double)owind); 596 amax = 1.0/(*energy); 597 598 for(psp3=sig,pspl=sig+wind1;psp3<pspl;psp3++) 599 *psp3 *= amax; 600 if((mm=dlpcwtd(sig,&wind1,lpc,&np,rc,phi,shi,&xl,w))!=np) { 601 printf("LPCWTD error mm<np %d %d\n",mm,np); 602 return(FALSE); 603 } 604 return(TRUE); 605 } 606 607 /* Copyright (c) 1987, 1988, 1989 AT&T */ 608 /* All Rights Reserved */ 609 610 /* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T */ 611 /* The copyright notice above does not evidence any */ 612 /* actual or intended publication of such source code. */ 613 614 /* downsample.c */ 615 /* a quick and dirty downsampler */ 616 617 #ifndef TRUE 618 # define TRUE 1 619 # define FALSE 0 620 #endif 621 622 #define PI 3.1415927 623 624 /* ---------------------------------------------------------- */ 625 int lc_lin_fir(fc,nf,coef) 626 /* create the coefficients for a symmetric FIR lowpass filter using the 627 window technique with a Hanning window. */ 628 register double fc; 629 double coef[]; 630 int *nf; 631 { 632 register int i, n; 633 register double twopi, fn, c; 634 635 if(((*nf % 2) != 1) || (*nf > 127)) { 636 if(*nf <= 126) *nf = *nf + 1; 637 else *nf = 127; 638 } 639 n = (*nf + 1)/2; 640 641 /* compute part of the ideal impulse response */ 642 twopi = PI * 2.0; 643 coef[0] = 2.0 * fc; 644 c = PI; 645 fn = twopi * fc; 646 for(i=1;i < n; i++) coef[i] = sin(i * fn)/(c * i); 647 648 /* Now apply a Hanning window to the (infinite) impulse response. */ 649 fn = twopi/((double)(*nf - 1)); 650 for(i=0;i<n;i++) 651 coef[i] *= (.5 + (.5 * cos(fn * ((double)i)))); 652 653 return(TRUE); 654 } 655 656 /* ---------------------------------------------------------- */ 657 658 void do_fir(buf,in_samps,bufo,ncoef,ic,invert) 659 /* ic contains 1/2 the coefficients of a symmetric FIR filter with unity 660 passband gain. This filter is convolved with the signal in buf. 661 The output is placed in buf2. If invert != 0, the filter magnitude 662 response will be inverted. */ 663 short *buf, *bufo, ic[]; 664 int in_samps, ncoef, invert; 665 { 666 register short *buft, *bufp, *bufp2, stem; 667 short co[256], mem[256]; 668 register int i, j, k, l, m, sum, integral; 669 670 for(i=ncoef-1, bufp=ic+ncoef-1, bufp2=co, buft = co+((ncoef-1)*2), 671 integral = 0; i-- > 0; ) 672 if(!invert) *buft-- = *bufp2++ = *bufp--; 673 else { 674 integral += (stem = *bufp--); 675 *buft-- = *bufp2++ = -stem; 676 } 677 if(!invert) *buft-- = *bufp2++ = *bufp--; /* point of symmetry */ 678 else { 679 integral *= 2; 680 integral += *bufp; 681 *buft-- = integral - *bufp; 682 } 683 /* for(i=(ncoef*2)-2; i >= 0; i--) printf("\n%4d%7d",i,co[i]); */ 684 for(i=ncoef-1, buft=mem; i-- > 0; ) *buft++ = 0; 685 for(i=ncoef; i-- > 0; ) *buft++ = *buf++; 686 l = 16384; 687 m = 15; 688 k = (ncoef << 1) -1; 689 for(i=in_samps-ncoef; i-- > 0; ) { 690 for(j=k, buft=mem, bufp=co, bufp2=mem+1, sum = 0; j-- > 0; 691 *buft++ = *bufp2++ ) 692 sum += (((*bufp++ * *buft) + l) >> m); 693 694 *--buft = *buf++; /* new data to memory */ 695 *bufo++ = sum; 696 } 697 for(i=ncoef; i-- > 0; ) { /* pad data end with zeros */ 698 for(j=k, buft=mem, bufp=co, bufp2=mem+1, sum = 0; j-- > 0; 699 *buft++ = *bufp2++ ) 700 sum += (((*bufp++ * *buft) + l) >> m); 701 *--buft = 0; 702 *bufo++ = sum; 703 } 704 } 705 706 /* ******************************************************************** */ 707 708 int get_abs_maximum(d,n) 709 register short *d; 710 register int n; 711 { 712 register int i; 713 register short amax, t; 714 715 if((t = *d++) >= 0) amax = t; 716 else amax = -t; 717 718 for(i = n-1; i-- > 0; ) { 719 if((t = *d++) > amax) amax = t; 720 else { 721 if(-t > amax) amax = -t; 722 } 723 } 724 return((int)amax); 725 } 726 727 /* ******************************************************************** */ 728 729 int dwnsamp(buf,in_samps,buf2,out_samps,insert,decimate,ncoef,ic,smin,smax) 730 short *buf, **buf2; 731 int in_samps, *out_samps, insert, decimate, ncoef, *smin, *smax; 732 short ic[]; 733 { 734 register short *bufp, *bufp2; 735 short *buft; 736 register int i, j, k, l, m; 737 int imax, imin; 738 739 if(!(*buf2 = buft = (short*)ckalloc(sizeof(short)*insert*in_samps))) { 740 perror("ckalloc() in dwnsamp()"); 741 return(FALSE); 742 } 743 744 k = imax = get_abs_maximum(buf,in_samps); 745 if (k == 0) k = 1; 746 if(insert > 1) k = (32767 * 32767)/k; /* prepare to scale data */ 747 else k = (16384 * 32767)/k; 748 l = 16384; 749 m = 15; 750 751 752 /* Insert zero samples to boost the sampling frequency and scale the 753 signal to maintain maximum precision. */ 754 for(i=0, bufp=buft, bufp2=buf; i < in_samps; i++) { 755 *bufp++ = ((k * (*bufp2++)) + l) >> m ; 756 for(j=1; j < insert; j++) *bufp++ = 0; 757 } 758 759 do_fir(buft,in_samps*insert,buft,ncoef,ic,0); 760 761 /* Finally, decimate and return the downsampled signal. */ 762 *out_samps = j = (in_samps * insert)/decimate; 763 k = decimate; 764 for(i=0, bufp=buft, imax = imin = *bufp; i < j; bufp += k,i++) { 765 *buft++ = *bufp; 766 if(imax < *bufp) imax = *bufp; 767 else 768 if(imin > *bufp) imin = *bufp; 769 } 770 *smin = imin; 771 *smax = imax; 772 *buf2 = (short*)ckrealloc((void *) *buf2,sizeof(short) * (*out_samps)); 773 return(TRUE); 774 } 775 776 /* ---------------------------------------------------------- */ 777 778 int ratprx(a,k,l,qlim) 779 double a; 780 int *l, *k, qlim; 781 { 782 double aa, af, q, em, qq = 0, pp = 0, ps, e; 783 int ai, ip, i; 784 785 aa = fabs(a); 786 ai = (int) aa; 787 /* af = fmod(aa,1.0); */ 788 i = (int) aa; 789 af = aa - i; 790 q = 0; 791 em = 1.0; 792 while(++q <= qlim) { 793 ps = q * af; 794 ip = (int) (ps + 0.5); 795 e = fabs((ps - (double)ip)/q); 796 if(e < em) { 797 em = e; 798 pp = ip; 799 qq = q; 800 } 801 }; 802 *k = (int) ((ai * qq) + pp); 803 *k = (a > 0)? *k : -(*k); 804 *l = (int) qq; 805 return(TRUE); 806 } 807 808 /* ----------------------------------------------------------------------- */ 809 810 Sound *Fdownsample(s,freq2,start,end) 811 double freq2; 812 Sound *s; 813 int start; 814 int end; 815 { 816 short *bufin, **bufout; 817 static double beta = 0.0, b[256]; 818 double ratio_t, maxi, ratio, beta_new, freq1; 819 static int ncoeff = 127, ncoefft = 0, nbits = 15; 820 static short ic[256]; 821 int insert, decimate, out_samps, smin, smax; 822 Sound *so; 823 824 register int i, j; 825 826 freq1 = s->samprate; 827 828 if((bufout = (short**)ckalloc(sizeof(short*)))) { 829 bufin = (short *) ckalloc(sizeof(short) * (end - start + 1)); 830 for (i = start; i <= end; i++) { 831 bufin[i-start] = (short) Snack_GetSample(s, 0, i); 832 } 833 834 ratio = freq2/freq1; 835 ratprx(ratio,&insert,&decimate,10); 836 ratio_t = ((double)insert)/((double)decimate); 837 838 if(ratio_t > .99) return(s); 839 840 freq2 = ratio_t * freq1; 841 beta_new = (.5 * freq2)/(insert * freq1); 842 843 if(beta != beta_new){ 844 beta = beta_new; 845 if( !lc_lin_fir(beta,&ncoeff,b)) { 846 printf("\nProblems computing interpolation filter\n"); 847 return(FALSE); 848 } 849 maxi = (1 << nbits) - 1; 850 j = (ncoeff/2) + 1; 851 for(ncoefft = 0, i=0; i < j; i++){ 852 ic[i] = (int) (0.5 + (maxi * b[i])); 853 if(ic[i]) ncoefft = i+1; 854 } 855 } /* endif new coefficients need to be computed */ 856 857 if(dwnsamp(bufin,end-start+1,bufout,&out_samps,insert,decimate,ncoefft,ic, 858 &smin,&smax)){ 859 /* so->buff_size = so->file_size = out_samps;*/ 860 so = Snack_NewSound(0, LIN16, s->nchannels); 861 Snack_ResizeSoundStorage(so, out_samps); 862 for (i = 0; i < out_samps; i++) { 863 Snack_SetSample(so, 0, i, (float)(*bufout)[i]); 864 } 865 so->length = out_samps; 866 so->samprate = (int)freq2; 867 ckfree((void *)*bufout); 868 ckfree((void *)bufout); 869 ckfree((void *)bufin); 870 return(so); 871 } else 872 printf("Problems in dwnsamp() in downsample()\n"); 873 } else 874 printf("Can't create a new Signal in downsample()\n"); 875 876 return(NULL); 877 } 878 879 /* ---------------------------------------------------------- */ 880 881 Sound 882 *highpass(s) 883 Sound *s; 884 { 885 886 short *datain, *dataout; 887 static short *lcf; 888 static int len = 0; 889 double scale, fn; 890 register int i; 891 Sound *so; 892 893 /* Header *h, *dup_header();*/ 894 895 #define LCSIZ 101 896 /* This assumes the sampling frequency is 10kHz and that the FIR 897 is a Hanning function of (LCSIZ/10)ms duration. */ 898 899 datain = (short *) ckalloc(sizeof(short) * s->length); 900 dataout = (short *) ckalloc(sizeof(short) * s->length); 901 for (i = 0; i < Snack_GetLength(s); i++) { 902 datain[i] = (short) Snack_GetSample(s, 0, i); 903 } 904 905 if(!len) { /* need to create a Hanning FIR? */ 906 lcf = (short*)ckalloc(sizeof(short) * LCSIZ); 907 len = 1 + (LCSIZ/2); 908 fn = PI * 2.0 / (LCSIZ - 1); 909 scale = 32767.0/(.5 * LCSIZ); 910 for(i=0; i < len; i++) 911 lcf[i] = (short) (scale * (.5 + (.4 * cos(fn * ((double)i))))); 912 } 913 do_fir(datain,s->length,dataout,len,lcf,1); /* in downsample.c */ 914 so = Snack_NewSound(s->samprate, LIN16, s->nchannels); 915 if (so == NULL) return(NULL); 916 Snack_ResizeSoundStorage(so, s->length); 917 for (i = 0; i < s->length; i++) { 918 Snack_SetSample(so, 0, i, (float)dataout[i]); 919 } 920 so->length = s->length; 921 ckfree((void *)dataout); 922 ckfree((void *)datain); 923 return(so); 924 } 925 926 int 927 formantCmd(Sound *s, Tcl_Interp *interp, int objc, 928 Tcl_Obj *CONST objv[]) 929 { 930 int nform, i,j, lpc_ord, lpc_type, w_type; 931 char *w_type_str = NULL; 932 double frame_int, wdur, 933 ds_freq, nom_f1 = -10.0, preemp; 934 double cor_wdur; 935 Sound *dssnd = NULL, *hpsnd = NULL, *polesnd = NULL; 936 Sound *formantsnd = NULL, *hpsrcsnd, *polesrcsnd; 937 Tcl_Obj *list; 938 int arg, startpos = 0, endpos = -1; 939 static CONST84 char *subOptionStrings[] = { 940 "-start", "-end", "-progress", 941 "-framelength", "-preemphasisfactor", "-numformants", 942 "-lpcorder", "-windowlength", "-windowtype", "-lpctype", 943 "-ds_freq", "-nom_f1_freq", NULL 944 }; 945 enum subOptions { 946 START, END, PROGRESS, FRAME, PREEMP, NUMFORM, ORDER, WINLEN, 947 WINTYPE, LPCTYPE, DSFREQ, NOMFREQ 948 }; 949 950 lpc_ord = 12; 951 lpc_type = 0; /* use bsa's stabilized covariance if != 0 */ 952 w_type = 2; /* window type: 0=rectangular; 1=Hamming; 2=cos**4 */ 953 ds_freq = 10000.0; 954 wdur = .049; /* for LPC analysis */ 955 cor_wdur = .01; /* for crosscorrelation F0 estimator */ 956 frame_int = .01; 957 preemp = .7; 958 nform = 4; 959 960 for (arg = 2; arg < objc; arg += 2) { 961 int index; 962 963 if (Tcl_GetIndexFromObj(interp, objv[arg], subOptionStrings, 964 "option", 0, &index) != TCL_OK) { 965 return TCL_ERROR; 966 } 967 968 if (arg + 1 == objc) { 969 Tcl_AppendResult(interp, "No argument given for ", 970 subOptionStrings[index], " option", (char *) NULL); 971 return TCL_ERROR; 972 } 973 974 switch ((enum subOptions) index) { 975 case START: 976 { 977 if (Tcl_GetIntFromObj(interp, objv[arg+1], &startpos) != TCL_OK) 978 return TCL_ERROR; 979 break; 980 } 981 case END: 982 { 983 if (Tcl_GetIntFromObj(interp, objv[arg+1], &endpos) != TCL_OK) 984 return TCL_ERROR; 985 break; 986 } 987 case PROGRESS: 988 { 989 char *str = Tcl_GetStringFromObj(objv[arg+1], NULL); 990 991 if (strlen(str) > 0) { 992 Tcl_IncrRefCount(objv[arg+1]); 993 s->cmdPtr = objv[arg+1]; 994 } 995 break; 996 } 997 case FRAME: 998 { 999 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &frame_int) 1000 != TCL_OK) 1001 return TCL_ERROR; 1002 break; 1003 } 1004 case PREEMP: 1005 { 1006 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &preemp) 1007 != TCL_OK) 1008 return TCL_ERROR; 1009 break; 1010 } 1011 case NUMFORM: 1012 { 1013 if (Tcl_GetIntFromObj(interp, objv[arg+1], &nform) != TCL_OK) 1014 return TCL_ERROR; 1015 break; 1016 } 1017 case ORDER: 1018 { 1019 if (Tcl_GetIntFromObj(interp, objv[arg+1], &lpc_ord) != TCL_OK) 1020 return TCL_ERROR; 1021 break; 1022 } 1023 case WINLEN: 1024 { 1025 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &wdur) 1026 != TCL_OK) 1027 return TCL_ERROR; 1028 break; 1029 } 1030 case WINTYPE: 1031 { 1032 w_type_str = Tcl_GetStringFromObj(objv[arg+1], NULL); 1033 break; 1034 } 1035 case LPCTYPE: 1036 { 1037 if (Tcl_GetIntFromObj(interp, objv[arg+1], &lpc_type) != TCL_OK) 1038 return TCL_ERROR; 1039 break; 1040 } 1041 case DSFREQ: 1042 { 1043 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &ds_freq) 1044 != TCL_OK) 1045 return TCL_ERROR; 1046 break; 1047 } 1048 case NOMFREQ: 1049 { 1050 if (Tcl_GetDoubleFromObj(interp, objv[arg+1], &nom_f1) 1051 != TCL_OK) 1052 return TCL_ERROR; 1053 break; 1054 } 1055 } 1056 } 1057 if (startpos < 0) startpos = 0; 1058 if (endpos >= (s->length - 1) || endpos == -1) 1059 endpos = s->length - 1; 1060 if (startpos > endpos) return TCL_OK; 1061 1062 /* 1063 * Check for errors in specifying parameters 1064 */ 1065 1066 if(nform > (lpc_ord-4)/2){ 1067 Tcl_AppendResult(interp, "Number of formants must be <= (lpc order - 4)/2", NULL); 1068 return TCL_ERROR; 1069 } 1070 1071 if(nform > MAXFORMANTS){ 1072 Tcl_AppendResult(interp, "A maximum of 7 formants are supported at this time", NULL); 1073 return TCL_ERROR; 1074 } 1075 1076 if (s->storeType != SOUND_IN_MEMORY ) { 1077 Tcl_AppendResult(interp, "formant only works with in-memory sounds", 1078 (char *) NULL); 1079 return TCL_ERROR; 1080 } 1081 1082 if (w_type_str != NULL) { 1083 int len = strlen(w_type_str); 1084 if (strncasecmp(w_type_str, "rectangular", len) == 0 || 1085 strncasecmp(w_type_str, "0", len) == 0) { 1086 w_type = 0; 1087 } else if (strncasecmp(w_type_str, "hamming", len) == 0 || 1088 strncasecmp(w_type_str, "1", len) == 0) { 1089 w_type = 1; 1090 } else if (strncasecmp(w_type_str, "cos^4", len) == 0 || 1091 strncasecmp(w_type_str, "2", len) == 0) { 1092 w_type = 2; 1093 } else if (strncasecmp(w_type_str, "hanning", len) == 0 || 1094 strncasecmp(w_type_str, "3", len) == 0) { 1095 w_type = 3; 1096 } else { 1097 Tcl_AppendResult(interp, "unknown window type: ", w_type_str, 1098 (char *) NULL); 1099 return TCL_ERROR; 1100 } 1101 } 1102 1103 Snack_ProgressCallback(s->cmdPtr, interp,"Computing formants",0.05); 1104 1105 if(ds_freq < s->samprate) { 1106 dssnd = Fdownsample(s,ds_freq, startpos, endpos); 1107 } 1108 1109 Snack_ProgressCallback(s->cmdPtr, interp, "Computing formants", 1110 0.5); 1111 1112 hpsrcsnd = (dssnd ? dssnd : s); 1113 1114 if (preemp < 1.0) { /* be sure DC and rumble are gone! */ 1115 hpsnd = highpass(hpsrcsnd); 1116 } 1117 1118 Snack_ProgressCallback(s->cmdPtr, interp, "Computing formants", 1119 0.6); 1120 1121 polesrcsnd = (hpsnd ? hpsnd : s); 1122 1123 if(!(polesnd = lpc_poles(polesrcsnd, wdur, frame_int, lpc_ord, 1124 preemp, lpc_type, w_type))) { 1125 Tcl_AppendResult(interp, "Problems in lpc_poles()", NULL); 1126 return TCL_ERROR; 1127 } 1128 1129 Snack_ProgressCallback(s->cmdPtr, interp, "Computing formants", 1130 0.7); 1131 1132 /* LPC poles are now available for the formant estimator. */ 1133 if (!(formantsnd = dpform(polesnd, nform, nom_f1))) { 1134 Tcl_AppendResult(interp, "Problems in dpform()", NULL); 1135 return TCL_ERROR; 1136 } 1137 1138 Snack_ProgressCallback(s->cmdPtr, interp, "Computing formants", 1139 0.95); 1140 1141 /* 1142 SaveSound(formantsnd, interp, "outt.wav", NULL, 1143 0, NULL, 0, formantsnd->length, WAV_STRING); 1144 */ 1145 1146 if (dssnd) Snack_DeleteSound(dssnd); 1147 if (hpsnd) Snack_DeleteSound(hpsnd); 1148 Snack_DeleteSound(polesnd); 1149 1150 list = Tcl_NewListObj(0, NULL); 1151 1152 for (i = 0; i < formantsnd->length; i++) { 1153 Tcl_Obj *frameList; 1154 frameList = Tcl_NewListObj(0, NULL); 1155 Tcl_ListObjAppendElement(interp, list, frameList); 1156 for (j = 0; j < nform * 2; j++) { 1157 Tcl_ListObjAppendElement(interp, frameList, 1158 Tcl_NewDoubleObj((double) Snack_GetSample(formantsnd, j, i))); 1159 } 1160 } 1161 1162 Snack_DeleteSound(formantsnd); 1163 1164 Snack_ProgressCallback(s->cmdPtr, interp,"Computing formants",1.0); 1165 1166 Tcl_SetObjResult(interp, list); 1167 1168 return TCL_OK; 1169 } 1170