1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                       Copyright (c) 1996,1997                         */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author :  Alistair Conkie                                 */
34 /*             Date   :  August 1996                                     */
35 /*-----------------------------------------------------------------------*/
36 /*                                                                       */
37 /*  The groupfile stuff is all awb's fault.                              */
38 /*                                                                       */
39 /*************************************************************************/
40 #include <cstdio>
41 #include "EST_unix.h"
42 #include <cstdlib>
43 #include "festival.h"
44 #include "diphone.h"
45 
46 static unsigned int DIPHONE_MAGIC=0x46544449;  /* FTDI */
47 
48 static void load_index(DIPHONE_DATABASE *database);
49 static void load_diphs(DIPHONE_DATABASE *database);
50 static void load_lpc_file(DIPHONE_DATABASE *db,int diph,int mode);
51 static void lpc2ref(const float *lpc, float *rfc, int order);
52 static void extract_lpc_frames(DIPHONE_DATABASE *db, int diph, EST_Track &lpc);
53 static void load_signal_file(DIPHONE_DATABASE *db, int i, int mode);
54 static void database_malloc(int ndiphs, DIPHONE_DATABASE *database);
55 static void di_group_load_signal(DIPHONE_DATABASE *db);
56 static void di_group_load_lpc_params(DIPHONE_DATABASE *db);
57 static void di_group_load_pm(DIPHONE_DATABASE *db);
58 
di_load_database(DIPHONE_DATABASE * database)59 void di_load_database(DIPHONE_DATABASE *database)
60 {
61     // Load the ungrouped form
62     database_malloc(database->ndiphs,database);
63 
64     load_index(database);
65     load_diphs(database);
66 
67 }
68 
database_malloc(int ndiphs,DIPHONE_DATABASE * database)69 static void database_malloc(int ndiphs, DIPHONE_DATABASE *database)
70 {
71     // So why am I not using all those cute C++ classes ?
72     // well I suppose I just don't know enough about binary loading
73     // and saving to trust them, but that's a poor excuse.
74     int i;
75 
76     database->nindex = 0;
77     database->zone = 0;
78 
79     database->indx = walloc(DI_INDEX *,ndiphs);
80     database->vox = walloc(DI_VOX *,ndiphs);
81     database->pm = walloc(DI_PM *,ndiphs);
82     database->lpc = walloc(DI_LPC *,ndiphs);
83 
84     for(i=0;i<ndiphs;i++)
85     {
86 	database->indx[i] = walloc(DI_INDEX,1);
87 	database->vox[i] = walloc(DI_VOX,1);
88 	database->vox[i]->signal = 0;
89 	database->pm[i] = walloc(DI_PM,1);
90 	database->pm[i]->mark = 0;
91 	database->lpc[i] = walloc(DI_LPC,1);
92 	database->lpc[i]->f = 0;
93     }
94 
95 }
96 
load_index(DIPHONE_DATABASE * database)97 static void load_index(DIPHONE_DATABASE *database)
98 {
99     EST_TokenStream ts;
100     int i;
101     EST_String line;
102 
103     if (ts.open(database->index_file) == -1)
104     {
105 	cerr << "Diphone: Can't open file " << database->index_file << endl;
106 	festival_error();
107     }
108 
109     for (i=0; (!ts.eof()) && (i<database->ndiphs);)
110     {
111 	line = ts.get_upto_eoln();
112 	if ((line.length() > 0) && (line[0] != ';'))
113 	{
114 	    EST_TokenStream ls;
115 	    ls.open_string(line);
116 	    database->indx[i]->diph = wstrdup(ls.get().string());
117 	    database->indx[i]->file = wstrdup(ls.get().string());
118 	    database->indx[i]->beg = atof(ls.get().string());
119 	    database->indx[i]->mid = atof(ls.get().string());
120 	    database->indx[i]->end = atof(ls.get().string());
121 	    ls.close();
122 	    i++;
123 	}
124     }
125 
126     if (i == database->ndiphs)
127     {
128 	cerr << "Diphone: too many diphones in DB" << endl;
129 	festival_error();
130     }
131 
132     database->nindex = i;
133     database->ndiphs = i;
134 
135     ts.close();
136 }
137 
load_diphs(DIPHONE_DATABASE * database)138 static void load_diphs(DIPHONE_DATABASE *database)
139 {
140     int i;
141 
142     for(i=0;i<database->nindex;i++)
143     {
144 	load_signal_file(database,i,database->sig_access_type);
145 //	if (database->type = di_lpc)
146 //	    load_lpc_file(database,i,database->sig_access_type);
147 	load_pitch_file(database,i,database->sig_access_type);
148     }
149 }
150 
load_lpc_file(DIPHONE_DATABASE * db,int diph,int mode)151 static void load_lpc_file(DIPHONE_DATABASE *db,int diph,int mode)
152 {
153     // Load LPC coefficients
154     EST_String lpc_file;
155     EST_Track lpc;
156 
157     if (db->lpc[diph]->f != 0)
158 	return;   // already loaded
159 
160     if (mode == di_direct)
161     {
162 	lpc_file = EST_String(db->lpc_dir) +
163 	    db->indx[diph]->file + db->lpc_ext;
164 
165 	if (lpc.load(lpc_file) != format_ok)
166 	{
167 	    cerr << "Diphone: failed to read lpc file " <<
168 		lpc_file << endl;
169 	    festival_error();
170 	}
171 	if (lpc.num_channels() != db->lpc_order)
172 	{
173 	    cerr << "Diphone: lpc file " <<
174 		lpc_file << " has order " << lpc.num_channels() <<
175 		    " while database has " << db->lpc_order << endl;
176 	    festival_error();
177 	}
178 	// Extract frames (pitch synchronously)
179 	extract_lpc_frames(db,diph,lpc);
180     }
181 
182     return;
183 }
184 
ref2lpc(const float * rfc,float * lpc,int order)185 static void ref2lpc(const float *rfc, float *lpc, int order)
186 {
187     // Here we use Christopher Longet Higgin's algorithm converted to
188     // an equivalent by awb.  Its doesn't have hte reverse order or
189     // negation requirement.
190     float a,b;
191     int n,k;
192 
193     for (n=0; n < order; n++)
194     {
195 	lpc[n] = rfc[n];
196 	for (k=0; 2*(k+1) <= n+1; k++)
197 	{
198 	    a = lpc[k];
199 	    b = lpc[n-(k+1)];
200 	    lpc[k] = a-b*lpc[n];
201 	    lpc[n-(k+1)] = b-a*lpc[n];
202 	}
203     }
204 }
205 
extract_lpc_frames(DIPHONE_DATABASE * db,int diph,EST_Track & lpc)206 static void extract_lpc_frames(DIPHONE_DATABASE *db, int diph, EST_Track &lpc)
207 {
208     // Extract LPC frames from lpc, one for each pitch mark
209     int frame_num;
210     float pos,factor;
211     float ps_pos;
212     int i,j,k;
213 
214     db->lpc[diph]->f = walloc(float *,db->pm[diph]->nmark);
215     float *lpcs = walloc(float,lpc.num_channels());
216     for (i=0; i < db->pm[diph]->nmark; i++)
217     {
218 	if (db->lpc_pitch_synch)
219 	{
220 	    db->lpc[diph]->f[i] = walloc(float,lpc.num_channels());
221 	    pos = (((float)db->pm[diph]->mark[i]-db->sig_band)/
222 		   (float)db->samp_freq)
223 		+ (db->indx[diph]->beg/1000.0);
224 	    for (j=1,ps_pos=0; j<lpc.num_frames(); j++)
225 	    {
226 		ps_pos = lpc.t(j);
227 		if (fabs(pos-ps_pos) < 0.003)  // within 3 ms
228 		{
229 		    for (k=0; k < lpc.num_channels(); k++)
230 			lpcs[k] = lpc(j,k);
231 		    // need to get reflection coefficients
232 		    db->lpc[diph]->f[i][0] = lpcs[0];
233 		    lpc2ref(&lpcs[1],&db->lpc[diph]->f[i][1],
234 			    lpc.num_channels()-1);
235 		    break;
236 		}
237 	    }
238 	    if (j==lpc.num_frames())
239 	    {
240 		cerr << "Diphone: lpc access, failed to find lpc coeffs"
241 		    << endl;
242 		festival_error();
243 	    }
244 	}
245 	else
246 	{   // Not pitch synchronous so find closest frames and
247 	    // interpolate between them
248 	    db->lpc[diph]->f[i] = walloc(float,lpc.num_channels());
249 	    // position of current mark in seconds
250 	    pos = (((float)db->pm[diph]->mark[i]-db->sig_band)/
251 		   (float)db->samp_freq)
252 		+ (db->indx[diph]->beg/1000.0);
253 	    // Convert to frames, rounding and subtracting start offset
254 	    frame_num =  (int)((pos/db->lpc_frame_shift))
255 		           - db->lpc_frame_offset;
256 	    if (frame_num+1 < lpc.num_frames())
257 	    {   //  Interpolate between them
258 		factor = (pos - ((1+frame_num)*db->lpc_frame_shift))/
259 		    db->lpc_frame_shift;
260 		for (j=0; j < lpc.num_channels(); j++)
261 		{
262 		    db->lpc[diph]->f[i][j] =
263 			lpc(frame_num,j) +
264 			    (factor * (lpc(frame_num+1,j)-
265 				       lpc(frame_num,j)));
266 		}
267 	    }
268 	    if (frame_num >= lpc.num_frames())
269 	    {
270 		cerr << "Diphone: LPC frame past end of file \"" <<
271 		    db->indx[diph]->file << "\"" << endl;
272 		memset(db->lpc[diph]->f[i],0,sizeof(float)*lpc.num_channels());
273 	    }
274 	    else  // Last one so just take it as is
275 	      {
276 		lpc.copy_frame_out(frame_num, db->lpc[diph]->f[i],
277 				   0, lpc.num_channels());
278 	      }
279 	}
280     }
281     db->lpc[diph]->nframes = db->pm[diph]->nmark;
282     wfree(lpcs);
283 //    db->lpc_order = lpc.num_channels();
284 
285 }
286 
lpc2ref(const float * lpc,float * rfc,int order)287 static void lpc2ref(const float *lpc, float *rfc, int order)
288 {
289     // LPC to reflection coefficients
290     // from code from Borja Etxebarria
291     int i,j;
292     float f,ai;
293     float *vo,*vx;
294     float *vn = new float[order];
295 
296     i = order-1;
297     rfc[i] = ai = lpc[i];
298     f = 1-ai*ai;
299     i--;
300 
301     for (j=0; j<=i; j++)
302 	rfc[j] = (lpc[j]+((ai*lpc[i-j])))/f;
303 
304     /* vn=vtmp in previous #define */
305     vo=rfc;
306 
307     for ( ;i>0; )
308     {
309 	ai=vo[i];
310 	f = 1-ai*ai;
311 	i--;
312 	for (j=0; j<=i; j++)
313 	    vn[j] = (vo[j]+((ai*vo[i-j])))/f;
314 
315 	rfc[i]=vn[i];
316 
317 	vx = vn;
318 	vn = vo;
319 	vo = vx;
320     }
321 
322     delete [] vn;
323 }
324 
load_signal_file(DIPHONE_DATABASE * db,int i,int mode)325 static void load_signal_file(DIPHONE_DATABASE *db, int i, int mode)
326 {
327     //  Load signal (or lpc residual) file)
328     int beg_samp,end_samp,zone,nsamples;
329     EST_String signal_file;
330     EST_String sig_type;
331     int offset,error;
332     beg_samp = 0;
333     zone = 0;
334     nsamples = 0;
335 
336     if (db->gtype == di_ungrouped)
337     {
338 	beg_samp = (int)((db->indx[i]->beg)/1000.0*db->samp_freq);
339 	end_samp = (int)((db->indx[i]->end)/1000.0*db->samp_freq);
340 
341 	nsamples = end_samp - beg_samp;
342 
343 	zone = db->sig_band;
344 	db->zone = zone;
345 	db->vox[i]->nsamples = nsamples+(2*zone);
346 	db->vox[i]->signal = 0;
347     }
348 
349     if (mode == di_direct)
350     {
351 	if (db->gtype == di_ungrouped)
352 	{
353 	    EST_Wave w;
354 	    if (db->type == di_lpc)
355 	    {
356 		signal_file = EST_String(db->lpc_dir) +
357 		    EST_String(db->indx[i]->file) +
358 			EST_String(db->lpc_res_ext);
359 		sig_type = db->lpc_res_type;
360 		// Different LPC techniques will leave various offsets
361 		// in the residule, you have to specify this explicitly
362 		beg_samp -= (int)(db->lpc_res_offset * db->samp_freq);
363 	    }
364 	    else
365 	    {
366 		signal_file = EST_String(db->signal_dir) +
367 		    EST_String(db->indx[i]->file) +
368 			EST_String(db->signal_ext);
369 		sig_type = db->signal_type;
370 	    }
371 	    offset = beg_samp-zone;
372 	    if (offset < 0)
373 		offset = 0;
374 	    if (w.load_file(signal_file,sig_type,
375 			    db->samp_freq, "short", EST_NATIVE_BO,
376 			    1, offset, nsamples+2*zone) != format_ok)
377 	    {
378 		cerr << "Diphone: failed to read " << sig_type
379 		    << " format signal file " << signal_file << endl;
380 		festival_error();
381 	    }
382 	    db->vox[i]->signal = walloc(short,w.num_samples());
383 	    if (beg_samp-zone < 0)   // wasn't enough space at beginning
384 		error = abs(beg_samp-zone);
385 	    else
386 		error = 0;
387 	    memset(db->vox[i]->signal,0,error*sizeof(short));
388 	    for (int j=0; j < w.num_samples()-error; j++)
389 		db->vox[i]->signal[error+j] = w(j);
390 	    db->vox[i]->nsamples = w.num_samples()-error;
391 	}
392 	else //  grouped so have to access the group file
393 	{
394 	    if (db->gfd == NULL)
395 	    {
396 		cerr << "Diphone: can no longer access the group file" << endl;
397 		festival_error();
398 	    }
399 	    if (db->group_encoding == di_raw)
400 	    {
401 		db->vox[i]->signal = walloc(short,db->vox[i]->nsamples);
402 		fseek(db->gfd,db->gsignalbase+(db->offsets[i]*2),SEEK_SET);
403 		fread(db->vox[i]->signal,sizeof(short),
404 		      db->vox[i]->nsamples,db->gfd);
405 		if (db->swap)
406 		    swap_bytes_short(db->vox[i]->signal,db->vox[i]->nsamples);
407 	    }
408 	    else if (db->group_encoding == di_ulaw)
409 	    {
410 		unsigned char *ulaw =
411 		    walloc(unsigned char,db->vox[i]->nsamples);
412 		db->vox[i]->signal = walloc(short,db->vox[i]->nsamples);
413 		fseek(db->gfd,db->gsignalbase+(db->offsets[i]),SEEK_SET);
414 		fread(ulaw,sizeof(unsigned char),db->vox[i]->nsamples,db->gfd);
415 		ulaw_to_short(ulaw,db->vox[i]->signal,db->vox[i]->nsamples);
416 		wfree(ulaw);
417 	    }
418 	    else if (db->group_encoding == di_alaw)
419 	    {
420 		unsigned char *alaw =
421 		    walloc(unsigned char,db->vox[i]->nsamples);
422 		db->vox[i]->signal = walloc(short,db->vox[i]->nsamples);
423 		fseek(db->gfd,db->gsignalbase+(db->offsets[i]),SEEK_SET);
424 		fread(alaw,sizeof(unsigned char),db->vox[i]->nsamples,db->gfd);
425 		alaw_to_short(alaw,db->vox[i]->signal,db->vox[i]->nsamples);
426 		wfree(alaw);
427 	    }
428 	    else
429 	    {
430 		cerr << "Diphone: unknown group type" << endl;
431 		festival_error();
432 	    }
433 	}
434     }
435 
436 }
437 
load_pitch_file(DIPHONE_DATABASE * database,int i,int mode)438 void load_pitch_file(DIPHONE_DATABASE *database, int i, int mode)
439 {
440     // load files from newer Track format
441     int mark[5000];
442     EST_String pitch_file;
443     EST_Track pms;
444     float fnum;
445     int k,k1,k2,m,zone,beg_samp,p;
446 
447     if ((database->pm[i]->mark != 0) ||
448 	(mode != di_direct))
449 	return;
450 
451     pitch_file = EST_String(database->pitch_dir)+database->indx[i]->file+
452 	database->pitch_ext;
453     if (pms.load(pitch_file) != format_ok)
454     {
455 	cerr << "Diphone: Can't open pitch file " << pitch_file << endl;
456 	festival_error();
457     }
458     /* assumptions.. only those within the limits of the diphone  */
459 
460     beg_samp = (int)((database->indx[i]->beg)/1000.0*database->samp_freq);
461 
462     zone = database->sig_band;
463 
464     k = 0;
465     k1 = 0;
466     k2 = 0;
467     for (p=0; p < pms.num_frames(); p++)
468     {
469 	fnum = pms.t(p)*1000.0;
470 	if((fnum>database->indx[i]->beg) && (fnum<database->indx[i]->mid))
471 	{
472 	    mark[k] = (int)(fnum/1000.0*database->samp_freq - beg_samp + zone);
473 	    if ((mark[k] >= database->vox[i]->nsamples+zone) ||
474 		(mark[k] > 64534))
475 	    {
476 		fprintf(stderr,"Diphone: Mark out of range -- too large %s\n",
477 			(const char *)pitch_file);
478 		k--; k1--;
479 	    }
480 	    if(mark[k] < zone)
481 	    {
482 		fprintf(stderr,"Diphone: Mark out of range -- too small %s\n",
483 			(const char *)pitch_file);
484 		k--; k1--;
485 	    }
486 	    k++;
487 	    k1++;
488 	}
489 	else if((fnum>=database->indx[i]->mid) &&
490 		(fnum<database->indx[i]->end))
491 	{
492 	    mark[k] = (int)(fnum/1000.0*database->samp_freq - beg_samp + zone);
493 	    if ((mark[k] >= database->vox[i]->nsamples+zone) ||
494 		(mark[k] > 64534))
495 	    {
496 		fprintf(stderr,"Diphone: Mark out of range -- too large %s\n",
497 			(const char *)pitch_file);
498 		k--; k2--;
499 	    }
500 	    if(mark[k] < zone)
501 	    {
502 		fprintf(stderr,"Diphone: Mark out of range -- too small %s\n",
503 			(const char *)pitch_file);
504 		k--; k2--;
505 	    }
506 	    k++;
507 	    k2++;
508 	}
509     }
510     database->pm[i]->mark = walloc(unsigned short,k);
511     for(m=0;m<k;m++)
512 	database->pm[i]->mark[m] = (unsigned short)mark[m];
513 
514     database->pm[i]->nmark = (unsigned short)k;
515     database->pm[i]->lmark = (unsigned short)k1;
516     database->pm[i]->rmark = (unsigned short)k2;
517     if (database->pm[i]->rmark == 0)
518     {
519 	*cdebug << "Diphone: modifying edge pms for "
520 	    << database->indx[i]->diph << endl;
521 	database->pm[i]->rmark = 1;
522 	database->pm[i]->lmark -= 1;
523     }
524     if (database->pm[i]->nmark <= 0)
525     {
526 	cerr << "Diphone: diphone " << database->indx[i]->diph <<
527           " has 0 pitchmarks" << endl;
528 	festival_error();
529     }
530 
531 }
532 
533 #if 0
534 void load_pitch_file(DIPHONE_DATABASE *database, int i, int mode)
535 {
536     char s[100];
537     int mark[5000];
538     EST_String pitch_file;
539     FILE *pfd;
540     float fnum;
541     int k,k1,k2,m,zone,beg_samp;
542 
543     if ((database->pm[i]->mark != 0) ||
544 	(mode != di_direct))
545 	return;
546 
547     pitch_file = EST_String(database->pitch_dir)+database->indx[i]->file+
548 	database->pitch_ext;
549     if((pfd=fopen(pitch_file,"rb")) == NULL)
550     {
551 	cerr << "Diphone: Can't open pitch file " << pitch_file << endl;
552 	festival_error();
553     }
554     /* assumptions.. only those within the limits of the diphone  */
555 
556     beg_samp = (int)((database->indx[i]->beg)/1000.0*database->samp_freq);
557 
558     zone = database->sig_band;
559 
560     k = 0;
561     k1 = 0;
562     k2 = 0;
563     while(fgets(s,100,pfd) != NULL)
564     {
565 	sscanf(s,"%f",&fnum);
566 	if((fnum>database->indx[i]->beg) && (fnum<database->indx[i]->mid))
567 	{
568 	    mark[k] = (int)(fnum/1000.0*database->samp_freq - beg_samp + zone);
569 	    if ((mark[k] >= database->vox[i]->nsamples+zone) ||
570 		(mark[k] > 64534))
571 	    {
572 		fprintf(stderr,"Diphone: Mark out of range -- too large %s\n",
573 			(const char *)pitch_file);
574 		k--; k1--;
575 	    }
576 	    if(mark[k] < zone)
577 	    {
578 		fprintf(stderr,"Diphone: Mark out of range -- too small %s\n",
579 			(const char *)pitch_file);
580 		k--; k1--;
581 	    }
582 	    k++;
583 	    k1++;
584 	}
585 	else if((fnum>=database->indx[i]->mid) &&
586 		(fnum<database->indx[i]->end))
587 	{
588 	    mark[k] = (int)(fnum/1000.0*database->samp_freq - beg_samp + zone);
589 	    if ((mark[k] >= database->vox[i]->nsamples+zone) ||
590 		(mark[k] > 64534))
591 	    {
592 		fprintf(stderr,"Diphone: Mark out of range -- too large %s\n",
593 			(const char *)pitch_file);
594 		k--; k2--;
595 	    }
596 	    if(mark[k] < zone)
597 	    {
598 		fprintf(stderr,"Diphone: Mark out of range -- too small %s\n",
599 			(const char *)pitch_file);
600 		k--; k2--;
601 	    }
602 	    k++;
603 	    k2++;
604 	}
605     }
606     database->pm[i]->mark = walloc(unsigned short,k);
607     for(m=0;m<k;m++)
608 	database->pm[i]->mark[m] = (unsigned short)mark[m];
609 
610     database->pm[i]->nmark = (unsigned short)k;
611     database->pm[i]->lmark = (unsigned short)k1;
612     database->pm[i]->rmark = (unsigned short)k2;
613     if (database->pm[i]->rmark == 0)
614     {
615 	*cdebug << "Diphone: modifying edge pms for "
616 	    << database->indx[i]->diph << endl;
617 	database->pm[i]->rmark = 1;
618 	database->pm[i]->lmark -= 1;
619     }
620     if (database->pm[i]->nmark <= 0)
621     {
622 	cerr << "Diphone: diphone " << database->indx[i]->diph <<
623           " has 0 pitchmarks" << endl;
624 	festival_error();
625     }
626 
627     fclose(pfd);
628 }
629 #endif
630 
631 /* Buffer to hold current diphone signal when using ondemand access   */
632 /* method.  It remembers the last phone accessed as typical access is */
633 /* the same one for a few times                                       */
634 static short *diph_buffer = 0;
635 static int diph_max_size = 0;
636 static int last_diph = -1;
637 static DIPHONE_DATABASE *last_db = 0;
638 
di_get_diph_signal(int diph,DIPHONE_DATABASE * db)639 short *di_get_diph_signal(int diph,DIPHONE_DATABASE *db)
640 {
641     // Get the diphone signal (or residual) from wherever
642 
643     if (db->sig_access_type == di_direct)         // all pre-loaded
644 	return db->vox[diph]->signal;
645     else if (db->sig_access_type == di_dynamic)   // Load and keep
646     {
647 	if (db->vox[diph]->signal == 0)
648 	    load_signal_file(db,diph,di_direct);
649 	return db->vox[diph]->signal;
650     }
651     else if (db->sig_access_type == di_ondemand)  // Load and free afterwards
652     {   // Loads it into a common buffer, over written each time
653 	if ((diph == last_diph) &&
654 	    (db == last_db))    // ensure db hasn't changed
655 	    return diph_buffer;
656 	load_signal_file(db,diph,di_direct);
657 	if (diph_max_size < db->vox[diph]->nsamples)
658 	{
659 	    wfree(diph_buffer);
660 	    diph_buffer = walloc(short,db->vox[diph]->nsamples);
661 	    diph_max_size = db->vox[diph]->nsamples;
662 	}
663 	memmove(diph_buffer,db->vox[diph]->signal,
664 		db->vox[diph]->nsamples*sizeof(short));
665 	wfree(db->vox[diph]->signal);
666 	db->vox[diph]->signal = 0;
667 	last_db = db; last_diph = diph;
668 	return diph_buffer;
669     }
670     else
671     {
672 	cerr << "Diphone: unknown diphone signal access strategy" << endl;
673 	festival_error();
674     }
675     return NULL;
676 }
677 
678 /* The buffer used to hold the requested frame */
679 static float frame_buff[128];
680 
di_get_diph_lpc_mark(int diph,int mark,DIPHONE_DATABASE * db)681 float *di_get_diph_lpc_mark(int diph,int mark,DIPHONE_DATABASE *db)
682 {
683     // Get the coeff frame for diph at mark
684 
685     load_lpc_file(db,diph,di_direct);
686 
687     memmove(frame_buff,
688 	    db->lpc[diph]->f[mark],
689 	    sizeof(float)*db->lpc_order);
690 
691     return frame_buff;
692 }
693 
di_get_diph_res_mark(int diph,int mark,int size,DIPHONE_DATABASE * db)694 short *di_get_diph_res_mark(int diph,int mark,int size,DIPHONE_DATABASE *db)
695 {
696     // Get the residual for diph at mark, use the signal field
697     // to hold it as they are so similar.
698     short *residual;
699 
700     residual = di_get_diph_signal(diph,db);
701 
702     // Take residual around this midpoint
703 
704     int pos_samp = db->pm[diph]->mark[mark] - size/2;
705 
706     if (pos_samp < 0)
707     {
708 	pos_samp = 0;
709 	*cdebug << "DIPHONE: sig_band too short to the left" << endl;
710     }
711     if (pos_samp+size >= db->vox[diph]->nsamples)
712     {
713 	pos_samp = db->vox[diph]->nsamples - size;
714 	*cdebug << "DIPHONE: sig_band too short to the right" << endl;
715     }
716 
717     return &residual[pos_samp];
718 }
719 
di_load_grouped_db(const EST_Pathname & filename,DIPHONE_DATABASE * db,LISP global_params)720 void di_load_grouped_db(const EST_Pathname &filename, DIPHONE_DATABASE *db,
721 			LISP global_params)
722 {
723     // Get index file and saved data from grouped file
724     int i,j;
725     unsigned int magic;
726     int strsize;
727     char *diphnames;
728     LISP params;
729 
730     if ((db->gfd=fopen(filename,"rb")) == NULL)
731     {
732 	cerr << "Diphone: cannot open group file " <<
733 	    filename << " for reading" << endl;
734 	festival_error();
735     }
736 
737     fread(&magic,sizeof(int),1,db->gfd);
738     if (magic == SWAPINT(DIPHONE_MAGIC))
739 	db->swap = TRUE;
740     else if (magic != DIPHONE_MAGIC)
741     {
742 	cerr << "Diphone: " << filename << " not a group file" << endl;
743 	festival_error();
744     }
745 
746     params = lreadf(db->gfd);  // read the parameters in LISP form
747 
748     di_general_parameters(db,params);  // some may be reset later
749     di_fixed_parameters(db,params);
750     di_general_parameters(db,global_params);  // reset some params
751 
752     database_malloc(db->ndiphs,db);
753     db->nindex = db->ndiphs;  // we can trust that number this time
754 
755     fread(&strsize,sizeof(int),1,db->gfd);  // number of chars in diph names
756     if (db->swap)
757 	strsize = SWAPINT(strsize);
758     diphnames = walloc(char,strsize);
759     fread(diphnames,sizeof(char),strsize,db->gfd);
760     for (j=i=0;i<db->nindex;i++)
761     {
762 	db->indx[i]->diph = &diphnames[j];
763 	db->indx[i]->file = 0;
764 	for ( ; diphnames[j] != '\0'; j++) // skip to next diphname
765 	    if (j > strsize)
766 	    {
767 		cerr << "Diphone: group file diphone name table corrupted"
768 		    << endl;
769 		festival_error();
770 	    }
771 	j++;
772     }
773 
774     // Diphone signals
775     di_group_load_signal(db);
776     // Diphone LPC parameters
777     if (db->type == di_lpc)
778 	di_group_load_lpc_params(db);
779     // Diphone Pitch marks
780     di_group_load_pm(db);
781 
782     if (db->sig_access_type == di_direct)
783     {
784 	fclose(db->gfd);  // read eveything
785 	db->gfd = 0;
786     }
787 
788 }
789 
di_group_load_signal(DIPHONE_DATABASE * db)790 static void di_group_load_signal(DIPHONE_DATABASE *db)
791 {
792     int i;
793     unsigned short *samp_counts;
794     int sample_offset,totsamples;
795 
796     samp_counts = walloc(unsigned short,db->nindex);
797     fread(samp_counts,sizeof(unsigned short),db->nindex,db->gfd);
798     if (db->swap) swap_bytes_ushort(samp_counts,db->nindex);
799     fread(&totsamples,sizeof(int),1,db->gfd);
800     if (db->swap)
801 	totsamples = SWAPINT(totsamples);
802     if (db->sig_access_type == di_direct)
803     {
804 	if (db->group_encoding == di_raw)
805 	{
806 	    db->allsignal = walloc(short,totsamples);
807 	    fread(db->allsignal,sizeof(short),totsamples,db->gfd);
808 	    if (db->swap)
809 		swap_bytes_short(db->allsignal,totsamples);
810 	}
811 	else if (db->group_encoding == di_ulaw)
812 	{
813 	    db->allualawsignal = walloc(unsigned char,totsamples);
814 	    fread(db->allualawsignal,sizeof(unsigned char),totsamples,db->gfd);
815 	}
816 	else if (db->group_encoding == di_alaw)
817 	{
818 	    db->allualawsignal = walloc(unsigned char,totsamples);
819 	    fread(db->allualawsignal,sizeof(unsigned char),totsamples,db->gfd);
820 	}
821     }
822     else
823     {
824 	db->gsignalbase = ftell(db->gfd);
825 	db->offsets = walloc(int,db->nindex);
826     }
827 
828     sample_offset = 0;
829     for (i=0; i < db->nindex; i++)
830     {
831 	db->vox[i]->nsamples = samp_counts[i];
832 	if (db->sig_access_type == di_direct)
833 	{
834 	    if (db->group_encoding == di_raw)
835 		db->vox[i]->signal = &db->allsignal[sample_offset];
836 	    else if (db->group_encoding == di_ulaw)
837 	    {
838 		db->vox[i]->signal = walloc(short,samp_counts[i]);
839 		ulaw_to_short(&db->allualawsignal[sample_offset],
840 			      db->vox[i]->signal,samp_counts[i]);
841 	    }
842 	    else if (db->group_encoding == di_alaw)
843 	    {
844 		db->vox[i]->signal = walloc(short,samp_counts[i]);
845 		alaw_to_short(&db->allualawsignal[sample_offset],
846 			      db->vox[i]->signal,samp_counts[i]);
847 	    }
848 	    else
849 	    {
850 		cerr << "Diphone: unknown group type to unpack" << endl;
851 		festival_error();
852 	    }
853 	}
854 	else
855 	{
856 	    db->offsets[i] = sample_offset;
857 	    db->vox[i]->signal = 0;
858 	}
859 	sample_offset += samp_counts[i];
860     }
861     if (db->sig_access_type != di_direct)
862 	if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw)
863 	    fseek(db->gfd,(long)sample_offset,SEEK_CUR);
864 	else
865 	    fseek(db->gfd,(long)sample_offset*sizeof(short),SEEK_CUR);
866     wfree(samp_counts);
867 }
868 
di_group_load_lpc_params(DIPHONE_DATABASE * db)869 static void di_group_load_lpc_params(DIPHONE_DATABASE *db)
870 {
871     // LPC params are always fully loaded
872     int totframes;
873     int i,j,k;
874     unsigned short *frame_counts;
875     int frame_offset;
876     int this_frame;
877 
878     frame_counts = walloc(unsigned short, db->nindex);
879     fread(frame_counts,sizeof(unsigned short),db->nindex,db->gfd);
880     if (db->swap) swap_bytes_ushort(frame_counts,db->nindex);
881     fread(&totframes,sizeof(int),1,db->gfd);
882     if (db->swap) totframes = SWAPINT(totframes);
883     if (db->group_encoding == di_raw) // its as floats
884     {
885 	db->allframes = walloc(float,totframes*db->lpc_order);
886 	fread(db->allframes,sizeof(float),
887 	      totframes*db->lpc_order,db->gfd);
888 	if (db->swap)
889 	    swap_bytes_float(db->allframes,totframes*db->lpc_order);
890     }
891     else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw) // its as shorts
892     {
893 	db->allframesshort = walloc(short,totframes*db->lpc_order);
894 	fread(db->allframesshort,sizeof(short),
895 	      totframes*db->lpc_order,db->gfd);
896 	if (db->swap)
897 	    swap_bytes_short(db->allframesshort,
898 			     totframes*db->lpc_order);
899     }
900     frame_offset = 0;
901     for (i=0; i < db->nindex; i++)
902     {
903 	db->lpc[i]->nframes = frame_counts[i];
904 	db->lpc[i]->f = walloc(float *,frame_counts[i]);
905 	if (db->group_encoding == di_raw)
906 	    for (j=0;j<db->lpc[i]->nframes;j++)
907 		db->lpc[i]->f[j] =
908 		    &db->allframes[(frame_offset+j)*db->lpc_order];
909 	else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw)
910 	{
911 	    int fixedpoint = FALSE;
912 	    if (siod_get_lval("lpc_fixedpoint",NULL) != NIL)
913 		fixedpoint = TRUE;
914 	    for (j=0;j<db->lpc[i]->nframes;j++)
915 	    {
916 		db->lpc[i]->f[j] = walloc(float,db->lpc_order);
917 		this_frame = (frame_offset+j)*db->lpc_order;
918 		if (fixedpoint)
919 		    for (k=0;k<db->lpc_order;k++)
920 			db->lpc[i]->f[j][k] =
921 			    (float)db->allframesshort[this_frame+k];
922 		else
923 		    for (k=0;k<db->lpc_order;k++)
924 			db->lpc[i]->f[j][k] =
925 			    (float)db->allframesshort[this_frame+k]/32766.0;
926 	    }
927 	}
928 	else
929 	{
930 	    cerr << "Diphone: unknown group type to unpack" << endl;
931 	    festival_error();
932 	}
933 	frame_offset += frame_counts[i];
934     }
935     wfree(db->allframesshort);
936     db->allframesshort = 0;      // don't really need this any more
937     wfree(frame_counts);
938 }
939 
di_group_load_pm(DIPHONE_DATABASE * db)940 static void di_group_load_pm(DIPHONE_DATABASE *db)
941 {
942     unsigned short *pm_info;
943     int i,j;
944 
945     pm_info = walloc(unsigned short,db->nindex*3);
946     if (fread(pm_info,sizeof(unsigned short),db->nindex*3,db->gfd) !=
947 	(unsigned int)(db->nindex*3))
948     {
949 	cerr << "DIPHONE: short group file, can't read pm\n";
950 	festival_error();
951     }
952     if (db->swap)
953 	for (i=0; i < db->nindex*3; i++)
954 	    pm_info[i] = SWAPSHORT(pm_info[i]);
955     for (i=0; i < db->nindex; i++)
956     {
957 	db->pm[i]->mark = walloc(unsigned short,pm_info[i*3]);
958 	db->pm[i]->nmark = pm_info[i*3];
959 	db->pm[i]->lmark = pm_info[(i*3)+1];
960 	db->pm[i]->rmark = pm_info[(i*3)+2];
961 	fread(db->pm[i]->mark,sizeof(unsigned short),db->pm[i]->nmark,db->gfd);
962 	if (db->swap)
963 	    for (j=0; j < db->pm[i]->nmark; j++)
964 		db->pm[i]->mark[j] = SWAPSHORT(db->pm[i]->mark[j]);
965     }
966 }
967 
di_enlispen_params(DIPHONE_DATABASE * db)968 static LISP di_enlispen_params(DIPHONE_DATABASE *db)
969 {
970     // Return lisp representation of the parameters in db
971 
972     return cons(make_param_str("name",db->name),
973 	   cons(make_param_str("type",db->type_str),
974 	   cons(make_param_str("index_file",db->index_file),
975 	   cons(make_param_str("signal_dir",db->signal_dir),
976 	   cons(make_param_str("signal_ext",db->signal_ext),
977 	   cons(make_param_str("signal_type",db->signal_type),
978 	   cons(make_param_str("pitch_dir",db->pitch_dir),
979 	   cons(make_param_str("pitch_ext",db->pitch_ext),
980 	   cons(make_param_str("lpc_dir",db->lpc_dir),
981 	   cons(make_param_str("lpc_ext",db->lpc_ext),
982 	   cons(make_param_str("lpc_res_ext",db->lpc_res_ext),
983 	   cons(make_param_str("lpc_type",db->lpc_type),
984 	   cons(make_param_str("lpc_res_type",db->lpc_res_type),
985 	   cons(make_param_float("lpc_res_offset",db->lpc_res_offset),
986 	   cons(make_param_int("lpc_frame_offset",db->lpc_frame_offset),
987 	   cons(make_param_int("lpc_order",db->lpc_order),
988 	   cons(make_param_float("lpc_frame_shift",db->lpc_frame_shift),
989 	   cons(make_param_int("samp_freq",db->samp_freq),
990 	   cons(make_param_str("phoneset",db->phoneset),
991 	   cons(make_param_str("access_type",db->sig_access_type_str),
992 	   cons(make_param_str("group_encoding",db->group_encoding_str),
993 	   cons(make_param_int("num_diphones",db->nindex),
994 	   cons(make_param_int("sig_band",db->sig_band),
995 	   cons(make_param_int("def_f0",db->def_f0),
996 	   cons(make_param_str("default_diphone",db->default_diphone),
997 	   cons(make_param_lisp("alternates_before",db->alternates_before),
998 	   cons(make_param_lisp("alternates_after",db->alternates_after),
999 	   NIL)))))))))))))))))))))))))));
1000 }
1001 
di_save_grouped_db(const EST_Pathname & filename,DIPHONE_DATABASE * db)1002 void di_save_grouped_db(const EST_Pathname &filename, DIPHONE_DATABASE *db)
1003 {
1004     // Get index file and saved data from grouped file
1005     FILE *fd;
1006     LISP params;
1007     int strsize,totsamples,totframes;
1008     int i,j,k;
1009 
1010     if ((fd=fopen(filename,"wb")) == NULL)
1011     {
1012 	cerr << "Diphone: cannot open group file " <<
1013 	    filename << " for saving" << endl;
1014 	festival_error();
1015     }
1016 
1017     fwrite(&DIPHONE_MAGIC,sizeof(int),1,fd);
1018     params = di_enlispen_params(db); // get lisp representation of parameters
1019     lprin1f(params,fd);
1020 
1021     // Only need to dump the diphone names, not the rest of the indx info
1022     strsize = 0;
1023     for (i=0;i<db->nindex;i++)
1024 	strsize += strlen(db->indx[i]->diph)+1;
1025     fwrite(&strsize,sizeof(int),1,fd);
1026     for (i=0;i<db->nindex;i++)
1027 	fwrite(db->indx[i]->diph,sizeof(char),strlen(db->indx[i]->diph)+1,fd);
1028 
1029     // Diphone Signals
1030     // Dump the signal sizes first to make reading easier
1031     totsamples = 0;
1032     for (i=0;i<db->nindex;i++)
1033     {
1034 	if (db->vox[i]->signal == 0) // in case it isn't loaded yet
1035 	{
1036 	    load_pitch_file(db,i,di_direct);
1037 	    load_signal_file(db,i,di_direct);
1038 	}
1039 	fwrite(&db->vox[i]->nsamples,sizeof(unsigned short),1,fd);
1040 	totsamples += db->vox[i]->nsamples;
1041     }
1042     fwrite(&totsamples,sizeof(int),1,fd);
1043     // Dump signals (compressed if necessary)
1044     for (i=0;i<db->nindex;i++)
1045     {
1046 	if (db->group_encoding == di_raw)
1047 	    fwrite(db->vox[i]->signal,sizeof(short),db->vox[i]->nsamples,fd);
1048 	else if (db->group_encoding == di_ulaw)
1049 	{
1050 	    unsigned char *ulaw = walloc(unsigned char,db->vox[i]->nsamples);
1051 	    short_to_ulaw(db->vox[i]->signal,ulaw,db->vox[i]->nsamples);
1052 	    fwrite(ulaw,sizeof(unsigned char),db->vox[i]->nsamples,fd);
1053 	    wfree(ulaw);
1054 	}
1055 	else if (db->group_encoding == di_alaw)
1056 	{
1057 	    unsigned char *alaw = walloc(unsigned char,db->vox[i]->nsamples);
1058 	    short_to_alaw(db->vox[i]->signal,alaw,db->vox[i]->nsamples);
1059 	    fwrite(alaw,sizeof(unsigned char),db->vox[i]->nsamples,fd);
1060 	    wfree(alaw);
1061 	}
1062 	else
1063 	{
1064 	    cerr << "Diphone: unknown group type for dumping" << endl;
1065 	    festival_error();
1066 	}
1067 
1068     }
1069 
1070     // Diphone LPC parameters
1071     if (db->type == di_lpc)
1072     {
1073 	for (i=0;i<db->nindex;i++)	// ensure they are all loaded
1074 	    load_lpc_file(db,i,di_direct);
1075 	totframes = 0;
1076 	for (i=0;i<db->nindex;i++)
1077 	{
1078 	    fwrite(&db->lpc[i]->nframes,sizeof(unsigned short),1,fd);
1079 	    totframes += db->lpc[i]->nframes;
1080 	}
1081 	fwrite(&totframes,sizeof(int),1,fd);
1082 	for (i=0;i<db->nindex;i++)
1083 	{
1084 	    if (db->group_encoding == di_raw) // saved as floats
1085 	    {
1086 		for (j=0; j<db->lpc[i]->nframes; j++)
1087 		    fwrite(db->lpc[i]->f[j],sizeof(float),db->lpc_order,fd);
1088 	    }
1089 	    else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw) // saved as shorts
1090 	    {
1091 		short *sh = new short[db->lpc_order];
1092 
1093 		for (j=0; j<db->lpc[i]->nframes; j++)
1094 		{
1095 		    for (k=0; k<db->lpc_order; k++)
1096 			sh[k] = (short)(db->lpc[i]->f[j][k]*32766.0);
1097 		    fwrite(sh,sizeof(short),db->lpc_order,fd);
1098 		}
1099 		delete sh;
1100 	    }
1101 	    else
1102 	    {
1103 		cerr << "Diphone: unknown group type for dumping" << endl;
1104 		festival_error();
1105 	    }
1106 	}
1107     }
1108 
1109     // Diphone Pitch Marks
1110     for (i=0;i<db->nindex;i++)
1111     {
1112 	fwrite(&db->pm[i]->nmark,sizeof(unsigned short),1,fd);
1113 	fwrite(&db->pm[i]->lmark,sizeof(unsigned short),1,fd);
1114 	fwrite(&db->pm[i]->rmark,sizeof(unsigned short),1,fd);
1115     }
1116     for (i=0;i<db->nindex;i++)
1117     {
1118 	fwrite(db->pm[i]->mark,sizeof(unsigned short),db->pm[i]->nmark,fd);
1119 	i=i;
1120     }
1121 
1122     fclose(fd);
1123 
1124 }
1125 
1126