1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alistair Conkie */
34 /* Date : August 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* The groupfile stuff is all awb's fault. */
38 /* */
39 /*************************************************************************/
40 #include <cstdio>
41 #include "EST_unix.h"
42 #include <cstdlib>
43 #include "festival.h"
44 #include "diphone.h"
45
46 static unsigned int DIPHONE_MAGIC=0x46544449; /* FTDI */
47
48 static void load_index(DIPHONE_DATABASE *database);
49 static void load_diphs(DIPHONE_DATABASE *database);
50 static void load_lpc_file(DIPHONE_DATABASE *db,int diph,int mode);
51 static void lpc2ref(const float *lpc, float *rfc, int order);
52 static void extract_lpc_frames(DIPHONE_DATABASE *db, int diph, EST_Track &lpc);
53 static void load_signal_file(DIPHONE_DATABASE *db, int i, int mode);
54 static void database_malloc(int ndiphs, DIPHONE_DATABASE *database);
55 static void di_group_load_signal(DIPHONE_DATABASE *db);
56 static void di_group_load_lpc_params(DIPHONE_DATABASE *db);
57 static void di_group_load_pm(DIPHONE_DATABASE *db);
58
di_load_database(DIPHONE_DATABASE * database)59 void di_load_database(DIPHONE_DATABASE *database)
60 {
61 // Load the ungrouped form
62 database_malloc(database->ndiphs,database);
63
64 load_index(database);
65 load_diphs(database);
66
67 }
68
database_malloc(int ndiphs,DIPHONE_DATABASE * database)69 static void database_malloc(int ndiphs, DIPHONE_DATABASE *database)
70 {
71 // So why am I not using all those cute C++ classes ?
72 // well I suppose I just don't know enough about binary loading
73 // and saving to trust them, but that's a poor excuse.
74 int i;
75
76 database->nindex = 0;
77 database->zone = 0;
78
79 database->indx = walloc(DI_INDEX *,ndiphs);
80 database->vox = walloc(DI_VOX *,ndiphs);
81 database->pm = walloc(DI_PM *,ndiphs);
82 database->lpc = walloc(DI_LPC *,ndiphs);
83
84 for(i=0;i<ndiphs;i++)
85 {
86 database->indx[i] = walloc(DI_INDEX,1);
87 database->vox[i] = walloc(DI_VOX,1);
88 database->vox[i]->signal = 0;
89 database->pm[i] = walloc(DI_PM,1);
90 database->pm[i]->mark = 0;
91 database->lpc[i] = walloc(DI_LPC,1);
92 database->lpc[i]->f = 0;
93 }
94
95 }
96
load_index(DIPHONE_DATABASE * database)97 static void load_index(DIPHONE_DATABASE *database)
98 {
99 EST_TokenStream ts;
100 int i;
101 EST_String line;
102
103 if (ts.open(database->index_file) == -1)
104 {
105 cerr << "Diphone: Can't open file " << database->index_file << endl;
106 festival_error();
107 }
108
109 for (i=0; (!ts.eof()) && (i<database->ndiphs);)
110 {
111 line = ts.get_upto_eoln();
112 if ((line.length() > 0) && (line[0] != ';'))
113 {
114 EST_TokenStream ls;
115 ls.open_string(line);
116 database->indx[i]->diph = wstrdup(ls.get().string());
117 database->indx[i]->file = wstrdup(ls.get().string());
118 database->indx[i]->beg = atof(ls.get().string());
119 database->indx[i]->mid = atof(ls.get().string());
120 database->indx[i]->end = atof(ls.get().string());
121 ls.close();
122 i++;
123 }
124 }
125
126 if (i == database->ndiphs)
127 {
128 cerr << "Diphone: too many diphones in DB" << endl;
129 festival_error();
130 }
131
132 database->nindex = i;
133 database->ndiphs = i;
134
135 ts.close();
136 }
137
load_diphs(DIPHONE_DATABASE * database)138 static void load_diphs(DIPHONE_DATABASE *database)
139 {
140 int i;
141
142 for(i=0;i<database->nindex;i++)
143 {
144 load_signal_file(database,i,database->sig_access_type);
145 // if (database->type = di_lpc)
146 // load_lpc_file(database,i,database->sig_access_type);
147 load_pitch_file(database,i,database->sig_access_type);
148 }
149 }
150
load_lpc_file(DIPHONE_DATABASE * db,int diph,int mode)151 static void load_lpc_file(DIPHONE_DATABASE *db,int diph,int mode)
152 {
153 // Load LPC coefficients
154 EST_String lpc_file;
155 EST_Track lpc;
156
157 if (db->lpc[diph]->f != 0)
158 return; // already loaded
159
160 if (mode == di_direct)
161 {
162 lpc_file = EST_String(db->lpc_dir) +
163 db->indx[diph]->file + db->lpc_ext;
164
165 if (lpc.load(lpc_file) != format_ok)
166 {
167 cerr << "Diphone: failed to read lpc file " <<
168 lpc_file << endl;
169 festival_error();
170 }
171 if (lpc.num_channels() != db->lpc_order)
172 {
173 cerr << "Diphone: lpc file " <<
174 lpc_file << " has order " << lpc.num_channels() <<
175 " while database has " << db->lpc_order << endl;
176 festival_error();
177 }
178 // Extract frames (pitch synchronously)
179 extract_lpc_frames(db,diph,lpc);
180 }
181
182 return;
183 }
184
ref2lpc(const float * rfc,float * lpc,int order)185 static void ref2lpc(const float *rfc, float *lpc, int order)
186 {
187 // Here we use Christopher Longet Higgin's algorithm converted to
188 // an equivalent by awb. Its doesn't have hte reverse order or
189 // negation requirement.
190 float a,b;
191 int n,k;
192
193 for (n=0; n < order; n++)
194 {
195 lpc[n] = rfc[n];
196 for (k=0; 2*(k+1) <= n+1; k++)
197 {
198 a = lpc[k];
199 b = lpc[n-(k+1)];
200 lpc[k] = a-b*lpc[n];
201 lpc[n-(k+1)] = b-a*lpc[n];
202 }
203 }
204 }
205
extract_lpc_frames(DIPHONE_DATABASE * db,int diph,EST_Track & lpc)206 static void extract_lpc_frames(DIPHONE_DATABASE *db, int diph, EST_Track &lpc)
207 {
208 // Extract LPC frames from lpc, one for each pitch mark
209 int frame_num;
210 float pos,factor;
211 float ps_pos;
212 int i,j,k;
213
214 db->lpc[diph]->f = walloc(float *,db->pm[diph]->nmark);
215 float *lpcs = walloc(float,lpc.num_channels());
216 for (i=0; i < db->pm[diph]->nmark; i++)
217 {
218 if (db->lpc_pitch_synch)
219 {
220 db->lpc[diph]->f[i] = walloc(float,lpc.num_channels());
221 pos = (((float)db->pm[diph]->mark[i]-db->sig_band)/
222 (float)db->samp_freq)
223 + (db->indx[diph]->beg/1000.0);
224 for (j=1,ps_pos=0; j<lpc.num_frames(); j++)
225 {
226 ps_pos = lpc.t(j);
227 if (fabs(pos-ps_pos) < 0.003) // within 3 ms
228 {
229 for (k=0; k < lpc.num_channels(); k++)
230 lpcs[k] = lpc(j,k);
231 // need to get reflection coefficients
232 db->lpc[diph]->f[i][0] = lpcs[0];
233 lpc2ref(&lpcs[1],&db->lpc[diph]->f[i][1],
234 lpc.num_channels()-1);
235 break;
236 }
237 }
238 if (j==lpc.num_frames())
239 {
240 cerr << "Diphone: lpc access, failed to find lpc coeffs"
241 << endl;
242 festival_error();
243 }
244 }
245 else
246 { // Not pitch synchronous so find closest frames and
247 // interpolate between them
248 db->lpc[diph]->f[i] = walloc(float,lpc.num_channels());
249 // position of current mark in seconds
250 pos = (((float)db->pm[diph]->mark[i]-db->sig_band)/
251 (float)db->samp_freq)
252 + (db->indx[diph]->beg/1000.0);
253 // Convert to frames, rounding and subtracting start offset
254 frame_num = (int)((pos/db->lpc_frame_shift))
255 - db->lpc_frame_offset;
256 if (frame_num+1 < lpc.num_frames())
257 { // Interpolate between them
258 factor = (pos - ((1+frame_num)*db->lpc_frame_shift))/
259 db->lpc_frame_shift;
260 for (j=0; j < lpc.num_channels(); j++)
261 {
262 db->lpc[diph]->f[i][j] =
263 lpc(frame_num,j) +
264 (factor * (lpc(frame_num+1,j)-
265 lpc(frame_num,j)));
266 }
267 }
268 if (frame_num >= lpc.num_frames())
269 {
270 cerr << "Diphone: LPC frame past end of file \"" <<
271 db->indx[diph]->file << "\"" << endl;
272 memset(db->lpc[diph]->f[i],0,sizeof(float)*lpc.num_channels());
273 }
274 else // Last one so just take it as is
275 {
276 lpc.copy_frame_out(frame_num, db->lpc[diph]->f[i],
277 0, lpc.num_channels());
278 }
279 }
280 }
281 db->lpc[diph]->nframes = db->pm[diph]->nmark;
282 wfree(lpcs);
283 // db->lpc_order = lpc.num_channels();
284
285 }
286
lpc2ref(const float * lpc,float * rfc,int order)287 static void lpc2ref(const float *lpc, float *rfc, int order)
288 {
289 // LPC to reflection coefficients
290 // from code from Borja Etxebarria
291 int i,j;
292 float f,ai;
293 float *vo,*vx;
294 float *vn = new float[order];
295
296 i = order-1;
297 rfc[i] = ai = lpc[i];
298 f = 1-ai*ai;
299 i--;
300
301 for (j=0; j<=i; j++)
302 rfc[j] = (lpc[j]+((ai*lpc[i-j])))/f;
303
304 /* vn=vtmp in previous #define */
305 vo=rfc;
306
307 for ( ;i>0; )
308 {
309 ai=vo[i];
310 f = 1-ai*ai;
311 i--;
312 for (j=0; j<=i; j++)
313 vn[j] = (vo[j]+((ai*vo[i-j])))/f;
314
315 rfc[i]=vn[i];
316
317 vx = vn;
318 vn = vo;
319 vo = vx;
320 }
321
322 delete [] vn;
323 }
324
load_signal_file(DIPHONE_DATABASE * db,int i,int mode)325 static void load_signal_file(DIPHONE_DATABASE *db, int i, int mode)
326 {
327 // Load signal (or lpc residual) file)
328 int beg_samp,end_samp,zone,nsamples;
329 EST_String signal_file;
330 EST_String sig_type;
331 int offset,error;
332 beg_samp = 0;
333 zone = 0;
334 nsamples = 0;
335
336 if (db->gtype == di_ungrouped)
337 {
338 beg_samp = (int)((db->indx[i]->beg)/1000.0*db->samp_freq);
339 end_samp = (int)((db->indx[i]->end)/1000.0*db->samp_freq);
340
341 nsamples = end_samp - beg_samp;
342
343 zone = db->sig_band;
344 db->zone = zone;
345 db->vox[i]->nsamples = nsamples+(2*zone);
346 db->vox[i]->signal = 0;
347 }
348
349 if (mode == di_direct)
350 {
351 if (db->gtype == di_ungrouped)
352 {
353 EST_Wave w;
354 if (db->type == di_lpc)
355 {
356 signal_file = EST_String(db->lpc_dir) +
357 EST_String(db->indx[i]->file) +
358 EST_String(db->lpc_res_ext);
359 sig_type = db->lpc_res_type;
360 // Different LPC techniques will leave various offsets
361 // in the residule, you have to specify this explicitly
362 beg_samp -= (int)(db->lpc_res_offset * db->samp_freq);
363 }
364 else
365 {
366 signal_file = EST_String(db->signal_dir) +
367 EST_String(db->indx[i]->file) +
368 EST_String(db->signal_ext);
369 sig_type = db->signal_type;
370 }
371 offset = beg_samp-zone;
372 if (offset < 0)
373 offset = 0;
374 if (w.load_file(signal_file,sig_type,
375 db->samp_freq, "short", EST_NATIVE_BO,
376 1, offset, nsamples+2*zone) != format_ok)
377 {
378 cerr << "Diphone: failed to read " << sig_type
379 << " format signal file " << signal_file << endl;
380 festival_error();
381 }
382 db->vox[i]->signal = walloc(short,w.num_samples());
383 if (beg_samp-zone < 0) // wasn't enough space at beginning
384 error = abs(beg_samp-zone);
385 else
386 error = 0;
387 memset(db->vox[i]->signal,0,error*sizeof(short));
388 for (int j=0; j < w.num_samples()-error; j++)
389 db->vox[i]->signal[error+j] = w(j);
390 db->vox[i]->nsamples = w.num_samples()-error;
391 }
392 else // grouped so have to access the group file
393 {
394 if (db->gfd == NULL)
395 {
396 cerr << "Diphone: can no longer access the group file" << endl;
397 festival_error();
398 }
399 if (db->group_encoding == di_raw)
400 {
401 db->vox[i]->signal = walloc(short,db->vox[i]->nsamples);
402 fseek(db->gfd,db->gsignalbase+(db->offsets[i]*2),SEEK_SET);
403 fread(db->vox[i]->signal,sizeof(short),
404 db->vox[i]->nsamples,db->gfd);
405 if (db->swap)
406 swap_bytes_short(db->vox[i]->signal,db->vox[i]->nsamples);
407 }
408 else if (db->group_encoding == di_ulaw)
409 {
410 unsigned char *ulaw =
411 walloc(unsigned char,db->vox[i]->nsamples);
412 db->vox[i]->signal = walloc(short,db->vox[i]->nsamples);
413 fseek(db->gfd,db->gsignalbase+(db->offsets[i]),SEEK_SET);
414 fread(ulaw,sizeof(unsigned char),db->vox[i]->nsamples,db->gfd);
415 ulaw_to_short(ulaw,db->vox[i]->signal,db->vox[i]->nsamples);
416 wfree(ulaw);
417 }
418 else if (db->group_encoding == di_alaw)
419 {
420 unsigned char *alaw =
421 walloc(unsigned char,db->vox[i]->nsamples);
422 db->vox[i]->signal = walloc(short,db->vox[i]->nsamples);
423 fseek(db->gfd,db->gsignalbase+(db->offsets[i]),SEEK_SET);
424 fread(alaw,sizeof(unsigned char),db->vox[i]->nsamples,db->gfd);
425 alaw_to_short(alaw,db->vox[i]->signal,db->vox[i]->nsamples);
426 wfree(alaw);
427 }
428 else
429 {
430 cerr << "Diphone: unknown group type" << endl;
431 festival_error();
432 }
433 }
434 }
435
436 }
437
load_pitch_file(DIPHONE_DATABASE * database,int i,int mode)438 void load_pitch_file(DIPHONE_DATABASE *database, int i, int mode)
439 {
440 // load files from newer Track format
441 int mark[5000];
442 EST_String pitch_file;
443 EST_Track pms;
444 float fnum;
445 int k,k1,k2,m,zone,beg_samp,p;
446
447 if ((database->pm[i]->mark != 0) ||
448 (mode != di_direct))
449 return;
450
451 pitch_file = EST_String(database->pitch_dir)+database->indx[i]->file+
452 database->pitch_ext;
453 if (pms.load(pitch_file) != format_ok)
454 {
455 cerr << "Diphone: Can't open pitch file " << pitch_file << endl;
456 festival_error();
457 }
458 /* assumptions.. only those within the limits of the diphone */
459
460 beg_samp = (int)((database->indx[i]->beg)/1000.0*database->samp_freq);
461
462 zone = database->sig_band;
463
464 k = 0;
465 k1 = 0;
466 k2 = 0;
467 for (p=0; p < pms.num_frames(); p++)
468 {
469 fnum = pms.t(p)*1000.0;
470 if((fnum>database->indx[i]->beg) && (fnum<database->indx[i]->mid))
471 {
472 mark[k] = (int)(fnum/1000.0*database->samp_freq - beg_samp + zone);
473 if ((mark[k] >= database->vox[i]->nsamples+zone) ||
474 (mark[k] > 64534))
475 {
476 fprintf(stderr,"Diphone: Mark out of range -- too large %s\n",
477 (const char *)pitch_file);
478 k--; k1--;
479 }
480 if(mark[k] < zone)
481 {
482 fprintf(stderr,"Diphone: Mark out of range -- too small %s\n",
483 (const char *)pitch_file);
484 k--; k1--;
485 }
486 k++;
487 k1++;
488 }
489 else if((fnum>=database->indx[i]->mid) &&
490 (fnum<database->indx[i]->end))
491 {
492 mark[k] = (int)(fnum/1000.0*database->samp_freq - beg_samp + zone);
493 if ((mark[k] >= database->vox[i]->nsamples+zone) ||
494 (mark[k] > 64534))
495 {
496 fprintf(stderr,"Diphone: Mark out of range -- too large %s\n",
497 (const char *)pitch_file);
498 k--; k2--;
499 }
500 if(mark[k] < zone)
501 {
502 fprintf(stderr,"Diphone: Mark out of range -- too small %s\n",
503 (const char *)pitch_file);
504 k--; k2--;
505 }
506 k++;
507 k2++;
508 }
509 }
510 database->pm[i]->mark = walloc(unsigned short,k);
511 for(m=0;m<k;m++)
512 database->pm[i]->mark[m] = (unsigned short)mark[m];
513
514 database->pm[i]->nmark = (unsigned short)k;
515 database->pm[i]->lmark = (unsigned short)k1;
516 database->pm[i]->rmark = (unsigned short)k2;
517 if (database->pm[i]->rmark == 0)
518 {
519 *cdebug << "Diphone: modifying edge pms for "
520 << database->indx[i]->diph << endl;
521 database->pm[i]->rmark = 1;
522 database->pm[i]->lmark -= 1;
523 }
524 if (database->pm[i]->nmark <= 0)
525 {
526 cerr << "Diphone: diphone " << database->indx[i]->diph <<
527 " has 0 pitchmarks" << endl;
528 festival_error();
529 }
530
531 }
532
533 #if 0
534 void load_pitch_file(DIPHONE_DATABASE *database, int i, int mode)
535 {
536 char s[100];
537 int mark[5000];
538 EST_String pitch_file;
539 FILE *pfd;
540 float fnum;
541 int k,k1,k2,m,zone,beg_samp;
542
543 if ((database->pm[i]->mark != 0) ||
544 (mode != di_direct))
545 return;
546
547 pitch_file = EST_String(database->pitch_dir)+database->indx[i]->file+
548 database->pitch_ext;
549 if((pfd=fopen(pitch_file,"rb")) == NULL)
550 {
551 cerr << "Diphone: Can't open pitch file " << pitch_file << endl;
552 festival_error();
553 }
554 /* assumptions.. only those within the limits of the diphone */
555
556 beg_samp = (int)((database->indx[i]->beg)/1000.0*database->samp_freq);
557
558 zone = database->sig_band;
559
560 k = 0;
561 k1 = 0;
562 k2 = 0;
563 while(fgets(s,100,pfd) != NULL)
564 {
565 sscanf(s,"%f",&fnum);
566 if((fnum>database->indx[i]->beg) && (fnum<database->indx[i]->mid))
567 {
568 mark[k] = (int)(fnum/1000.0*database->samp_freq - beg_samp + zone);
569 if ((mark[k] >= database->vox[i]->nsamples+zone) ||
570 (mark[k] > 64534))
571 {
572 fprintf(stderr,"Diphone: Mark out of range -- too large %s\n",
573 (const char *)pitch_file);
574 k--; k1--;
575 }
576 if(mark[k] < zone)
577 {
578 fprintf(stderr,"Diphone: Mark out of range -- too small %s\n",
579 (const char *)pitch_file);
580 k--; k1--;
581 }
582 k++;
583 k1++;
584 }
585 else if((fnum>=database->indx[i]->mid) &&
586 (fnum<database->indx[i]->end))
587 {
588 mark[k] = (int)(fnum/1000.0*database->samp_freq - beg_samp + zone);
589 if ((mark[k] >= database->vox[i]->nsamples+zone) ||
590 (mark[k] > 64534))
591 {
592 fprintf(stderr,"Diphone: Mark out of range -- too large %s\n",
593 (const char *)pitch_file);
594 k--; k2--;
595 }
596 if(mark[k] < zone)
597 {
598 fprintf(stderr,"Diphone: Mark out of range -- too small %s\n",
599 (const char *)pitch_file);
600 k--; k2--;
601 }
602 k++;
603 k2++;
604 }
605 }
606 database->pm[i]->mark = walloc(unsigned short,k);
607 for(m=0;m<k;m++)
608 database->pm[i]->mark[m] = (unsigned short)mark[m];
609
610 database->pm[i]->nmark = (unsigned short)k;
611 database->pm[i]->lmark = (unsigned short)k1;
612 database->pm[i]->rmark = (unsigned short)k2;
613 if (database->pm[i]->rmark == 0)
614 {
615 *cdebug << "Diphone: modifying edge pms for "
616 << database->indx[i]->diph << endl;
617 database->pm[i]->rmark = 1;
618 database->pm[i]->lmark -= 1;
619 }
620 if (database->pm[i]->nmark <= 0)
621 {
622 cerr << "Diphone: diphone " << database->indx[i]->diph <<
623 " has 0 pitchmarks" << endl;
624 festival_error();
625 }
626
627 fclose(pfd);
628 }
629 #endif
630
631 /* Buffer to hold current diphone signal when using ondemand access */
632 /* method. It remembers the last phone accessed as typical access is */
633 /* the same one for a few times */
634 static short *diph_buffer = 0;
635 static int diph_max_size = 0;
636 static int last_diph = -1;
637 static DIPHONE_DATABASE *last_db = 0;
638
di_get_diph_signal(int diph,DIPHONE_DATABASE * db)639 short *di_get_diph_signal(int diph,DIPHONE_DATABASE *db)
640 {
641 // Get the diphone signal (or residual) from wherever
642
643 if (db->sig_access_type == di_direct) // all pre-loaded
644 return db->vox[diph]->signal;
645 else if (db->sig_access_type == di_dynamic) // Load and keep
646 {
647 if (db->vox[diph]->signal == 0)
648 load_signal_file(db,diph,di_direct);
649 return db->vox[diph]->signal;
650 }
651 else if (db->sig_access_type == di_ondemand) // Load and free afterwards
652 { // Loads it into a common buffer, over written each time
653 if ((diph == last_diph) &&
654 (db == last_db)) // ensure db hasn't changed
655 return diph_buffer;
656 load_signal_file(db,diph,di_direct);
657 if (diph_max_size < db->vox[diph]->nsamples)
658 {
659 wfree(diph_buffer);
660 diph_buffer = walloc(short,db->vox[diph]->nsamples);
661 diph_max_size = db->vox[diph]->nsamples;
662 }
663 memmove(diph_buffer,db->vox[diph]->signal,
664 db->vox[diph]->nsamples*sizeof(short));
665 wfree(db->vox[diph]->signal);
666 db->vox[diph]->signal = 0;
667 last_db = db; last_diph = diph;
668 return diph_buffer;
669 }
670 else
671 {
672 cerr << "Diphone: unknown diphone signal access strategy" << endl;
673 festival_error();
674 }
675 return NULL;
676 }
677
678 /* The buffer used to hold the requested frame */
679 static float frame_buff[128];
680
di_get_diph_lpc_mark(int diph,int mark,DIPHONE_DATABASE * db)681 float *di_get_diph_lpc_mark(int diph,int mark,DIPHONE_DATABASE *db)
682 {
683 // Get the coeff frame for diph at mark
684
685 load_lpc_file(db,diph,di_direct);
686
687 memmove(frame_buff,
688 db->lpc[diph]->f[mark],
689 sizeof(float)*db->lpc_order);
690
691 return frame_buff;
692 }
693
di_get_diph_res_mark(int diph,int mark,int size,DIPHONE_DATABASE * db)694 short *di_get_diph_res_mark(int diph,int mark,int size,DIPHONE_DATABASE *db)
695 {
696 // Get the residual for diph at mark, use the signal field
697 // to hold it as they are so similar.
698 short *residual;
699
700 residual = di_get_diph_signal(diph,db);
701
702 // Take residual around this midpoint
703
704 int pos_samp = db->pm[diph]->mark[mark] - size/2;
705
706 if (pos_samp < 0)
707 {
708 pos_samp = 0;
709 *cdebug << "DIPHONE: sig_band too short to the left" << endl;
710 }
711 if (pos_samp+size >= db->vox[diph]->nsamples)
712 {
713 pos_samp = db->vox[diph]->nsamples - size;
714 *cdebug << "DIPHONE: sig_band too short to the right" << endl;
715 }
716
717 return &residual[pos_samp];
718 }
719
di_load_grouped_db(const EST_Pathname & filename,DIPHONE_DATABASE * db,LISP global_params)720 void di_load_grouped_db(const EST_Pathname &filename, DIPHONE_DATABASE *db,
721 LISP global_params)
722 {
723 // Get index file and saved data from grouped file
724 int i,j;
725 unsigned int magic;
726 int strsize;
727 char *diphnames;
728 LISP params;
729
730 if ((db->gfd=fopen(filename,"rb")) == NULL)
731 {
732 cerr << "Diphone: cannot open group file " <<
733 filename << " for reading" << endl;
734 festival_error();
735 }
736
737 fread(&magic,sizeof(int),1,db->gfd);
738 if (magic == SWAPINT(DIPHONE_MAGIC))
739 db->swap = TRUE;
740 else if (magic != DIPHONE_MAGIC)
741 {
742 cerr << "Diphone: " << filename << " not a group file" << endl;
743 festival_error();
744 }
745
746 params = lreadf(db->gfd); // read the parameters in LISP form
747
748 di_general_parameters(db,params); // some may be reset later
749 di_fixed_parameters(db,params);
750 di_general_parameters(db,global_params); // reset some params
751
752 database_malloc(db->ndiphs,db);
753 db->nindex = db->ndiphs; // we can trust that number this time
754
755 fread(&strsize,sizeof(int),1,db->gfd); // number of chars in diph names
756 if (db->swap)
757 strsize = SWAPINT(strsize);
758 diphnames = walloc(char,strsize);
759 fread(diphnames,sizeof(char),strsize,db->gfd);
760 for (j=i=0;i<db->nindex;i++)
761 {
762 db->indx[i]->diph = &diphnames[j];
763 db->indx[i]->file = 0;
764 for ( ; diphnames[j] != '\0'; j++) // skip to next diphname
765 if (j > strsize)
766 {
767 cerr << "Diphone: group file diphone name table corrupted"
768 << endl;
769 festival_error();
770 }
771 j++;
772 }
773
774 // Diphone signals
775 di_group_load_signal(db);
776 // Diphone LPC parameters
777 if (db->type == di_lpc)
778 di_group_load_lpc_params(db);
779 // Diphone Pitch marks
780 di_group_load_pm(db);
781
782 if (db->sig_access_type == di_direct)
783 {
784 fclose(db->gfd); // read eveything
785 db->gfd = 0;
786 }
787
788 }
789
di_group_load_signal(DIPHONE_DATABASE * db)790 static void di_group_load_signal(DIPHONE_DATABASE *db)
791 {
792 int i;
793 unsigned short *samp_counts;
794 int sample_offset,totsamples;
795
796 samp_counts = walloc(unsigned short,db->nindex);
797 fread(samp_counts,sizeof(unsigned short),db->nindex,db->gfd);
798 if (db->swap) swap_bytes_ushort(samp_counts,db->nindex);
799 fread(&totsamples,sizeof(int),1,db->gfd);
800 if (db->swap)
801 totsamples = SWAPINT(totsamples);
802 if (db->sig_access_type == di_direct)
803 {
804 if (db->group_encoding == di_raw)
805 {
806 db->allsignal = walloc(short,totsamples);
807 fread(db->allsignal,sizeof(short),totsamples,db->gfd);
808 if (db->swap)
809 swap_bytes_short(db->allsignal,totsamples);
810 }
811 else if (db->group_encoding == di_ulaw)
812 {
813 db->allualawsignal = walloc(unsigned char,totsamples);
814 fread(db->allualawsignal,sizeof(unsigned char),totsamples,db->gfd);
815 }
816 else if (db->group_encoding == di_alaw)
817 {
818 db->allualawsignal = walloc(unsigned char,totsamples);
819 fread(db->allualawsignal,sizeof(unsigned char),totsamples,db->gfd);
820 }
821 }
822 else
823 {
824 db->gsignalbase = ftell(db->gfd);
825 db->offsets = walloc(int,db->nindex);
826 }
827
828 sample_offset = 0;
829 for (i=0; i < db->nindex; i++)
830 {
831 db->vox[i]->nsamples = samp_counts[i];
832 if (db->sig_access_type == di_direct)
833 {
834 if (db->group_encoding == di_raw)
835 db->vox[i]->signal = &db->allsignal[sample_offset];
836 else if (db->group_encoding == di_ulaw)
837 {
838 db->vox[i]->signal = walloc(short,samp_counts[i]);
839 ulaw_to_short(&db->allualawsignal[sample_offset],
840 db->vox[i]->signal,samp_counts[i]);
841 }
842 else if (db->group_encoding == di_alaw)
843 {
844 db->vox[i]->signal = walloc(short,samp_counts[i]);
845 alaw_to_short(&db->allualawsignal[sample_offset],
846 db->vox[i]->signal,samp_counts[i]);
847 }
848 else
849 {
850 cerr << "Diphone: unknown group type to unpack" << endl;
851 festival_error();
852 }
853 }
854 else
855 {
856 db->offsets[i] = sample_offset;
857 db->vox[i]->signal = 0;
858 }
859 sample_offset += samp_counts[i];
860 }
861 if (db->sig_access_type != di_direct)
862 if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw)
863 fseek(db->gfd,(long)sample_offset,SEEK_CUR);
864 else
865 fseek(db->gfd,(long)sample_offset*sizeof(short),SEEK_CUR);
866 wfree(samp_counts);
867 }
868
di_group_load_lpc_params(DIPHONE_DATABASE * db)869 static void di_group_load_lpc_params(DIPHONE_DATABASE *db)
870 {
871 // LPC params are always fully loaded
872 int totframes;
873 int i,j,k;
874 unsigned short *frame_counts;
875 int frame_offset;
876 int this_frame;
877
878 frame_counts = walloc(unsigned short, db->nindex);
879 fread(frame_counts,sizeof(unsigned short),db->nindex,db->gfd);
880 if (db->swap) swap_bytes_ushort(frame_counts,db->nindex);
881 fread(&totframes,sizeof(int),1,db->gfd);
882 if (db->swap) totframes = SWAPINT(totframes);
883 if (db->group_encoding == di_raw) // its as floats
884 {
885 db->allframes = walloc(float,totframes*db->lpc_order);
886 fread(db->allframes,sizeof(float),
887 totframes*db->lpc_order,db->gfd);
888 if (db->swap)
889 swap_bytes_float(db->allframes,totframes*db->lpc_order);
890 }
891 else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw) // its as shorts
892 {
893 db->allframesshort = walloc(short,totframes*db->lpc_order);
894 fread(db->allframesshort,sizeof(short),
895 totframes*db->lpc_order,db->gfd);
896 if (db->swap)
897 swap_bytes_short(db->allframesshort,
898 totframes*db->lpc_order);
899 }
900 frame_offset = 0;
901 for (i=0; i < db->nindex; i++)
902 {
903 db->lpc[i]->nframes = frame_counts[i];
904 db->lpc[i]->f = walloc(float *,frame_counts[i]);
905 if (db->group_encoding == di_raw)
906 for (j=0;j<db->lpc[i]->nframes;j++)
907 db->lpc[i]->f[j] =
908 &db->allframes[(frame_offset+j)*db->lpc_order];
909 else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw)
910 {
911 int fixedpoint = FALSE;
912 if (siod_get_lval("lpc_fixedpoint",NULL) != NIL)
913 fixedpoint = TRUE;
914 for (j=0;j<db->lpc[i]->nframes;j++)
915 {
916 db->lpc[i]->f[j] = walloc(float,db->lpc_order);
917 this_frame = (frame_offset+j)*db->lpc_order;
918 if (fixedpoint)
919 for (k=0;k<db->lpc_order;k++)
920 db->lpc[i]->f[j][k] =
921 (float)db->allframesshort[this_frame+k];
922 else
923 for (k=0;k<db->lpc_order;k++)
924 db->lpc[i]->f[j][k] =
925 (float)db->allframesshort[this_frame+k]/32766.0;
926 }
927 }
928 else
929 {
930 cerr << "Diphone: unknown group type to unpack" << endl;
931 festival_error();
932 }
933 frame_offset += frame_counts[i];
934 }
935 wfree(db->allframesshort);
936 db->allframesshort = 0; // don't really need this any more
937 wfree(frame_counts);
938 }
939
di_group_load_pm(DIPHONE_DATABASE * db)940 static void di_group_load_pm(DIPHONE_DATABASE *db)
941 {
942 unsigned short *pm_info;
943 int i,j;
944
945 pm_info = walloc(unsigned short,db->nindex*3);
946 if (fread(pm_info,sizeof(unsigned short),db->nindex*3,db->gfd) !=
947 (unsigned int)(db->nindex*3))
948 {
949 cerr << "DIPHONE: short group file, can't read pm\n";
950 festival_error();
951 }
952 if (db->swap)
953 for (i=0; i < db->nindex*3; i++)
954 pm_info[i] = SWAPSHORT(pm_info[i]);
955 for (i=0; i < db->nindex; i++)
956 {
957 db->pm[i]->mark = walloc(unsigned short,pm_info[i*3]);
958 db->pm[i]->nmark = pm_info[i*3];
959 db->pm[i]->lmark = pm_info[(i*3)+1];
960 db->pm[i]->rmark = pm_info[(i*3)+2];
961 fread(db->pm[i]->mark,sizeof(unsigned short),db->pm[i]->nmark,db->gfd);
962 if (db->swap)
963 for (j=0; j < db->pm[i]->nmark; j++)
964 db->pm[i]->mark[j] = SWAPSHORT(db->pm[i]->mark[j]);
965 }
966 }
967
di_enlispen_params(DIPHONE_DATABASE * db)968 static LISP di_enlispen_params(DIPHONE_DATABASE *db)
969 {
970 // Return lisp representation of the parameters in db
971
972 return cons(make_param_str("name",db->name),
973 cons(make_param_str("type",db->type_str),
974 cons(make_param_str("index_file",db->index_file),
975 cons(make_param_str("signal_dir",db->signal_dir),
976 cons(make_param_str("signal_ext",db->signal_ext),
977 cons(make_param_str("signal_type",db->signal_type),
978 cons(make_param_str("pitch_dir",db->pitch_dir),
979 cons(make_param_str("pitch_ext",db->pitch_ext),
980 cons(make_param_str("lpc_dir",db->lpc_dir),
981 cons(make_param_str("lpc_ext",db->lpc_ext),
982 cons(make_param_str("lpc_res_ext",db->lpc_res_ext),
983 cons(make_param_str("lpc_type",db->lpc_type),
984 cons(make_param_str("lpc_res_type",db->lpc_res_type),
985 cons(make_param_float("lpc_res_offset",db->lpc_res_offset),
986 cons(make_param_int("lpc_frame_offset",db->lpc_frame_offset),
987 cons(make_param_int("lpc_order",db->lpc_order),
988 cons(make_param_float("lpc_frame_shift",db->lpc_frame_shift),
989 cons(make_param_int("samp_freq",db->samp_freq),
990 cons(make_param_str("phoneset",db->phoneset),
991 cons(make_param_str("access_type",db->sig_access_type_str),
992 cons(make_param_str("group_encoding",db->group_encoding_str),
993 cons(make_param_int("num_diphones",db->nindex),
994 cons(make_param_int("sig_band",db->sig_band),
995 cons(make_param_int("def_f0",db->def_f0),
996 cons(make_param_str("default_diphone",db->default_diphone),
997 cons(make_param_lisp("alternates_before",db->alternates_before),
998 cons(make_param_lisp("alternates_after",db->alternates_after),
999 NIL)))))))))))))))))))))))))));
1000 }
1001
di_save_grouped_db(const EST_Pathname & filename,DIPHONE_DATABASE * db)1002 void di_save_grouped_db(const EST_Pathname &filename, DIPHONE_DATABASE *db)
1003 {
1004 // Get index file and saved data from grouped file
1005 FILE *fd;
1006 LISP params;
1007 int strsize,totsamples,totframes;
1008 int i,j,k;
1009
1010 if ((fd=fopen(filename,"wb")) == NULL)
1011 {
1012 cerr << "Diphone: cannot open group file " <<
1013 filename << " for saving" << endl;
1014 festival_error();
1015 }
1016
1017 fwrite(&DIPHONE_MAGIC,sizeof(int),1,fd);
1018 params = di_enlispen_params(db); // get lisp representation of parameters
1019 lprin1f(params,fd);
1020
1021 // Only need to dump the diphone names, not the rest of the indx info
1022 strsize = 0;
1023 for (i=0;i<db->nindex;i++)
1024 strsize += strlen(db->indx[i]->diph)+1;
1025 fwrite(&strsize,sizeof(int),1,fd);
1026 for (i=0;i<db->nindex;i++)
1027 fwrite(db->indx[i]->diph,sizeof(char),strlen(db->indx[i]->diph)+1,fd);
1028
1029 // Diphone Signals
1030 // Dump the signal sizes first to make reading easier
1031 totsamples = 0;
1032 for (i=0;i<db->nindex;i++)
1033 {
1034 if (db->vox[i]->signal == 0) // in case it isn't loaded yet
1035 {
1036 load_pitch_file(db,i,di_direct);
1037 load_signal_file(db,i,di_direct);
1038 }
1039 fwrite(&db->vox[i]->nsamples,sizeof(unsigned short),1,fd);
1040 totsamples += db->vox[i]->nsamples;
1041 }
1042 fwrite(&totsamples,sizeof(int),1,fd);
1043 // Dump signals (compressed if necessary)
1044 for (i=0;i<db->nindex;i++)
1045 {
1046 if (db->group_encoding == di_raw)
1047 fwrite(db->vox[i]->signal,sizeof(short),db->vox[i]->nsamples,fd);
1048 else if (db->group_encoding == di_ulaw)
1049 {
1050 unsigned char *ulaw = walloc(unsigned char,db->vox[i]->nsamples);
1051 short_to_ulaw(db->vox[i]->signal,ulaw,db->vox[i]->nsamples);
1052 fwrite(ulaw,sizeof(unsigned char),db->vox[i]->nsamples,fd);
1053 wfree(ulaw);
1054 }
1055 else if (db->group_encoding == di_alaw)
1056 {
1057 unsigned char *alaw = walloc(unsigned char,db->vox[i]->nsamples);
1058 short_to_alaw(db->vox[i]->signal,alaw,db->vox[i]->nsamples);
1059 fwrite(alaw,sizeof(unsigned char),db->vox[i]->nsamples,fd);
1060 wfree(alaw);
1061 }
1062 else
1063 {
1064 cerr << "Diphone: unknown group type for dumping" << endl;
1065 festival_error();
1066 }
1067
1068 }
1069
1070 // Diphone LPC parameters
1071 if (db->type == di_lpc)
1072 {
1073 for (i=0;i<db->nindex;i++) // ensure they are all loaded
1074 load_lpc_file(db,i,di_direct);
1075 totframes = 0;
1076 for (i=0;i<db->nindex;i++)
1077 {
1078 fwrite(&db->lpc[i]->nframes,sizeof(unsigned short),1,fd);
1079 totframes += db->lpc[i]->nframes;
1080 }
1081 fwrite(&totframes,sizeof(int),1,fd);
1082 for (i=0;i<db->nindex;i++)
1083 {
1084 if (db->group_encoding == di_raw) // saved as floats
1085 {
1086 for (j=0; j<db->lpc[i]->nframes; j++)
1087 fwrite(db->lpc[i]->f[j],sizeof(float),db->lpc_order,fd);
1088 }
1089 else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw) // saved as shorts
1090 {
1091 short *sh = new short[db->lpc_order];
1092
1093 for (j=0; j<db->lpc[i]->nframes; j++)
1094 {
1095 for (k=0; k<db->lpc_order; k++)
1096 sh[k] = (short)(db->lpc[i]->f[j][k]*32766.0);
1097 fwrite(sh,sizeof(short),db->lpc_order,fd);
1098 }
1099 delete sh;
1100 }
1101 else
1102 {
1103 cerr << "Diphone: unknown group type for dumping" << endl;
1104 festival_error();
1105 }
1106 }
1107 }
1108
1109 // Diphone Pitch Marks
1110 for (i=0;i<db->nindex;i++)
1111 {
1112 fwrite(&db->pm[i]->nmark,sizeof(unsigned short),1,fd);
1113 fwrite(&db->pm[i]->lmark,sizeof(unsigned short),1,fd);
1114 fwrite(&db->pm[i]->rmark,sizeof(unsigned short),1,fd);
1115 }
1116 for (i=0;i<db->nindex;i++)
1117 {
1118 fwrite(db->pm[i]->mark,sizeof(unsigned short),db->pm[i]->nmark,fd);
1119 i=i;
1120 }
1121
1122 fclose(fd);
1123
1124 }
1125
1126