1 /*
2  * Copyright (C) 2005 to 2014 by Jonathan Duddington
3  * email: jonsd@users.sourceforge.net
4  * Copyright (C) 2015-2017 Reece H. Dunn
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, see: <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "config.h"
21 
22 #include <ctype.h>
23 #include <errno.h>
24 #include <math.h>
25 //#include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "espeak_ng.h"
32 #include "encoding.h"
33 #include "speech.h"
34 #include "synthesize.h"
35 #include "translate.h"
36 
37 extern FILE *f_log;
38 static void SmoothSpect(void);
39 
40 // list of phonemes in a clause
41 int n_phoneme_list = 0;
42 PHONEME_LIST phoneme_list[N_PHONEME_LIST+1];
43 
44 int mbrola_delay;
45 char mbrola_name[20];
46 
47 SPEED_FACTORS speed;
48 
49 static int last_pitch_cmd;
50 static int last_amp_cmd;
51 static frame_t  *last_frame;
52 static int last_wcmdq;
53 static int pitch_length;
54 static int amp_length;
55 static int modn_flags;
56 static int fmt_amplitude = 0;
57 
58 static int syllable_start;
59 static int syllable_end;
60 static int syllable_centre;
61 
62 static voice_t *new_voice = NULL;
63 
64 int n_soundicon_tab = N_SOUNDICON_SLOTS;
65 SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];
66 
67 #define RMS_GLOTTAL1 35   // vowel before glottal stop
68 #define RMS_START 28  // 28
69 
70 #define VOWEL_FRONT_LENGTH  50
71 
72 // a dummy phoneme_list entry which looks like a pause
73 static PHONEME_LIST next_pause;
74 
WordToString(unsigned int word)75 const char *WordToString(unsigned int word)
76 {
77 	// Convert a phoneme mnemonic word into a string
78 	int ix;
79 	static char buf[5];
80 
81 	for (ix = 0; ix < 4; ix++)
82 		buf[ix] = word >> (ix*8);
83 	buf[4] = 0;
84 	return buf;
85 }
86 
SynthesizeInit()87 void SynthesizeInit()
88 {
89 	last_pitch_cmd = 0;
90 	last_amp_cmd = 0;
91 	last_frame = NULL;
92 	syllable_centre = -1;
93 
94 	// initialise next_pause, a dummy phoneme_list entry
95 	next_pause.type = phPAUSE;
96 	next_pause.newword = 0;
97 }
98 
EndAmplitude(void)99 static void EndAmplitude(void)
100 {
101 	if (amp_length > 0) {
102 		if (wcmdq[last_amp_cmd][1] == 0)
103 			wcmdq[last_amp_cmd][1] = amp_length;
104 		amp_length = 0;
105 	}
106 }
107 
EndPitch(int voice_break)108 static void EndPitch(int voice_break)
109 {
110 	// posssible end of pitch envelope, fill in the length
111 	if ((pitch_length > 0) && (last_pitch_cmd >= 0)) {
112 		if (wcmdq[last_pitch_cmd][1] == 0)
113 			wcmdq[last_pitch_cmd][1] = pitch_length;
114 		pitch_length = 0;
115 	}
116 
117 	if (voice_break) {
118 		last_wcmdq = -1;
119 		last_frame = NULL;
120 		syllable_end = wcmdq_tail;
121 		SmoothSpect();
122 		syllable_centre = -1;
123 		memset(vowel_transition, 0, sizeof(vowel_transition));
124 	}
125 }
126 
DoAmplitude(int amp,unsigned char * amp_env)127 static void DoAmplitude(int amp, unsigned char *amp_env)
128 {
129 	intptr_t *q;
130 
131 	last_amp_cmd = wcmdq_tail;
132 	amp_length = 0; // total length of vowel with this amplitude envelope
133 
134 	q = wcmdq[wcmdq_tail];
135 	q[0] = WCMD_AMPLITUDE;
136 	q[1] = 0; // fill in later from amp_length
137 	q[2] = (intptr_t)amp_env;
138 	q[3] = amp;
139 	WcmdqInc();
140 }
141 
DoPitch(unsigned char * env,int pitch1,int pitch2)142 static void DoPitch(unsigned char *env, int pitch1, int pitch2)
143 {
144 	intptr_t *q;
145 
146 	EndPitch(0);
147 
148 	if (pitch1 == 255) {
149 		// pitch was not set
150 		pitch1 = 55;
151 		pitch2 = 76;
152 		env = envelope_data[PITCHfall];
153 	}
154 	last_pitch_cmd = wcmdq_tail;
155 	pitch_length = 0; // total length of spect with this pitch envelope
156 
157 	if (pitch2 < 0)
158 		pitch2 = 0;
159 
160 	q = wcmdq[wcmdq_tail];
161 	q[0] = WCMD_PITCH;
162 	q[1] = 0; // length, fill in later from pitch_length
163 	q[2] = (intptr_t)env;
164 	q[3] = (pitch1 << 16) + pitch2;
165 	WcmdqInc();
166 }
167 
PauseLength(int pause,int control)168 int PauseLength(int pause, int control)
169 {
170 	unsigned int len;
171 
172 	if (control == 0) {
173 		if (pause >= 200)
174 			len = (pause * speed.clause_pause_factor)/256;
175 		else
176 			len = (pause * speed.pause_factor)/256;
177 	} else
178 		len = (pause * speed.wav_factor)/256;
179 
180 	if (len < speed.min_pause)
181 		len = speed.min_pause; // mS, limit the amount to which pauses can be shortened
182 	return len;
183 }
184 
DoPause(int length,int control)185 static void DoPause(int length, int control)
186 {
187 	// length in nominal mS
188 	// control = 1, less shortening at fast speeds
189 
190 	unsigned int len;
191 	int srate2;
192 
193 	if (length == 0)
194 		len = 0;
195 	else {
196 		len = PauseLength(length, control);
197 
198 		if (len < 90000)
199 			len = (len * samplerate) / 1000; // convert from mS to number of samples
200 		else {
201 			srate2 = samplerate / 25; // avoid overflow
202 			len = (len * srate2) / 40;
203 		}
204 	}
205 
206 	EndPitch(1);
207 	wcmdq[wcmdq_tail][0] = WCMD_PAUSE;
208 	wcmdq[wcmdq_tail][1] = len;
209 	WcmdqInc();
210 	last_frame = NULL;
211 
212 	if (fmt_amplitude != 0) {
213 		wcmdq[wcmdq_tail][0] = WCMD_FMT_AMPLITUDE;
214 		wcmdq[wcmdq_tail][1] = fmt_amplitude = 0;
215 		WcmdqInc();
216 	}
217 }
218 
219 extern int seq_len_adjust; // temporary fix to advance the start point for playing the wav sample
220 
DoSample2(int index,int which,int std_length,int control,int length_mod,int amp)221 static int DoSample2(int index, int which, int std_length, int control, int length_mod, int amp)
222 {
223 	int length;
224 	int wav_length;
225 	int wav_scale;
226 	int min_length;
227 	int x;
228 	int len4;
229 	intptr_t *q;
230 	unsigned char *p;
231 
232 	index = index & 0x7fffff;
233 	p = &wavefile_data[index];
234 	wav_scale = p[2];
235 	wav_length = (p[1] * 256);
236 	wav_length += p[0]; // length in bytes
237 
238 	if (wav_length == 0)
239 		return 0;
240 
241 	min_length = speed.min_sample_len;
242 
243 	if (wav_scale == 0)
244 		min_length *= 2; // 16 bit samples
245 
246 	if (std_length > 0) {
247 		std_length = (std_length * samplerate)/1000;
248 		if (wav_scale == 0)
249 			std_length *= 2;
250 
251 		x = (min_length * std_length)/wav_length;
252 		if (x > min_length)
253 			min_length = x;
254 	} else {
255 		// no length specified, use the length of the stored sound
256 		std_length = wav_length;
257 	}
258 
259 	if (length_mod > 0)
260 		std_length = (std_length * length_mod)/256;
261 
262 	length = (std_length * speed.wav_factor)/256;
263 
264 	if (control & pd_DONTLENGTHEN) {
265 		// this option is used for Stops, with short noise bursts.
266 		// Don't change their length much.
267 		if (length > std_length) {
268 			// don't let length exceed std_length
269 			length = std_length;
270 		}
271 	}
272 
273 	if (length < min_length)
274 		length = min_length;
275 
276 
277 	if (wav_scale == 0) {
278 		// 16 bit samples
279 		length /= 2;
280 		wav_length /= 2;
281 	}
282 
283 	if (amp < 0)
284 		return length;
285 
286 	len4 = wav_length / 4;
287 
288 	index += 4;
289 
290 	if (which & 0x100) {
291 		// mix this with synthesised wave
292 		last_wcmdq = wcmdq_tail;
293 		q = wcmdq[wcmdq_tail];
294 		q[0] = WCMD_WAVE2;
295 		q[1] = length | (wav_length << 16); // length in samples
296 		q[2] = (intptr_t)(&wavefile_data[index]);
297 		q[3] = wav_scale + (amp << 8);
298 		WcmdqInc();
299 		return length;
300 	}
301 
302 	if (length > wav_length) {
303 		x = len4*3;
304 		length -= x;
305 	} else {
306 		x = length;
307 		length = 0;
308 	}
309 
310 	last_wcmdq = wcmdq_tail;
311 	q = wcmdq[wcmdq_tail];
312 	q[0] = WCMD_WAVE;
313 	q[1] = x; // length in samples
314 	q[2] = (intptr_t)(&wavefile_data[index]);
315 	q[3] = wav_scale + (amp << 8);
316 	WcmdqInc();
317 
318 	while (length > len4*3) {
319 		x = len4;
320 		if (wav_scale == 0)
321 			x *= 2;
322 
323 		last_wcmdq = wcmdq_tail;
324 		q = wcmdq[wcmdq_tail];
325 		q[0] = WCMD_WAVE;
326 		q[1] = len4*2; // length in samples
327 		q[2] = (intptr_t)(&wavefile_data[index+x]);
328 		q[3] = wav_scale + (amp << 8);
329 		WcmdqInc();
330 
331 		length -= len4*2;
332 	}
333 
334 	if (length > 0) {
335 		x = wav_length - length;
336 		if (wav_scale == 0)
337 			x *= 2;
338 		last_wcmdq = wcmdq_tail;
339 		q = wcmdq[wcmdq_tail];
340 		q[0] = WCMD_WAVE;
341 		q[1] = length; // length in samples
342 		q[2] = (intptr_t)(&wavefile_data[index+x]);
343 		q[3] = wav_scale + (amp << 8);
344 		WcmdqInc();
345 	}
346 
347 	return length;
348 }
349 
DoSample3(PHONEME_DATA * phdata,int length_mod,int amp)350 int DoSample3(PHONEME_DATA *phdata, int length_mod, int amp)
351 {
352 	int amp2;
353 	int len;
354 	EndPitch(1);
355 
356 	if (amp == -1) {
357 		// just get the length, don't produce sound
358 		amp2 = amp;
359 	} else {
360 		amp2 = phdata->sound_param[pd_WAV];
361 		if (amp2 == 0)
362 			amp2 = 100;
363 		amp2 = (amp2 * 32)/100;
364 	}
365 
366 	seq_len_adjust = 0;
367 
368 	if (phdata->sound_addr[pd_WAV] == 0)
369 		len = 0;
370 	else
371 		len = DoSample2(phdata->sound_addr[pd_WAV], 2, phdata->pd_param[pd_LENGTHMOD]*2, phdata->pd_control, length_mod, amp2);
372 	last_frame = NULL;
373 	return len;
374 }
375 
AllocFrame()376 static frame_t *AllocFrame()
377 {
378 	// Allocate a temporary spectrum frame for the wavegen queue. Use a pool which is big
379 	// enough to use a round-robin without checks.
380 	// Only needed for modifying spectra for blending to consonants
381 
382 	#define N_FRAME_POOL N_WCMDQ
383 	static int ix = 0;
384 	static frame_t frame_pool[N_FRAME_POOL];
385 
386 	ix++;
387 	if (ix >= N_FRAME_POOL)
388 		ix = 0;
389 	return &frame_pool[ix];
390 }
391 
set_frame_rms(frame_t * fr,int new_rms)392 static void set_frame_rms(frame_t *fr, int new_rms)
393 {
394 	// Each frame includes its RMS amplitude value, so to set a new
395 	// RMS just adjust the formant amplitudes by the appropriate ratio
396 
397 	int x;
398 	int h;
399 	int ix;
400 
401 	static const short sqrt_tab[200] = {
402 		  0,  64,  90, 110, 128, 143, 156, 169, 181, 192, 202, 212, 221, 230, 239, 247,
403 		256, 263, 271, 278, 286, 293, 300, 306, 313, 320, 326, 332, 338, 344, 350, 356,
404 		362, 367, 373, 378, 384, 389, 394, 399, 404, 409, 414, 419, 424, 429, 434, 438,
405 		443, 448, 452, 457, 461, 465, 470, 474, 478, 483, 487, 491, 495, 499, 503, 507,
406 		512, 515, 519, 523, 527, 531, 535, 539, 543, 546, 550, 554, 557, 561, 565, 568,
407 		572, 576, 579, 583, 586, 590, 593, 596, 600, 603, 607, 610, 613, 617, 620, 623,
408 		627, 630, 633, 636, 640, 643, 646, 649, 652, 655, 658, 662, 665, 668, 671, 674,
409 		677, 680, 683, 686, 689, 692, 695, 698, 701, 704, 706, 709, 712, 715, 718, 721,
410 		724, 726, 729, 732, 735, 738, 740, 743, 746, 749, 751, 754, 757, 759, 762, 765,
411 		768, 770, 773, 775, 778, 781, 783, 786, 789, 791, 794, 796, 799, 801, 804, 807,
412 		809, 812, 814, 817, 819, 822, 824, 827, 829, 832, 834, 836, 839, 841, 844, 846,
413 		849, 851, 853, 856, 858, 861, 863, 865, 868, 870, 872, 875, 877, 879, 882, 884,
414 		886, 889, 891, 893, 896, 898, 900, 902
415 	};
416 
417 	if (voice->klattv[0]) {
418 		if (new_rms == -1)
419 			fr->klattp[KLATT_AV] = 50;
420 		return;
421 	}
422 
423 	if (fr->rms == 0) return; // check for divide by zero
424 	x = (new_rms * 64)/fr->rms;
425 	if (x >= 200) x = 199;
426 
427 	x = sqrt_tab[x]; // sqrt(new_rms/fr->rms)*0x200;
428 
429 	for (ix = 0; ix < 8; ix++) {
430 		h = fr->fheight[ix] * x;
431 		fr->fheight[ix] = h/0x200;
432 	}
433 }
434 
formants_reduce_hf(frame_t * fr,int level)435 static void formants_reduce_hf(frame_t *fr, int level)
436 {
437 	// change height of peaks 2 to 8, percentage
438 	int ix;
439 	int x;
440 
441 	if (voice->klattv[0])
442 		return;
443 
444 	for (ix = 2; ix < 8; ix++) {
445 		x = fr->fheight[ix] * level;
446 		fr->fheight[ix] = x/100;
447 	}
448 }
449 
CopyFrame(frame_t * frame1,int copy)450 static frame_t *CopyFrame(frame_t *frame1, int copy)
451 {
452 	// create a copy of the specified frame in temporary buffer
453 
454 	frame_t *frame2;
455 
456 	if ((copy == 0) && (frame1->frflags & FRFLAG_COPIED)) {
457 		// this frame has already been copied in temporary rw memory
458 		return frame1;
459 	}
460 
461 	frame2 = AllocFrame();
462 	if (frame2 != NULL) {
463 		memcpy(frame2, frame1, sizeof(frame_t));
464 		frame2->length = 0;
465 		frame2->frflags |= FRFLAG_COPIED;
466 	}
467 	return frame2;
468 }
469 
DuplicateLastFrame(frameref_t * seq,int n_frames,int length)470 static frame_t *DuplicateLastFrame(frameref_t *seq, int n_frames, int length)
471 {
472 	frame_t *fr;
473 
474 	seq[n_frames-1].length = length;
475 	fr = CopyFrame(seq[n_frames-1].frame, 1);
476 	seq[n_frames].frame = fr;
477 	seq[n_frames].length = 0;
478 	return fr;
479 }
480 
AdjustFormants(frame_t * fr,int target,int min,int max,int f1_adj,int f3_adj,int hf_reduce,int flags)481 static void AdjustFormants(frame_t *fr, int target, int min, int max, int f1_adj, int f3_adj, int hf_reduce, int flags)
482 {
483 	int x;
484 
485 	target = (target * voice->formant_factor)/256;
486 
487 	x = (target - fr->ffreq[2]) / 2;
488 	if (x > max) x = max;
489 	if (x < min) x = min;
490 	fr->ffreq[2] += x;
491 	fr->ffreq[3] += f3_adj;
492 
493 	if (flags & 0x20)
494 		f3_adj = -f3_adj; // reverse direction for f4,f5 change
495 	fr->ffreq[4] += f3_adj;
496 	fr->ffreq[5] += f3_adj;
497 
498 	if (f1_adj == 1) {
499 		x = (235 - fr->ffreq[1]);
500 		if (x < -100) x = -100;
501 		if (x > -60) x = -60;
502 		fr->ffreq[1] += x;
503 	}
504 	if (f1_adj == 2) {
505 		x = (235 - fr->ffreq[1]);
506 		if (x < -300) x = -300;
507 		if (x > -150) x = -150;
508 		fr->ffreq[1] += x;
509 		fr->ffreq[0] += x;
510 	}
511 	if (f1_adj == 3) {
512 		x = (100 - fr->ffreq[1]);
513 		if (x < -400) x = -400;
514 		if (x > -300) x = -400;
515 		fr->ffreq[1] += x;
516 		fr->ffreq[0] += x;
517 	}
518 	formants_reduce_hf(fr, hf_reduce);
519 }
520 
VowelCloseness(frame_t * fr)521 static int VowelCloseness(frame_t *fr)
522 {
523 	// return a value 0-3 depending on the vowel's f1
524 	int f1;
525 
526 	if ((f1 = fr->ffreq[1]) < 300)
527 		return 3;
528 	if (f1 < 400)
529 		return 2;
530 	if (f1 < 500)
531 		return 1;
532 	return 0;
533 }
534 
FormantTransition2(frameref_t * seq,int * n_frames,unsigned int data1,unsigned int data2,PHONEME_TAB * other_ph,int which)535 int FormantTransition2(frameref_t *seq, int *n_frames, unsigned int data1, unsigned int data2, PHONEME_TAB *other_ph, int which)
536 {
537 	int ix;
538 	int formant;
539 	int next_rms;
540 
541 	int len;
542 	int rms;
543 	int f1;
544 	int f2;
545 	int f2_min;
546 	int f2_max;
547 	int f3_adj;
548 	int f3_amp;
549 	int flags;
550 	int vcolour;
551 
552 	#define N_VCOLOUR 2
553 	// percentage change for each formant in 256ths
554 	static short vcolouring[N_VCOLOUR][5] = {
555 		{ 243, 272, 256, 256, 256 }, // palatal consonant follows
556 		{ 256, 256, 240, 240, 240 }, // retroflex
557 	};
558 
559 	frame_t *fr = NULL;
560 
561 	if (*n_frames < 2)
562 		return 0;
563 
564 	len = (data1 & 0x3f) * 2;
565 	rms = (data1 >> 6) & 0x3f;
566 	flags = (data1 >> 12);
567 
568 	f2 = (data2 & 0x3f) * 50;
569 	f2_min = (((data2 >> 6) & 0x1f) - 15) * 50;
570 	f2_max = (((data2 >> 11) & 0x1f) - 15) * 50;
571 	f3_adj = (((data2 >> 16) & 0x1f) - 15) * 50;
572 	f3_amp = ((data2 >> 21) & 0x1f) * 8;
573 	f1 = ((data2 >> 26) & 0x7);
574 	vcolour = (data2 >> 29);
575 
576 	if ((other_ph != NULL) && (other_ph->mnemonic == '?'))
577 		flags |= 8;
578 
579 	if (which == 1) {
580 		// entry to vowel
581 		fr = CopyFrame(seq[0].frame, 0);
582 		seq[0].frame = fr;
583 		seq[0].length = VOWEL_FRONT_LENGTH;
584 		if (len > 0)
585 			seq[0].length = len;
586 		seq[0].frflags |= FRFLAG_LEN_MOD2; // reduce length modification
587 		fr->frflags |= FRFLAG_LEN_MOD2;
588 
589 		next_rms = seq[1].frame->rms;
590 
591 		if (voice->klattv[0])
592 			fr->klattp[KLATT_AV] = seq[1].frame->klattp[KLATT_AV] - 4;
593 		if (f2 != 0) {
594 			if (rms & 0x20)
595 				set_frame_rms(fr, (next_rms * (rms & 0x1f))/30);
596 			AdjustFormants(fr, f2, f2_min, f2_max, f1, f3_adj, f3_amp, flags);
597 
598 			if ((rms & 0x20) == 0)
599 				set_frame_rms(fr, rms*2);
600 		} else {
601 			if (flags & 8)
602 				set_frame_rms(fr, (next_rms*24)/32);
603 			else
604 				set_frame_rms(fr, RMS_START);
605 		}
606 
607 		if (flags & 8)
608 			modn_flags = 0x800 + (VowelCloseness(fr) << 8);
609 	} else {
610 		// exit from vowel
611 		rms = rms*2;
612 		if ((f2 != 0) || (flags != 0)) {
613 
614 			if (flags & 8) {
615 				fr = CopyFrame(seq[*n_frames-1].frame, 0);
616 				seq[*n_frames-1].frame = fr;
617 				rms = RMS_GLOTTAL1;
618 
619 				// degree of glottal-stop effect depends on closeness of vowel (indicated by f1 freq)
620 				modn_flags = 0x400 + (VowelCloseness(fr) << 8);
621 			} else {
622 				fr = DuplicateLastFrame(seq, (*n_frames)++, len);
623 				if (len > 36)
624 					seq_len_adjust += (len - 36);
625 
626 				if (f2 != 0)
627 					AdjustFormants(fr, f2, f2_min, f2_max, f1, f3_adj, f3_amp, flags);
628 			}
629 
630 			set_frame_rms(fr, rms);
631 
632 			if ((vcolour > 0) && (vcolour <= N_VCOLOUR)) {
633 				for (ix = 0; ix < *n_frames; ix++) {
634 					fr = CopyFrame(seq[ix].frame, 0);
635 					seq[ix].frame = fr;
636 
637 					for (formant = 1; formant <= 5; formant++) {
638 						int x;
639 						x = fr->ffreq[formant] * vcolouring[vcolour-1][formant-1];
640 						fr->ffreq[formant] = x / 256;
641 					}
642 				}
643 			}
644 		}
645 	}
646 
647 	if (fr != NULL) {
648 		if (flags & 4)
649 			fr->frflags |= FRFLAG_FORMANT_RATE;
650 		if (flags & 2)
651 			fr->frflags |= FRFLAG_BREAK; // don't merge with next frame
652 	}
653 
654 	if (flags & 0x40)
655 		DoPause(20, 0); // add a short pause after the consonant
656 
657 	if (flags & 16)
658 		return len;
659 	return 0;
660 }
661 
SmoothSpect(void)662 static void SmoothSpect(void)
663 {
664 	// Limit the rate of frequence change of formants, to reduce chirping
665 
666 	intptr_t *q;
667 	frame_t *frame;
668 	frame_t *frame2;
669 	frame_t *frame1;
670 	frame_t *frame_centre;
671 	int ix;
672 	int len;
673 	int pk;
674 	int modified;
675 	int allowed;
676 	int diff;
677 
678 	if (syllable_start == syllable_end)
679 		return;
680 
681 	if ((syllable_centre < 0) || (syllable_centre == syllable_start)) {
682 		syllable_start = syllable_end;
683 		return;
684 	}
685 
686 	q = wcmdq[syllable_centre];
687 	frame_centre = (frame_t *)q[2];
688 
689 	// backwards
690 	ix = syllable_centre -1;
691 	frame = frame2 = frame_centre;
692 	for (;;) {
693 		if (ix < 0) ix = N_WCMDQ-1;
694 		q = wcmdq[ix];
695 
696 		if (q[0] == WCMD_PAUSE || q[0] == WCMD_WAVE)
697 			break;
698 
699 		if (q[0] <= WCMD_SPECT2) {
700 			len = q[1] & 0xffff;
701 
702 			frame1 = (frame_t *)q[3];
703 			if (frame1 == frame) {
704 				q[3] = (intptr_t)frame2;
705 				frame1 = frame2;
706 			} else
707 				break; // doesn't follow on from previous frame
708 
709 			frame = frame2 = (frame_t *)q[2];
710 			modified = 0;
711 
712 			if (frame->frflags & FRFLAG_BREAK)
713 				break;
714 
715 			if (frame->frflags & FRFLAG_FORMANT_RATE)
716 				len = (len * 12)/10; // allow slightly greater rate of change for this frame (was 12/10)
717 
718 			for (pk = 0; pk < 6; pk++) {
719 				int f1, f2;
720 
721 				if ((frame->frflags & FRFLAG_BREAK_LF) && (pk < 3))
722 					continue;
723 
724 				f1 = frame1->ffreq[pk];
725 				f2 = frame->ffreq[pk];
726 
727 				// backwards
728 				if ((diff = f2 - f1) > 0)
729 					allowed = f1*2 + f2;
730 				else
731 					allowed = f1 + f2*2;
732 
733 				// the allowed change is specified as percentage (%*10) of the frequency
734 				// take "frequency" as 1/3 from the lower freq
735 				allowed = (allowed * formant_rate[pk])/3000;
736 				allowed = (allowed * len)/256;
737 
738 				if (diff > allowed) {
739 					if (modified == 0) {
740 						frame2 = CopyFrame(frame, 0);
741 						modified = 1;
742 					}
743 					frame2->ffreq[pk] = frame1->ffreq[pk] + allowed;
744 					q[2] = (intptr_t)frame2;
745 				} else if (diff < -allowed) {
746 					if (modified == 0) {
747 						frame2 = CopyFrame(frame, 0);
748 						modified = 1;
749 					}
750 					frame2->ffreq[pk] = frame1->ffreq[pk] - allowed;
751 					q[2] = (intptr_t)frame2;
752 				}
753 			}
754 		}
755 
756 		if (ix == syllable_start)
757 			break;
758 		ix--;
759 	}
760 
761 	// forwards
762 	ix = syllable_centre;
763 
764 	frame = NULL;
765 	for (;;) {
766 		q = wcmdq[ix];
767 
768 		if (q[0] == WCMD_PAUSE || q[0] == WCMD_WAVE)
769 			break;
770 
771 		if (q[0] <= WCMD_SPECT2) {
772 			len = q[1] & 0xffff;
773 
774 			frame1 = (frame_t *)q[2];
775 			if (frame != NULL) {
776 				if (frame1 == frame) {
777 					q[2] = (intptr_t)frame2;
778 					frame1 = frame2;
779 				} else
780 					break; // doesn't follow on from previous frame
781 			}
782 
783 			frame = frame2 = (frame_t *)q[3];
784 			modified = 0;
785 
786 			if (frame1->frflags & FRFLAG_BREAK)
787 				break;
788 
789 			if (frame1->frflags & FRFLAG_FORMANT_RATE)
790 				len = (len *6)/5; // allow slightly greater rate of change for this frame
791 
792 			for (pk = 0; pk < 6; pk++) {
793 				int f1, f2;
794 				f1 = frame1->ffreq[pk];
795 				f2 = frame->ffreq[pk];
796 
797 				// forwards
798 				if ((diff = f2 - f1) > 0)
799 					allowed = f1*2 + f2;
800 				else
801 					allowed = f1 + f2*2;
802 				allowed = (allowed * formant_rate[pk])/3000;
803 				allowed = (allowed * len)/256;
804 
805 				if (diff > allowed) {
806 					if (modified == 0) {
807 						frame2 = CopyFrame(frame, 0);
808 						modified = 1;
809 					}
810 					frame2->ffreq[pk] = frame1->ffreq[pk] + allowed;
811 					q[3] = (intptr_t)frame2;
812 				} else if (diff < -allowed) {
813 					if (modified == 0) {
814 						frame2 = CopyFrame(frame, 0);
815 						modified = 1;
816 					}
817 					frame2->ffreq[pk] = frame1->ffreq[pk] - allowed;
818 					q[3] = (intptr_t)frame2;
819 				}
820 			}
821 		}
822 
823 		ix++;
824 		if (ix >= N_WCMDQ) ix = 0;
825 		if (ix == syllable_end)
826 			break;
827 	}
828 
829 	syllable_start = syllable_end;
830 }
831 
StartSyllable(void)832 static void StartSyllable(void)
833 {
834 	// start of syllable, if not already started
835 	if (syllable_end == syllable_start)
836 		syllable_end = wcmdq_tail;
837 }
838 
DoSpect2(PHONEME_TAB * this_ph,int which,FMT_PARAMS * fmt_params,PHONEME_LIST * plist,int modulation)839 int DoSpect2(PHONEME_TAB *this_ph, int which, FMT_PARAMS *fmt_params,  PHONEME_LIST *plist, int modulation)
840 {
841 	// which:  0 not a vowel, 1  start of vowel,   2 body and end of vowel
842 	// length_mod: 256 = 100%
843 	// modulation: -1 = don't write to wcmdq
844 
845 	int n_frames;
846 	frameref_t *frames;
847 	int frameix;
848 	frame_t *frame1;
849 	frame_t *frame2;
850 	frame_t *fr;
851 	int ix;
852 	intptr_t *q;
853 	int len;
854 	int frame_length;
855 	int length_factor;
856 	int length_mod;
857 	int length_sum;
858 	int length_min;
859 	int total_len = 0;
860 	static int wave_flag = 0;
861 	int wcmd_spect = WCMD_SPECT;
862 	int frame_lengths[N_SEQ_FRAMES];
863 
864 	if (fmt_params->fmt_addr == 0)
865 		return 0;
866 
867 	length_mod = plist->length;
868 	if (length_mod == 0) length_mod = 256;
869 
870 	length_min = (samplerate/70); // greater than one cycle at low pitch (Hz)
871 	if (which == 2) {
872 		if ((translator->langopts.param[LOPT_LONG_VOWEL_THRESHOLD] > 0) && ((this_ph->std_length >= translator->langopts.param[LOPT_LONG_VOWEL_THRESHOLD]) || (plist->synthflags & SFLAG_LENGTHEN) || (this_ph->phflags & phLONG)))
873 			length_min *= 2; // ensure long vowels are longer
874 	}
875 
876 	if (which == 1) {
877 		// limit the shortening of sonorants before shortened (eg. unstressed vowels)
878 		if ((this_ph->type == phLIQUID) || (plist[-1].type == phLIQUID) || (plist[-1].type == phNASAL)) {
879 			if (length_mod < (len = translator->langopts.param[LOPT_SONORANT_MIN]))
880 				length_mod = len;
881 		}
882 	}
883 
884 	modn_flags = 0;
885 	frames = LookupSpect(this_ph, which, fmt_params, &n_frames, plist);
886 	if (frames == NULL)
887 		return 0; // not found
888 
889 	if (fmt_params->fmt_amp != fmt_amplitude) {
890 		// an amplitude adjustment is specified for this sequence
891 		q = wcmdq[wcmdq_tail];
892 		q[0] = WCMD_FMT_AMPLITUDE;
893 		q[1] = fmt_amplitude = fmt_params->fmt_amp;
894 		WcmdqInc();
895 	}
896 
897 	frame1 = frames[0].frame;
898 	if (voice->klattv[0])
899 		wcmd_spect = WCMD_KLATT;
900 
901 	wavefile_ix = fmt_params->wav_addr;
902 
903 	if (fmt_params->wav_amp == 0)
904 		wavefile_amp = 32;
905 	else
906 		wavefile_amp = (fmt_params->wav_amp * 32)/100;
907 
908 	if (wavefile_ix == 0) {
909 		if (wave_flag) {
910 			// cancel any wavefile that was playing previously
911 			wcmd_spect = WCMD_SPECT2;
912 			if (voice->klattv[0])
913 				wcmd_spect = WCMD_KLATT2;
914 			wave_flag = 0;
915 		} else {
916 			wcmd_spect = WCMD_SPECT;
917 			if (voice->klattv[0])
918 				wcmd_spect = WCMD_KLATT;
919 		}
920 	}
921 
922 	if (last_frame != NULL) {
923 		if (((last_frame->length < 2) || (last_frame->frflags & FRFLAG_VOWEL_CENTRE))
924 		    && !(last_frame->frflags & FRFLAG_BREAK)) {
925 			// last frame of previous sequence was zero-length, replace with first of this sequence
926 			wcmdq[last_wcmdq][3] = (intptr_t)frame1;
927 
928 			if (last_frame->frflags & FRFLAG_BREAK_LF) {
929 				// but flag indicates keep HF peaks in last segment
930 				fr = CopyFrame(frame1, 1);
931 				for (ix = 3; ix < 8; ix++) {
932 					if (ix < 7)
933 						fr->ffreq[ix] = last_frame->ffreq[ix];
934 					fr->fheight[ix] = last_frame->fheight[ix];
935 				}
936 				wcmdq[last_wcmdq][3] = (intptr_t)fr;
937 			}
938 		}
939 	}
940 
941 	if ((this_ph->type == phVOWEL) && (which == 2)) {
942 		SmoothSpect(); // process previous syllable
943 
944 		// remember the point in the output queue of the centre of the vowel
945 		syllable_centre = wcmdq_tail;
946 	}
947 
948 	length_sum = 0;
949 	for (frameix = 1; frameix < n_frames; frameix++) {
950 		length_factor = length_mod;
951 		if (frames[frameix-1].frflags & FRFLAG_LEN_MOD) // reduce effect of length mod
952 			length_factor = (length_mod*(256-speed.lenmod_factor) + 256*speed.lenmod_factor)/256;
953 		else if (frames[frameix-1].frflags & FRFLAG_LEN_MOD2) // reduce effect of length mod, used for the start of a vowel
954 			length_factor = (length_mod*(256-speed.lenmod2_factor) + 256*speed.lenmod2_factor)/256;
955 
956 		frame_length = frames[frameix-1].length;
957 		len = (frame_length * samplerate)/1000;
958 		len = (len * length_factor)/256;
959 		length_sum += len;
960 		frame_lengths[frameix] = len;
961 	}
962 
963 	if ((length_sum > 0) && (length_sum < length_min)) {
964 		// lengthen, so that the sequence is greater than one cycle at low pitch
965 		for (frameix = 1; frameix < n_frames; frameix++)
966 			frame_lengths[frameix] = (frame_lengths[frameix] * length_min) / length_sum;
967 	}
968 
969 	for (frameix = 1; frameix < n_frames; frameix++) {
970 		frame2 = frames[frameix].frame;
971 
972 		if ((fmt_params->wav_addr != 0) && ((frame1->frflags & FRFLAG_DEFER_WAV) == 0)) {
973 			// there is a wave file to play along with this synthesis
974 			seq_len_adjust = 0;
975 			DoSample2(fmt_params->wav_addr, which+0x100, 0, fmt_params->fmt_control, 0, wavefile_amp);
976 			wave_flag = 1;
977 			wavefile_ix = 0;
978 			fmt_params->wav_addr = 0;
979 		}
980 
981 		if (modulation >= 0) {
982 			if (frame1->frflags & FRFLAG_MODULATE)
983 				modulation = 6;
984 			if ((frameix == n_frames-1) && (modn_flags & 0xf00))
985 				modulation |= modn_flags; // before or after a glottal stop
986 		}
987 
988 		len = frame_lengths[frameix];
989 		pitch_length += len;
990 		amp_length += len;
991 
992 		if (len == 0) {
993 			last_frame = NULL;
994 			frame1 = frame2;
995 		} else {
996 			last_wcmdq = wcmdq_tail;
997 
998 			if (modulation >= 0) {
999 				q = wcmdq[wcmdq_tail];
1000 				q[0] = wcmd_spect;
1001 				q[1] = len + (modulation << 16);
1002 				q[2] = (intptr_t)frame1;
1003 				q[3] = (intptr_t)frame2;
1004 
1005 				WcmdqInc();
1006 			}
1007 			last_frame = frame1 = frame2;
1008 			total_len += len;
1009 		}
1010 	}
1011 
1012 	if ((which != 1) && (fmt_amplitude != 0)) {
1013 		q = wcmdq[wcmdq_tail];
1014 		q[0] = WCMD_FMT_AMPLITUDE;
1015 		q[1] = fmt_amplitude = 0;
1016 		WcmdqInc();
1017 	}
1018 
1019 	return total_len;
1020 }
1021 
DoMarker(int type,int char_posn,int length,int value)1022 void DoMarker(int type, int char_posn, int length, int value)
1023 {
1024 	// This could be used to return an index to the word currently being spoken
1025 	// Type 1=word, 2=sentence, 3=named marker, 4=play audio, 5=end
1026 
1027 	if (WcmdqFree() > 5) {
1028 		wcmdq[wcmdq_tail][0] = WCMD_MARKER + (type << 8);
1029 		wcmdq[wcmdq_tail][1] = (char_posn & 0xffffff) | (length << 24);
1030 		wcmdq[wcmdq_tail][2] = value;
1031 		WcmdqInc();
1032 	}
1033 }
1034 
DoPhonemeMarker(int type,int char_posn,int length,char * name)1035 void DoPhonemeMarker(int type, int char_posn, int length, char *name)
1036 {
1037 	// This could be used to return an index to the word currently being spoken
1038 	// Type 7=phoneme
1039 
1040 	int *p;
1041 
1042 	if (WcmdqFree() > 5) {
1043 		wcmdq[wcmdq_tail][0] = WCMD_MARKER + (type << 8);
1044 		wcmdq[wcmdq_tail][1] = (char_posn & 0xffffff) | (length << 24);
1045 		p = (int *)name;
1046 		wcmdq[wcmdq_tail][2] = p[0]; // up to 8 bytes of UTF8 characters
1047 		wcmdq[wcmdq_tail][3] = p[1];
1048 		WcmdqInc();
1049 	}
1050 }
1051 
1052 #if HAVE_SONIC_H
DoSonicSpeed(int value)1053 void DoSonicSpeed(int value)
1054 {
1055 	// value, multiplier * 1024
1056 	wcmdq[wcmdq_tail][0] = WCMD_SONIC_SPEED;
1057 	wcmdq[wcmdq_tail][1] = value;
1058 	WcmdqInc();
1059 }
1060 #endif
1061 
DoVoiceChange(voice_t * v)1062 espeak_ng_STATUS DoVoiceChange(voice_t *v)
1063 {
1064 	// allocate memory for a copy of the voice data, and free it in wavegenfill()
1065 	voice_t *v2;
1066 	if ((v2 = (voice_t *)malloc(sizeof(voice_t))) == NULL)
1067 		return static_cast<espeak_ng_STATUS> (ENOMEM);
1068 	memcpy(v2, v, sizeof(voice_t));
1069 	wcmdq[wcmdq_tail][0] = WCMD_VOICE;
1070 	wcmdq[wcmdq_tail][2] = (intptr_t)v2;
1071 	WcmdqInc();
1072 	return static_cast<espeak_ng_STATUS> (ENS_OK);
1073 }
1074 
DoEmbedded(int * embix,int sourceix)1075 void DoEmbedded(int *embix, int sourceix)
1076 {
1077 	// There were embedded commands in the text at this point
1078 	unsigned int word; // bit 7=last command for this word, bits 5,6 sign, bits 0-4 command
1079 	unsigned int value;
1080 	int command;
1081 
1082 	do {
1083 		word = embedded_list[*embix];
1084 		value = word >> 8;
1085 		command = word & 0x7f;
1086 
1087 		if (command == 0)
1088 			return; // error
1089 
1090 		(*embix)++;
1091 
1092 		switch (command & 0x1f)
1093 		{
1094 		case EMBED_S: // speed
1095 			SetEmbedded((command & 0x60) + EMBED_S2, value); // adjusts embedded_value[EMBED_S2]
1096 			SetSpeed(2);
1097 			break;
1098 		case EMBED_I: // play dynamically loaded wav data (sound icon)
1099 			if ((int)value < n_soundicon_tab) {
1100 				if (soundicon_tab[value].length != 0) {
1101 					DoPause(10, 0); // ensure a break in the speech
1102 					wcmdq[wcmdq_tail][0] = WCMD_WAVE;
1103 					wcmdq[wcmdq_tail][1] = soundicon_tab[value].length;
1104 					wcmdq[wcmdq_tail][2] = (intptr_t)soundicon_tab[value].data + 44; // skip WAV header
1105 					wcmdq[wcmdq_tail][3] = 0x1500; // 16 bit data, amp=21
1106 					WcmdqInc();
1107 				}
1108 			}
1109 			break;
1110 		case EMBED_M: // named marker
1111 			DoMarker(espeakEVENT_MARK, (sourceix & 0x7ff) + clause_start_char, 0, value);
1112 			break;
1113 		case EMBED_U: // play sound
1114 			DoMarker(espeakEVENT_PLAY, count_characters+1, 0, value); // always occurs at end of clause
1115 			break;
1116 		default:
1117 			DoPause(10, 0); // ensure a break in the speech
1118 			wcmdq[wcmdq_tail][0] = WCMD_EMBEDDED;
1119 			wcmdq[wcmdq_tail][1] = command;
1120 			wcmdq[wcmdq_tail][2] = value;
1121 			WcmdqInc();
1122 			break;
1123 		}
1124 	} while ((word & 0x80) == 0);
1125 }
1126 
Generate(PHONEME_LIST * phonemelist,int * n_ph,bool resume)1127 int Generate(PHONEME_LIST *phonemelist, int *n_ph, bool resume)
1128 {
1129 	static int ix;
1130 	static int embedded_ix;
1131 	static int word_count;
1132 	PHONEME_LIST *prev;
1133 	PHONEME_LIST *next;
1134 	PHONEME_LIST *next2;
1135 	PHONEME_LIST *p;
1136 	bool released;
1137 	int stress;
1138 	int modulation;
1139 	bool  pre_voiced;
1140 	int free_min;
1141 	int value;
1142 	unsigned char *pitch_env = NULL;
1143 	unsigned char *amp_env;
1144 	PHONEME_TAB *ph;
1145 	int use_ipa = 0;
1146 	int done_phoneme_marker;
1147 	int vowelstart_prev;
1148 	char phoneme_name[16];
1149 	static int sourceix = 0;
1150 
1151 	PHONEME_DATA phdata;
1152 	PHONEME_DATA phdata_prev;
1153 	PHONEME_DATA phdata_next;
1154 	PHONEME_DATA phdata_tone;
1155 	FMT_PARAMS fmtp;
1156 	static WORD_PH_DATA worddata;
1157 
1158 	if (option_phoneme_events & espeakINITIALIZE_PHONEME_IPA)
1159 		use_ipa = 1;
1160 
1161 	if (mbrola_name[0] != 0)
1162 		return MbrolaGenerate(phonemelist, n_ph, resume);
1163 
1164 	if (resume == false) {
1165 		ix = 1;
1166 		embedded_ix = 0;
1167 		word_count = 0;
1168 		pitch_length = 0;
1169 		amp_length = 0;
1170 		last_frame = NULL;
1171 		last_wcmdq = -1;
1172 		syllable_start = wcmdq_tail;
1173 		syllable_end = wcmdq_tail;
1174 		syllable_centre = -1;
1175 		last_pitch_cmd = -1;
1176 		memset(vowel_transition, 0, sizeof(vowel_transition));
1177 		memset(&worddata, 0, sizeof(worddata));
1178 		DoPause(0, 0); // isolate from the previous clause
1179 	}
1180 
1181 	while ((ix < (*n_ph)) && (ix < N_PHONEME_LIST-2)) {
1182 		p = &phonemelist[ix];
1183 
1184 		if (p->type == phPAUSE)
1185 			free_min = 10;
1186 		else if (p->type != phVOWEL)
1187 			free_min = 15; // we need less Q space for non-vowels, and we need to generate phonemes after a vowel so that the pitch_length is filled in
1188 		else
1189 			free_min = MIN_WCMDQ;
1190 
1191 		if (WcmdqFree() <= free_min)
1192 			return 1; // wait
1193 
1194 		prev = &phonemelist[ix-1];
1195 		next = &phonemelist[ix+1];
1196 		next2 = &phonemelist[ix+2];
1197 
1198 		if (p->synthflags & SFLAG_EMBEDDED)
1199 			DoEmbedded(&embedded_ix, p->sourceix);
1200 
1201 		if (p->newword) {
1202 			if (((p->type == phVOWEL) && (translator->langopts.param[LOPT_WORD_MERGE] & 1)) ||
1203 			    (p->ph->phflags & phNOPAUSE)) {
1204 			} else
1205 				last_frame = NULL;
1206 
1207 			sourceix = (p->sourceix & 0x7ff) + clause_start_char;
1208 
1209 			if (p->newword & 4)
1210 				DoMarker(espeakEVENT_SENTENCE, sourceix, 0, count_sentences); // start of sentence
1211 
1212 			if (p->newword & 1)
1213 				DoMarker(espeakEVENT_WORD, sourceix, p->sourceix >> 11, clause_start_word + word_count++); // NOTE, this count doesn't include multiple-word pronunciations in *_list. eg (of a)
1214 		}
1215 
1216 		EndAmplitude();
1217 
1218 		if ((p->prepause > 0) && !(p->ph->phflags & phPREVOICE))
1219 			DoPause(p->prepause, 1);
1220 
1221 		done_phoneme_marker = 0;
1222 		if (option_phoneme_events && (p->ph->code != phonEND_WORD)) {
1223 			if ((p->type == phVOWEL) && (prev->type == phLIQUID || prev->type == phNASAL)) {
1224 				// For vowels following a liquid or nasal, do the phoneme event after the vowel-start
1225 			} else {
1226 				WritePhMnemonic(phoneme_name, p->ph, p, use_ipa, NULL);
1227 				DoPhonemeMarker(espeakEVENT_PHONEME, sourceix, 0, phoneme_name);
1228 				done_phoneme_marker = 1;
1229 			}
1230 		}
1231 
1232 		switch (p->type)
1233 		{
1234 		case phPAUSE:
1235 			DoPause(p->length, 0);
1236 			p->std_length = p->ph->std_length;
1237 			break;
1238 		case phSTOP:
1239 			released = false;
1240 			ph = p->ph;
1241 			if (next->type == phVOWEL)
1242 				released = true;
1243 			else if (!next->newword) {
1244 				if (next->type == phLIQUID) released = true;
1245 			}
1246 			if (released == false)
1247 				p->synthflags |= SFLAG_NEXT_PAUSE;
1248 
1249 			if (ph->phflags & phPREVOICE) {
1250 				// a period of voicing before the release
1251 				memset(&fmtp, 0, sizeof(fmtp));
1252 				InterpretPhoneme(NULL, 0x01, p, &phdata, &worddata);
1253 				fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
1254 				fmtp.fmt_amp = phdata.sound_param[pd_FMT];
1255 
1256 				if (last_pitch_cmd < 0) {
1257 					DoAmplitude(next->amp, NULL);
1258 					DoPitch(envelope_data[p->env], next->pitch1, next->pitch2);
1259 				}
1260 
1261 				DoSpect2(ph, 0, &fmtp, p, 0);
1262 			}
1263 
1264 			InterpretPhoneme(NULL, 0, p, &phdata, &worddata);
1265 			phdata.pd_control |= pd_DONTLENGTHEN;
1266 			DoSample3(&phdata, 0, 0);
1267 			break;
1268 		case phFRICATIVE:
1269 			InterpretPhoneme(NULL, 0, p, &phdata, &worddata);
1270 
1271 			if (p->synthflags & SFLAG_LENGTHEN)
1272 				DoSample3(&phdata, p->length, 0); // play it twice for [s:] etc.
1273 			DoSample3(&phdata, p->length, 0);
1274 			break;
1275 		case phVSTOP:
1276 			ph = p->ph;
1277 			memset(&fmtp, 0, sizeof(fmtp));
1278 			fmtp.fmt_control = pd_DONTLENGTHEN;
1279 
1280 			pre_voiced = false;
1281 			if (next->type == phVOWEL) {
1282 				DoAmplitude(p->amp, NULL);
1283 				DoPitch(envelope_data[p->env], p->pitch1, p->pitch2);
1284 				pre_voiced = true;
1285 			} else if ((next->type == phLIQUID) && !next->newword) {
1286 				DoAmplitude(next->amp, NULL);
1287 				DoPitch(envelope_data[next->env], next->pitch1, next->pitch2);
1288 				pre_voiced = true;
1289 			} else {
1290 				if (last_pitch_cmd < 0) {
1291 					DoAmplitude(next->amp, NULL);
1292 					DoPitch(envelope_data[p->env], p->pitch1, p->pitch2);
1293 				}
1294 			}
1295 
1296 			if ((prev->type == phVOWEL) || (ph->phflags & phPREVOICE)) {
1297 				// a period of voicing before the release
1298 				InterpretPhoneme(NULL, 0x01, p, &phdata, &worddata);
1299 				fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
1300 				fmtp.fmt_amp = phdata.sound_param[pd_FMT];
1301 
1302 				DoSpect2(ph, 0, &fmtp, p, 0);
1303 				if (p->synthflags & SFLAG_LENGTHEN) {
1304 					DoPause(25, 1);
1305 					DoSpect2(ph, 0, &fmtp, p, 0);
1306 				}
1307 			} else {
1308 				if (p->synthflags & SFLAG_LENGTHEN)
1309 					DoPause(50, 0);
1310 			}
1311 
1312 			if (pre_voiced) {
1313 				// followed by a vowel, or liquid + vowel
1314 				StartSyllable();
1315 			} else
1316 				p->synthflags |= SFLAG_NEXT_PAUSE;
1317 			InterpretPhoneme(NULL, 0, p, &phdata, &worddata);
1318 			fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
1319 			fmtp.fmt_amp = phdata.sound_param[pd_FMT];
1320 			fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV];
1321 			fmtp.wav_amp = phdata.sound_param[pd_ADDWAV];
1322 			DoSpect2(ph, 0, &fmtp, p, 0);
1323 
1324 			if ((p->newword == 0) && (next2->newword == 0)) {
1325 				if (next->type == phVFRICATIVE)
1326 					DoPause(20, 0);
1327 				if (next->type == phFRICATIVE)
1328 					DoPause(12, 0);
1329 			}
1330 			break;
1331 		case phVFRICATIVE:
1332 			if (next->type == phVOWEL) {
1333 				DoAmplitude(p->amp, NULL);
1334 				DoPitch(envelope_data[p->env], p->pitch1, p->pitch2);
1335 			} else if (next->type == phLIQUID) {
1336 				DoAmplitude(next->amp, NULL);
1337 				DoPitch(envelope_data[next->env], next->pitch1, next->pitch2);
1338 			} else {
1339 				if (last_pitch_cmd < 0) {
1340 					DoAmplitude(p->amp, NULL);
1341 					DoPitch(envelope_data[p->env], p->pitch1, p->pitch2);
1342 				}
1343 			}
1344 
1345 			if ((next->type == phVOWEL) || ((next->type == phLIQUID) && (next->newword == 0))) // ?? test 14.Aug.2007
1346 				StartSyllable();
1347 			else
1348 				p->synthflags |= SFLAG_NEXT_PAUSE;
1349 			InterpretPhoneme(NULL, 0, p, &phdata, &worddata);
1350 			memset(&fmtp, 0, sizeof(fmtp));
1351 			fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2;
1352 			fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
1353 			fmtp.fmt_amp = phdata.sound_param[pd_FMT];
1354 			fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV];
1355 			fmtp.wav_amp = phdata.sound_param[pd_ADDWAV];
1356 
1357 			if (p->synthflags & SFLAG_LENGTHEN)
1358 				DoSpect2(p->ph, 0, &fmtp, p, 0);
1359 			DoSpect2(p->ph, 0, &fmtp, p, 0);
1360 			break;
1361 		case phNASAL:
1362 			memset(&fmtp, 0, sizeof(fmtp));
1363 			if (!(p->synthflags & SFLAG_SEQCONTINUE)) {
1364 				DoAmplitude(p->amp, NULL);
1365 				DoPitch(envelope_data[p->env], p->pitch1, p->pitch2);
1366 			}
1367 
1368 			if (prev->type == phNASAL)
1369 				last_frame = NULL;
1370 
1371 			InterpretPhoneme(NULL, 0, p, &phdata, &worddata);
1372 			fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2;
1373 			fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
1374 			fmtp.fmt_amp = phdata.sound_param[pd_FMT];
1375 
1376 			if (next->type == phVOWEL) {
1377 				StartSyllable();
1378 				DoSpect2(p->ph, 0, &fmtp, p, 0);
1379 			} else if (prev->type == phVOWEL && (p->synthflags & SFLAG_SEQCONTINUE))
1380 				DoSpect2(p->ph, 0, &fmtp, p, 0);
1381 			else {
1382 				last_frame = NULL; // only for nasal ?
1383 				DoSpect2(p->ph, 0, &fmtp, p, 0);
1384 				last_frame = NULL;
1385 			}
1386 
1387 			break;
1388 		case phLIQUID:
1389 			memset(&fmtp, 0, sizeof(fmtp));
1390 			modulation = 0;
1391 			if (p->ph->phflags & phTRILL)
1392 				modulation = 5;
1393 
1394 			if (!(p->synthflags & SFLAG_SEQCONTINUE)) {
1395 				DoAmplitude(p->amp, NULL);
1396 				DoPitch(envelope_data[p->env], p->pitch1, p->pitch2);
1397 			}
1398 
1399 			if (prev->type == phNASAL)
1400 				last_frame = NULL;
1401 
1402 			if (next->type == phVOWEL)
1403 				StartSyllable();
1404 			InterpretPhoneme(NULL, 0, p, &phdata, &worddata);
1405 
1406 			if ((value = (phdata.pd_param[i_PAUSE_BEFORE] - p->prepause)) > 0)
1407 				DoPause(value, 1);
1408 			fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2;
1409 			fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
1410 			fmtp.fmt_amp = phdata.sound_param[pd_FMT];
1411 			fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV];
1412 			fmtp.wav_amp = phdata.sound_param[pd_ADDWAV];
1413 			DoSpect2(p->ph, 0, &fmtp, p, modulation);
1414 			break;
1415 		case phVOWEL:
1416 			ph = p->ph;
1417 			stress = p->stresslevel & 0xf;
1418 
1419 			memset(&fmtp, 0, sizeof(fmtp));
1420 
1421 			InterpretPhoneme(NULL, 0, p, &phdata, &worddata);
1422 			fmtp.std_length = phdata.pd_param[i_SET_LENGTH] * 2;
1423 			vowelstart_prev = 0;
1424 
1425 			if (((fmtp.fmt_addr = phdata.sound_addr[pd_VWLSTART]) != 0) && ((phdata.pd_control & pd_FORNEXTPH) == 0)) {
1426 				// a vowel start has been specified by the Vowel program
1427 				fmtp.fmt_length = phdata.sound_param[pd_VWLSTART];
1428 			} else if (prev->type != phPAUSE) {
1429 				// check the previous phoneme
1430 				InterpretPhoneme(NULL, 0, prev, &phdata_prev, NULL);
1431 				if (((fmtp.fmt_addr = phdata_prev.sound_addr[pd_VWLSTART]) != 0) && (phdata_prev.pd_control & pd_FORNEXTPH)) {
1432 					// a vowel start has been specified by the previous phoneme
1433 					vowelstart_prev = 1;
1434 					fmtp.fmt2_lenadj = phdata_prev.sound_param[pd_VWLSTART];
1435 				}
1436 				fmtp.transition0 = phdata_prev.vowel_transition[0];
1437 				fmtp.transition1 = phdata_prev.vowel_transition[1];
1438 			}
1439 
1440 			if (fmtp.fmt_addr == 0) {
1441 				// use the default start for this vowel
1442 				fmtp.use_vowelin = 1;
1443 				fmtp.fmt_control = 1;
1444 				fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
1445 			}
1446 
1447 			fmtp.fmt_amp = phdata.sound_param[pd_FMT];
1448 
1449 			pitch_env = envelope_data[p->env];
1450 			amp_env = NULL;
1451 			if (p->tone_ph != 0) {
1452 				InterpretPhoneme2(p->tone_ph, &phdata_tone);
1453 				pitch_env = GetEnvelope(phdata_tone.pitch_env);
1454 				if (phdata_tone.amp_env > 0)
1455 					amp_env = GetEnvelope(phdata_tone.amp_env);
1456 			}
1457 
1458 			StartSyllable();
1459 
1460 			modulation = 2;
1461 			if (stress <= 1)
1462 				modulation = 1; // 16ths
1463 			else if (stress >= 7)
1464 				modulation = 3;
1465 
1466 			if (prev->type == phVSTOP || prev->type == phVFRICATIVE) {
1467 				DoAmplitude(p->amp, amp_env);
1468 				DoPitch(pitch_env, p->pitch1, p->pitch2); // don't use prevocalic rising tone
1469 				DoSpect2(ph, 1, &fmtp, p, modulation);
1470 			} else if (prev->type == phLIQUID || prev->type == phNASAL) {
1471 				DoAmplitude(p->amp, amp_env);
1472 				DoSpect2(ph, 1, &fmtp, p, modulation); // continue with pre-vocalic rising tone
1473 				DoPitch(pitch_env, p->pitch1, p->pitch2);
1474 			} else if (vowelstart_prev) {
1475 				// VowelStart from the previous phoneme, but not phLIQUID or phNASAL
1476 				DoPitch(envelope_data[PITCHrise], p->pitch2 - 15, p->pitch2);
1477 				DoAmplitude(p->amp-1, amp_env);
1478 				DoSpect2(ph, 1, &fmtp, p, modulation); // continue with pre-vocalic rising tone
1479 				DoPitch(pitch_env, p->pitch1, p->pitch2);
1480 			} else {
1481 				if (!(p->synthflags & SFLAG_SEQCONTINUE)) {
1482 					DoAmplitude(p->amp, amp_env);
1483 					DoPitch(pitch_env, p->pitch1, p->pitch2);
1484 				}
1485 
1486 				DoSpect2(ph, 1, &fmtp, p, modulation);
1487 			}
1488 
1489 			if ((option_phoneme_events) && (done_phoneme_marker == 0)) {
1490 				WritePhMnemonic(phoneme_name, p->ph, p, use_ipa, NULL);
1491 				DoPhonemeMarker(espeakEVENT_PHONEME, sourceix, 0, phoneme_name);
1492 			}
1493 
1494 			fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
1495 			fmtp.fmt_amp = phdata.sound_param[pd_FMT];
1496 			fmtp.transition0 = 0;
1497 			fmtp.transition1 = 0;
1498 
1499 			if ((fmtp.fmt2_addr = phdata.sound_addr[pd_VWLEND]) != 0)
1500 				fmtp.fmt2_lenadj = phdata.sound_param[pd_VWLEND];
1501 			else if (next->type != phPAUSE) {
1502 				fmtp.fmt2_lenadj = 0;
1503 				InterpretPhoneme(NULL, 0, next, &phdata_next, NULL);
1504 
1505 				fmtp.use_vowelin = 1;
1506 				fmtp.transition0 = phdata_next.vowel_transition[2]; // always do vowel_transition, even if ph_VWLEND ??  consider [N]
1507 				fmtp.transition1 = phdata_next.vowel_transition[3];
1508 
1509 				if ((fmtp.fmt2_addr = phdata_next.sound_addr[pd_VWLEND]) != 0)
1510 					fmtp.fmt2_lenadj = phdata_next.sound_param[pd_VWLEND];
1511 			}
1512 
1513 			DoSpect2(ph, 2, &fmtp, p, modulation);
1514 			break;
1515 		}
1516 		ix++;
1517 	}
1518 	EndPitch(1);
1519 	if (*n_ph > 0) {
1520 		DoMarker(espeakEVENT_END, count_characters, 0, count_sentences); // end of clause
1521 		*n_ph = 0;
1522 	}
1523 
1524 	return 0; // finished the phoneme list
1525 }
1526 
SpeakNextClause(int control)1527 int SpeakNextClause(int control)
1528 {
1529 	// Speak text from memory (text_in)
1530 	// control 0: start
1531 	//    text_in is set
1532 
1533 	// The other calls have text_in = NULL
1534 	// control 1: speak next text
1535 	//         2: stop
1536 
1537 	int clause_tone;
1538 	char *voice_change;
1539 	const char *phon_out;
1540 
1541 	if (control == 2) {
1542 		// stop speaking
1543 		n_phoneme_list = 0;
1544 		WcmdqStop();
1545 
1546 		return 0;
1547 	}
1548 
1549 	if (text_decoder_eof(p_decoder)) {
1550 		skipping_text = 0;
1551 		return 0;
1552 	}
1553 
1554 	if (current_phoneme_table != voice->phoneme_tab_ix)
1555 		SelectPhonemeTable(voice->phoneme_tab_ix);
1556 
1557 	// read the next clause from the input text file, translate it, and generate
1558 	// entries in the wavegen command queue
1559 	TranslateClause(translator, &clause_tone, &voice_change);
1560 
1561 	CalcPitches(translator, clause_tone);
1562 	CalcLengths(translator);
1563 
1564 	if ((option_phonemes & 0xf) || (phoneme_callback != NULL)) {
1565 		phon_out = GetTranslatedPhonemeString(option_phonemes);
1566 		if (option_phonemes & 0xf)
1567 			fprintf(f_trans, "%s\n", phon_out);
1568 		if (phoneme_callback != NULL)
1569 			phoneme_callback(phon_out);
1570 	}
1571 
1572 	if (skipping_text) {
1573 		n_phoneme_list = 0;
1574 		return 1;
1575 	}
1576 
1577 	Generate(phoneme_list, &n_phoneme_list, 0);
1578 
1579 	if (voice_change != NULL) {
1580 		// voice change at the end of the clause (i.e. clause was terminated by a voice change)
1581 		new_voice = LoadVoiceVariant(voice_change, 0); // add a Voice instruction to wavegen at the end of the clause
1582 	}
1583 
1584 	if (new_voice) {
1585 		// finished the current clause, now change the voice if there was an embedded
1586 		// change voice command at the end of it (i.e. clause was broken at the change voice command)
1587 		DoVoiceChange(voice);
1588 		new_voice = NULL;
1589 	}
1590 
1591 	return 1;
1592 }
1593