1 /* SpeechSynthesizer_and_TextGrid.cpp
2  *
3  * Copyright (C) 2011-2019 David Weenink
4  *
5  * This code is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This code is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this work. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /*
20 	djmw 20111214
21 */
22 
23 #include "DTW.h"
24 #include "Sounds_to_DTW.h"
25 #include "Sound_extensions.h"
26 #include "SpeechSynthesizer_and_TextGrid.h"
27 #include "CCs_to_DTW.h"
28 #include "DTW_and_TextGrid.h"
29 #include "NUMmachar.h"
30 
31 // prototypes
32 static void IntervalTier_splitInterval (IntervalTier me, double time, conststring32 leftLabel, integer interval, double precision);
33 static autoIntervalTier IntervalTier_IntervalTier_cutPartsMatchingLabel (IntervalTier me, IntervalTier thee, conststring32 label, double precision);
34 static autoIntervalTier IntervalTiers_patch_noBoundaries (IntervalTier me, IntervalTier thee, conststring32 patchLabel, double precision);
35 static autoTable IntervalTiers_to_Table_textAlignmentment (IntervalTier target, IntervalTier source, EditCostsTable costs);
36 
37 
38 static void IntervalTier_checkRange (IntervalTier me, integer startInterval, integer endinterval) {
39 	Melder_require (startInterval <= endinterval,
40 		U"The interval range end number should not be smaller than the interval range start number.");
41 	Melder_require (startInterval > 0,
42 		U"The specified interval range start number is ", startInterval, U", but should be at least 1.");
43 	Melder_require (endinterval <= my intervals.size,
44 		U"The specified interval range end number (", endinterval, U") exceeds the number of intervals (", my intervals.size, U") in this tier.");
45 }
46 
47 autoSound SpeechSynthesizer_TextInterval_to_Sound (SpeechSynthesizer me, TextInterval thee, autoTextGrid *out_textgrid)
48 {
49 	try {
50 		Melder_require (thy text && thy text [0] != U'\0',
51 			U"TextInterval should not be empty.");
52 		autoSound him = SpeechSynthesizer_to_Sound (me, thy text.get(), out_textgrid, nullptr);
53 		return him;
54 	} catch (MelderError) {
EspeakVoice_create()55 		Melder_throw (U"Sound not created from TextInterval.");
56 	}
57 }
58 
59 autoSound SpeechSynthesizer_TextGrid_to_Sound (SpeechSynthesizer me, TextGrid thee, integer tierNumber, integer iinterval, autoTextGrid *out_textgrid) {
60 	try {
61 		TextGrid_checkSpecifiedTierNumberWithinRange (thee, tierNumber);
62 		const IntervalTier intervalTier = (IntervalTier) thy tiers->at [tierNumber];
63 		Melder_require (intervalTier -> classInfo == classIntervalTier,
64 			U"Tier ", tierNumber, U" is not an interval tier.");
65 		Melder_require (iinterval > 0 && iinterval <= intervalTier -> intervals.size,
66 			U"Interval ", iinterval, U" does not exist on tier ", tierNumber, U".");
67 		return SpeechSynthesizer_TextInterval_to_Sound (me, intervalTier -> intervals.at [iinterval], out_textgrid);
68 	} catch (MelderError) {
69 		Melder_throw (U"Sound not created from textGrid.");
70 	}
71 }
72 
73 #if 0
74 static double TextGrid_getStartTimeOfFirstOccurrence (TextGrid thee, integer tierNumber, conststring32 label) {
75 	TextGrid_checkSpecifiedTierNumberWithinRange (thee, tierNumber);
76 	const IntervalTier intervalTier = (IntervalTier) thy tiers->at [tierNumber];
77 	Melder_require (intervalTier -> classInfo == classIntervalTier,
78 		U"Tier ", tierNumber, U" is not an interval tier.");
79 	double start = undefined;
80 	for (integer iint = 1; iint <= intervalTier -> intervals.size; iint ++) {
81 		const TextInterval ti = intervalTier -> intervals.at [iint];
EspeakVoice_setDefaults(EspeakVoice me)82 		if (Melder_cmp (ti -> text, label) == 0) {
83 			start = ti -> xmin;
84 			break;
85 		}
EspeakVoice_initFromEspeakVoice(EspeakVoice me,voice_t * voicet)86 	}
87 	return start;
88 }
89 
90 static double TextGrid_getEndTimeOfLastOccurrence (TextGrid thee, integer tierNumber, conststring32 label) {
91 	TextGrid_checkSpecifiedTierNumberWithinRange (thee, tierNumber);
92 	IntervalTier intervalTier = (IntervalTier) thy tiers->at [tierNumber];
93 	Melder_require (intervalTier -> classInfo == classIntervalTier,
94 		U"Tier ", tierNumber, U" is not an interval tier.");
95 	double end = undefined;
96 	for (integer iint = intervalTier -> intervals.size; iint > 0; iint --) {
97 		const TextInterval ti = intervalTier -> intervals.at [iint];
98 		if (Melder_equ (ti -> text, label)) {
99 			end = ti -> xmax;
100 			break;
101 		}
102 	}
103 	return end;
104 }
105 #endif
106 
107 static void IntervalTier_getLabelInfo (IntervalTier me, conststring32 label, double *labelDurations, integer *numberOfOccurences) {
108     *labelDurations = 0.0;
109     *numberOfOccurences = 0;
110     for (integer i = 1; i <= my intervals.size; i ++) {
111 		const TextInterval ti = my intervals.at [i];
112         if (Melder_equ (ti -> text.get(), label)) {
113             *labelDurations += ti -> xmax - ti -> xmin;
114             (*numberOfOccurences) ++;
115         }
116     }
117 }
118 
119 #define TIMES_ARE_CLOSE(x,y) (fabs((x)-(y)) < precision)
120 void IntervalTier_splitInterval (IntervalTier me, double time, conststring32 leftLabel, integer interval, double precision) {
121     try {
122 		Melder_assert (interval > 0);
123         TextInterval ti = nullptr;
124 		integer index = 0;
125         for (integer i = interval; i <= my intervals.size; i ++) {
126             ti = my intervals.at [i];
127             if (time < ti -> xmax + precision && time > ti -> xmin - precision) {
EspeakVoice_into_voice(EspeakVoice me,voice_t * voicet)128                 index = i;
129                 break;
130 			}
131         }
132         // if index == 0 then search left intervals??
133         if (index == 0 || TIMES_ARE_CLOSE(time, ti -> xmin) || TIMES_ARE_CLOSE(time, ti -> xmax))
134             return;
135         autoTextInterval newInterval = TextInterval_create (ti -> xmin, time, leftLabel);
136         /*
137 			Make start of current and begin of new interval equal
138 		*/
139         ti -> xmin = time;
140         my intervals. addItem_move (newInterval.move());
141     } catch (MelderError) {
142         Melder_throw (U"Boundary not inserted.");
143     }
144 
145 }
146 
147 static autoTextTier TextTier_IntervalTier_cutPartsMatchingLabel (TextTier me, IntervalTier thee, conststring32 label, double precision) {
148     try {
149        Melder_require (my xmin == thy xmin && my xmax == thy xmax,
150             U"Domains should be equal.");
151         integer myIndex = 1;
152 		double timeCut = 0.0;
153         autoTextTier him = TextTier_create (0.0, my xmax - my xmin);
154         for (integer j = 1; j <= thy intervals.size; j ++) {
155             const TextInterval cut = thy intervals.at [j];
156             if (Melder_equ (cut -> text.get(), label))
157                 timeCut += cut -> xmax - cut -> xmin;
158             else {
159                  while (myIndex <= my points.size) {
160                     const TextPoint tp = my points.at [myIndex];
161                     if (tp -> number < cut -> xmin - precision) {
162                         // point is left of cut
163                         myIndex ++;
164                     } else if (tp -> number < cut -> xmax + precision) {
165                         // point is in (no)cut
166                         const double time = tp -> number - my xmin - timeCut;
167                         TextTier_addPoint (him.get(), time, tp -> mark.get());
168                         myIndex ++;
169                     } else {
170                         break;
171                     }
172                  }
v_info()173             }
174         }
175         his xmax -= timeCut;
176         return him;
177     } catch (MelderError) {
178         Melder_throw (me, U": parts not cut.");
179     }
180 }
181 
182 // Cut parts from me marked by labels in thee
183 autoIntervalTier IntervalTier_IntervalTier_cutPartsMatchingLabel (IntervalTier me, IntervalTier thee, conststring32 label, double precision) {
184     try {
185         Melder_require (my xmin == thy xmin && my xmax != thy xmax,
186             U"Domains should be identical.");
187         autoVEC durations = raw_VEC (my intervals.size);
188         for (integer i = 1; i <= my intervals.size; i ++) {
189             const TextInterval ti = my intervals.at [i];
190             durations [i] = ti -> xmax - ti -> xmin;
191         }
192         integer myInterval = 1;
193         for (integer j = 1; j <= thy intervals.size; j ++) {
synthCallback(short * wav,int numsamples,espeak_EVENT * events)194             const TextInterval cut = thy intervals.at [j];
195             if (Melder_equ (cut -> text.get(), label)) { // trim
196                 while (myInterval <= my intervals.size) {
197                     const TextInterval ti = my intervals.at [myInterval];
198                     if (ti -> xmin > cut -> xmin - precision && ti -> xmax < cut -> xmax + precision) {
199 						/*
200 							1. Interval completely within cut
201                         */
202                         durations [myInterval] = 0.0;
203                         myInterval ++;
204                     } else if (ti -> xmin < cut -> xmin + precision && cut -> xmin < ti -> xmax + precision) {
205 						/*
206 							2. Cut start is within interval
207 						*/
208                         if (cut -> xmax > ti -> xmax - precision) {
209 							/*
210 								Interval end is in cut, interval start before
211 							*/
212                             durations [myInterval] -= ti -> xmax - cut -> xmin;
213                             myInterval ++;
214                         } else {
215 							/*
216 								3. cut completely within interval
217                             */
218                             durations [myInterval] -= cut -> xmax - cut -> xmin;
219                             break;
220                         }
221                     } else if (cut -> xmax > ti -> xmin - precision && cut -> xmin < ti -> xmax + precision) {
222 						/*
223 							1+2 : cut end is within interval, cut start before
224                         */
225                         durations [myInterval] -= cut -> xmax - ti -> xmin;
226                         break;
227                     } else if (ti -> xmax < cut -> xmin + precision) {
228                         myInterval ++;
229                     }
230                 }
231             }
232         }
233         longdouble totalDuration = 0.0;
234         for (integer i = 1; i <= my intervals.size; i ++) {
235             if (durations [i] < precision)
236                 durations [i] = 0.0;
237             totalDuration += durations [i];
238         }
239         autoIntervalTier him = IntervalTier_create (0, double (totalDuration));
240         double time = 0.0;
SpeechSynthesizer_getLanguageCode(SpeechSynthesizer me)241         integer hisInterval = 1;
242         for (integer i = 1; i <= my intervals.size; i ++) {
243             if (durations [i] <= 0.0)
244             	continue;
245             const TextInterval ti = my intervals.at [i];
246             time += durations [i];
247             if (fabs (time - totalDuration) > precision) {
248                 IntervalTier_splitInterval (him.get(), time, ti -> text.get(), hisInterval, precision);
249                 hisInterval ++;
250             } else { // last interval
251                 const TextInterval histi = his intervals.at [hisInterval];
SpeechSynthesizer_getPhonemeCode(SpeechSynthesizer me)252                 TextInterval_setText (histi, ti -> text.get());
253             }
254         }
255         return him;
256     } catch (MelderError) {
257         Melder_throw (me, U": parts not cut.");
258     }
259 }
260 
261 autoTextGrid TextGrid_IntervalTier_cutPartsMatchingLabel (TextGrid me, IntervalTier thee, conststring32 label, double precision) {
262     try {
SpeechSynthesizer_getVoiceCode(SpeechSynthesizer me)263         Melder_require (my xmin == thy xmin && my xmax == thy xmax,
264            U"Domains should be equal.");
265         double cutDurations = 0;
266         for (integer i = 1; i <= thy intervals.size; i ++) {
267             const TextInterval cut = thy intervals.at [i];
268             if (Melder_equ (cut -> text.get(), label))
269                 cutDurations += cut -> xmax - cut -> xmin;
270         }
271         if (cutDurations <= precision) // Nothing to patch
272             return Data_copy (me);
273         autoTextGrid him = TextGrid_createWithoutTiers (0, thy xmax - thy xmin - cutDurations);
SpeechSynthesizer_create(conststring32 languageName,conststring32 voiceName)274         for (integer itier = 1; itier <= my tiers->size; itier ++) {
275             const Function anyTier = my tiers->at [itier];
276             if (anyTier -> classInfo == classIntervalTier) {
277                 autoIntervalTier newTier = IntervalTier_IntervalTier_cutPartsMatchingLabel ((IntervalTier) anyTier, thee, label, precision);
278                 his tiers -> addItem_move (newTier.move());
279             } else {
280                 autoTextTier newTier = TextTier_IntervalTier_cutPartsMatchingLabel ((TextTier) anyTier, thee, label, precision);
281                 his tiers -> addItem_move (newTier.move());
282             }
283         }
284         return him;
285     } catch (MelderError) {
286         Melder_throw (me, U": no parts cut.");
287     }
288 }
289 
290 // Patch thy intervals that match patchLabel into my intervals
291 // The resulting IntervalTier has thy xmin as starting time and thy xmax as end time
SpeechSynthesizer_setTextInputSettings(SpeechSynthesizer me,int inputTextFormat,int inputPhonemeCoding)292 autoIntervalTier IntervalTiers_patch_noBoundaries (IntervalTier me, IntervalTier thee, conststring32 patchLabel, double precision) {
293     try {
294 		autoVEC durations = zero_VEC (my intervals.size + 1);
295 		for (integer i = 1; i <= my intervals.size; i ++) {
296 			const TextInterval myti = my intervals.at [i];
297 			durations [i] = myti -> xmax - myti -> xmin;
298 		}
299 		integer myInterval = 1;
300 		double xShift = thy xmin - my xmin, firstShift = 0.0;
301         for (integer interval = 1; interval <= thy intervals.size; interval ++) {
302             const TextInterval patch = thy intervals.at [interval];
303             if (Melder_equ (patch -> text.get(), patchLabel)) {
304 				if (interval == 1)
305 					xShift += firstShift = patch -> xmax - patch -> xmin;
306 				else if (interval == thy intervals.size)
307 					durations [my intervals.size + 1] = patch -> xmax - patch -> xmin;
308 				else
309 					while (myInterval <= my intervals.size) {
310 						const TextInterval ti = my intervals.at [myInterval];
311 						const double tixmin = ti -> xmin + xShift;
312 						const double tixmax = ti -> xmax + xShift;
313 						if ((patch -> xmin > tixmin - precision) && (patch -> xmin < tixmax + precision)) {
314 							durations [myInterval] += patch -> xmax - patch -> xmin;
315 							break;
316 						}
317 						myInterval++;
318 					}
319             } else
320 				while (myInterval <= my intervals.size) {
321 					TextInterval ti = my intervals.at [myInterval];
322 					double tixmax = ti -> xmax + xShift;
323 					if (tixmax < patch -> xmin + precision)
324 						myInterval ++;
325 					else
326 						break;
327 				}
328         }
329         autoIntervalTier him = IntervalTier_create (thy xmin, thy xmax);
330         // first interval
331 		double time = thy xmin + firstShift;
332 		integer hisInterval = 1;
333 		if (firstShift > 0.0) {
334 			IntervalTier_splitInterval (him.get(), time , U"", hisInterval, precision);
335 			hisInterval ++;
336 		}
337 		for (integer interval = 1; interval <= my intervals.size; interval ++) {
338 			const TextInterval ti = my intervals.at [interval];
339 			time += durations [interval];
340 			IntervalTier_splitInterval (him.get(), time, ti -> text.get(), hisInterval, precision);
341 			hisInterval ++;
342 		}
343 		if (durations [my intervals.size + 1] > 0.0) {
344 			time += durations [my intervals.size + 1];
345 			IntervalTier_splitInterval (him.get(), time , U"", hisInterval, precision);
346 		}
347         return him;
348     } catch (MelderError) {
349         Melder_throw (me, U": not patched.");
350     }
351 }
352 
353 #if 0
354 static autoIntervalTier IntervalTiers_patch (IntervalTier me, IntervalTier thee, conststring32 patchLabel, double precision) {
355 	try {
356 		utoIntervalTier him = IntervalTier_create (thy xmin, thy xmax);
357 		integer myInterval = 1, hisInterval = 1;
358 		double xmax = thy xmin;
359 		for (integer i = 1; i <= thy intervals.size; i ++) {
360 			TextInterval myti, ti = thy intervals.at [i];
361 			if (Melder_equ (ti -> text, patchLabel)) {
362 				bool splitInterval = false;
363 				double endtime, split = 0.0;
364 BUG				if (i > 0) {
365                     while (myInterval <= my intervals.size) {
366                         myti = my intervals.at [myInterval];
367                         endtime = xmax + myti -> xmax - myti -> xmin;
368                         if (endtime <= ti -> xmin + precision) {
369                             xmax = endtime;
370                             IntervalTier_splitInterval (him.get(), xmax, myti -> text, hisInterval, precision);
371                             hisInterval ++;
372                         } else {
373                             if (xmax < ti -> xmin - precision) { // split interval ???
374                                 splitInterval = true;
375                                 xmax = ti -> xmin;
MelderString_trimWhiteSpaceAtEnd(MelderString * me)376                                 split = endtime - xmax;
377                                 IntervalTier_splitInterval (him.get(), xmax, myti -> text, hisInterval, precision);
378                                 hisInterval ++;
379                                 myInterval ++;
380                             }
381                             break;
IntervalTier_mergeSpecialIntervals(IntervalTier me)382                         }
383                         myInterval ++;
384                     }
385                 }
386                 xmax += ti -> xmax - ti -> xmin;
387                 IntervalTier_splitInterval (him.get(), xmax, U"", hisInterval, precision);
388                 hisInterval ++;
389                 if (splitInterval) {
390                     xmax += split;
391                     IntervalTier_splitInterval (him.get(), xmax, myti -> text, hisInterval, precision);
392                     hisInterval ++;
393                 }
394             } else if (i == thy intervals.size) { // copy remaining if last interval doesn't match
395                 while (myInterval <= my intervals.size) {
396                     myti = my intervals.at [myInterval];
397                     xmax += myti -> xmax - myti -> xmin;
398                     IntervalTier_splitInterval (him.get(), xmax, myti -> text, hisInterval, precision);
399                     hisInterval ++;
400                     myInterval ++;
401                 }
402             }
403         }
404         return him;
405     } catch (MelderError) {
406         Melder_throw (me, U": not patched.");
407     }
408 }
409 #endif
410 
411 static autoTextTier TextTier_IntervalTier_patch (TextTier me, IntervalTier thee, conststring32 patchLabel, double precision) {
412 	try {
413 		integer myIndex = 1;
414 		autoTextTier him = TextTier_create (thy xmin, thy xmax);
415 		double xShift = thy xmin - my xmin;
416 		for (integer i = 1; i <= thy intervals.size; i ++) {
417 			const TextInterval ti = thy intervals.at [i];
418 			if (Melder_equ (ti -> text.get(), patchLabel)) {
419 				if (i > 1) {
420 					while (myIndex <= my points.size) {
421 						const TextPoint tp = my points.at [myIndex];
422 						const double time = tp -> number + xShift;
423 						if (time < ti -> xmin + precision) {
424 							autoTextPoint newPoint = TextPoint_create (time, tp -> mark.get());
425 							his points. addItem_move (newPoint.move());
426 						} else {
427 							break;
428 						}
429 						myIndex ++;
430 					}
431 				}
almost_equal(double t1,double t2)432 				xShift += ti -> xmax - ti -> xmin;
433 			} else if (i == thy intervals.size) {
434 				while (myIndex <= my points.size) {
435 					const TextPoint tp = my points.at [myIndex];
436 					const  double time = tp -> number + xShift;
IntervalTier_insertEmptyIntervalsFromOtherTier(IntervalTier to,IntervalTier from)437 					if (time < ti -> xmin + precision) {
438 						autoTextPoint newPoint = TextPoint_create (time, tp -> mark.get());
439 						his points. addItem_move (newPoint.move());
440 					}
441 					myIndex ++;
442 				}
443 			}
444 		}
445 		return him;
446 	} catch (MelderError) {
447 		Melder_throw (me, U": cannot patch TextTier.");
448 	}
449 }
450 
451 autoTextGrid TextGrid_IntervalTier_patch (TextGrid me, IntervalTier thee, conststring32 patchLabel, double precision) {
452 	try {
453 		double patchDurations;
454 		integer numberOfPatches;
455 		IntervalTier_getLabelInfo (thee, patchLabel, & patchDurations, & numberOfPatches);
456 		if (patchDurations <= 0 || my xmax - my xmin >= thy xmax - thy xmin ) // Nothing to patch
457 			return Data_copy (me);
458 		autoTextGrid him = TextGrid_createWithoutTiers (thy xmin, thy xmax);
459 		for (integer itier = 1; itier <= my tiers->size; itier ++) {
460 			const Function anyTier = my tiers->at [itier];
461 			if (anyTier -> classInfo == classIntervalTier) {
462 				//autoIntervalTier ait = IntervalTiers_patch ((IntervalTier) anyTier, thee, patchLabel, precision);
463 				autoIntervalTier newTier = IntervalTiers_patch_noBoundaries ((IntervalTier) anyTier, thee, patchLabel, precision);
IntervalTier_removeVeryShortIntervals(IntervalTier me)464 				his tiers -> addItem_move (newTier.move());
465 			} else {
466 				autoTextTier newTier = TextTier_IntervalTier_patch ((TextTier) anyTier, thee, patchLabel, precision);
467 				his tiers -> addItem_move (newTier.move());
468 			}
469 		}
470 		return him;
471 	} catch (MelderError) {
472 		Melder_throw (me, U": not patched.");
473 	}
474 }
Table_to_TextGrid(Table me,conststring32 text,double xmin,double xmax)475 
476 // We assume that the Sound and the SpeechSynthesizer have the same samplingFrequency
477 autoTextGrid SpeechSynthesizer_Sound_TextInterval_align (SpeechSynthesizer me, Sound thee, TextInterval him, double silenceThreshold, double minSilenceDuration, double minSoundingDuration) {
478 	try {
479 		Melder_require (thy xmin == his xmin && thy xmax == his xmax,
480 			U"Domains of Sound and TextGrid should be equal.");
481 		Melder_require (fabs (1.0 / thy dx - my d_samplingFrequency) < 1e-9,
482 			U"The sampling frequencies of the SpeechSynthesizer and the Sound should be equal.");
483 
484 		autoSTRVEC tokens = splitByWhitespace_STRVEC (his text.get());
485 		const integer numberOfTokens = tokens.size;
486 		Melder_require (numberOfTokens > 0,
487 			U"The interval should contain text.");
488 		/*
489 			Remove silent intervals from start and end of sounds because
490 			1. it will improve the word rate guess
491 			2. it will improve the DTW matching.
492 		*/
493 		const double minPitch = 200.0, timeStep = 0.005, precision = thy dx;
494 		double startTimeOfSounding, endTimeOfSounding;
495 		autoSound soundTrimmed = Sound_trimSilencesAtStartAndEnd (thee, 0.0, minPitch, timeStep, silenceThreshold, minSilenceDuration, minSoundingDuration, & startTimeOfSounding, & endTimeOfSounding);
496 		const double duration_soundTrimmed = soundTrimmed -> xmax - soundTrimmed -> xmin;
497 		const bool hasSilence_sound = fabs (startTimeOfSounding - thy xmin) > precision || fabs (endTimeOfSounding - thy xmax) > precision;
498 
499 		if (my d_estimateSpeechRate) {
500 			/*
501 				Estimate speaking rate with the number of words per minute from the text
502 			*/
503 			const double wordsPerMinute_rawTokens = 60.0 * numberOfTokens / duration_soundTrimmed;
504 			/*
505 				Compensation for long words: 5 characters / word
506 			*/
507 			const double wordsPerMinute_rawText = 60.0 * (str32len (his text.get()) / 5.0) / duration_soundTrimmed;
508 			my d_wordsPerMinute = Melder_ifloor (0.5 * (wordsPerMinute_rawTokens + wordsPerMinute_rawText));
509 		}
510 
511 		autoTextGrid textgrid_synth, textgrid_synth_sounding;
512 		autoSound synth = SpeechSynthesizer_TextInterval_to_Sound (me, him, & textgrid_synth);
513 		/*
514 			For the synthesizer the silence threshold has to be < -30 dB, otherwise fricatives will not
515 			be found as sounding! This is ok since silences are almost at zero amplitudes for synthesized sounds.
516 			We also have to decrease the minimum silence and minimum sounding duration to catch, for example,
517 			the final plosive "t" from the synthesized sound "text".
518 		*/
519 		const double silenceThreshold_synth = -40.0, minSilenceDuration_synth = 0.05;
520 		const double minSoundingDuration_synth = 0.05;
521 		double startTimeOfSounding_synth, endTimeOfSounding_synth;
522 		autoSound synthTrimmed = Sound_trimSilencesAtStartAndEnd (synth.get(), 0.0, minPitch, timeStep, silenceThreshold_synth,
523 			minSilenceDuration_synth, minSoundingDuration_synth, & startTimeOfSounding_synth, & endTimeOfSounding_synth);
524 		const double synthTrimmed_duration = synthTrimmed -> xmax - synthTrimmed -> xmin;
525 		const bool hasSilence_synth = fabs (startTimeOfSounding_synth - synth -> xmin) > precision ||
526 								fabs (endTimeOfSounding_synth - synth -> xmax) > precision;
527 
528 		if (hasSilence_synth)
529 			textgrid_synth_sounding = TextGrid_extractPart (textgrid_synth.get(), startTimeOfSounding_synth, endTimeOfSounding_synth, true);
530 		/*
531 			Compare the durations of the two sounds to get an indication of the slope constraint needed for the DTW
532 		*/
533 		double slope = duration_soundTrimmed / synthTrimmed_duration;
534 		slope = ( slope > 1.0 ? slope : 1.0 / slope );
535         const int constraint = ( slope < 1.5 ? 4 : slope < 2.0 ? 3 : slope < 3.0 ? 2 : 1 ); // TODO enums
536 
537 		const double analysisWidth = 0.02, dt = 0.005, band = 0.0;
538         autoDTW dtw = Sounds_to_DTW ((hasSilence_sound ? soundTrimmed.get() : thee),
539 				(hasSilence_synth ? synthTrimmed.get() : synth.get()), analysisWidth, dt, band, constraint);
540 
541 		autoTextGrid result = DTW_TextGrid_to_TextGrid (dtw.get(), (hasSilence_synth ? textgrid_synth_sounding.get() : textgrid_synth.get()), precision);
542 		if (hasSilence_sound) {
543 			if (startTimeOfSounding > thy xmin)
544 				TextGrid_setEarlierStartTime (result.get(), thy xmin, U"", U"");
545 			if (endTimeOfSounding < thy xmax || result -> xmax < thy xmax)
546 					TextGrid_setLaterEndTime (result.get(), thy xmax, U"", U"");
547 		}
548 		return result;
549 	} catch (MelderError) {
550 		Melder_throw (U"Sound and TextInterval not aligned.");
551 	}
552 }
553 /*
554 typedef struct structAlignmentOfSoundAndTextStruct {
555 	double windowLength, timeStep; // analysis
556 	double f1_mel, fmax_mel, df_mel; // MelFilter
557 	integer numberOfMFCCCoefficients; // MFCC
558 	double dtw_cepstralWeight, dtw_logEnergyWeight; // MFCC -> DTW
559 	double dtw_regressionWeight, dtw_regressionlogEnergyWeight;
560 	double dtw_regressionWindowLength;
561 	double dtw_sakoeChibaBand, dtw_constraint;
562 	double silenceThreshold, minSilenceDuration, minSoundingDuration, trimDuration; // silence detection
563 	integer language, voicevariant, pitchAdjustment, pitchRange, wordsPerMinute; // synthesizer
564 	bool interpretPhonemeCodes, ipa, set_wordsPerMinute;
565 	double wordgap; // synthesizer
566 } *SpeechSynthesizer_alignmentStruct;*/
567 
568 static autoTextGrid SpeechSynthesizer_Sound_TextInterval_align2 (SpeechSynthesizer me, Sound thee, TextInterval him, double silenceThreshold, double minSilenceDuration, double minSoundingDuration, double trimDuration) {
569     try {
570 		Melder_require (thy xmin == his xmin && thy xmax == his xmax,
571 			U"Domains of Sound and TextGrid should be equal.");
572 		Melder_require (fabs (1.0 / thy dx - my d_samplingFrequency) < 1e-9,
573 			U"The sampling frequencies of the SpeechSynthesizer and the Sound should be equal.");
574 
575         const conststring32 trimLabel = U"trim";
576         /*
577 			1. Trim the silences of the sound
578 
579 			For the synthesizer the silence threshold has to be < -30 dB, otherwise fricatives
580 			will not be found as sounding! This is ok since silences are almost at zero amplitudes
581 			We also have to decrease the minimum silence and minimum sounding duration to catch,
582 			for example, the final plosive "t" from the word "text"
583          */
584         const double minPitch = 200, timeStep = 0.005, precision = thy dx;
585         autoTextGrid thee_trimmer;
586         autoSound thee_trimmed = Sound_trimSilences (thee, trimDuration, false, minPitch, timeStep, silenceThreshold,  minSilenceDuration, minSoundingDuration, &thee_trimmer, trimLabel);
587 		/*
588 			2. Synthesize the sound from the TextInterval
589 		*/
590         autoTextGrid tg_syn;
591         autoSound synth = SpeechSynthesizer_TextInterval_to_Sound (me, him, &tg_syn);
592 		/*
593 			3. There should be no silences in the synthesized sound except at the start and finish.
594 			Set the wordwarp parameter to a small value like 0.001 s.
595 
596 			4. Get DTW from the two sounds
597 		*/
598         const double analysisWidth = 0.02, dt = 0.005, band = 0.0;
599         const int constraint = 4;
600         autoDTW dtw = Sounds_to_DTW (thee_trimmed.get(), synth.get(), analysisWidth, dt, band, constraint);
601 		/*
602 			6. Warp the synthesis TextGrid
603 			First make domains equal, otherwsise the warper protests
604 		*/
SpeechSynthesizer_to_Sound(SpeechSynthesizer me,conststring32 text,autoTextGrid * tg,autoTable * events)605         autoTextGrid warp = DTW_TextGrid_to_TextGrid (dtw.get(), tg_syn.get(), precision);
606 		/*
607 			6. Patch the trimmed intervals back into the warped TextGrid
608 		*/
609         autoTextGrid result = TextGrid_IntervalTier_patch (warp.get(), (IntervalTier) thee_trimmer -> tiers->at [1], U"trim", 2 * thy dx);
610 
611         return result;
612     } catch (MelderError) {
613         Melder_throw (thee, U": sound and TextInterval not aligned.");
614     }
615 }
616 
617 autoTextGrid SpeechSynthesizer_Sound_IntervalTier_align (SpeechSynthesizer me, Sound thee, IntervalTier him, integer istart, integer iend, double silenceThreshold, double minSilenceDuration, double minSoundingDuration) {
618     try {
619 		IntervalTier_checkRange (him, istart, iend);
620         const TextInterval tib = his intervals.at [istart];
621         const TextInterval tie = his intervals.at [iend];
622 		Melder_require (tib -> xmin >= thy xmin && tie -> xmax <= thy xmax,
623 			U"The chosen interval(s) must lie within the sound.");
624         OrderedOf<structTextGrid> textgrids;
625         autoTextGrid result = TextGrid_create (tib -> xmin, tie -> xmax, U"sentence clause word phoneme", U"");
626         for (integer iint = istart; iint <= iend; iint ++) {
627 			const TextInterval ti = his intervals.at [iint];
628             if (ti -> text && ti -> text [0] != U'\0') {
629                 autoSound sound = Sound_extractPart (thee, ti -> xmin, ti -> xmax,  kSound_windowShape::RECTANGULAR, 1, true);
630                 autoTextGrid grid = SpeechSynthesizer_Sound_TextInterval_align (me, sound.get(), ti, silenceThreshold, minSilenceDuration, minSoundingDuration);
631                 textgrids. addItem_move (grid.move());
632             }
633         }
634         Melder_require (textgrids.size > 0,
635 			U"Nothing could be aligned. Was your IntervalTier empty?");
636         autoTextGrid aligned = TextGrids_to_TextGrid_appendContinuous (& textgrids, true);
637         return aligned;
638     } catch (MelderError) {
639         Melder_throw (U"No aligned TextGrid created.");
640     }
641 }
642 
643 static autoTextGrid SpeechSynthesizer_Sound_IntervalTier_align2 (SpeechSynthesizer me, Sound thee, IntervalTier him, integer istart, integer iend, double silenceThreshold, double minSilenceDuration, double minSoundingDuration, double trimDuration) {
644     try {
645 		IntervalTier_checkRange (him, istart, iend);
646         const TextInterval tb = his intervals.at [istart];
647         const TextInterval te = his intervals.at [iend];
648         autoTextGrid result = TextGrid_create (tb -> xmin, te -> xmax, U"sentence clause word phoneme", U"");
649         OrderedOf<structTextGrid> textgrids;
650         for (integer iint = istart; iint <= iend; iint ++) {
651             const TextInterval ti = his intervals.at [iint];
652             if (ti -> text && ti -> text [0] != U'\0') {
653                 autoSound sound = Sound_extractPart (thee, ti -> xmin, ti -> xmax,  kSound_windowShape::RECTANGULAR, 1, true);
654                 autoTextGrid grid = SpeechSynthesizer_Sound_TextInterval_align2 (me, sound.get(), ti, silenceThreshold, minSilenceDuration, minSoundingDuration, trimDuration);
655                 textgrids. addItem_move (grid.move());
656             }
657         }
658         Melder_require (textgrids.size > 0, U"Nothing could be aligned. Was your IntervalTier empty?");
659 
660         autoTextGrid aligned = TextGrids_to_TextGrid_appendContinuous (& textgrids, true);
661         return aligned;
662     } catch (MelderError) {
663         Melder_throw (U"No aligned TextGrid created.");
664     }
665 }
666 
667 autoTextGrid SpeechSynthesizer_Sound_TextGrid_align (SpeechSynthesizer me, Sound thee, TextGrid him, integer tierNumber, integer istart, integer iend, double silenceThreshold, double minSilenceDuration, double minSoundingDuration) {
668 	try {
669 		Melder_require (thy xmin == his xmin && thy xmax == his xmax,
670 			U"The domains of the Sound and the TextGrid must be equal.");
671 		const IntervalTier tier = TextGrid_checkSpecifiedTierIsIntervalTier (him, tierNumber);
672 		autoTextGrid grid = SpeechSynthesizer_Sound_IntervalTier_align (me, thee, tier, istart, iend, silenceThreshold, minSilenceDuration, minSoundingDuration);
673 		return grid;
674 	} catch (MelderError) {
675 		Melder_throw (me, U", ", thee, U", ", him, U": Cannot align.");
676 	}
677 }
678 
679 
680 autoTextGrid SpeechSynthesizer_Sound_TextGrid_align2 (SpeechSynthesizer me, Sound thee, TextGrid him, integer tierNumber, integer istart, integer iend, double silenceThreshold, double minSilenceDuration, double minSoundingDuration, double trimDuration) {
681     try {//TODO: check not empty tier
682     	const IntervalTier tier = TextGrid_checkSpecifiedTierIsIntervalTier (him, tierNumber);
683         autoTextGrid grid = SpeechSynthesizer_Sound_IntervalTier_align2 (me, thee, tier, istart, iend, silenceThreshold, minSilenceDuration, minSoundingDuration, trimDuration);
684         return grid;
685     } catch (MelderError) {
686         Melder_throw (U"");
687     }
688 }
689 
690 static autoStrings IntervalTier_to_Strings_withOriginData (IntervalTier me, INTVEC from) {
691 	try {
692 		autoStrings thee = Thing_new (Strings);
693 		thy strings = autoSTRVEC (my intervals.size);
694 		for (integer i = 1; i <= my intervals.size; i ++) {
695 			const TextInterval ti = my intervals.at [i];
696 			if (ti -> text && ti -> text [0] != U'\0') {
697 				thy strings [++ thy numberOfStrings] = Melder_dup (ti -> text.get());
698 				from [thy numberOfStrings] = i;
699 			}
700 		}
701 		return thee;
702 	} catch (MelderError) {
703 		Melder_throw (me, U": no Strings created.");
704 	}
705 }
706 
707 autoTable IntervalTiers_to_Table_textAlignmentment (IntervalTier target, IntervalTier source, EditCostsTable costs) {
708 	try {
709 		const integer numberOfTargetIntervals = target -> intervals.size;
710 		const integer numberOfSourceIntervals = source -> intervals.size;
711 		autoINTVEC targetOrigin = zero_INTVEC (numberOfTargetIntervals);
712 		autoINTVEC sourceOrigin = zero_INTVEC (numberOfSourceIntervals);
713 		autoStrings targets = IntervalTier_to_Strings_withOriginData (target, targetOrigin.get());
714 		autoStrings sources = IntervalTier_to_Strings_withOriginData (source, sourceOrigin.get());
715 		autoEditDistanceTable edit = EditDistanceTable_create (targets.get(), sources.get());
716 		if (costs) {
717 			EditDistanceTable_setEditCosts (edit.get(), costs);
718 			EditDistanceTable_findPath (edit.get(), nullptr);
719 		}
720 		const integer pathLength = edit -> warpingPath -> pathLength;
721 		const conststring32 columnNames [] = {
722 			U"targetInterval", U"targetText", U"targetStart", U"targetEnd",
723 			U"sourceInterval", U"sourceText", U"sourceStart", U"sourceEnd",
724 			U"operation"
725 		};
726 		autoTable thee = Table_createWithColumnNames (pathLength - 1, ARRAY_TO_STRVEC (columnNames));
727 		for (integer i = 2; i <= pathLength; i++) {
728 			const structPairOfInteger p = edit -> warpingPath -> path [i];
729 			const structPairOfInteger p1 = edit -> warpingPath -> path [i - 1];
730 			double targetStart = undefined, targetEnd = undefined;
731 			double sourceStart = undefined, sourceEnd = undefined;
732 			conststring32 targetText = U"", sourceText = U"";
733 			const integer targetInterval = ( p.y > 1 ? targetOrigin [p.y - 1] : 0 );
734 			const integer sourceInterval = ( p.x > 1 ? sourceOrigin [p.x - 1] : 0 );
735 			if (targetInterval > 0) {
736 				const TextInterval ti = target -> intervals.at [targetInterval];
737 				targetStart = ti -> xmin;
738 				targetEnd =  ti -> xmax;
739 				targetText = ti -> text.get();
740 			}
741 			if (sourceInterval > 0) {
742 				const TextInterval ti = source -> intervals.at [sourceInterval];
743 				sourceStart = ti -> xmin;
744 				sourceEnd =  ti -> xmax;
745 				sourceText = ti -> text.get();
746 			}
747 			const integer irow = i - 1;
748 			if (p.y == p1.y) { // deletion
749 				Table_setNumericValue (thee.get(), irow, 1, 0);
750 				Table_setStringValue  (thee.get(), irow, 2, U"");
751 				Table_setNumericValue (thee.get(), irow, 3, undefined);
752 				Table_setNumericValue (thee.get(), irow, 4, undefined);
753 				Table_setNumericValue (thee.get(), irow, 5, sourceInterval);
754 				Table_setStringValue  (thee.get(), irow, 6, sourceText);
755 				Table_setNumericValue (thee.get(), irow, 7, sourceStart);
756 				Table_setNumericValue (thee.get(), irow, 8, sourceEnd);
757 				Table_setStringValue  (thee.get(), irow, 9, U"d");
758 			} else if (p.x == p1.x) { // insertion
759 				Table_setNumericValue (thee.get(), irow, 1, targetInterval);
760 				Table_setStringValue  (thee.get(), irow, 2, targetText);
761 				Table_setNumericValue (thee.get(), irow, 3, targetStart);
762 				Table_setNumericValue (thee.get(), irow, 4, targetEnd);
763 				Table_setNumericValue (thee.get(), irow, 5, 0);
764 				Table_setStringValue  (thee.get(), irow, 6, U"");
765 				Table_setNumericValue (thee.get(), irow, 7, undefined);
766 				Table_setNumericValue (thee.get(), irow, 8, undefined);
767 				Table_setStringValue  (thee.get(), irow, 9, U"i");
768 			} else { // substitution ?
769 				Table_setNumericValue (thee.get(), irow, 1, targetInterval);
770 				Table_setStringValue  (thee.get(), irow, 2, targetText);
771 				Table_setNumericValue (thee.get(), irow, 3, targetStart);
772 				Table_setNumericValue (thee.get(), irow, 4, targetEnd);
773 				Table_setNumericValue (thee.get(), irow, 5, sourceInterval);
774 				Table_setStringValue  (thee.get(), irow, 6, sourceText);
775 				Table_setNumericValue (thee.get(), irow, 7, sourceStart);
776 				Table_setNumericValue (thee.get(), irow, 8, sourceEnd);
777 				Table_setStringValue  (thee.get(), irow, 9, Melder_equ (targetText, sourceText) ? U" " : U"s");
778 			}
779 		}
780 		return thee;
781 	} catch (MelderError) {
782 		Melder_throw (target, U" and ", source, U" not aligned.");
783 	}
784 }
785 
786 autoTable TextGrids_to_Table_textAlignment (TextGrid target, integer ttier, TextGrid source, integer stier, EditCostsTable costs) {
787 	try {
788 		const IntervalTier targetTier = TextGrid_checkSpecifiedTierIsIntervalTier (target, ttier);
789 		const IntervalTier sourceTier = TextGrid_checkSpecifiedTierIsIntervalTier (source, stier);
790 		return IntervalTiers_to_Table_textAlignmentment (targetTier, sourceTier, costs);
791 	} catch (MelderError) {
792 		Melder_throw (U"No text alignment table created from TextGrids ", target, U" and ", source, U".");
793 	}
794 }
795 
796 // End of file TextGrid_and_SpeechSynthesizer.cpp
797