1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                       Copyright (c) 1996,1997                         */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*                                                                       */
34 /*                 Author: Paul Taylor                                   */
35 /*                   Date: February 1998                                 */
36 /* --------------------------------------------------------------------- */
37 /*             Waveform Generation Scheme Interface File                 */
38 /*                                                                       */
39 /*************************************************************************/
40 #include "siod.h"
41 #include "EST.h"
42 #include "UniSyn.h"
43 #include "us_synthesis.h"
44 #include "Phone.h"
45 
46 VAL_REGISTER_TYPE(ivector,EST_IVector)
47 VAL_REGISTER_TYPE(wavevector,EST_WaveVector);
48 
49 SIOD_REGISTER_TYPE(wavevector, EST_WaveVector);
50 
51 void map_to_relation(EST_IVector &map, EST_Relation &r,
52 		     const EST_Track &source_pm,
53 		     const EST_Track &target_pm);
54 
scheme_param(const EST_String & param,const EST_String & path)55 EST_Features *scheme_param(const EST_String& param, const EST_String &path)
56 {
57     EST_Features *f, *p;
58 
59     f = feats(siod_get_lval(param, "Couldn't find scheme paramete named: "
60 			    + param));
61 
62     p = (path == "") ? f : &f->A(path);
63     return p;
64 }
65 
66 
FT_us_linear_smooth_amplitude(LISP lutt)67 LISP FT_us_linear_smooth_amplitude( LISP lutt )
68 {
69   EST_Utterance *utt = get_c_utt( lutt );
70 
71   us_linear_smooth_amplitude( utt );
72 
73   return lutt;
74 }
75 
76 
FT_wavevector_get_wave(LISP l_wavevector,LISP l_framenum)77 static LISP FT_wavevector_get_wave( LISP l_wavevector, LISP l_framenum )
78 {
79   EST_WaveVector *wv = wavevector( l_wavevector );
80   int i = get_c_int( l_framenum );
81 
82   if( i<0 || i>wv->length() )
83     EST_error( "index out of bounds" );
84 
85   return siod( &((*wv)[i]) );
86 }
87 
88 
FT_us_unit_concat(LISP lutt)89 LISP FT_us_unit_concat(LISP lutt)
90 {
91     EST_String window_name;
92     float window_factor;
93     bool window_symmetric;
94 
95     EST_Features *f = scheme_param("Param", "unisyn");
96 
97     window_name = f->S("window_name");
98     window_factor = f->F("window_factor");
99 
100     window_symmetric = (f->I("window_symmetric",1) == 0) ? false : true;
101 
102     us_unit_concat(*get_c_utt(lutt), window_factor, window_name, false, window_symmetric);
103     return lutt;
104 }
105 
FT_us_unit_raw_concat(LISP lutt)106 LISP FT_us_unit_raw_concat(LISP lutt)
107 {
108     us_unit_raw_concat(*get_c_utt(lutt));
109     return lutt;
110 }
111 
112 
FT_us_energy_normalise(LISP lutt,LISP lrname)113 LISP FT_us_energy_normalise(LISP lutt, LISP lrname)
114 {
115     EST_Utterance *utt = get_c_utt(lutt);
116     EST_String rname = get_c_string(lrname);
117 
118     us_energy_normalise(*utt->relation(rname));
119     return lutt;
120 }
121 
FT_us_generate_wave(LISP lutt,LISP l_f_method,LISP l_o_method)122 LISP FT_us_generate_wave(LISP lutt, LISP l_f_method, LISP l_o_method)
123 {
124     EST_String filter_method = get_c_string(l_f_method);
125     EST_String ola_method = get_c_string(l_o_method);
126     EST_Utterance *utt = get_c_utt(lutt);
127 
128     EST_Features *f = scheme_param("Param", "unisyn");
129     if(f->I("window_symmetric",1) == 0){
130       ola_method = "asymmetric_window";
131     }
132     us_generate_wave(*utt, filter_method, ola_method);
133 
134     return lutt;
135 }
136 
FT_us_mapping(LISP lutt,LISP method)137 LISP FT_us_mapping(LISP lutt, LISP method)
138 {
139     us_mapping(*get_c_utt(lutt), get_c_string(method));
140     return lutt;
141 }
142 
FT_us_get_copy_wave(LISP lutt,LISP l_sig_file,LISP l_pm_file,LISP l_seg_file)143 LISP FT_us_get_copy_wave(LISP lutt, LISP l_sig_file,  LISP l_pm_file,
144 			  LISP l_seg_file)
145 {
146     EST_Utterance *utt = get_c_utt(lutt);
147     EST_Relation seg;
148     EST_String sig_file = get_c_string(l_sig_file);
149     EST_String seg_file = get_c_string(l_seg_file);
150     EST_String pm_file = get_c_string(l_pm_file);
151 
152     EST_Track *pm = new EST_Track;
153     EST_Wave *sig = new EST_Wave;
154 
155     if (pm->load(pm_file) != format_ok)
156 	return NIL;
157 
158     if (sig->load(sig_file) != format_ok)
159 	return NIL;
160 
161     if (seg.load(seg_file) != format_ok)
162 	return NIL;
163 
164     if (!ph_is_silence(seg.tail()->f("name")))
165     {
166 	EST_Item *n = seg.tail()->insert_after();
167 	n->set("name", ph_silence());
168 	n->set("end", seg.tail()->prev()->F("end") + 0.1);
169     }
170 
171     us_get_copy_wave(*utt, *sig, *pm, seg);
172     return lutt;
173 }
174 
175 
FT_f0_to_pitchmarks(LISP lutt,LISP l_f0_name,LISP l_pm_name,LISP l_end_time)176 LISP FT_f0_to_pitchmarks(LISP lutt, LISP l_f0_name, LISP l_pm_name,
177 			 LISP l_end_time)
178 {
179     EST_Utterance *utt = get_c_utt(lutt);
180     int num_channels=0;
181     const float default_f0 = 100.0;
182     EST_Relation *f0_rel=0, *pm_rel=0;
183     EST_Track *f0=0, *pm=0;
184     EST_Item *a;
185 
186     float end_time = (l_end_time == NIL) ? -1 : get_c_float(l_end_time);
187 
188     f0_rel = utt->relation(get_c_string(l_f0_name), 1);
189     pm_rel = utt->create_relation(get_c_string(l_pm_name));
190 
191     f0 = track(f0_rel->head()->f("f0"));
192     pm = new EST_Track;
193 
194     a = pm_rel->append();
195     a->set_val("coefs", est_val(pm));
196     a = pm_rel->append();
197 
198     if (utt->relation_present("SourceCoef"))
199     {
200 	EST_Track *source_coef =
201 	    track(utt->relation("SourceCoef")->head()->f("coefs"));
202 	num_channels = source_coef->num_channels();
203     }
204 
205     f0_to_pitchmarks(*f0, *pm, num_channels, default_f0, end_time);
206 
207     return lutt;
208 }
209 
FT_map_to_relation(LISP lutt,LISP lsource_name,LISP ltarget_name,LISP lrel_name)210 LISP FT_map_to_relation(LISP lutt, LISP lsource_name, LISP ltarget_name,
211 			LISP lrel_name)
212 {
213     EST_Utterance *utt = get_c_utt(lutt);
214     EST_Track *source_pm = 0;
215     EST_Track *target_pm = 0;
216     EST_IVector *map = 0;
217     target_pm =
218 	track(utt->relation(get_c_string(ltarget_name))->head()->f("coefs"));
219     source_pm =
220 	track(utt->relation(get_c_string(lsource_name))->head()->f("coefs"));
221     map = ivector(utt->relation("US_map")->head()->f("map"));
222 
223     utt->create_relation(get_c_string(lrel_name));
224 
225     map_to_relation(*map, *utt->relation(get_c_string(lrel_name)),
226 		    *source_pm, *target_pm);
227 
228     return NIL;
229 }
230 
festival_UniSyn_init(void)231 void festival_UniSyn_init(void)
232 {
233     proclaim_module("UniSyn");
234 
235     register_unisyn_features();
236 
237     init_subr_2( "wavevector.getwave", FT_wavevector_get_wave,
238     "(wavevector.getwave WAVEVECTOR FRAMENUM)\n\
239     retrieves an EST_Wave frame (int FRAMENUM) from a wavevector.");
240 
241     init_subr_1("us_linear_smooth_amplitude", FT_us_linear_smooth_amplitude,
242     "(us_linear_smooth_amplitude UTT)\n\
243      Perform linear amplitute smoothing on diphone joins.");
244 
245     init_subr_1("us_unit_raw_concat", FT_us_unit_raw_concat,
246     "(us_init_raw_concat UTT).");
247 
248     init_subr_2("us_energy_normalise", FT_us_energy_normalise,
249     "(us_ps_synthesis UTT SIGPR)\n\
250     Synthesize utterance UTT using signal processing technique SIGPR \n\
251     for the UniSyn pitch-synchronous synthesizer.");
252 
253     init_subr_3("us_generate_wave", FT_us_generate_wave,
254     "(us_td_synthesis UTT FILTER_METHOD OLA_METHOD)\n\
255     Synthesize utterance UTT using signal processing technique SIGPR \n\
256     for the UniSyn pitch-synchronous synthesizer.");
257 
258     init_subr_2("us_mapping", FT_us_mapping,
259     "(us_mapping UTT method)\n\
260     Synthesize utterance UTT using signal processing technique SIGPR \n\
261     for the UniSyn pitch-synchronous synthesizer.");
262 
263     init_subr_1("us_unit_concat", FT_us_unit_concat,
264     "(us_unit_concat UTT)\n\
265      Concat coef and wave information in unit stream into a single \n\
266      Frames structure storing the result in the Frame relation");
267 
268     init_subr_4("us_f0_to_pitchmarks", FT_f0_to_pitchmarks,
269     "(us_f0_to_pitchmarks UTT F0_relation PM_relation END_TIME)\n\
270     From the F0 contour in F0_relation, create a set of pitchmarks\n\
271     in PM_relation. If END_TIME is not nil, Extra pitchmarks will be \n\
272     created at the default interval up to this point");
273 
274     init_subr_4("map_to_relation", FT_map_to_relation,
275     "(map_to_relation UTT Source_relation Target_relation new_relation)\n\
276     From the F0 contour in F0_relation, create a set of pitchmarks\n\
277     in PM_relation. If END_TIME is not nil, Extra pitchmarks will be \n\
278     created at the default interval up to this point");
279 
280     init_subr_4("us_get_copy_wave", FT_us_get_copy_wave,
281     "(warp_utterance UTT (Wavefile Pitchmark_file))\n\
282     Change waveform to match prosodic specification of utterance.");
283 
284 
285 #ifdef HAVE_US_TDPSOLA_TM
286     us_init_tdpsola();
287 #endif
288 
289 }
290 
291 /*
292 
293     init_subr_2("us_F0targets_to_pitchmarks", FT_us_F0targets_to_pitchmarks,
294     "(us_F0targets_to_pitchmarks UTT Segment_Relation)\n\
295      Make set of pitchmarks according to F0 target specification");
296 
297 LISP FT_merge_pitchmarks(LISP lutt, LISP l_pm1, LISP l_pm2,
298 			 LISP l_guide_name)
299 {
300     EST_Utterance *utt = get_c_utt(lutt);
301 
302     EST_Track *pm1 =
303 	track(utt->relation(get_c_string(l_pm1), 1)->head()->f("coefs", 1));
304     EST_Track *pm2 =
305 	track(utt->relation(get_c_string(l_pm2), 1)->head()->f("coefs", 1));
306 
307     EST_Relation *guide = utt->relation(get_c_string(l_guide_name), 1);
308 
309     EST_Relation *pm_rel = utt->create_relation("TargetCoefs");
310 
311     EST_Track *target_pm = new EST_Track;
312 
313     EST_Item *a = pm_rel->append();
314     a->fset_val("coefs", est_val(target_pm));
315 
316     merge_pitchmarks(*get_c_utt(lutt), *pm1, *pm2, *target_pm, *guide);
317 
318     return lutt;
319 }
320 LISP FT_warp_pitchmarks(LISP lutt, LISP l_pm_file, LISP l_seg_file)
321 {
322     EST_Utterance *utt = get_c_utt(lutt);
323 
324     EST_String pm_file = get_c_string(l_pm_file);
325     EST_String seg_file = get_c_string(l_seg_file);
326 
327     EST_Track *pm = new EST_Track;
328     EST_Relation seg;
329 
330     if (pm->load(pm_file) != format_ok)
331 	return NIL;
332 
333     if (seg.load(seg_file) != format_ok)
334 	return NIL;
335 
336     warp_pitchmarks(*utt, pm, seg, *utt->relation("Segment"));
337 
338     return lutt;
339 }
340 
341     init_subr_3("us_warp_pitchmarks", FT_warp_pitchmarks,
342     "(warp_utterance UTT (Wavefile Pitchmark_file))\n\
343     Change waveform to match prosodic specification of utterance.");
344 
345 LISP FT_us_load_utt_segments(LISP l_utt, LISP l_filename)
346 {
347     EST_String filename = get_c_string(l_filename);
348     EST_Utterance tu;
349     EST_Utterance *u = get_c_utt(l_utt);
350     EST_Item *s, *t;
351 
352     if (tu.load(filename) != format_ok)
353       festival_error();
354 
355     u->relation("Segment")->clear();
356 
357     for (s = tu.relation("Segment")->head(); s; s = s->next())
358       {
359 	t = u->relation("Segment")->append();
360 	t->fset("name", s->fS("name"));
361 	t->fset("end", s->fS("end"));
362       }
363 
364     return l_utt;
365 }
366 
367 void us_F0targets_to_pitchmarks(EST_Utterance &utt,
368 				const EST_String &seg_relation);
369 
370 LISP FT_us_F0targets_to_pitchmarks(LISP lutt, LISP lseg)
371 {
372     EST_String s = (lseg == NIL) ? "" :  get_c_string(lseg);
373     us_F0targets_to_pitchmarks(*get_c_utt(lutt), s);
374 
375     return lutt;
376 }
377 
378 
379 */
380