1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* */
34 /* Author: Paul Taylor */
35 /* Date: February 1998 */
36 /* --------------------------------------------------------------------- */
37 /* Waveform Generation Scheme Interface File */
38 /* */
39 /*************************************************************************/
40 #include "siod.h"
41 #include "EST.h"
42 #include "UniSyn.h"
43 #include "us_synthesis.h"
44 #include "Phone.h"
45
46 VAL_REGISTER_TYPE(ivector,EST_IVector)
47 VAL_REGISTER_TYPE(wavevector,EST_WaveVector);
48
49 SIOD_REGISTER_TYPE(wavevector, EST_WaveVector);
50
51 void map_to_relation(EST_IVector &map, EST_Relation &r,
52 const EST_Track &source_pm,
53 const EST_Track &target_pm);
54
scheme_param(const EST_String & param,const EST_String & path)55 EST_Features *scheme_param(const EST_String& param, const EST_String &path)
56 {
57 EST_Features *f, *p;
58
59 f = feats(siod_get_lval(param, "Couldn't find scheme paramete named: "
60 + param));
61
62 p = (path == "") ? f : &f->A(path);
63 return p;
64 }
65
66
FT_us_linear_smooth_amplitude(LISP lutt)67 LISP FT_us_linear_smooth_amplitude( LISP lutt )
68 {
69 EST_Utterance *utt = get_c_utt( lutt );
70
71 us_linear_smooth_amplitude( utt );
72
73 return lutt;
74 }
75
76
FT_wavevector_get_wave(LISP l_wavevector,LISP l_framenum)77 static LISP FT_wavevector_get_wave( LISP l_wavevector, LISP l_framenum )
78 {
79 EST_WaveVector *wv = wavevector( l_wavevector );
80 int i = get_c_int( l_framenum );
81
82 if( i<0 || i>wv->length() )
83 EST_error( "index out of bounds" );
84
85 return siod( &((*wv)[i]) );
86 }
87
88
FT_us_unit_concat(LISP lutt)89 LISP FT_us_unit_concat(LISP lutt)
90 {
91 EST_String window_name;
92 float window_factor;
93 bool window_symmetric;
94
95 EST_Features *f = scheme_param("Param", "unisyn");
96
97 window_name = f->S("window_name");
98 window_factor = f->F("window_factor");
99
100 window_symmetric = (f->I("window_symmetric",1) == 0) ? false : true;
101
102 us_unit_concat(*get_c_utt(lutt), window_factor, window_name, false, window_symmetric);
103 return lutt;
104 }
105
FT_us_unit_raw_concat(LISP lutt)106 LISP FT_us_unit_raw_concat(LISP lutt)
107 {
108 us_unit_raw_concat(*get_c_utt(lutt));
109 return lutt;
110 }
111
112
FT_us_energy_normalise(LISP lutt,LISP lrname)113 LISP FT_us_energy_normalise(LISP lutt, LISP lrname)
114 {
115 EST_Utterance *utt = get_c_utt(lutt);
116 EST_String rname = get_c_string(lrname);
117
118 us_energy_normalise(*utt->relation(rname));
119 return lutt;
120 }
121
FT_us_generate_wave(LISP lutt,LISP l_f_method,LISP l_o_method)122 LISP FT_us_generate_wave(LISP lutt, LISP l_f_method, LISP l_o_method)
123 {
124 EST_String filter_method = get_c_string(l_f_method);
125 EST_String ola_method = get_c_string(l_o_method);
126 EST_Utterance *utt = get_c_utt(lutt);
127
128 EST_Features *f = scheme_param("Param", "unisyn");
129 if(f->I("window_symmetric",1) == 0){
130 ola_method = "asymmetric_window";
131 }
132 us_generate_wave(*utt, filter_method, ola_method);
133
134 return lutt;
135 }
136
FT_us_mapping(LISP lutt,LISP method)137 LISP FT_us_mapping(LISP lutt, LISP method)
138 {
139 us_mapping(*get_c_utt(lutt), get_c_string(method));
140 return lutt;
141 }
142
FT_us_get_copy_wave(LISP lutt,LISP l_sig_file,LISP l_pm_file,LISP l_seg_file)143 LISP FT_us_get_copy_wave(LISP lutt, LISP l_sig_file, LISP l_pm_file,
144 LISP l_seg_file)
145 {
146 EST_Utterance *utt = get_c_utt(lutt);
147 EST_Relation seg;
148 EST_String sig_file = get_c_string(l_sig_file);
149 EST_String seg_file = get_c_string(l_seg_file);
150 EST_String pm_file = get_c_string(l_pm_file);
151
152 EST_Track *pm = new EST_Track;
153 EST_Wave *sig = new EST_Wave;
154
155 if (pm->load(pm_file) != format_ok)
156 return NIL;
157
158 if (sig->load(sig_file) != format_ok)
159 return NIL;
160
161 if (seg.load(seg_file) != format_ok)
162 return NIL;
163
164 if (!ph_is_silence(seg.tail()->f("name")))
165 {
166 EST_Item *n = seg.tail()->insert_after();
167 n->set("name", ph_silence());
168 n->set("end", seg.tail()->prev()->F("end") + 0.1);
169 }
170
171 us_get_copy_wave(*utt, *sig, *pm, seg);
172 return lutt;
173 }
174
175
FT_f0_to_pitchmarks(LISP lutt,LISP l_f0_name,LISP l_pm_name,LISP l_end_time)176 LISP FT_f0_to_pitchmarks(LISP lutt, LISP l_f0_name, LISP l_pm_name,
177 LISP l_end_time)
178 {
179 EST_Utterance *utt = get_c_utt(lutt);
180 int num_channels=0;
181 const float default_f0 = 100.0;
182 EST_Relation *f0_rel=0, *pm_rel=0;
183 EST_Track *f0=0, *pm=0;
184 EST_Item *a;
185
186 float end_time = (l_end_time == NIL) ? -1 : get_c_float(l_end_time);
187
188 f0_rel = utt->relation(get_c_string(l_f0_name), 1);
189 pm_rel = utt->create_relation(get_c_string(l_pm_name));
190
191 f0 = track(f0_rel->head()->f("f0"));
192 pm = new EST_Track;
193
194 a = pm_rel->append();
195 a->set_val("coefs", est_val(pm));
196 a = pm_rel->append();
197
198 if (utt->relation_present("SourceCoef"))
199 {
200 EST_Track *source_coef =
201 track(utt->relation("SourceCoef")->head()->f("coefs"));
202 num_channels = source_coef->num_channels();
203 }
204
205 f0_to_pitchmarks(*f0, *pm, num_channels, default_f0, end_time);
206
207 return lutt;
208 }
209
FT_map_to_relation(LISP lutt,LISP lsource_name,LISP ltarget_name,LISP lrel_name)210 LISP FT_map_to_relation(LISP lutt, LISP lsource_name, LISP ltarget_name,
211 LISP lrel_name)
212 {
213 EST_Utterance *utt = get_c_utt(lutt);
214 EST_Track *source_pm = 0;
215 EST_Track *target_pm = 0;
216 EST_IVector *map = 0;
217 target_pm =
218 track(utt->relation(get_c_string(ltarget_name))->head()->f("coefs"));
219 source_pm =
220 track(utt->relation(get_c_string(lsource_name))->head()->f("coefs"));
221 map = ivector(utt->relation("US_map")->head()->f("map"));
222
223 utt->create_relation(get_c_string(lrel_name));
224
225 map_to_relation(*map, *utt->relation(get_c_string(lrel_name)),
226 *source_pm, *target_pm);
227
228 return NIL;
229 }
230
festival_UniSyn_init(void)231 void festival_UniSyn_init(void)
232 {
233 proclaim_module("UniSyn");
234
235 register_unisyn_features();
236
237 init_subr_2( "wavevector.getwave", FT_wavevector_get_wave,
238 "(wavevector.getwave WAVEVECTOR FRAMENUM)\n\
239 retrieves an EST_Wave frame (int FRAMENUM) from a wavevector.");
240
241 init_subr_1("us_linear_smooth_amplitude", FT_us_linear_smooth_amplitude,
242 "(us_linear_smooth_amplitude UTT)\n\
243 Perform linear amplitute smoothing on diphone joins.");
244
245 init_subr_1("us_unit_raw_concat", FT_us_unit_raw_concat,
246 "(us_init_raw_concat UTT).");
247
248 init_subr_2("us_energy_normalise", FT_us_energy_normalise,
249 "(us_ps_synthesis UTT SIGPR)\n\
250 Synthesize utterance UTT using signal processing technique SIGPR \n\
251 for the UniSyn pitch-synchronous synthesizer.");
252
253 init_subr_3("us_generate_wave", FT_us_generate_wave,
254 "(us_td_synthesis UTT FILTER_METHOD OLA_METHOD)\n\
255 Synthesize utterance UTT using signal processing technique SIGPR \n\
256 for the UniSyn pitch-synchronous synthesizer.");
257
258 init_subr_2("us_mapping", FT_us_mapping,
259 "(us_mapping UTT method)\n\
260 Synthesize utterance UTT using signal processing technique SIGPR \n\
261 for the UniSyn pitch-synchronous synthesizer.");
262
263 init_subr_1("us_unit_concat", FT_us_unit_concat,
264 "(us_unit_concat UTT)\n\
265 Concat coef and wave information in unit stream into a single \n\
266 Frames structure storing the result in the Frame relation");
267
268 init_subr_4("us_f0_to_pitchmarks", FT_f0_to_pitchmarks,
269 "(us_f0_to_pitchmarks UTT F0_relation PM_relation END_TIME)\n\
270 From the F0 contour in F0_relation, create a set of pitchmarks\n\
271 in PM_relation. If END_TIME is not nil, Extra pitchmarks will be \n\
272 created at the default interval up to this point");
273
274 init_subr_4("map_to_relation", FT_map_to_relation,
275 "(map_to_relation UTT Source_relation Target_relation new_relation)\n\
276 From the F0 contour in F0_relation, create a set of pitchmarks\n\
277 in PM_relation. If END_TIME is not nil, Extra pitchmarks will be \n\
278 created at the default interval up to this point");
279
280 init_subr_4("us_get_copy_wave", FT_us_get_copy_wave,
281 "(warp_utterance UTT (Wavefile Pitchmark_file))\n\
282 Change waveform to match prosodic specification of utterance.");
283
284
285 #ifdef HAVE_US_TDPSOLA_TM
286 us_init_tdpsola();
287 #endif
288
289 }
290
291 /*
292
293 init_subr_2("us_F0targets_to_pitchmarks", FT_us_F0targets_to_pitchmarks,
294 "(us_F0targets_to_pitchmarks UTT Segment_Relation)\n\
295 Make set of pitchmarks according to F0 target specification");
296
297 LISP FT_merge_pitchmarks(LISP lutt, LISP l_pm1, LISP l_pm2,
298 LISP l_guide_name)
299 {
300 EST_Utterance *utt = get_c_utt(lutt);
301
302 EST_Track *pm1 =
303 track(utt->relation(get_c_string(l_pm1), 1)->head()->f("coefs", 1));
304 EST_Track *pm2 =
305 track(utt->relation(get_c_string(l_pm2), 1)->head()->f("coefs", 1));
306
307 EST_Relation *guide = utt->relation(get_c_string(l_guide_name), 1);
308
309 EST_Relation *pm_rel = utt->create_relation("TargetCoefs");
310
311 EST_Track *target_pm = new EST_Track;
312
313 EST_Item *a = pm_rel->append();
314 a->fset_val("coefs", est_val(target_pm));
315
316 merge_pitchmarks(*get_c_utt(lutt), *pm1, *pm2, *target_pm, *guide);
317
318 return lutt;
319 }
320 LISP FT_warp_pitchmarks(LISP lutt, LISP l_pm_file, LISP l_seg_file)
321 {
322 EST_Utterance *utt = get_c_utt(lutt);
323
324 EST_String pm_file = get_c_string(l_pm_file);
325 EST_String seg_file = get_c_string(l_seg_file);
326
327 EST_Track *pm = new EST_Track;
328 EST_Relation seg;
329
330 if (pm->load(pm_file) != format_ok)
331 return NIL;
332
333 if (seg.load(seg_file) != format_ok)
334 return NIL;
335
336 warp_pitchmarks(*utt, pm, seg, *utt->relation("Segment"));
337
338 return lutt;
339 }
340
341 init_subr_3("us_warp_pitchmarks", FT_warp_pitchmarks,
342 "(warp_utterance UTT (Wavefile Pitchmark_file))\n\
343 Change waveform to match prosodic specification of utterance.");
344
345 LISP FT_us_load_utt_segments(LISP l_utt, LISP l_filename)
346 {
347 EST_String filename = get_c_string(l_filename);
348 EST_Utterance tu;
349 EST_Utterance *u = get_c_utt(l_utt);
350 EST_Item *s, *t;
351
352 if (tu.load(filename) != format_ok)
353 festival_error();
354
355 u->relation("Segment")->clear();
356
357 for (s = tu.relation("Segment")->head(); s; s = s->next())
358 {
359 t = u->relation("Segment")->append();
360 t->fset("name", s->fS("name"));
361 t->fset("end", s->fS("end"));
362 }
363
364 return l_utt;
365 }
366
367 void us_F0targets_to_pitchmarks(EST_Utterance &utt,
368 const EST_String &seg_relation);
369
370 LISP FT_us_F0targets_to_pitchmarks(LISP lutt, LISP lseg)
371 {
372 EST_String s = (lseg == NIL) ? "" : get_c_string(lseg);
373 us_F0targets_to_pitchmarks(*get_c_utt(lutt), s);
374
375 return lutt;
376 }
377
378
379 */
380