1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                       Copyright (c) 1996,1997                         */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author :  Paul Taylor                                     */
34 /*             Date   :  June 1998                                       */
35 /*-----------------------------------------------------------------------*/
36 /*                                                                       */
37 /*             Metrical Tree based Phonology system                      */
38 /*                                                                       */
39 /*=======================================================================*/
40 
41 #include <cstdio>
42 #include <cstdlib>
43 #include <cmath>
44 #include <fstream>
45 #include "festival.h"
46 
47 #include "../UniSyn/us_features.h"
48 
49 void merge_features(EST_Item *from, EST_Item *to, int keep_id);
50 void insert_schwa(EST_Item *n);
51 
52 extern EST_Features phone_def;
53 void subword_metrical_tree(EST_Item *w, EST_Relation &syllable,
54 			   EST_Relation &metricaltree);
55 
56 void lex_to_phones(const EST_String &name, const EST_String &pos,
57 		   EST_Relation &phone);
58 void trans_to_phones(EST_Item *w, EST_Relation &trans,
59 		     EST_Relation &phone);
60 
61 void fix_syllables(EST_Item *nw, EST_Utterance &word);
62 
63 typedef
64 float (*local_cost_function)(const EST_Item *item1,
65 			     const EST_Item *item2);
66 
67 void add_metrical_functions(EST_Utterance &utt);
68 bool dp_match(const EST_Relation &lexical,
69 	      const EST_Relation &surface,
70 	      EST_Relation &match,
71 	      local_cost_function lcf,
72 	      EST_Item *null_syl);
73 float local_cost(const EST_Item *s1, const EST_Item *s2);
74 
75 void add_times(EST_Relation &lexical, EST_Relation &surface,
76 	       EST_Relation &match);
77 
78 void add_initial_silence(EST_Relation &lexical, EST_Relation &surface,
79 	       EST_Relation &match);
80 
81 void subword_metrical_tree(EST_Relation &syllable,
82 			   EST_Relation &metricaltree);
83 
84 void add_metrical_functions(EST_Utterance &utt);
85 
86 int syllabify_word(EST_Item *nw, EST_Relation &phone,
87 		    EST_Relation &sylstructure, EST_Relation &syl, int flat);
88 
89 void add_even_segment_times(EST_Item *w, EST_Relation &phone);
90 void lex_to_phones(EST_Utterance &u, const EST_String &relname);
91 void phonemic_trans(EST_Relation &trans);
92 
93 void add_single_phrase(EST_Utterance &utt, EST_Item *t);
94 
95 LISP FT_add_trans_metrical_tree(LISP l_utt, LISP lf_input, LISP lf_output);
96 #if 0
97 static void add_trans_phrase(EST_Utterance &utt, const EST_String &i_name,
98 			  const EST_String &s_name);
99 #endif
100 
local_cost(const EST_Item * s1,const EST_Item * s2)101 float local_cost(const EST_Item *s1, const EST_Item *s2)
102 {
103     float insertion_cost = get_c_int(siod_get_lval("met_insertion", NULL));
104     float deletion_cost = get_c_int(siod_get_lval("met_deletion", NULL));
105     float substitution_cost =
106 	get_c_int(siod_get_lval("met_substitution", NULL));
107 
108     EST_String null_sym = "nil";
109 
110     // otherwise cost is either insertion cost, or cost_matrix value
111     if (s1->name() == s2->name())
112 	return 0;
113     else
114     {
115 	if (s1->name() == null_sym)
116 	    return insertion_cost;
117 	else if (s2->name() == null_sym)
118 	    return deletion_cost;
119 	else
120 	    return substitution_cost;
121     }
122 }
123 
trans_to_phones(EST_Item * w,EST_Relation & trans,EST_Relation & phone)124 void trans_to_phones(EST_Item *w, EST_Relation &trans, EST_Relation &phone)
125 {
126     int prev_phone;
127     EST_Item *t, *p;
128     int r;
129 
130     prev_phone = w->prev() ? prev(w)->I("phon_ref") : -1;
131     r = w->I("phon_ref");
132 
133     for (t = trans.head(); t; t = t->next())
134     {
135 	if ((t->f("name") == "sil") || (t->f("name") == "pau"))
136 	    continue;
137 	if ((t->I("ref") > prev_phone) && (t->I("ref") <= r))
138 	{
139 	    p = phone.append();
140 	    p->set("name", t->S("name"));
141 	    p->set_val("end", t->f("end"));
142 	    p->set_val("start", t->f("start"));
143 	    p->set("df", phone_def.A(p->S("name")));
144 	}
145     }
146 }
147 
add_trans_intonation(EST_Utterance & utt,const EST_String & i_name,const EST_String & s_name,int add_words)148 void add_trans_intonation(EST_Utterance &utt, const EST_String &i_name,
149 			  const EST_String &s_name, int add_words)
150 {
151     EST_Item *s, *w, *t, *a, *b;
152     EST_String wref;
153     int s_num;
154     EST_String w_num;
155     EST_String is_name = i_name + s_name;
156 
157     utt.relation(i_name)->f.set("intonation_style", "tilt");
158 
159     utt.create_relation(is_name);
160     cout << "created : " << is_name << endl;
161 
162     // optional feature to add intonation events to words rather than syllables
163     if (add_words)
164 	utt.create_relation("IntonationWord");
165 
166     for (t = utt.relation(i_name, 1)->head(); t; t = t->next())
167 	{
168 	    t->f_remove("end");
169 	    if (!t->f_present("word_ref"))
170 		add_single_phrase(utt, t);
171 	    else
172 		{
173 		    w_num = t->S("word_ref");
174 		    s_num = t->I("syl_num");
175 
176 		    for (w = utt.relation("Word", 1)->head(); w; w = w->next())
177 		    {
178 			if (w->S("id") == w_num)
179 			    break;
180 		    }
181 		    if (w == 0)
182 			{
183 			    cerr << "Error: couldn't find word ref " << endl;
184 			    cerr << "For intonation event " << *t << endl;
185 			    festival_error();
186 			}
187 		    if (add_words)
188 		    {
189 			if (!w->in_relation("IntonationWord"))
190 			    b = utt.relation("IntonationWord")->append(w);
191 			else
192 			    b = w->as_relation("IntonationWord");
193 			b->append_daughter(t);
194 		    }
195 
196 		    //	cout << "matching word: " << w->name() << endl;
197 		    if ((b = w->as_relation("WordStructure")) == 0)
198 			EST_error("Item is not in WordStructure\n");
199 		    if ((s = nth_leaf(b, s_num)) == 0)
200 		    {
201 			cerr << "Intonation element " << *t <<
202 			    "\nis linked to syllable " << s_num  <<
203 			    " but word \"" << w->S("name") << "\""
204 			    " has only " << num_leaves(b) << " syllables\n";
205 		    }
206 		    //	cout << "here is s\n";
207 		    //	cout << "matching syllable: " << *s << endl;
208 
209 		    if (!s->in_relation(is_name))
210 			a = utt.relation(is_name)->append(s);
211 		    else
212 			a = s->as_relation(is_name);
213 		    a->append_daughter(t);
214 
215 //		    cout << "s1: " << s->S("id", "XX") << endl;
216 		    s = s->as_relation(s_name);
217 		    if (s == 0)
218 			cerr << "Syllable with id " << nth_leaf(b, s_num)->S("id") << "exists "
219 			    "but is not in syllable relation. Suspect corrupted "
220 			    "lexical conversion\n";
221 
222 //		    cout << "s2: " << s->S("id", "XX") << endl;
223 
224 		    // change to relative positions if not already specified
225 		    if (!t->f_present("rel_pos"))
226 			t->set("rel_pos", t->F("time") - s->F("vowel_start"));
227 
228 		    t->set("time_path", is_name);
229 		    t->set_function("time",
230 				    "standard+unisyn_tilt_event_position");
231 //		    cout << "end syl:" << endl;
232 		    t->f_remove("word_ref");
233 		    t->f_remove("syl_num");
234 		}
235 	}
236 //    add_trans_phrase(utt, i_name, s_name);
237 }
238 
syl_to_word_intonation(EST_Utterance & utt)239 void syl_to_word_intonation(EST_Utterance &utt)
240 {
241     EST_Item *s, *w, *t=0, *b;
242 
243     utt.create_relation("IntonationWord");
244 
245     for (s = utt.relation("Syllable", 1)->head(); s; s = s->next())
246     {
247 	if (!s->in_relation("IntonationSyllable"))
248 	    continue;
249 
250 	w = root(s, "WordStructure");
251 
252 	if (w == 0)
253 	{
254 	    cerr << "Error: couldn't find word ref " << endl;
255 	    cerr << "For intonation event " << *t << endl;
256 	    festival_error();
257 	}
258 	if (!w->in_relation("IntonationWord"))
259 	    b = utt.relation("IntonationWord")->append(w);
260 	else
261 	    b = w->as_relation("IntonationWord");
262 
263 	for (t = daughter1(s->as_relation("IntonationSyllable")); t; t = t->next())
264 	    b->append_daughter(t);
265     }
266 }
267 
legal_daughter(EST_Item * r,const EST_String & iname,const EST_StrList & valid)268 static bool legal_daughter(EST_Item *r, const EST_String &iname,
269 			   const EST_StrList &valid)
270 {
271     if (!r->in_relation(iname))
272 	return false;
273     if (strlist_member(valid, daughter1(r->as_relation(iname))->S("name", "")))
274 	return true;
275     return false;
276 }
277 
intonation_diagnostics(EST_Utterance & ref,EST_Utterance & test,const EST_String & rel,const EST_StrList & valid)278 void intonation_diagnostics(EST_Utterance &ref, EST_Utterance &test,
279 			    const EST_String &rel, const EST_StrList &valid)
280 {
281     EST_Item *r, *t;
282     EST_String iname = "Intonation" + rel;
283 
284     for (r = ref.relation(rel, 1)->head(), t = test.relation(rel, 1)->head(); r && t;
285 	 r = r->next(), t = t->next())
286     {
287 	if (legal_daughter(r, iname, valid) && legal_daughter(t, iname, valid))
288 	    t->set("i_status", "COR");
289 	else if (legal_daughter(r, iname, valid) && (!legal_daughter(t, iname, valid)))
290 	    t->set("i_status", "DEL");
291 	else if (!legal_daughter(r, iname, valid) && legal_daughter(t, iname, valid))
292 	    t->set("i_status", "INS");
293 //	else
294 //	    t->set("i_status", "0");
295     }
296 }
297 
298 #if 0
299 static void add_trans_phrase(EST_Utterance &utt, const EST_String &i_name,
300 			  const EST_String &s_name)
301 {
302     EST_Item *s, *t, *a, *p;
303     float pos, max, d;
304     EST_String is_name = i_name + s_name;
305 
306     for (t = utt.relation(i_name, 1)->head(); t; t = t->next())
307 	{
308 	    if (t->in_relation(is_name))
309 		continue;
310 	    pos = t->F("time");
311 	    max = 100000.0;
312 
313 	    cout << "here 1\n";
314 
315 	    for (p = utt.relation(s_name)->head(); p; p = p->next())
316 		{
317 		    if (t->S("name","0") == "phrase_end")
318 			d = fabs(pos - p->end());
319 		    else
320 			d = fabs(pos - p->start());
321 		    if (d < max)
322 			{
323 			    max = d;
324 			    s = p;
325 			}
326 		}
327 	    a = utt.relation(is_name)->append(t);
328 	}
329 }
330 #endif
331 
FT_add_trans_intonation(LISP l_utt,LISP lf_int,LISP l_add_words)332 LISP FT_add_trans_intonation(LISP l_utt, LISP lf_int, LISP l_add_words)
333 {
334     EST_String int_file = get_c_string(lf_int);
335     EST_Utterance *u = get_c_utt(l_utt);
336     EST_Relation lab;
337     EST_Item *s, *n;
338     int add_words = (l_add_words == NIL) ? 0 : 1;
339 
340     u->create_relation("Intonation");
341 
342     if (lab.load(int_file) != format_ok)
343 	EST_error("Couldn't load file %s\n", (const char *) int_file);
344 
345     for (s = lab.head(); s; s = s->next())
346       {
347 	n = u->relation("Intonation")->append();
348 	merge_features(n, s, 1);
349 	if (n->S("name") =="afb")
350 	    n->set("name", "a");
351 	else if (n->S("name") == "m")
352 	{
353 	    n->set("name", "a");
354 	    n->set("minor", 1);
355 	}
356 	else if ((n->S("name") == "a") || (n->S("name") == "arb")
357 		 || (n->S("name") == "rb") || (n->S("name") == "phrase_end")
358 		 || (n->S("name") == "phrase_start")
359 		 || (n->S("name") == "fb") )  // tmp check (awb)
360 	    continue;
361 	else
362 	    EST_error("Illegal intonation name \"%s\"\n", (const char *) n->S("name"));
363       }
364 
365     add_trans_intonation(*u, "Intonation", "Syllable", add_words);
366     return l_utt;
367 }
368 
FT_add_trans_word(LISP l_utt,LISP lf_word,LISP keep_times)369 LISP FT_add_trans_word(LISP l_utt, LISP lf_word, LISP keep_times)
370 {
371     EST_String word_file = get_c_string(lf_word);
372     EST_Utterance *u = get_c_utt(l_utt);
373     EST_Relation lab;
374     EST_Item *s, *n;
375     float p_end = 0;
376 
377     u->create_relation("Word");
378 
379     if (lab.load(word_file) != format_ok)
380 	EST_error("Couldn't load file %s\n", (const char *) word_file);
381 
382     for (s = lab.head(); s; s = s->next())
383     {
384 	s->set("start", p_end);
385 	p_end = s->F("end");
386 	if ((s->S("name") == "pau") || (s->S("name") == "sil"))
387 	    continue;
388 	n = u->relation("Word")->append();
389 	merge_features(n, s, 0);
390 	if (keep_times == NIL)
391 	{
392 	    n->f_remove("end");
393 	    n->f_remove("start");
394 	}
395     }
396 
397     return l_utt;
398 }
399 
FT_add_f0_points(LISP l_utt,LISP lf_f0)400 LISP FT_add_f0_points(LISP l_utt, LISP lf_f0)
401 {
402     EST_String f0_file = get_c_string(lf_f0);
403     EST_Utterance *u = get_c_utt(l_utt);
404     EST_Track f0;
405     EST_Item *s;
406     float prev_mid, next_mid;
407 
408     if (f0.load(f0_file) != format_ok)
409 	EST_error("Couldn't load file %s\n", (const char *) f0_file);
410 
411     for (s = u->relation("Segment")->head(); s; s = s->next())
412     {
413 	prev_mid = s->prev() ?
414 	    (prev(s)->F("end") + prev(s)->F("start"))/2.0 : 0.0;
415 	next_mid = s->next() ?
416 	    (next(s)->F("end") + next(s)->F("start"))/2.0 : 0.0;
417 
418 	s->set("prev_mid_f0", f0.a(f0.index(prev_mid)));
419 	s->set("start_f0", f0.a(f0.index(s->F("start"))));
420 	s->set("mid_f0", f0.a(f0.index((s->F("end") + s->F("start"))/2.0)));
421 	s->set("end_f0", f0.a(f0.index(s->F("end"))));
422 	s->set("next_mid_f0", f0.a(f0.index(next_mid)));
423     }
424 
425     return l_utt;
426 }
427 
FT_add_coefs(LISP l_utt,LISP lf_coef)428 LISP FT_add_coefs(LISP l_utt, LISP lf_coef)
429 {
430     EST_String coef_file = get_c_string(lf_coef);
431 
432     EST_Utterance *u = get_c_utt(l_utt);
433     EST_Track coef;
434     EST_Item *s;
435     float prev_mid, next_mid;
436     EST_FVector *frame;
437 
438     cout << "loading\n";
439     if (coef.load(coef_file) != format_ok)
440 	EST_error("Couldn't load file %s\n", (const char *) coef_file);
441     cout << "done\n";
442 
443     frame = new EST_FVector;
444     frame->fill(0.0); // special case for first frame.
445 
446     for (s = u->relation("Segment")->head(); s; s = s->next())
447     {
448 	prev_mid = s->prev() ?
449 	    (prev(s)->F("end") + prev(s)->F("start"))/2.0 : 0.0;
450 	next_mid = s->next() ?
451 	    (next(s)->F("end") + next(s)->F("start"))/2.0 : 0.0;
452 
453 	frame = new EST_FVector;
454 	coef.copy_frame_out(coef.index((s->F("end") + s->F("start"))/2.0),
455 				       *frame);
456 	s->set_val("mid_coef", est_val(frame));
457 
458 	frame = new EST_FVector;
459 	coef.copy_frame_out(coef.index(s->F("end")), *frame);
460 	s->set_val("end_coef", est_val(frame));
461 
462 	frame = new EST_FVector;
463 	coef.copy_frame_out(coef.index(s->F("start")), *frame);
464 	s->set_val("start_coef", est_val(frame));
465 
466 	frame = new EST_FVector;
467 	coef.copy_frame_out(coef.index(prev_mid), *frame);
468 	s->set_val("prev_mid_coef", est_val(frame));
469 
470 	frame = new EST_FVector;
471 	coef.copy_frame_out(coef.index(next_mid), *frame);
472 	s->set_val("next_mid_coef", est_val(frame));
473     }
474 
475     return l_utt;
476 
477 //	imid = coef.index((s->F("end") + s->F("start"))/2.0);
478 //	iend = coef.index(s->F("end"));
479 
480 }
481 
FT_add_xml_relation(LISP l_utt,LISP xml_file)482 LISP FT_add_xml_relation(LISP l_utt, LISP xml_file)
483 {
484     EST_Utterance *u, tmp;
485 
486     u = get_c_utt(l_utt);
487 
488     tmp.clear();
489     tmp.load(get_c_string(xml_file));
490 
491     EST_Features::Entries p;
492 
493     for (p.begin(tmp.relations); p; ++p)
494     {
495 	relation(p->v)->remove_item_feature("actuate");
496 	relation(p->v)->remove_item_feature("estExpansion");
497 	relation(p->v)->remove_item_feature("xml:link");
498 	relation(p->v)->remove_item_feature("href");
499 	relation(p->v)->remove_item_feature("show");
500     }
501 
502     utterance_merge(*u, tmp, "id");
503 
504     return l_utt;
505 }
506 
fix_syllables(EST_Item * nw,EST_Utterance & word)507 void fix_syllables(EST_Item *nw, EST_Utterance &word)
508 {
509     EST_Item *t, *n, *s, *m;
510 
511     if (word.relation("Syllable")->length() == word.relation("SurfaceSyllable")->length())
512 	return;
513 
514     cout << "Word \"" << word.relation("Word")->head()->name() << "\" has "
515 	<< word.relation("Syllable")->length() <<
516 	    " lexical syllables and " <<
517 		word.relation("SurfaceSyllable")->length() <<
518 		" surface syllables\n";
519 
520     for (s = word.relation("Syllable")->head(); s; s = s->next())
521     {
522 	t = s->as_relation("SylStructure");
523 	n = syl_nucleus(t);
524 
525 	m = daughter1(n->as_relation("Match"));
526 	if (m == 0)
527 	    insert_schwa(n->as_relation("Segment"));
528     }
529 
530     word.relation("SylStructure")->clear();
531     word.relation("Syllable")->clear();
532     word.relation("Match")->clear();
533 
534     syllabify_word(nw, *word.relation("Segment"),
535 		       *word.relation("SylStructure"),
536 		       *word.relation("Syllable"), 0);
537 
538 //    syllabify_word(nw, *word.relation("SurfacePhone"),
539 //		       *word.relation("SurfaceSylStructure"),
540 //		       *word.relation("SurfaceSyllable"));
541 
542     EST_Item xx;
543 
544     dp_match(*word.relation("Segment"),
545 	      *word.relation("SurfacePhone"),
546 	      *word.relation("Match"), local_cost, &xx);
547 
548 }
549 
550 
551 /* Add segment durations from file */
add_trans_duration(EST_Utterance & utt,const EST_String & segfile)552 void add_trans_duration(EST_Utterance &utt, const EST_String &segfile)
553 {
554     EST_Utterance word;
555     EST_Relation phone, lab;
556     EST_Item *s, *n;
557     EST_StrList plist;
558     float phone_start;
559     EST_Item xx;
560 
561     if (lab.load(segfile) != format_ok)
562 	EST_error("Couldn't load file %s\n", (const char *) segfile);
563 
564     phone_start = 0.0;
565     utt.create_relation("LabelSegment");
566     utt.create_relation("Match");
567 
568     for (s = lab.head(); s; s = s->next())
569     {
570 	if (!phone_def.present(s->S("name")))
571 	    EST_error("Phone %s is not defined in phone set\n", (const char *)
572 		      s->S("name"));
573 	n = utt.relation("LabelSegment")->append();
574 	merge_features(n, s, 1);
575 	n->set("start", phone_start);
576 	phone_start = s->F("end");
577 	n->set("dur", n->F("end") -  n->F("start"));
578     }
579 
580     dp_match(*utt.relation("Segment"), *utt.relation("LabelSegment"),
581 	     *utt.relation("Match"), local_cost, &xx);
582 
583     add_times(*utt.relation("Segment"), *utt.relation("LabelSegment"),
584 	     *utt.relation("Match"));
585 
586     for (s = utt.relation("Segment")->head(); s; s = s->next())
587     {
588 	s->set("target_dur", (s->F("end") - s->F("start")));
589 	s->f_remove("end");
590 	s->f_remove("dur");
591 	s->f_remove("start");
592     }
593 }
594 
add_silences(EST_Utterance & utt,EST_Item * w)595 static void add_silences(EST_Utterance &utt,EST_Item *w)
596 {
597     EST_Item *s;
598     int r;
599 
600     if (w == 0)  // insert initial silence
601     {
602 	s = utt.relation("LabelSegment")->head();
603 	if (s->name() == "pau")
604 	{
605 	    EST_Item *sil = utt.relation("Segment")->append();
606 	    sil->set("name","pau");
607 	    sil->set("start",s->F("start"));
608 	    sil->set("end",s->F("end"));
609 	}
610 	return;
611     }
612 
613     cout << "Looking at inserting\n";
614     // Intermeditate silences
615     r = w->I("phon_ref");
616     for (s=utt.relation("LabelSegment")->head(); s; s=s->next())
617     {
618 	if (r == s->I("ref"))
619 	{
620 	    if (next(s)->name() == "pau")
621 	    {
622 		cout << "actually inserting\n";
623 		EST_Item *sil = utt.relation("Segment")->append();
624 		sil->set("name","pau");
625 		sil->set("start",s->F("end"));
626 		sil->set("end",next(s)->F("end"));
627 	    }
628 	    return;
629 	}
630     }
631 }
632 
add_trans_seg(EST_Utterance & utt,const EST_String & segfile)633 void add_trans_seg(EST_Utterance &utt, const EST_String &segfile)
634 {
635     EST_Utterance word;
636     EST_Relation phone, lab;
637     EST_Item *s, *w, *nw, *n;
638     EST_StrList plist;
639     float phone_start;
640     LISP lutt;
641     int i;
642     LISP l_pdef;
643 
644     l_pdef = siod_get_lval("darpa_fs", NULL);
645     lisp_to_features(l_pdef, phone_def);
646 
647     utt.create_relation("LabelSegment");
648     utt.create_relation("tmpSegment");
649     utt.create_relation("Syllable");
650     utt.create_relation("Segment");
651     utt.create_relation("WordStructure");
652 
653     if (lab.load(segfile) != format_ok)
654 	EST_error("Couldn't load file %s\n", (const char *) segfile);
655 
656     phone_start = 0.0;
657 
658     for (s = lab.head(); s; s = s->next())
659     {
660 	if (!phone_def.present(s->S("name")))
661 	    EST_error("Phone %s is not defined in phone set\n", (const char *)
662 		      s->S("name"));
663 	n = utt.relation("LabelSegment")->append();
664 //	cout << "append ls id " << n->S("id") << endl;
665 	merge_features(n, s, 1);
666 //	cout << "keep ls id " << n->S("id") << endl;
667 	n->set("start", phone_start);
668 	phone_start = s->F("end");
669     }
670 
671 //    phonemic_trans(*utt.relation("LabelSegment"));
672 /*    for (w = utt.relation("Word")->head(); w != 0; w = n)
673     {
674 	n = w->next();
675 	w->f_remove("end");
676 	if ((w->f("name") == "sil") || (w->f("name") == "pau"))
677 	    utt.relation("Word")->remove_item(w);
678     }
679 */
680     gc_protect(&lutt);
681 
682     word.create_relation("Word");
683     word.create_relation("Match");
684     word.create_relation("NewMatch");
685     word.create_relation("Segment");
686     word.create_relation("tmpSegment");
687     word.create_relation("SylStructure");
688     word.create_relation("Syllable");
689     word.create_relation("WordStructure");
690 
691     // Note starts are hardwired here because feature function thing
692     // isn't fully operational and because deleting silence messes
693     // it up.
694 
695 /*    s = utt.relation("LabelSegment")->head();
696     if ((s->f("name") == "pau") || (s->f("name") == "sil"))
697     {
698 	w = utt.relation("SurfacePhone")->append();
699 
700 	w->set("name", "pau");
701 	w->set("end",  s->F("end"));
702 	w->set("start",  s->F("start"));
703 	w->set("df", phone_def.A("pau"));
704     }
705 */
706 
707     add_silences(utt,0);
708 
709     for (i = 0, w = utt.relation("Word")->head(); w != 0; w = w->next(), ++i)
710     {
711 	word.clear_relations();
712 	word.f.set("max_id", 0);
713 	cout << "word: " << *w << endl;
714 	lex_to_phones(w->f("name"), w->f("pos", ""),
715 		      *word.relation("Segment"));
716 	trans_to_phones(w, *utt.relation("LabelSegment"),
717 			*word.relation("tmpSegment"));
718 
719 	nw = word.relation("Word")->append();
720 	nw->set("name", w->S("name"));
721 
722 	syllabify_word(nw, *word.relation("Segment"),
723 		       *word.relation("SylStructure"),
724 		       *word.relation("Syllable"), 0);
725 
726 //	subword_list(nw, *word.relation("Syllable"),
727 //			      *word.relation("MetricalTree"));
728 
729 	if (siod_get_lval("mettree_debug", NULL) != NIL)
730 	    word.save("word_lex.utt", "est");
731 
732 	EST_Item xx;
733 	dp_match(*word.relation("Segment"), *word.relation("tmpSegment"),
734 		 *word.relation("Match"), local_cost, &xx);
735 
736 
737 //	fix_syllables(nw, word);
738 
739 	subword_metrical_tree(nw, *word.relation("Syllable"),
740 			      *word.relation("WordStructure"));
741 //	cout << "C2\n";
742 
743 	if (siod_get_lval("mettree_debug_word", NULL) != NIL)
744 	    word.save("word_dp.utt", "est");
745 
746 	if (siod_get_lval("mettree_debug_word", NULL) != NIL)
747 	    if (get_c_int(siod_get_lval("mettree_debug_word", NULL)) == i)
748 		word.save("word_nth.utt", "est");
749 
750 	word.remove_relation("SurfaceSylStructure");
751 	word.remove_relation("SurfaceMetrcialTree");
752 	word.remove_relation("SurfaceSyllable");
753 	//cout << "32\n";
754 	EST_String wid = w->S("id");
755 	utterance_merge(utt, word, w, word.relation("Word")->head());
756 
757 	w->set("id", wid);
758 
759 	add_silences(utt,w);
760     }
761     cout << "time2\n";
762 
763 //    utt.save("test.utt");
764 
765 
766 /*    add_initial_silence(*utt.relation("Segment"),
767 			*utt.relation("SurfacePhone"),
768 			*utt.relation("Match"));
769 			*/
770 
771     add_times(*utt.relation("Segment"), *utt.relation("LabelSegment"),
772 	      *utt.relation("Match"));
773 
774     //    utt.relation("Word")->f.set("timing_style", "segment");
775     //    cout << "here d\n";
776 
777 //    add_feature_function(*utt.relation("SurfacePhone"), "dur",
778 //		 usf_duration);
779 
780     add_metrical_functions(utt);
781 
782     // if silences aren't wanted we still have to build with them so that
783     // start times before pauses are done properly.
784     if (!siod_get_lval("unisyn_build_with_silences",NULL))
785 	for (s = next(utt.relation("Segment")->head());s;s = s->next())
786 	    if ((prev(s)->S("name") != "pau") && (prev(s)->S("name") != "sil"))
787 	    	s->set_function("start", "standard+unisyn_start");
788 	    else
789 	    	utt.relation("Segment")->remove_item(prev(s));
790 
791 
792 
793     utt.relation("Segment")->remove_item_feature("stress_num");
794     utt.relation("Word")->remove_item_feature("phon_ref");
795 
796     utt.remove_relation("tmpSegment");
797 
798     if (siod_get_lval("mettree_debug", NULL) != NIL)
799         utt.save("met_data.utt", "est");
800 
801     gc_unprotect(&lutt);
802      cout << "here c\n";
803 }
804 
805 
FT_add_trans_seg(LISP l_utt,LISP lf_seg)806 LISP FT_add_trans_seg(LISP l_utt, LISP lf_seg)
807 {
808     add_trans_seg(*get_c_utt(l_utt), get_c_string(lf_seg));
809     return l_utt;
810 }
811 
FT_add_trans_duration(LISP l_utt,LISP lf_seg)812 LISP FT_add_trans_duration(LISP l_utt, LISP lf_seg)
813 {
814     add_trans_duration(*get_c_utt(l_utt), get_c_string(lf_seg));
815     return l_utt;
816 }
817 
FT_syl_to_word_intonation(LISP l_utt)818 LISP FT_syl_to_word_intonation(LISP l_utt)
819 {
820     syl_to_word_intonation(*get_c_utt(l_utt));
821     return l_utt;
822 }
823 
FT_intonation_diagnostics(LISP l_ref,LISP l_test,LISP l_rel_name,LISP l_valid)824 LISP FT_intonation_diagnostics(LISP l_ref, LISP l_test, LISP l_rel_name, LISP l_valid)
825 {
826     EST_StrList valid;
827 
828     siod_list_to_strlist(l_valid, valid);
829     intonation_diagnostics(*get_c_utt(l_ref), *get_c_utt(l_test),
830 			   get_c_string(l_rel_name), valid);
831 
832     return NIL;
833 }
834 
835 
836 /*LISP FT_metrical_data(LISP lf_word, LISP lf_seg, LISP lf_int, LISP lf_met)
837 {
838     EST_Utterance *u = new EST_Utterance;
839     LISP l_utt = siod_make_utt(u);
840     EST_String word_file = get_c_string(lf_word);
841 
842     u->f.set("fileroot", basename(word_file, "*"));
843 
844     if (lf_met)
845     {
846 	if (siod_get_lval("us_xml_metrical_trees", NULL) != NIL)
847 	    data_metrical_tree(*u, get_c_string(lf_met), "xml");
848 	else
849 	    data_metrical_tree(*u, get_c_string(lf_met), "");
850     }
851     else
852 	syntax_metrical_tree(*u, get_c_string(lf_word));
853 
854     if (lf_int)
855 	data_metrical_lex(*get_c_utt(l_utt), get_c_string(lf_seg),
856 			  get_c_string(lf_int));
857     else
858 	data_metrical_lex(*get_c_utt(l_utt), get_c_string(lf_seg), "");
859 
860     return l_utt;
861 }
862 */
863 
864 
865 
866 /* Specific to s-expression weather.
867    Should be replaced when metrical trees go into XML.
868 
869 void data_metrical_tree(LISP l, EST_Item *met_parent, EST_Relation &word)
870 {
871     EST_String mv, name;
872     int id, phon_ref;
873     LISP a;
874     EST_Item *m;
875 
876     //cout << "full entry\n";
877 //    lprint(l);
878     //cout << "now parsing\n";
879 
880     mv = get_c_string(car(l));
881     //cout << "adding node strength: " << mv << endl;
882     // root nodes are added in calling routine.
883     if (mv != "r")
884 	m = met_parent->append_daughter();
885     else
886 	m = met_parent;
887     m->set("MetricalValue", mv);
888 
889     if (siod_atomic_list(cdr(l)))
890 	{
891 	    // cout << "atomic cdr is: ";
892 //	    lprint(cdr(l));
893 	    a = cdr(l);
894 	    name = get_c_string(car(a));
895 	    phon_ref = get_c_int(car(cdr(a)));
896 	    id = get_c_int(car(cdr(cdr(a))));
897 	    m->set("name", name);
898 	    m->set("phon_ref", phon_ref);
899 	    m->set("id", id);
900 	    word.append(m);
901 
902 	    //cout << "adding " << name << " on id: " << id << endl;
903 	    return;
904 	}
905     //cout << "\ndoing left branch\n";
906     data_metrical_tree(car(cdr(l)), m, word);
907     //cout << "\ndoing right branch\n";
908     data_metrical_tree(car(cdr(cdr(l))), m, word);
909 }
910 */
911 
lisp_tree_to_xml(ofstream & outf,LISP l)912 int lisp_tree_to_xml(ofstream &outf, LISP l)
913 {
914     int id;
915     LISP a;
916     EST_String mv;
917 
918     //cout << "full entry\n";
919 //    lprint(l);
920     //cout << "now parsing\n";
921 
922     mv = get_c_string(car(l));
923 
924     outf << "<elem metrical_value=\"" << mv << "\"";
925 
926     if (siod_atomic_list(cdr(l)))
927 	{
928 	    // cout << "atomic cdr is: ";
929 //	    lprint(cdr(l));
930 	    a = cdr(l);
931 	    id = get_c_int(car(cdr(cdr(a))));
932 	    outf << " href=\"&w;#id(" << id << ")\" />\n";
933 	    return 0;
934 	}
935     else
936 	outf << ">\n";
937 
938     if (lisp_tree_to_xml(outf, car(cdr(l))))
939 	outf << "</elem>\n";
940     if (lisp_tree_to_xml(outf, car(cdr(cdr(l)))))
941 	outf << "</elem>\n";
942 
943     return 1;
944 }
945 
FT_add_trans_metrical_tree(LISP l_utt,LISP lf_input,LISP lf_output)946 LISP FT_add_trans_metrical_tree(LISP l_utt, LISP lf_input, LISP lf_output)
947 {
948     EST_Utterance *utt;
949     LISP lmet, l;
950     EST_Item *m;
951 
952     utt = get_c_utt(l_utt);
953 
954     utt->create_relation("Word");
955     utt->create_relation("Token");
956     utt->create_relation("MetricalTree");
957 
958     lmet = vload(get_c_string(lf_input), 1);
959 
960 
961     ofstream outf;
962     outf.open(get_c_string(lf_output));
963 
964     outf << "<?xml version='1.0'?>\n";
965     outf << "<!DOCTYPE relation SYSTEM \"relation.dtd\" [\n";
966     outf << "<!ATTLIST elem metrical_value (s | r | w) #REQUIRED>]>\n";
967 
968     outf << "<relation name=\"MetricalTree\" structure-type=\"tree\">n";
969 
970 //	lprint(lmet);
971 
972 	// have to ensure that next id of build nodes is greater than
973 	// any in the file. This should be done properly sometime.
974     utt->set_highest_id(10000);
975 
976     for (l = lmet; l ; l = cdr(l))
977     {
978 	m = utt->relation("MetricalTree")->append();
979 //	    cout << "\nNew Tree\n";
980 	if (lisp_tree_to_xml(outf, car(l)))
981 	    outf << "</elem>" << endl;
982 	}
983 
984     outf << "</relation>\n";
985 
986     return l_utt;
987 }
988