1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                         Copyright (c) 1998                            */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author :  Alan W Black and Paul Taylor                    */
34 /*             Date   :  February 1998                                   */
35 /*-----------------------------------------------------------------------*/
36 /*                                                                       */
37 /*  An implementation of Metrical Tree Phonology                         */
38 /*                                                                       */
39 /*=======================================================================*/
40 
41 #include <cmath>
42 #include "festival.h"
43 #include "lexicon.h"
44 #include "../UniSyn/us_features.h"
45 
46 EST_Features phone_def;
47 
48 float local_cost(const EST_Item *s1, const EST_Item *s2);
49 
50 static void mettree_add_words(EST_Utterance &u);
51 
52 void construct_metrical_tree(EST_Utterance &word);
53 void add_end_silences(EST_Relation &segment);
54 void StrListtoString(EST_StrList &l, EST_String &s, EST_String sep=" ");
55 
56 void parse_wsj_syntax(void);
57 static void apply_nsr(EST_Utterance &u, const EST_String &tree);
58 #if 0
59 static void remove_punctuation(EST_Utterance &u);
60 static void add_intonation(EST_Utterance &u, const EST_String &base_int,
61 			   float threshold);
62 #endif
63 
64 void subword_list(EST_Item *w, EST_Relation &syllable,
65 		  EST_Relation &metricaltree);
66 
67 void add_non_terminal_features(EST_Relation &r,
68 			       EST_Features &f);
69 
70 void stress_factor1(EST_Utterance &u, const EST_String &base_stream,
71 		    const EST_String &m);
72 void stress_factor2(EST_Utterance &u, const EST_String &base_stream,
73 		    const EST_String &m);
74 
75 void phrase_factor(EST_Utterance &u);
76 
77 void main_stress(EST_Item *s);
78 
79 void end_to_dur(EST_Relation &r);
80 
81 void footing(EST_Item *n1);
82 
83 LISP FT_Classic_Phrasify_Utt(LISP args);
84 LISP FT_Classic_POS_Utt(LISP args);
85 LISP FT_PParse_Utt(LISP args);
86 LISP FT_MultiParse_Utt(LISP utt);
87 void MultiParse(EST_Utterance &u);
88 
89 void add_feature_string(EST_Relation &r, const EST_String &fname,
90 			const EST_String &f);
91 
92 void add_monotone_targets(EST_Utterance &u, float start_f0,
93 				    float end_f0);
94 
95 
96 void clear_feature(EST_Relation &r, const EST_String &name);
97 
98 typedef
99 float (*local_cost_function)(const EST_Item *item1,
100 			     const EST_Item *item2);
101 
102 
103 
104 void subword_phonology(EST_Utterance &word);
105 void lex_to_phones(EST_Utterance &u, const EST_String &relname);
106 
107 bool dp_match(const EST_Relation &lexical,
108 	      const EST_Relation &surface,
109 	      EST_Relation &match,
110 	      local_cost_function lcf,
111 	      EST_Item *null_syl);
112 
113 //bool dp_match(EST_Relation &a, EST_Relation &b, EST_Relation &c,
114 //	      local_cost_function lcf, const EST_String &null_sym);
115 void lex_to_phones(const EST_String &name, const EST_String &pos,
116 		   EST_Relation &phone);
117 
118 void subword_metrical_tree(EST_Relation &syllable,
119 			   EST_Relation &metricaltree);
120 
121 
122 int syllabify_word(EST_Item *nw, EST_Relation &phone,
123 		    EST_Relation &sylstructure, EST_Relation &syl, int flat);
124 
125 void subword_metrical_tree(EST_Item *w, EST_Relation &syllable,
126 			   EST_Relation &metricaltree);
127 
128 
129 /*void phonemic_trans(EST_Relation &trans)
130 {
131     EST_Item *s, *n;
132     EST_String a;
133 
134 //    cout << "trans: " << trans << endl;
135 
136     for (s = trans.head(); s; s = s->next())
137     {
138 	n = s->next();
139 //	cout << *s;
140 	if (s->S("name").contains("cl"))
141 	{
142 	    a = s->S("name").before("cl");
143 	    if ((next(s) != 0) && (next(s)->S("name") == a))
144 		    trans.remove_item(s);
145 	    else if ((next(s) != 0) && (a == "dcl" )
146 		     && (next(s)->S("name") == "jh"))
147 		    trans.remove_item(s);
148 	    else if ((next(s) != 0) && (a == "tcl" )
149 		     && (next(s)->S("name") == "ch"))
150 		    trans.remove_item(s);
151 	    else
152 		s->set("name", a);
153 //	    cout << "here1: " << a << "\n";
154 //	    s->set("name", s->S("name").before("cl"));
155 
156 	}
157     }
158 }
159 */
160 
161 
prev_match(EST_Item * n)162 EST_Item *prev_match(EST_Item *n)
163 {
164     EST_Item *p = n->prev();
165     if (p == 0)
166 	return 0;
167 
168     if (daughter1(p->as_relation("Match")) == 0)
169 	prev_match(p);
170 
171     return daughter1(p->as_relation("Match"));
172 }
173 
insert_schwa(EST_Item * n)174 void insert_schwa(EST_Item *n)
175 {
176     EST_Item *p, *s;
177     float pp_end = 0;
178     float schwa_length = 0.01;
179 
180     if ((p = prev_match(n)) == 0)
181     {
182 	cout << "Couldn't insert dummy schwa after " << *n << endl;
183 	return;
184     }
185 
186     p = p->as_relation("SurfacePhone");
187     pp_end = (prev(p) != 0) ? prev(p)->F("end",0.0) : 0.0;
188 
189     s = p->insert_after();
190 
191     s->set("name", "ax");
192     s->set("stress_num", "0");
193 
194     if ((p->F("end",0) - pp_end) < schwa_length)
195 	schwa_length = p->F("dur") / 2.0;
196 
197     s->set("end", p->F("end",0));
198     p->set("end", p->F("end",0) - schwa_length);
199     s->set("start", p->F("end",0));
200 
201     s->set("df", phone_def.A("ax"));
202 
203 //    cout << "end 1:" << s->f("end") << endl;
204 //    cout << "end 2:" << p->f("end") << endl;
205 }
206 
add_initial_silence(EST_Relation & lexical,EST_Relation & surface,EST_Relation & match)207 void add_initial_silence(EST_Relation &lexical, EST_Relation &surface,
208 	       EST_Relation &match)
209 {
210     EST_Item *s, *p, *n, *m;
211 
212     s = lexical.head();
213     if ((s->f("name") != "pau") && (s->f("name") != "sil"))
214     {
215 	p = s->insert_before();
216 	p->set("name", "pau");
217 	p->set("df", phone_def.A("pau"));
218 
219 	n = surface.head();
220 	if ((n->f("name") == "pau") || (n->f("name") == "sil"))
221 	{
222 	    m = match.head()->insert_before(p);
223 	    m->append_daughter(n);
224 	}
225 
226     }
227 }
228 
add_even_segment_times(EST_Item * w,EST_Relation & phone)229 void add_even_segment_times(EST_Item *w, EST_Relation &phone)
230 {
231     EST_Item *s;
232     int i;
233     float start,dur=0,n,div;
234 
235     start = w->F("start");
236     dur = w->F("end") - start;
237     n = (float)phone.length();
238     div = dur/n;
239 
240     for (i = 0, s = phone.head(); s; s = s->next(), ++i)
241     {
242 	s->set("start", start + div * (float) i);
243 	s->set("end", start + div * (float) (i + 1));
244     }
245 }
246 
247 #if 0
248 static void add_trans_phrase_phrase(EST_Utterance &utt)
249 {
250     EST_Item *s, *t, *a, *r;
251     EST_Item *first_accent = 0, *last_accent = 0;
252     bool exist;
253 
254     // This looks insanely comlicated, but all it really does is
255     // add phrase_start and phrase_end items to the root node of
256     // each metrical tree and then places these in the right position
257     // in the intonation relation.
258 
259     utt.create_relation("IntonationPhrase");
260 
261     for (s = utt.relation("MetricalTree", 1)->head(); s; s = s->next())
262 	{
263 	    for (r = first_leaf_in_tree(s);
264 		 r != next_leaf(last_leaf_in_tree(s)); r = next_leaf(r))
265 		if (r->in_relation("IntonationSyllable"))
266 		    {
267 			if (first_accent == 0)
268 			    first_accent =
269 				parent(r->as_relation("IntonationSyllable"))
270 				->as_relation("Intonation");
271 			last_accent =
272 			    parent(r->as_relation("IntonationSyllable"))
273 			    ->as_relation("Intonation");
274 		    }
275 
276 	    exist = false;
277 	    //	    cout << "\nroot node: " <<*s << endl;
278 
279 	    if (first_accent)
280 		{
281 		    cout << "first accent: " << *first_accent << endl;
282 		    a = first_accent->prev();
283 
284 		    if (a->S("name","") != "phrase_start")
285 			a = first_accent->insert_before();
286 		}
287 	    else
288 		{
289 		    if (a == 0)
290 			a = utt.relation("Intonation")->prepend();
291 		    else
292 			a = a->insert_after();
293 		}
294 
295 	    if (a->S("name","") != "phrase_start" ) // i.e. its a new one
296 		{
297 		    a->set("name", "phrase_start");
298 		    a->set("ev:f0", 100);
299 		}
300 	    // re-write position as relative to metrical tree
301 	    a->set("position", usf_int_start);
302 	    // add this as daughter to root node
303 	    t = utt.relation("IntonationPhrase")->append(s);
304 	    t->append_daughter(a);
305 	    exist = false;
306 	    //cout << "appended phrase end\n";
307 
308 	    if (last_accent)
309 		{
310 		    cout << "last accent: " << *last_accent << endl;
311 		    a = last_accent->next();
312 		    if (a->S("name","") != "phrase_end")
313 			a = last_accent->insert_after();
314 		}
315 	    else
316 		a = a->insert_after();
317 
318 	    if (a->S("name","") != "phrase_end")
319 		{
320 		    a->set("name", "phrase_end");
321 		    a->set("ev:f0", 100);
322 		}
323 	    // re-write position as relative to metrical tree
324 	    a->set("position", usf_int_end);
325 
326 	    // add this as daughter to root node
327 	    t->append_daughter(a);
328 	    //cout << "appended phrase start\n";
329 	    first_accent = 0; // trigger for first time operation of loop
330 	}
331 
332     // now join any other marked phrase_start/ends to intermediate
333     // nodes in metrical tree.
334 
335     /*    for (s = u.relation("Intonation", 1)->head(); s; s = s->next())
336 	{
337 	    if (!s->in_relation("IntonationPhrase") &&
338 		!s->in_relation("IntonationSyllable"))
339 		{
340 		    pos = s->F("position");
341 
342 
343 
344 	}
345     */
346 
347 }
348 #endif
349 
350 
add_single_phrase(EST_Utterance & utt,EST_Item * t)351 void add_single_phrase(EST_Utterance &utt, EST_Item *t)
352 {
353     EST_Item *s=0, *a, *p;
354     float pos, max, d = 0, start = 0.0;
355 
356     pos = t->F("time");
357     max = 100000.0;
358 
359     for (p = utt.relation("Syllable")->head(); p; p = p->next())
360     {
361 	if (t->S("name") == "phrase_end")
362 	    d = fabs(pos - p->F("end"));
363 	else
364 	    d = fabs(pos - start);
365 
366 	if (d < max)
367 	{
368 	    max = d;
369 	    s = p;
370 	}
371 	start = p->F("end");
372     }
373 
374 /*    if (s)
375 	 cout << "joining syllable " << *s << endl;
376     else
377 	 cout << "No legal syllable " << endl;
378      cout << "to " << *t << endl;
379 
380      cout << "d = " << d << endl;
381 */
382 
383     if (!s->in_relation("IntonationSyllable"))
384 	a = utt.relation("IntonationSyllable")->append(s);
385     else
386 	a = s->as_relation("IntonationSyllable");
387     a->append_daughter(t);
388     t->set("time_path", "IntonationSyllable");
389     t->set_function("position", "standard+unisyn_tilt_phrase_position");
390 }
391 
add_times(EST_Relation & lexical,EST_Relation & surface,EST_Relation & match)392 void add_times(EST_Relation &lexical, EST_Relation &surface,
393 	       EST_Relation &match)
394 {
395     (void) surface;
396     (void) match;
397     EST_Item *s, *t, *p;
398     float prev_end, inc, first_end, last_end;
399     int i;
400 
401     // first pass, copy times as appropriate, and find first
402     // and last defined ends
403     // This is hacky and certainly won't work for many cases
404 
405     first_end = -1.0;
406     prev_end = 0.0;
407     last_end = 0.0;
408 
409 //    cout << "surface: " << surface << endl;
410 
411     for (s = lexical.head(); s; s = s->next())
412     {
413 	if ((t = daughter1(s->as_relation("Match"))) != 0)
414 	{
415 	    s->set("end", t->F("end"));
416 	    s->set("start", t->F("start"));
417 
418 	    last_end = t->F("end");
419 	    if (first_end < 0.0)
420 		first_end = t->F("end");
421 	}
422     }
423 
424      if (!lexical.head()->f_present("end"))
425      {
426 	 lexical.head()->set("end", first_end / 2.0);
427 	 lexical.head()->set("start", 0.0);
428      }
429 
430      if (!lexical.tail()->f_present("end"))
431      {
432 	 lexical.tail()->set("end", last_end + 0.01);
433 	 lexical.tail()->set("start", last_end);
434      }
435 
436     for (s = lexical.head(); s; s = s->next())
437     {
438 	if (!s->f_present("end"))
439 	{
440 //	    cout << "missing end feature for " << *s << endl;
441 	    for (i = 1, p = s; p; p = p->next(), ++i)
442 		if (p->f_present("end"))
443 		    break;
444 	    inc = (p->F("end") - prev_end) / ((float) i);
445 //	    cout << "inc is : " << inc << endl;
446 
447 //	    cout << "stop phone is " << *p << endl;
448 
449 	    for (i = 1; s !=p ; s = s->next(), ++i)
450 	    {
451 		s->set("end", (prev_end + ((float) i * inc)));
452 		s->set("start", (prev_end + ((float) (i - 1 )* inc)));
453 	    }
454 	}
455 	prev_end = s->F("end");
456     }
457 }
458 
459 
met_error(EST_Item * s)460 static void met_error(EST_Item *s)
461 {
462     cerr << "Illegally named daughters of metrical node\n"
463 	 << "daughter1 : " << *daughter1(s) << endl
464 	 << "daughter2 : " << *daughter2(s) << endl;
465     EST_error("");
466 }
467 
legal_metrical_tree(EST_Item * s)468 void legal_metrical_tree(EST_Item *s)
469 {
470     if (s == 0)
471 	return;
472 
473     if ((daughter1(s) == 0) || (daughter2(s) == 0))
474 	return;
475 
476     if ((daughter1(s)->S("MetricalValue") == "s")
477 	&& (daughter2(s)->S("MetricalValue") != "w"))
478 	met_error(s);
479     else if ((daughter1(s)->S("MetricalValue") == "w")
480 	&& (daughter2(s)->S("MetricalValue") != "s"))
481 	met_error(s);
482     else if ((daughter1(s)->S("MetricalValue") != "w")
483 	     && (daughter1(s)->S("MetricalValue") != "s"))
484 	met_error(s);
485 
486     legal_metrical_tree(daughter1(s));
487     legal_metrical_tree(daughter2(s));
488 }
489 
parse_words(EST_Utterance & utt)490 void parse_words(EST_Utterance &utt)
491 {
492     utt.create_relation("Token");
493 
494     FT_Classic_POS_Utt(siod(&utt));
495     FT_Classic_Phrasify_Utt(siod(&utt));
496     MultiParse(utt);
497 
498     utt.relation("Syntax")->remove_item_feature("pos_index");
499     utt.relation("Syntax")->remove_item_feature("pos_index_score");
500     utt.relation("Syntax")->remove_item_feature("phr_pos");
501     utt.relation("Syntax")->remove_item_feature("pbreak_index");
502     utt.relation("Syntax")->remove_item_feature("pbreak_index_score");
503     utt.relation("Syntax")->remove_item_feature("pbreak");
504     utt.relation("Syntax")->remove_item_feature("blevel");
505     utt.relation("Syntax")->remove_item_feature("prob");
506 }
507 
binaryize_tree(EST_Item * t)508 void binaryize_tree(EST_Item *t)
509 {
510     // terminating condition
511     if (daughter1(t) == 0)
512 	return;
513 
514     // nodes with single children should be merged
515     if (daughter2(t) == 0)
516     {
517 //	cout << "Single daughter: " << *t << endl;
518 	EST_Item *d = daughter1(t);
519 	move_sub_tree(d, t);
520     }
521 
522     for (EST_Item *p = daughter1(t); p; p = p->next())
523 	binaryize_tree(p);
524 }
525 
binaryize_tree(EST_Utterance & utt,const EST_String & base_tree,const EST_String & new_tree)526 void binaryize_tree(EST_Utterance &utt, const EST_String &base_tree,
527 		    const EST_String &new_tree)
528 {
529     utt.create_relation(new_tree);
530     copy_relation(*utt.relation(base_tree), *utt.relation(new_tree));
531 
532     for (EST_Item *p = utt.relation(new_tree)->head(); p; p = p->next())
533 	binaryize_tree(p);
534 }
535 
syntax_to_metrical_words(EST_Utterance & utt)536 void syntax_to_metrical_words(EST_Utterance &utt)
537 {
538     utt.create_relation("MetricalWord");
539     // copy syntax tree while merging single daughter nodes
540     binaryize_tree(utt, "Syntax", "MetricalWord");
541     // add strong and weak values
542     apply_nsr(utt, "MetricalWord");
543 }
544 
add_metrical_functions(EST_Utterance & utt)545 void add_metrical_functions(EST_Utterance &utt)
546 {
547     // Note that we don't add "start" functions here as this depends on
548     // pause behaviour
549     add_feature_function(*utt.relation("Syllable"),
550 			 "vowel_start",
551 			 "unisyn_vowel_start");
552 
553     add_feature_function(*utt.relation("Syllable"),
554 			 "end", "standard+unisyn_leaf_end");
555     add_feature_function(*utt.relation("Syllable"),
556 			 "start", "standard+unisyn_leaf_start");
557 
558     for (EST_Item *s = utt.relation("Syllable")->head(); s; s = s->next())
559 	s->set("time_path", "SylStructure");
560 
561     EST_Features tf;
562     tf.set_function("end", "standard+unisyn_leaf_end");
563     tf.set_function("start","standard+unisyn_leaf_start");
564     tf.set_function("dur","standard+unisyn_duration");
565 
566     tf.set("time_path", "MetricalTree");
567     tf.set("time_path", "MetricalTree");
568 
569 //    add_non_terminal_features(*utt.relation("MetricalTree"), tf);
570 
571     tf.set("time_path", "SylStructure");
572     add_non_terminal_features(*utt.relation("SylStructure"), tf);
573 
574     add_feature_function(*utt.relation("Segment"),
575 			 "dur",
576 			 "standard+duration");
577 
578 
579 }
580 
auto_metrical_lex(EST_Utterance & utt)581 void auto_metrical_lex(EST_Utterance &utt)
582 {
583     LISP l_pdef;
584 
585     utt.create_relation("Syllable");
586     utt.create_relation("Segment");
587 
588     l_pdef = siod_get_lval("darpa_fs", NULL);
589     lisp_to_features(l_pdef, phone_def);
590 
591     mettree_add_words(utt);
592 
593     LISP lt = siod_get_lval("us_base_int", NULL);
594     EST_String base_int;
595     if (lt == NIL)
596 	base_int = "Syllable";
597     else
598     {
599 	const char *x = get_c_string(lt);
600 	base_int = x;
601     }
602 
603     // add_end_silences(*utt.relation("Segment"));
604 
605     add_metrical_functions(utt);
606 }
607 
extend_tree(EST_Item * m,EST_Item * p,const EST_String & terminal,const EST_String & second_tree)608 void extend_tree(EST_Item *m, EST_Item *p, const EST_String &terminal,
609 		 const EST_String &second_tree)
610 {
611     EST_Item *d, *e;
612 
613     if (!daughter1(m))
614     {
615 	if (m->in_relation(terminal)) // ie. really hit the bottom
616 	    return;
617 	m = m->as_relation(second_tree); // swap to a new tree
618     }
619 
620     for (d = daughter1(m); d; d = d->next())
621     {
622 	e = p->append_daughter(d);
623         extend_tree(d, e, terminal, second_tree);
624     }
625 }
626 
627 
nsr(EST_Item * n)628 static void nsr(EST_Item *n)
629 {
630     EST_Item *left, *right;
631     left = daughter1(n);
632     right = daughter2(n);
633     if (left == 0)
634 	return;
635     else
636     {
637 	nsr(left);
638 	left->set("MetricalValue","w");
639     }
640 
641     if (right == 0)
642 	return;
643     else
644     {
645 	nsr(right);
646 	right->set("MetricalValue","s");
647     }
648 }
649 
apply_nsr(EST_Utterance & u,const EST_String & tree)650 static void apply_nsr(EST_Utterance &u, const EST_String &tree)
651 {
652     EST_Item *n;
653 
654     for (n = u.relation(tree)->head(); n; n = n->next())
655 	nsr(n);
656 }
657 
other_daughter(EST_Item * parent,EST_Item * daughter)658 EST_Item *other_daughter(EST_Item *parent, EST_Item *daughter)
659 {
660     return (daughter1(parent) == daughter) ? daughter2(parent) :
661     daughter1(parent);
662 }
663 
stress_factor1(EST_Item * s,int max_depth)664 static void stress_factor1(EST_Item *s, int max_depth)
665 {
666     EST_Item *a;
667     EST_String val, pad;
668     char *str;
669     long n, i;
670     float max;
671 
672     val = "";
673 
674     for (a = s; parent(a); a = parent(a))
675 	if (a->f("MetricalValue") == "s")
676 	    val += "2";
677 	else
678 	    val += "0";
679 
680     //    cout << "\nSyllable " << s << " has value " << val << endl;
681 
682     if (val.length() < max_depth)
683 	for (pad = "", i = 0; i < (max_depth - val.length()); ++i)
684 	    pad += "2";
685 
686     val += pad;
687     //    cout << "Syllable " << s << " has padded value " << val << endl;
688 
689     str = strdup(val);
690     max = pow(3.0, (float)max_depth) - 1.0;
691     n = strtol(str, (char **)NULL, 3);
692     //    cout << "decimal value: " << n;
693     //    cout << " normalised: " << (float)n/max << endl;
694     s->set("StressFactor1", ((float)n/max));
695 }
696 
find_apex(EST_Item * n,int & num_nodes)697 EST_Item * find_apex(EST_Item *n, int &num_nodes)
698 {
699     EST_Item *p;
700     p = parent(n);
701     if (p == 0)
702 	return n;
703     if (daughter2(p) == n)
704 	return find_apex(p, ++num_nodes);
705 
706     return p;
707 }
708 
find_leaf(EST_Item * n,int & num_nodes)709 void find_leaf(EST_Item *n, int &num_nodes)
710 {
711     if (n == 0)
712 	return;
713     find_leaf(daughter1(n), ++num_nodes);
714 }
715 
phrase_factor(EST_Item & syl,const EST_String & met_name)716 static void phrase_factor(EST_Item &syl, const EST_String &met_name)
717 {
718     EST_Item *p;
719     EST_String val, pad;
720     int num_nodes = 1;
721 
722     //    cout << "Terminal Syl = " << syl << " f:" << syl.f << endl;
723 
724     p = find_apex(syl.as_relation(met_name), num_nodes);
725     //    cout << "up nodes: " << num_nodes;
726     //    cout << "Apex = " << *p << endl;
727     find_leaf(daughter2(p), num_nodes);
728     //    cout << " downp nodes: " << num_nodes << endl;
729 
730     syl.set("PhraseIndex", num_nodes);
731 }
732 
max_tree_depth(EST_Utterance & u,const EST_String & base_stream,const EST_String & mettree)733 static int max_tree_depth(EST_Utterance &u, const EST_String &base_stream,
734 			  const EST_String &mettree)
735 {
736     EST_Item *s, *a;
737     int depth;
738     int max_depth = 0;
739 
740     for (s = u.relation(base_stream)->head(); s; s = s->next())
741     {
742 	depth = 0;
743 	for (a = s->as_relation(mettree); parent(a); a = parent(a))
744 	    ++depth;
745 	if (depth > max_depth)
746 	    max_depth = depth;
747     }
748     return max_depth;
749 }
750 
stress_factor1(EST_Utterance & u,const EST_String & base_stream,const EST_String & mettree)751 void stress_factor1(EST_Utterance &u, const EST_String &base_stream,
752 		    const EST_String &mettree)
753 {
754     EST_Item *s;
755     int max_depth = max_tree_depth(u, base_stream, mettree);
756 
757     for (s = u.relation(base_stream)->head(); s; s = s->next())
758 	stress_factor1(s->as_relation(mettree), max_depth);
759 }
760 
761 
strong_daughter(EST_Item * n)762 EST_Item *strong_daughter(EST_Item *n)
763 {
764     if (daughter1(n) == 0)
765 	return 0;
766     return (daughter1(n)->f("MetricalValue") == "s")
767 	? daughter1(n) : daughter2(n);
768 }
769 
weak_daughter(EST_Item * n)770 EST_Item *weak_daughter(EST_Item *n)
771 {
772     if (daughter1(n) == 0)
773 	return 0;
774     return (daughter1(n)->f("MetricalValue") == "w")
775 	? daughter1(n) : daughter2(n);
776 }
777 
fill_mini_tree(EST_Item * s,int val)778 static void fill_mini_tree(EST_Item *s, int val)
779 {
780     if (s->f("MetricalValue") == "s")
781 	s->set("StressVal", val);
782     else
783 	s->set("StressVal", 0);
784     if (strong_daughter(s))
785 	fill_mini_tree(strong_daughter(s), val);
786 
787     if (weak_daughter(s))
788 	fill_mini_tree(weak_daughter(s), val - 1);
789 }
790 
stress_factor2(EST_Utterance & u,const EST_String & base_stream,const EST_String & mettree)791 void stress_factor2(EST_Utterance &u, const EST_String &base_stream,
792 		    const EST_String &mettree)
793 {
794     EST_Item *s;
795     int sv = -1;
796     float b;
797     (void) base_stream;
798 
799     s = u.relation(mettree)->head();
800     fill_mini_tree(s, sv);
801 
802     // normalise values
803     sv = 0;
804     for (s = u.relation(base_stream)->head(); s; s = s->next())
805 	sv = Lof(s->I("StressVal"), sv);
806 
807     cout << "Max Stress: " << sv << endl;
808 
809     for (s = u.relation(base_stream)->head(); s; s = s->next())
810     {
811 	b = (float)(s->I("StressVal") - sv + 1);
812 	if (s->f("MetricalValue") == "s")
813 	    s->set("StressFactor2", (b / float(sv)) * -1.0);
814 	else
815 	    s->set("StressFactor2", 0);
816     }
817 }
818 
phrase_factor(EST_Utterance & u,const EST_String & base_stream,const EST_String & mettree)819 void phrase_factor(EST_Utterance &u, const EST_String &base_stream,
820 		   const EST_String &mettree)
821 {
822     EST_Item *s;
823     float max_pf = 0;
824 
825     for (s = u.relation(base_stream)->head(); s; s = s->next())
826 	phrase_factor(*s, mettree);
827 
828     for (s = u.relation(base_stream)->head(); s; s = s->next())
829 	if (s->I("PhraseIndex") > max_pf)
830 	    max_pf = s->I("PhraseIndex");
831 
832     for (s = u.relation(base_stream)->head(); s; s = s->next())
833     {
834 	s->set("PhraseFactor",
835 		(float)s->I("PhraseIndex")/max_pf);
836 	//	cout << *s << " pf = " <<
837 	//	    s->F("PhraseFactor") << endl;
838     }
839 
840 }
841 
842 #if 0
843 static void remove_punctuation(EST_Utterance &u)
844 {
845     // The syntactic grammar has unary rules for the preterminals
846     // these would make the mtettrical tree have an extra layer
847     // at the word level.  So here we remove that extra layer
848     EST_Item *w;
849     EST_Item *a, *b, *c, *od;
850 
851     for (w = u.relation("Word")->head(); w != 0; w = w->next())
852     {
853 	if (w->f("pos") == "punc")
854 	{
855 	    a = w->as_relation("Syntax");
856 	    b = parent(a);
857 	    c = parent(b);
858 	    od = other_daughter(c, b);
859 	    remove_item(b, "Syntax");
860 	    move_sub_tree(od, c);
861 	    remove_item(w, "Word");
862 	}
863     }
864 }
865 
866 static void add_intonation(EST_Utterance &u, const EST_String &base_stream,
867 			   float threshold)
868 {
869     EST_Item *e, *s;
870 
871     cout << "Threshold = " << threshold << endl;
872 
873     for (s = u.relation(base_stream)->head(); s; s = s->next())
874     {
875 	if (s->F("StressFactor") > threshold)
876 	{
877 	    //	    cout << *s <<" **stress factor:" << s->F("StressFactor") << endl;
878 	    e = u.relation("IntSyl")->append();
879 	    e->insert_below(s);
880 	    e->set_name("Accent");
881 	    e->set("prominence", s->F("StressFactor"));
882 	    u.relation("Intonation")->append(e);
883 	}
884     }
885 }
886 
887 #endif
888 
add_monotone_targets(EST_Utterance & u,float start_f0,float end_f0)889 void add_monotone_targets(EST_Utterance &u, float start_f0,
890 				 float end_f0)
891 {
892     EST_Item *t;
893     float end;
894 
895     end = u.relation("Segment")->tail()->f("end");
896 
897     cout << "Phone ends\n";
898     cout << *u.relation("Segment");
899 
900     cout << "last position is :" << end << endl;
901 
902     u.create_relation("Target");
903 
904     t = u.relation("Target")->append();
905     t->set("f0", start_f0);
906     t->set("pos", 0.0);
907 
908     // temporary - should disappear when awb changes code
909     //    t->set("name", ftoString(start_f0));
910     //    t->set("end", 0.0);
911 
912     t = u.relation("Target")->append();
913     t->set("f0", end_f0);
914     t->set("pos", end);
915 
916     // temporary - should disappear when awb changes code
917     //    t->set("name", ftoString(end_f0));
918     //    t->set("end", end);
919 }
920 
mettree_add_words(EST_Utterance & u)921 static void mettree_add_words(EST_Utterance &u)
922 {
923     EST_Utterance word;
924     EST_Item *w;
925 
926     word.create_relation("Word");
927     word.create_relation("Segment");
928     word.create_relation("SylStructure");
929     word.create_relation("Syllable");
930     word.create_relation("WordStructure");
931 
932     for (w = u.relation("Word")->head(); w != 0; w = w->next())
933     {
934 	word.clear_relations();
935 
936 	cout << "N:";
937 	cout << w->f("name") << " " << w->f("pos", "") << endl;
938 	lex_to_phones(w->f("name"), w->f("pos", "0"),
939 		      *word.relation("Segment"));
940 
941 	EST_Item *nw = word.relation("Word")->append();
942 	nw->set("name", w->S("name"));
943 
944 	syllabify_word(nw, *word.relation("Segment"),
945 		       *word.relation("SylStructure"),
946 		       *word.relation("Syllable"), 0);
947 
948 	subword_metrical_tree(nw, *word.relation("Syllable"),
949 			      *word.relation("WordStructure"));
950 
951 	utterance_merge(u, word, w, word.relation("Word")->head());
952     }
953 }
954 
955 void add_metrical_nodes(EST_Utterance &u, EST_Item *n, LISP lpos);
956 
strip_vowel_num(EST_String p)957 EST_String strip_vowel_num(EST_String p)
958 {
959     if (p.contains(RXint))
960 	p = p.before(RXint);
961     return p;
962 }
963 
964 
percolate(EST_Item * start)965 static void percolate(EST_Item *start)
966 {
967     EST_Item *n;
968 
969     for (n = start; n; n = parent(n))
970     {
971 	//	cout << "altering sister\n";
972 	if (prev(n) != 0)
973 	    prev(n)->set("MetricalValue", "w");
974 	else if (next(n) != 0)
975 	    next(n)->set("MetricalValue", "w");
976     }
977 }
978 
979 
main_stress(EST_Item * s)980 void main_stress(EST_Item *s)
981 {
982     EST_Item *n;
983 
984     for (n = s; parent(n); n = parent(n))
985   	n->set("MetricalValue", "s");
986 
987     n = s;
988     percolate(n);
989 }
990 
footing(EST_Item * n1)991 void footing(EST_Item *n1)
992 {
993     EST_Item *n2, *n3, *n4, *p1, *p3, *r;
994 
995     r = parent(n1);		// root node
996     p1 = daughter2(r);
997     n2 = daughter1(p1);
998     n3 = daughter2(p1);
999 
1000     if (p1 == 0)
1001     {
1002 	cerr << "Error: Empty 3rd node after " << *n1 << " in footing\n";
1003 	return;
1004     }
1005     if (n2 == 0)
1006     {
1007 	cerr << "Error: Empty 3rd node after " << *n1 << " in footing\n";
1008 	return;
1009     }
1010     if (n3 == 0)
1011     {
1012 	cerr << "Error: Empty 3rd node after " << *n1 << " in footing\n";
1013 	return;
1014     }
1015 
1016     cout << "n1: " << *n1 << endl << endl;
1017     cout << "n2: " << *n2 << endl << endl;
1018     cout << "n3: " << *n3 << endl << endl;
1019     cout << "p1: " << *p1 << endl << endl;
1020 
1021     p3 = n1->insert_parent();
1022     n1 = daughter1(p3);
1023     n4 = p3->append_daughter();
1024 
1025     move_sub_tree(n2, n4);
1026     move_sub_tree(n3, p1);
1027 
1028     p3->set("MetricalValue", "w");
1029     p3->set("Altered_a", "DONE_W");
1030 
1031     n1->set("MetricalValue", "s");
1032     n1->set("Altered_b", "DONE_S");
1033 }
1034 
1035 
1036 #if 0
1037 LISP FT_metrical_data(LISP lf_word, LISP lf_seg, LISP lf_int)
1038    {
1039 
1040    EST_Utterance word, *u = new EST_Utterance;
1041    EST_Relation phone;
1042    EST_Item *s, *p, *w, *nw, *n;
1043    EST_StrList plist;
1044    float phone_start, mid;
1045    LISP lutt;
1046    EST_Track fz;
1047    int i;
1048 
1049    u->create_relation("Word");
1050    u->create_relation("Segment");
1051    u->create_relation("Syllable");
1052    u->create_relation("MetricalTree");
1053    u->create_relation("LexicalMetricalTree");
1054    u->create_relation("SurfacePhone");
1055    u->create_relation("Surface");
1056    u->create_relation("Intonation");
1057    u->create_relation("IntonationSyllable");
1058 
1059    EST_String segfile = get_c_string(lf_seg);
1060    EST_String wordfile = get_c_string(lf_word);
1061 
1062    if (u->relation("Word")->load(wordfile) != format_ok)
1063    {
1064    cerr << "Couldn't load file " << get_c_string(lf_word) << endl;
1065    festival_error();
1066    }
1067 
1068    if ((segfile != "dummy") &&(u->relation("Segment")->
1069    load(get_c_string(lf_seg)) != format_ok))
1070    {
1071    cerr << "Couldn't load file " << get_c_string(lf_seg) << endl;
1072    festival_error();
1073    }
1074 
1075    if (lf_int != NIL)
1076    if (u->relation("Intonation")->load(get_c_string(lf_int)) != format_ok)
1077    {
1078    cerr << "Couldn't load file " << get_c_string(lf_int) << endl;
1079    festival_error();
1080    }
1081 
1082    u->f.set("fileroot", basename(wordfile, "*"));
1083 
1084    //    cout << "Words: " << *u->relation("Word");
1085 
1086    if (segfile != "dummy")
1087    phonemic_trans(*u->relation("Segment"));
1088    //    u->relation("Intonation")->load(get_c_string(lf_int));
1089 
1090    // tmp hack
1091    float prev_end = 0.0;
1092 
1093    for (w = u->relation("Word")->head(); w != 0; w = n)
1094    {
1095    n = w->next();
1096    //	w->set("start", prev_end);
1097    w->f_remove("end");
1098    //	prev_end = w->F("end");
1099    if ((w->f("name") == "sil") || (w->f("name") == "pau"))
1100    u->relation("Word")->remove_item(w);
1101    }
1102 
1103    gc_protect(&lutt);
1104    lutt = siod_make_utt(u);
1105 
1106    cout << *u->relation("Word") << endl;
1107 
1108    FT_POS_Utt(lutt);
1109    FT_Phrasify_Utt(lutt);
1110    MultiParse(*u);
1111 
1112    //    remove_punctuation(*u);
1113 
1114    // Copy Syntax tree into a new Metrical Tree
1115    copy_relation(*u->relations.val("Syntax"),
1116    *u->relations.val("MetricalTree"));
1117    // flatten preterminal unary rules
1118    flatten_preterminals(*u);
1119 
1120    apply_nsr(*u);
1121 
1122    copy_relation(*u->relations.val("MetricalTree"),
1123    *u->relations.val("LexicalMetricalTree"));
1124 
1125    word.create_relation("Word");
1126    word.create_relation("Match");
1127    word.create_relation("NewMatch");
1128    word.create_relation("Segment");
1129    word.create_relation("SurfacePhone");
1130    word.create_relation("LexicalSylStructure");
1131    word.create_relation("SurfaceSylStructure");
1132    word.create_relation("LexicalSyllable");
1133    word.create_relation("SurfaceSyllable");
1134 
1135    word.create_relation("LexicalMetricalTree");
1136    word.create_relation("SurfaceMetricalTree");
1137 
1138    phone_start = 0.0;
1139 
1140    // Note starts are hardwired here because feature function thing
1141    // isn't fully operational and because deleting silence messes
1142    // it up.
1143 
1144    //    u->save("zz_parse.utt", "est");
1145 
1146    if (segfile != "dummy")
1147    {
1148    for (s = u->relation("Segment")->head(); s; s = s->next())
1149    {
1150    s->set("start", phone_start);
1151    phone_start = s->F("end");
1152    }
1153    phone_start = 0.0;
1154 
1155    s = u->relation("Segment")->head();
1156    if ((s->f("name") == "pau") || (s->f("name") == "sil"))
1157    {
1158    w = u->relation("SurfacePhone")->append();
1159    w->set("name", "pau");
1160    w->set("end",  s->F("end"));
1161    w->set("start",  s->F("start"));
1162    }
1163    }
1164 
1165    //    cout <<"Surface 1:" << *u->relation("SurfacePhone") << endl;
1166 
1167    for (i = 0, w = u->relation("Word")->head(); w != 0; w = w->next(), ++i)
1168    {
1169    word.clear_relations();
1170 
1171    lex_to_phones(w->f("name"), w->f("pos"),
1172    *word.relation("Segment"));
1173 
1174    if (segfile == "dummy")
1175    *word.relation("SurfacePhone") = *word.relation("Segment");
1176    else
1177    trans_to_phones(w, *u->relation("Segment"),
1178    *word.relation("SurfacePhone"));
1179 
1180    //	cout << "lex phones: " << *word.relation("LexicalPhone") << endl;
1181    //	cout << "sur phones: " << *word.relation("SurfacePhone") << endl;
1182 
1183    if (siod_get_lval("mettree_phones_debug", NULL) != NIL)
1184    {
1185    cout << "phones for word" << *w << endl;
1186    cout << *word.relation("SurfacePhone") << endl;
1187    }
1188 
1189    nw = word.relation("Word")->append();
1190    nw->set("name", w->S("name"));
1191 
1192    syllabify_word(nw, *word.relation("LexicalPhone"),
1193    *word.relation("LexicalSylStructure"),
1194    *word.relation("LexicalSyllable"));
1195 
1196    subword_metrical_tree(nw, *word.relation("LexicalSyllable"),
1197    *word.relation("LexicalMetricalTree"));
1198 
1199    if (siod_get_lval("mettree_debug", NULL) != NIL)
1200    word.save("word_lex.utt", "est");
1201 
1202    //	copy_relation(*word.relation("LexicalMetricalTree"),
1203    //		      *word.relation("HackMT"));
1204 
1205    EST_Item xx;
1206    dp_match(*word.relation("LexicalPhone"),
1207    *word.relation("SurfacePhone"),
1208    *word.relation("Match"), local_cost, &xx);
1209 
1210    if (syllabify_word(nw, *word.relation("SurfacePhone"),
1211    *word.relation("SurfaceSylStructure"),
1212    *word.relation("SurfaceSyllable")) < 1)
1213    {
1214    cerr << "Pronuciation for \"" << w->S("name")
1215    << "\" doesn't contain a vowel: " <<
1216    *word.relation("SurfacePhone") << endl;
1217    //	    festival_error();
1218    }
1219 
1220    fix_syllables(nw, word);
1221 
1222    subword_metrical_tree(nw, *word.relation("SurfaceSyllable"),
1223    *word.relation("SurfaceMetricalTree"));
1224 
1225 
1226    if (siod_get_lval("mettree_debug_word", NULL) != NIL)
1227    word.save("word_dp.utt", "est");
1228 
1229    if (siod_get_lval("mettree_debug_word", NULL) != NIL)
1230    if (get_c_int(siod_get_lval("mettree_debug_word", NULL)) == i)
1231    word.save("word_nth.utt", "est");
1232 
1233 
1234    utterance_merge(*u, word, w, "LexicalMetricalTree");
1235    }
1236 
1237    //    u->save("zz_parse2.utt", "est");
1238 
1239    //    u->save("test.utt");
1240 
1241    //    cout <<"Surface 2:" << *u->relation("SurfacePhone") << endl;
1242 
1243    add_initial_silence(*u->relation("LexicalPhone"),
1244    *u->relation("SurfacePhone"),
1245    *u->relation("Match"));
1246 
1247    //    cout <<"Surface 3:" << *u->relation("SurfacePhone") << endl;
1248 
1249    add_times(*u->relation("LexicalPhone"), *u->relation("SurfacePhone"),
1250    *u->relation("Match"));
1251 
1252    u->relation("LexicalPhone")->f.set("timing_style", "segment");
1253    u->relation("SurfacePhone")->f.set("timing_style", "segment");
1254    //    u->relation("Word")->f.set("timing_style", "segment");
1255 
1256    u->relation("LexicalSyllable")->f.set("timing_style", "segment");
1257    u->relation("LexicalSyllable")->f.set("time_path",
1258    "LexicalSylStructure");
1259 
1260    //    u->relation("LexicalSylStructure")->f.set("timing_style", "segment");
1261    //    u->relation("LexicalSylStructure")->f.set("time_relation",
1262    //					 "LexicalPhone");
1263 
1264 
1265    u->relation("LexicalMetricalTree")->f.set("timing_style", "segment");
1266 
1267 
1268    //    if (lf_int != NIL)
1269    //	add_feature_function(*u->relation("LexicalSyllable"),"vowel_start",
1270    //			     vowel_start_time);
1271 
1272    //    add_feature_function(*u->relation("LexicalPhone"), "start",
1273    //			 ff_start_time);
1274    //    add_feature_function(*u->relation("SurfacePhone"), "start",
1275    //			 ff_start_time);
1276    //    add_feature_function(*u->relation("SurfacePhone"), "dur",
1277    //			 duration_time);
1278 
1279    //    add_feature_function(*u->relation("LexicalSyllable"),"end", leaf_end_time);
1280 
1281    EST_Features tf;
1282    tf.set("time_path", "LexicalMetricalTree");
1283    tf.set("end", leaf_end_time);
1284 
1285    //    add_feature_string(*u->relation("LexicalMetricalTree"), "time_path",
1286    //		       "LexicalMetricalTree");
1287    //    add_feature_string(*u->relation("LexicalSylStructure"), "time_path",
1288    //		       "LexicalSylStructure");
1289    //
1290 
1291    add_non_terminal_features(*u->relation("LexicalMetricalTree"),
1292    tf);
1293 
1294    tf.set("time_path", "LexicalSylStructure");
1295 
1296    add_non_terminal_features(*u->relation("LexicalSylStructure"),
1297    tf);
1298 
1299 
1300    //    add_feature_function(*u->relation("LexicalSyllable"),"start",
1301    //			 ff_start_time);
1302    //    add_feature_function(*u->relation("LexicalSyllable"),"dur", duration_time);
1303 
1304 
1305    //    cout << "ADDED Features to phone\n\n";
1306    //    cout << *(u->relation("LexicalPhone")) << endl << endl;
1307 
1308    //    cout << "ADDED Features\n\n";
1309    //    cout << *(u->relation("LexicalSyllable"));
1310 
1311    //    cout << "\nfinished\n\n";
1312 
1313    //    if (lf_int != NIL)
1314    //	add_trans_intonation(*u);
1315 
1316    //    cout <<"Lexical 3:" << *u->relation("LexicalPhone") << endl;
1317 
1318    //    end_to_dur(*u->relation("SurfacePhone"));
1319    //    end_to_dur(*u->relation("LexicalPhone"));
1320 
1321 
1322 
1323 
1324    //    cout <<"Lexical 3:" << *u->relation("LexicalPhone") << endl;
1325 
1326    //    clear_feature(*u->relation("SurfacePhone"), "end");
1327    //    clear_feature(*u->relation("LexicalPhone"), "end");
1328 
1329    if (siod_get_lval("mettree_debug", NULL) != NIL)
1330    u->save("met_data.utt", "est");
1331 
1332    gc_unprotect(&lutt);
1333 
1334    //    u->save("zz_parse3.utt", "est");
1335 
1336    return lutt;
1337    }
1338 #endif
1339