1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1998 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black and Paul Taylor */
34 /* Date : February 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* An implementation of Metrical Tree Phonology */
38 /* */
39 /*=======================================================================*/
40
41 #include <cmath>
42 #include "festival.h"
43 #include "lexicon.h"
44 #include "../UniSyn/us_features.h"
45
46 EST_Features phone_def;
47
48 float local_cost(const EST_Item *s1, const EST_Item *s2);
49
50 static void mettree_add_words(EST_Utterance &u);
51
52 void construct_metrical_tree(EST_Utterance &word);
53 void add_end_silences(EST_Relation &segment);
54 void StrListtoString(EST_StrList &l, EST_String &s, EST_String sep=" ");
55
56 void parse_wsj_syntax(void);
57 static void apply_nsr(EST_Utterance &u, const EST_String &tree);
58 #if 0
59 static void remove_punctuation(EST_Utterance &u);
60 static void add_intonation(EST_Utterance &u, const EST_String &base_int,
61 float threshold);
62 #endif
63
64 void subword_list(EST_Item *w, EST_Relation &syllable,
65 EST_Relation &metricaltree);
66
67 void add_non_terminal_features(EST_Relation &r,
68 EST_Features &f);
69
70 void stress_factor1(EST_Utterance &u, const EST_String &base_stream,
71 const EST_String &m);
72 void stress_factor2(EST_Utterance &u, const EST_String &base_stream,
73 const EST_String &m);
74
75 void phrase_factor(EST_Utterance &u);
76
77 void main_stress(EST_Item *s);
78
79 void end_to_dur(EST_Relation &r);
80
81 void footing(EST_Item *n1);
82
83 LISP FT_Classic_Phrasify_Utt(LISP args);
84 LISP FT_Classic_POS_Utt(LISP args);
85 LISP FT_PParse_Utt(LISP args);
86 LISP FT_MultiParse_Utt(LISP utt);
87 void MultiParse(EST_Utterance &u);
88
89 void add_feature_string(EST_Relation &r, const EST_String &fname,
90 const EST_String &f);
91
92 void add_monotone_targets(EST_Utterance &u, float start_f0,
93 float end_f0);
94
95
96 void clear_feature(EST_Relation &r, const EST_String &name);
97
98 typedef
99 float (*local_cost_function)(const EST_Item *item1,
100 const EST_Item *item2);
101
102
103
104 void subword_phonology(EST_Utterance &word);
105 void lex_to_phones(EST_Utterance &u, const EST_String &relname);
106
107 bool dp_match(const EST_Relation &lexical,
108 const EST_Relation &surface,
109 EST_Relation &match,
110 local_cost_function lcf,
111 EST_Item *null_syl);
112
113 //bool dp_match(EST_Relation &a, EST_Relation &b, EST_Relation &c,
114 // local_cost_function lcf, const EST_String &null_sym);
115 void lex_to_phones(const EST_String &name, const EST_String &pos,
116 EST_Relation &phone);
117
118 void subword_metrical_tree(EST_Relation &syllable,
119 EST_Relation &metricaltree);
120
121
122 int syllabify_word(EST_Item *nw, EST_Relation &phone,
123 EST_Relation &sylstructure, EST_Relation &syl, int flat);
124
125 void subword_metrical_tree(EST_Item *w, EST_Relation &syllable,
126 EST_Relation &metricaltree);
127
128
129 /*void phonemic_trans(EST_Relation &trans)
130 {
131 EST_Item *s, *n;
132 EST_String a;
133
134 // cout << "trans: " << trans << endl;
135
136 for (s = trans.head(); s; s = s->next())
137 {
138 n = s->next();
139 // cout << *s;
140 if (s->S("name").contains("cl"))
141 {
142 a = s->S("name").before("cl");
143 if ((next(s) != 0) && (next(s)->S("name") == a))
144 trans.remove_item(s);
145 else if ((next(s) != 0) && (a == "dcl" )
146 && (next(s)->S("name") == "jh"))
147 trans.remove_item(s);
148 else if ((next(s) != 0) && (a == "tcl" )
149 && (next(s)->S("name") == "ch"))
150 trans.remove_item(s);
151 else
152 s->set("name", a);
153 // cout << "here1: " << a << "\n";
154 // s->set("name", s->S("name").before("cl"));
155
156 }
157 }
158 }
159 */
160
161
prev_match(EST_Item * n)162 EST_Item *prev_match(EST_Item *n)
163 {
164 EST_Item *p = n->prev();
165 if (p == 0)
166 return 0;
167
168 if (daughter1(p->as_relation("Match")) == 0)
169 prev_match(p);
170
171 return daughter1(p->as_relation("Match"));
172 }
173
insert_schwa(EST_Item * n)174 void insert_schwa(EST_Item *n)
175 {
176 EST_Item *p, *s;
177 float pp_end = 0;
178 float schwa_length = 0.01;
179
180 if ((p = prev_match(n)) == 0)
181 {
182 cout << "Couldn't insert dummy schwa after " << *n << endl;
183 return;
184 }
185
186 p = p->as_relation("SurfacePhone");
187 pp_end = (prev(p) != 0) ? prev(p)->F("end",0.0) : 0.0;
188
189 s = p->insert_after();
190
191 s->set("name", "ax");
192 s->set("stress_num", "0");
193
194 if ((p->F("end",0) - pp_end) < schwa_length)
195 schwa_length = p->F("dur") / 2.0;
196
197 s->set("end", p->F("end",0));
198 p->set("end", p->F("end",0) - schwa_length);
199 s->set("start", p->F("end",0));
200
201 s->set("df", phone_def.A("ax"));
202
203 // cout << "end 1:" << s->f("end") << endl;
204 // cout << "end 2:" << p->f("end") << endl;
205 }
206
add_initial_silence(EST_Relation & lexical,EST_Relation & surface,EST_Relation & match)207 void add_initial_silence(EST_Relation &lexical, EST_Relation &surface,
208 EST_Relation &match)
209 {
210 EST_Item *s, *p, *n, *m;
211
212 s = lexical.head();
213 if ((s->f("name") != "pau") && (s->f("name") != "sil"))
214 {
215 p = s->insert_before();
216 p->set("name", "pau");
217 p->set("df", phone_def.A("pau"));
218
219 n = surface.head();
220 if ((n->f("name") == "pau") || (n->f("name") == "sil"))
221 {
222 m = match.head()->insert_before(p);
223 m->append_daughter(n);
224 }
225
226 }
227 }
228
add_even_segment_times(EST_Item * w,EST_Relation & phone)229 void add_even_segment_times(EST_Item *w, EST_Relation &phone)
230 {
231 EST_Item *s;
232 int i;
233 float start,dur=0,n,div;
234
235 start = w->F("start");
236 dur = w->F("end") - start;
237 n = (float)phone.length();
238 div = dur/n;
239
240 for (i = 0, s = phone.head(); s; s = s->next(), ++i)
241 {
242 s->set("start", start + div * (float) i);
243 s->set("end", start + div * (float) (i + 1));
244 }
245 }
246
247 #if 0
248 static void add_trans_phrase_phrase(EST_Utterance &utt)
249 {
250 EST_Item *s, *t, *a, *r;
251 EST_Item *first_accent = 0, *last_accent = 0;
252 bool exist;
253
254 // This looks insanely comlicated, but all it really does is
255 // add phrase_start and phrase_end items to the root node of
256 // each metrical tree and then places these in the right position
257 // in the intonation relation.
258
259 utt.create_relation("IntonationPhrase");
260
261 for (s = utt.relation("MetricalTree", 1)->head(); s; s = s->next())
262 {
263 for (r = first_leaf_in_tree(s);
264 r != next_leaf(last_leaf_in_tree(s)); r = next_leaf(r))
265 if (r->in_relation("IntonationSyllable"))
266 {
267 if (first_accent == 0)
268 first_accent =
269 parent(r->as_relation("IntonationSyllable"))
270 ->as_relation("Intonation");
271 last_accent =
272 parent(r->as_relation("IntonationSyllable"))
273 ->as_relation("Intonation");
274 }
275
276 exist = false;
277 // cout << "\nroot node: " <<*s << endl;
278
279 if (first_accent)
280 {
281 cout << "first accent: " << *first_accent << endl;
282 a = first_accent->prev();
283
284 if (a->S("name","") != "phrase_start")
285 a = first_accent->insert_before();
286 }
287 else
288 {
289 if (a == 0)
290 a = utt.relation("Intonation")->prepend();
291 else
292 a = a->insert_after();
293 }
294
295 if (a->S("name","") != "phrase_start" ) // i.e. its a new one
296 {
297 a->set("name", "phrase_start");
298 a->set("ev:f0", 100);
299 }
300 // re-write position as relative to metrical tree
301 a->set("position", usf_int_start);
302 // add this as daughter to root node
303 t = utt.relation("IntonationPhrase")->append(s);
304 t->append_daughter(a);
305 exist = false;
306 //cout << "appended phrase end\n";
307
308 if (last_accent)
309 {
310 cout << "last accent: " << *last_accent << endl;
311 a = last_accent->next();
312 if (a->S("name","") != "phrase_end")
313 a = last_accent->insert_after();
314 }
315 else
316 a = a->insert_after();
317
318 if (a->S("name","") != "phrase_end")
319 {
320 a->set("name", "phrase_end");
321 a->set("ev:f0", 100);
322 }
323 // re-write position as relative to metrical tree
324 a->set("position", usf_int_end);
325
326 // add this as daughter to root node
327 t->append_daughter(a);
328 //cout << "appended phrase start\n";
329 first_accent = 0; // trigger for first time operation of loop
330 }
331
332 // now join any other marked phrase_start/ends to intermediate
333 // nodes in metrical tree.
334
335 /* for (s = u.relation("Intonation", 1)->head(); s; s = s->next())
336 {
337 if (!s->in_relation("IntonationPhrase") &&
338 !s->in_relation("IntonationSyllable"))
339 {
340 pos = s->F("position");
341
342
343
344 }
345 */
346
347 }
348 #endif
349
350
add_single_phrase(EST_Utterance & utt,EST_Item * t)351 void add_single_phrase(EST_Utterance &utt, EST_Item *t)
352 {
353 EST_Item *s=0, *a, *p;
354 float pos, max, d = 0, start = 0.0;
355
356 pos = t->F("time");
357 max = 100000.0;
358
359 for (p = utt.relation("Syllable")->head(); p; p = p->next())
360 {
361 if (t->S("name") == "phrase_end")
362 d = fabs(pos - p->F("end"));
363 else
364 d = fabs(pos - start);
365
366 if (d < max)
367 {
368 max = d;
369 s = p;
370 }
371 start = p->F("end");
372 }
373
374 /* if (s)
375 cout << "joining syllable " << *s << endl;
376 else
377 cout << "No legal syllable " << endl;
378 cout << "to " << *t << endl;
379
380 cout << "d = " << d << endl;
381 */
382
383 if (!s->in_relation("IntonationSyllable"))
384 a = utt.relation("IntonationSyllable")->append(s);
385 else
386 a = s->as_relation("IntonationSyllable");
387 a->append_daughter(t);
388 t->set("time_path", "IntonationSyllable");
389 t->set_function("position", "standard+unisyn_tilt_phrase_position");
390 }
391
add_times(EST_Relation & lexical,EST_Relation & surface,EST_Relation & match)392 void add_times(EST_Relation &lexical, EST_Relation &surface,
393 EST_Relation &match)
394 {
395 (void) surface;
396 (void) match;
397 EST_Item *s, *t, *p;
398 float prev_end, inc, first_end, last_end;
399 int i;
400
401 // first pass, copy times as appropriate, and find first
402 // and last defined ends
403 // This is hacky and certainly won't work for many cases
404
405 first_end = -1.0;
406 prev_end = 0.0;
407 last_end = 0.0;
408
409 // cout << "surface: " << surface << endl;
410
411 for (s = lexical.head(); s; s = s->next())
412 {
413 if ((t = daughter1(s->as_relation("Match"))) != 0)
414 {
415 s->set("end", t->F("end"));
416 s->set("start", t->F("start"));
417
418 last_end = t->F("end");
419 if (first_end < 0.0)
420 first_end = t->F("end");
421 }
422 }
423
424 if (!lexical.head()->f_present("end"))
425 {
426 lexical.head()->set("end", first_end / 2.0);
427 lexical.head()->set("start", 0.0);
428 }
429
430 if (!lexical.tail()->f_present("end"))
431 {
432 lexical.tail()->set("end", last_end + 0.01);
433 lexical.tail()->set("start", last_end);
434 }
435
436 for (s = lexical.head(); s; s = s->next())
437 {
438 if (!s->f_present("end"))
439 {
440 // cout << "missing end feature for " << *s << endl;
441 for (i = 1, p = s; p; p = p->next(), ++i)
442 if (p->f_present("end"))
443 break;
444 inc = (p->F("end") - prev_end) / ((float) i);
445 // cout << "inc is : " << inc << endl;
446
447 // cout << "stop phone is " << *p << endl;
448
449 for (i = 1; s !=p ; s = s->next(), ++i)
450 {
451 s->set("end", (prev_end + ((float) i * inc)));
452 s->set("start", (prev_end + ((float) (i - 1 )* inc)));
453 }
454 }
455 prev_end = s->F("end");
456 }
457 }
458
459
met_error(EST_Item * s)460 static void met_error(EST_Item *s)
461 {
462 cerr << "Illegally named daughters of metrical node\n"
463 << "daughter1 : " << *daughter1(s) << endl
464 << "daughter2 : " << *daughter2(s) << endl;
465 EST_error("");
466 }
467
legal_metrical_tree(EST_Item * s)468 void legal_metrical_tree(EST_Item *s)
469 {
470 if (s == 0)
471 return;
472
473 if ((daughter1(s) == 0) || (daughter2(s) == 0))
474 return;
475
476 if ((daughter1(s)->S("MetricalValue") == "s")
477 && (daughter2(s)->S("MetricalValue") != "w"))
478 met_error(s);
479 else if ((daughter1(s)->S("MetricalValue") == "w")
480 && (daughter2(s)->S("MetricalValue") != "s"))
481 met_error(s);
482 else if ((daughter1(s)->S("MetricalValue") != "w")
483 && (daughter1(s)->S("MetricalValue") != "s"))
484 met_error(s);
485
486 legal_metrical_tree(daughter1(s));
487 legal_metrical_tree(daughter2(s));
488 }
489
parse_words(EST_Utterance & utt)490 void parse_words(EST_Utterance &utt)
491 {
492 utt.create_relation("Token");
493
494 FT_Classic_POS_Utt(siod(&utt));
495 FT_Classic_Phrasify_Utt(siod(&utt));
496 MultiParse(utt);
497
498 utt.relation("Syntax")->remove_item_feature("pos_index");
499 utt.relation("Syntax")->remove_item_feature("pos_index_score");
500 utt.relation("Syntax")->remove_item_feature("phr_pos");
501 utt.relation("Syntax")->remove_item_feature("pbreak_index");
502 utt.relation("Syntax")->remove_item_feature("pbreak_index_score");
503 utt.relation("Syntax")->remove_item_feature("pbreak");
504 utt.relation("Syntax")->remove_item_feature("blevel");
505 utt.relation("Syntax")->remove_item_feature("prob");
506 }
507
binaryize_tree(EST_Item * t)508 void binaryize_tree(EST_Item *t)
509 {
510 // terminating condition
511 if (daughter1(t) == 0)
512 return;
513
514 // nodes with single children should be merged
515 if (daughter2(t) == 0)
516 {
517 // cout << "Single daughter: " << *t << endl;
518 EST_Item *d = daughter1(t);
519 move_sub_tree(d, t);
520 }
521
522 for (EST_Item *p = daughter1(t); p; p = p->next())
523 binaryize_tree(p);
524 }
525
binaryize_tree(EST_Utterance & utt,const EST_String & base_tree,const EST_String & new_tree)526 void binaryize_tree(EST_Utterance &utt, const EST_String &base_tree,
527 const EST_String &new_tree)
528 {
529 utt.create_relation(new_tree);
530 copy_relation(*utt.relation(base_tree), *utt.relation(new_tree));
531
532 for (EST_Item *p = utt.relation(new_tree)->head(); p; p = p->next())
533 binaryize_tree(p);
534 }
535
syntax_to_metrical_words(EST_Utterance & utt)536 void syntax_to_metrical_words(EST_Utterance &utt)
537 {
538 utt.create_relation("MetricalWord");
539 // copy syntax tree while merging single daughter nodes
540 binaryize_tree(utt, "Syntax", "MetricalWord");
541 // add strong and weak values
542 apply_nsr(utt, "MetricalWord");
543 }
544
add_metrical_functions(EST_Utterance & utt)545 void add_metrical_functions(EST_Utterance &utt)
546 {
547 // Note that we don't add "start" functions here as this depends on
548 // pause behaviour
549 add_feature_function(*utt.relation("Syllable"),
550 "vowel_start",
551 "unisyn_vowel_start");
552
553 add_feature_function(*utt.relation("Syllable"),
554 "end", "standard+unisyn_leaf_end");
555 add_feature_function(*utt.relation("Syllable"),
556 "start", "standard+unisyn_leaf_start");
557
558 for (EST_Item *s = utt.relation("Syllable")->head(); s; s = s->next())
559 s->set("time_path", "SylStructure");
560
561 EST_Features tf;
562 tf.set_function("end", "standard+unisyn_leaf_end");
563 tf.set_function("start","standard+unisyn_leaf_start");
564 tf.set_function("dur","standard+unisyn_duration");
565
566 tf.set("time_path", "MetricalTree");
567 tf.set("time_path", "MetricalTree");
568
569 // add_non_terminal_features(*utt.relation("MetricalTree"), tf);
570
571 tf.set("time_path", "SylStructure");
572 add_non_terminal_features(*utt.relation("SylStructure"), tf);
573
574 add_feature_function(*utt.relation("Segment"),
575 "dur",
576 "standard+duration");
577
578
579 }
580
auto_metrical_lex(EST_Utterance & utt)581 void auto_metrical_lex(EST_Utterance &utt)
582 {
583 LISP l_pdef;
584
585 utt.create_relation("Syllable");
586 utt.create_relation("Segment");
587
588 l_pdef = siod_get_lval("darpa_fs", NULL);
589 lisp_to_features(l_pdef, phone_def);
590
591 mettree_add_words(utt);
592
593 LISP lt = siod_get_lval("us_base_int", NULL);
594 EST_String base_int;
595 if (lt == NIL)
596 base_int = "Syllable";
597 else
598 {
599 const char *x = get_c_string(lt);
600 base_int = x;
601 }
602
603 // add_end_silences(*utt.relation("Segment"));
604
605 add_metrical_functions(utt);
606 }
607
extend_tree(EST_Item * m,EST_Item * p,const EST_String & terminal,const EST_String & second_tree)608 void extend_tree(EST_Item *m, EST_Item *p, const EST_String &terminal,
609 const EST_String &second_tree)
610 {
611 EST_Item *d, *e;
612
613 if (!daughter1(m))
614 {
615 if (m->in_relation(terminal)) // ie. really hit the bottom
616 return;
617 m = m->as_relation(second_tree); // swap to a new tree
618 }
619
620 for (d = daughter1(m); d; d = d->next())
621 {
622 e = p->append_daughter(d);
623 extend_tree(d, e, terminal, second_tree);
624 }
625 }
626
627
nsr(EST_Item * n)628 static void nsr(EST_Item *n)
629 {
630 EST_Item *left, *right;
631 left = daughter1(n);
632 right = daughter2(n);
633 if (left == 0)
634 return;
635 else
636 {
637 nsr(left);
638 left->set("MetricalValue","w");
639 }
640
641 if (right == 0)
642 return;
643 else
644 {
645 nsr(right);
646 right->set("MetricalValue","s");
647 }
648 }
649
apply_nsr(EST_Utterance & u,const EST_String & tree)650 static void apply_nsr(EST_Utterance &u, const EST_String &tree)
651 {
652 EST_Item *n;
653
654 for (n = u.relation(tree)->head(); n; n = n->next())
655 nsr(n);
656 }
657
other_daughter(EST_Item * parent,EST_Item * daughter)658 EST_Item *other_daughter(EST_Item *parent, EST_Item *daughter)
659 {
660 return (daughter1(parent) == daughter) ? daughter2(parent) :
661 daughter1(parent);
662 }
663
stress_factor1(EST_Item * s,int max_depth)664 static void stress_factor1(EST_Item *s, int max_depth)
665 {
666 EST_Item *a;
667 EST_String val, pad;
668 char *str;
669 long n, i;
670 float max;
671
672 val = "";
673
674 for (a = s; parent(a); a = parent(a))
675 if (a->f("MetricalValue") == "s")
676 val += "2";
677 else
678 val += "0";
679
680 // cout << "\nSyllable " << s << " has value " << val << endl;
681
682 if (val.length() < max_depth)
683 for (pad = "", i = 0; i < (max_depth - val.length()); ++i)
684 pad += "2";
685
686 val += pad;
687 // cout << "Syllable " << s << " has padded value " << val << endl;
688
689 str = strdup(val);
690 max = pow(3.0, (float)max_depth) - 1.0;
691 n = strtol(str, (char **)NULL, 3);
692 // cout << "decimal value: " << n;
693 // cout << " normalised: " << (float)n/max << endl;
694 s->set("StressFactor1", ((float)n/max));
695 }
696
find_apex(EST_Item * n,int & num_nodes)697 EST_Item * find_apex(EST_Item *n, int &num_nodes)
698 {
699 EST_Item *p;
700 p = parent(n);
701 if (p == 0)
702 return n;
703 if (daughter2(p) == n)
704 return find_apex(p, ++num_nodes);
705
706 return p;
707 }
708
find_leaf(EST_Item * n,int & num_nodes)709 void find_leaf(EST_Item *n, int &num_nodes)
710 {
711 if (n == 0)
712 return;
713 find_leaf(daughter1(n), ++num_nodes);
714 }
715
phrase_factor(EST_Item & syl,const EST_String & met_name)716 static void phrase_factor(EST_Item &syl, const EST_String &met_name)
717 {
718 EST_Item *p;
719 EST_String val, pad;
720 int num_nodes = 1;
721
722 // cout << "Terminal Syl = " << syl << " f:" << syl.f << endl;
723
724 p = find_apex(syl.as_relation(met_name), num_nodes);
725 // cout << "up nodes: " << num_nodes;
726 // cout << "Apex = " << *p << endl;
727 find_leaf(daughter2(p), num_nodes);
728 // cout << " downp nodes: " << num_nodes << endl;
729
730 syl.set("PhraseIndex", num_nodes);
731 }
732
max_tree_depth(EST_Utterance & u,const EST_String & base_stream,const EST_String & mettree)733 static int max_tree_depth(EST_Utterance &u, const EST_String &base_stream,
734 const EST_String &mettree)
735 {
736 EST_Item *s, *a;
737 int depth;
738 int max_depth = 0;
739
740 for (s = u.relation(base_stream)->head(); s; s = s->next())
741 {
742 depth = 0;
743 for (a = s->as_relation(mettree); parent(a); a = parent(a))
744 ++depth;
745 if (depth > max_depth)
746 max_depth = depth;
747 }
748 return max_depth;
749 }
750
stress_factor1(EST_Utterance & u,const EST_String & base_stream,const EST_String & mettree)751 void stress_factor1(EST_Utterance &u, const EST_String &base_stream,
752 const EST_String &mettree)
753 {
754 EST_Item *s;
755 int max_depth = max_tree_depth(u, base_stream, mettree);
756
757 for (s = u.relation(base_stream)->head(); s; s = s->next())
758 stress_factor1(s->as_relation(mettree), max_depth);
759 }
760
761
strong_daughter(EST_Item * n)762 EST_Item *strong_daughter(EST_Item *n)
763 {
764 if (daughter1(n) == 0)
765 return 0;
766 return (daughter1(n)->f("MetricalValue") == "s")
767 ? daughter1(n) : daughter2(n);
768 }
769
weak_daughter(EST_Item * n)770 EST_Item *weak_daughter(EST_Item *n)
771 {
772 if (daughter1(n) == 0)
773 return 0;
774 return (daughter1(n)->f("MetricalValue") == "w")
775 ? daughter1(n) : daughter2(n);
776 }
777
fill_mini_tree(EST_Item * s,int val)778 static void fill_mini_tree(EST_Item *s, int val)
779 {
780 if (s->f("MetricalValue") == "s")
781 s->set("StressVal", val);
782 else
783 s->set("StressVal", 0);
784 if (strong_daughter(s))
785 fill_mini_tree(strong_daughter(s), val);
786
787 if (weak_daughter(s))
788 fill_mini_tree(weak_daughter(s), val - 1);
789 }
790
stress_factor2(EST_Utterance & u,const EST_String & base_stream,const EST_String & mettree)791 void stress_factor2(EST_Utterance &u, const EST_String &base_stream,
792 const EST_String &mettree)
793 {
794 EST_Item *s;
795 int sv = -1;
796 float b;
797 (void) base_stream;
798
799 s = u.relation(mettree)->head();
800 fill_mini_tree(s, sv);
801
802 // normalise values
803 sv = 0;
804 for (s = u.relation(base_stream)->head(); s; s = s->next())
805 sv = Lof(s->I("StressVal"), sv);
806
807 cout << "Max Stress: " << sv << endl;
808
809 for (s = u.relation(base_stream)->head(); s; s = s->next())
810 {
811 b = (float)(s->I("StressVal") - sv + 1);
812 if (s->f("MetricalValue") == "s")
813 s->set("StressFactor2", (b / float(sv)) * -1.0);
814 else
815 s->set("StressFactor2", 0);
816 }
817 }
818
phrase_factor(EST_Utterance & u,const EST_String & base_stream,const EST_String & mettree)819 void phrase_factor(EST_Utterance &u, const EST_String &base_stream,
820 const EST_String &mettree)
821 {
822 EST_Item *s;
823 float max_pf = 0;
824
825 for (s = u.relation(base_stream)->head(); s; s = s->next())
826 phrase_factor(*s, mettree);
827
828 for (s = u.relation(base_stream)->head(); s; s = s->next())
829 if (s->I("PhraseIndex") > max_pf)
830 max_pf = s->I("PhraseIndex");
831
832 for (s = u.relation(base_stream)->head(); s; s = s->next())
833 {
834 s->set("PhraseFactor",
835 (float)s->I("PhraseIndex")/max_pf);
836 // cout << *s << " pf = " <<
837 // s->F("PhraseFactor") << endl;
838 }
839
840 }
841
842 #if 0
843 static void remove_punctuation(EST_Utterance &u)
844 {
845 // The syntactic grammar has unary rules for the preterminals
846 // these would make the mtettrical tree have an extra layer
847 // at the word level. So here we remove that extra layer
848 EST_Item *w;
849 EST_Item *a, *b, *c, *od;
850
851 for (w = u.relation("Word")->head(); w != 0; w = w->next())
852 {
853 if (w->f("pos") == "punc")
854 {
855 a = w->as_relation("Syntax");
856 b = parent(a);
857 c = parent(b);
858 od = other_daughter(c, b);
859 remove_item(b, "Syntax");
860 move_sub_tree(od, c);
861 remove_item(w, "Word");
862 }
863 }
864 }
865
866 static void add_intonation(EST_Utterance &u, const EST_String &base_stream,
867 float threshold)
868 {
869 EST_Item *e, *s;
870
871 cout << "Threshold = " << threshold << endl;
872
873 for (s = u.relation(base_stream)->head(); s; s = s->next())
874 {
875 if (s->F("StressFactor") > threshold)
876 {
877 // cout << *s <<" **stress factor:" << s->F("StressFactor") << endl;
878 e = u.relation("IntSyl")->append();
879 e->insert_below(s);
880 e->set_name("Accent");
881 e->set("prominence", s->F("StressFactor"));
882 u.relation("Intonation")->append(e);
883 }
884 }
885 }
886
887 #endif
888
add_monotone_targets(EST_Utterance & u,float start_f0,float end_f0)889 void add_monotone_targets(EST_Utterance &u, float start_f0,
890 float end_f0)
891 {
892 EST_Item *t;
893 float end;
894
895 end = u.relation("Segment")->tail()->f("end");
896
897 cout << "Phone ends\n";
898 cout << *u.relation("Segment");
899
900 cout << "last position is :" << end << endl;
901
902 u.create_relation("Target");
903
904 t = u.relation("Target")->append();
905 t->set("f0", start_f0);
906 t->set("pos", 0.0);
907
908 // temporary - should disappear when awb changes code
909 // t->set("name", ftoString(start_f0));
910 // t->set("end", 0.0);
911
912 t = u.relation("Target")->append();
913 t->set("f0", end_f0);
914 t->set("pos", end);
915
916 // temporary - should disappear when awb changes code
917 // t->set("name", ftoString(end_f0));
918 // t->set("end", end);
919 }
920
mettree_add_words(EST_Utterance & u)921 static void mettree_add_words(EST_Utterance &u)
922 {
923 EST_Utterance word;
924 EST_Item *w;
925
926 word.create_relation("Word");
927 word.create_relation("Segment");
928 word.create_relation("SylStructure");
929 word.create_relation("Syllable");
930 word.create_relation("WordStructure");
931
932 for (w = u.relation("Word")->head(); w != 0; w = w->next())
933 {
934 word.clear_relations();
935
936 cout << "N:";
937 cout << w->f("name") << " " << w->f("pos", "") << endl;
938 lex_to_phones(w->f("name"), w->f("pos", "0"),
939 *word.relation("Segment"));
940
941 EST_Item *nw = word.relation("Word")->append();
942 nw->set("name", w->S("name"));
943
944 syllabify_word(nw, *word.relation("Segment"),
945 *word.relation("SylStructure"),
946 *word.relation("Syllable"), 0);
947
948 subword_metrical_tree(nw, *word.relation("Syllable"),
949 *word.relation("WordStructure"));
950
951 utterance_merge(u, word, w, word.relation("Word")->head());
952 }
953 }
954
955 void add_metrical_nodes(EST_Utterance &u, EST_Item *n, LISP lpos);
956
strip_vowel_num(EST_String p)957 EST_String strip_vowel_num(EST_String p)
958 {
959 if (p.contains(RXint))
960 p = p.before(RXint);
961 return p;
962 }
963
964
percolate(EST_Item * start)965 static void percolate(EST_Item *start)
966 {
967 EST_Item *n;
968
969 for (n = start; n; n = parent(n))
970 {
971 // cout << "altering sister\n";
972 if (prev(n) != 0)
973 prev(n)->set("MetricalValue", "w");
974 else if (next(n) != 0)
975 next(n)->set("MetricalValue", "w");
976 }
977 }
978
979
main_stress(EST_Item * s)980 void main_stress(EST_Item *s)
981 {
982 EST_Item *n;
983
984 for (n = s; parent(n); n = parent(n))
985 n->set("MetricalValue", "s");
986
987 n = s;
988 percolate(n);
989 }
990
footing(EST_Item * n1)991 void footing(EST_Item *n1)
992 {
993 EST_Item *n2, *n3, *n4, *p1, *p3, *r;
994
995 r = parent(n1); // root node
996 p1 = daughter2(r);
997 n2 = daughter1(p1);
998 n3 = daughter2(p1);
999
1000 if (p1 == 0)
1001 {
1002 cerr << "Error: Empty 3rd node after " << *n1 << " in footing\n";
1003 return;
1004 }
1005 if (n2 == 0)
1006 {
1007 cerr << "Error: Empty 3rd node after " << *n1 << " in footing\n";
1008 return;
1009 }
1010 if (n3 == 0)
1011 {
1012 cerr << "Error: Empty 3rd node after " << *n1 << " in footing\n";
1013 return;
1014 }
1015
1016 cout << "n1: " << *n1 << endl << endl;
1017 cout << "n2: " << *n2 << endl << endl;
1018 cout << "n3: " << *n3 << endl << endl;
1019 cout << "p1: " << *p1 << endl << endl;
1020
1021 p3 = n1->insert_parent();
1022 n1 = daughter1(p3);
1023 n4 = p3->append_daughter();
1024
1025 move_sub_tree(n2, n4);
1026 move_sub_tree(n3, p1);
1027
1028 p3->set("MetricalValue", "w");
1029 p3->set("Altered_a", "DONE_W");
1030
1031 n1->set("MetricalValue", "s");
1032 n1->set("Altered_b", "DONE_S");
1033 }
1034
1035
1036 #if 0
1037 LISP FT_metrical_data(LISP lf_word, LISP lf_seg, LISP lf_int)
1038 {
1039
1040 EST_Utterance word, *u = new EST_Utterance;
1041 EST_Relation phone;
1042 EST_Item *s, *p, *w, *nw, *n;
1043 EST_StrList plist;
1044 float phone_start, mid;
1045 LISP lutt;
1046 EST_Track fz;
1047 int i;
1048
1049 u->create_relation("Word");
1050 u->create_relation("Segment");
1051 u->create_relation("Syllable");
1052 u->create_relation("MetricalTree");
1053 u->create_relation("LexicalMetricalTree");
1054 u->create_relation("SurfacePhone");
1055 u->create_relation("Surface");
1056 u->create_relation("Intonation");
1057 u->create_relation("IntonationSyllable");
1058
1059 EST_String segfile = get_c_string(lf_seg);
1060 EST_String wordfile = get_c_string(lf_word);
1061
1062 if (u->relation("Word")->load(wordfile) != format_ok)
1063 {
1064 cerr << "Couldn't load file " << get_c_string(lf_word) << endl;
1065 festival_error();
1066 }
1067
1068 if ((segfile != "dummy") &&(u->relation("Segment")->
1069 load(get_c_string(lf_seg)) != format_ok))
1070 {
1071 cerr << "Couldn't load file " << get_c_string(lf_seg) << endl;
1072 festival_error();
1073 }
1074
1075 if (lf_int != NIL)
1076 if (u->relation("Intonation")->load(get_c_string(lf_int)) != format_ok)
1077 {
1078 cerr << "Couldn't load file " << get_c_string(lf_int) << endl;
1079 festival_error();
1080 }
1081
1082 u->f.set("fileroot", basename(wordfile, "*"));
1083
1084 // cout << "Words: " << *u->relation("Word");
1085
1086 if (segfile != "dummy")
1087 phonemic_trans(*u->relation("Segment"));
1088 // u->relation("Intonation")->load(get_c_string(lf_int));
1089
1090 // tmp hack
1091 float prev_end = 0.0;
1092
1093 for (w = u->relation("Word")->head(); w != 0; w = n)
1094 {
1095 n = w->next();
1096 // w->set("start", prev_end);
1097 w->f_remove("end");
1098 // prev_end = w->F("end");
1099 if ((w->f("name") == "sil") || (w->f("name") == "pau"))
1100 u->relation("Word")->remove_item(w);
1101 }
1102
1103 gc_protect(&lutt);
1104 lutt = siod_make_utt(u);
1105
1106 cout << *u->relation("Word") << endl;
1107
1108 FT_POS_Utt(lutt);
1109 FT_Phrasify_Utt(lutt);
1110 MultiParse(*u);
1111
1112 // remove_punctuation(*u);
1113
1114 // Copy Syntax tree into a new Metrical Tree
1115 copy_relation(*u->relations.val("Syntax"),
1116 *u->relations.val("MetricalTree"));
1117 // flatten preterminal unary rules
1118 flatten_preterminals(*u);
1119
1120 apply_nsr(*u);
1121
1122 copy_relation(*u->relations.val("MetricalTree"),
1123 *u->relations.val("LexicalMetricalTree"));
1124
1125 word.create_relation("Word");
1126 word.create_relation("Match");
1127 word.create_relation("NewMatch");
1128 word.create_relation("Segment");
1129 word.create_relation("SurfacePhone");
1130 word.create_relation("LexicalSylStructure");
1131 word.create_relation("SurfaceSylStructure");
1132 word.create_relation("LexicalSyllable");
1133 word.create_relation("SurfaceSyllable");
1134
1135 word.create_relation("LexicalMetricalTree");
1136 word.create_relation("SurfaceMetricalTree");
1137
1138 phone_start = 0.0;
1139
1140 // Note starts are hardwired here because feature function thing
1141 // isn't fully operational and because deleting silence messes
1142 // it up.
1143
1144 // u->save("zz_parse.utt", "est");
1145
1146 if (segfile != "dummy")
1147 {
1148 for (s = u->relation("Segment")->head(); s; s = s->next())
1149 {
1150 s->set("start", phone_start);
1151 phone_start = s->F("end");
1152 }
1153 phone_start = 0.0;
1154
1155 s = u->relation("Segment")->head();
1156 if ((s->f("name") == "pau") || (s->f("name") == "sil"))
1157 {
1158 w = u->relation("SurfacePhone")->append();
1159 w->set("name", "pau");
1160 w->set("end", s->F("end"));
1161 w->set("start", s->F("start"));
1162 }
1163 }
1164
1165 // cout <<"Surface 1:" << *u->relation("SurfacePhone") << endl;
1166
1167 for (i = 0, w = u->relation("Word")->head(); w != 0; w = w->next(), ++i)
1168 {
1169 word.clear_relations();
1170
1171 lex_to_phones(w->f("name"), w->f("pos"),
1172 *word.relation("Segment"));
1173
1174 if (segfile == "dummy")
1175 *word.relation("SurfacePhone") = *word.relation("Segment");
1176 else
1177 trans_to_phones(w, *u->relation("Segment"),
1178 *word.relation("SurfacePhone"));
1179
1180 // cout << "lex phones: " << *word.relation("LexicalPhone") << endl;
1181 // cout << "sur phones: " << *word.relation("SurfacePhone") << endl;
1182
1183 if (siod_get_lval("mettree_phones_debug", NULL) != NIL)
1184 {
1185 cout << "phones for word" << *w << endl;
1186 cout << *word.relation("SurfacePhone") << endl;
1187 }
1188
1189 nw = word.relation("Word")->append();
1190 nw->set("name", w->S("name"));
1191
1192 syllabify_word(nw, *word.relation("LexicalPhone"),
1193 *word.relation("LexicalSylStructure"),
1194 *word.relation("LexicalSyllable"));
1195
1196 subword_metrical_tree(nw, *word.relation("LexicalSyllable"),
1197 *word.relation("LexicalMetricalTree"));
1198
1199 if (siod_get_lval("mettree_debug", NULL) != NIL)
1200 word.save("word_lex.utt", "est");
1201
1202 // copy_relation(*word.relation("LexicalMetricalTree"),
1203 // *word.relation("HackMT"));
1204
1205 EST_Item xx;
1206 dp_match(*word.relation("LexicalPhone"),
1207 *word.relation("SurfacePhone"),
1208 *word.relation("Match"), local_cost, &xx);
1209
1210 if (syllabify_word(nw, *word.relation("SurfacePhone"),
1211 *word.relation("SurfaceSylStructure"),
1212 *word.relation("SurfaceSyllable")) < 1)
1213 {
1214 cerr << "Pronuciation for \"" << w->S("name")
1215 << "\" doesn't contain a vowel: " <<
1216 *word.relation("SurfacePhone") << endl;
1217 // festival_error();
1218 }
1219
1220 fix_syllables(nw, word);
1221
1222 subword_metrical_tree(nw, *word.relation("SurfaceSyllable"),
1223 *word.relation("SurfaceMetricalTree"));
1224
1225
1226 if (siod_get_lval("mettree_debug_word", NULL) != NIL)
1227 word.save("word_dp.utt", "est");
1228
1229 if (siod_get_lval("mettree_debug_word", NULL) != NIL)
1230 if (get_c_int(siod_get_lval("mettree_debug_word", NULL)) == i)
1231 word.save("word_nth.utt", "est");
1232
1233
1234 utterance_merge(*u, word, w, "LexicalMetricalTree");
1235 }
1236
1237 // u->save("zz_parse2.utt", "est");
1238
1239 // u->save("test.utt");
1240
1241 // cout <<"Surface 2:" << *u->relation("SurfacePhone") << endl;
1242
1243 add_initial_silence(*u->relation("LexicalPhone"),
1244 *u->relation("SurfacePhone"),
1245 *u->relation("Match"));
1246
1247 // cout <<"Surface 3:" << *u->relation("SurfacePhone") << endl;
1248
1249 add_times(*u->relation("LexicalPhone"), *u->relation("SurfacePhone"),
1250 *u->relation("Match"));
1251
1252 u->relation("LexicalPhone")->f.set("timing_style", "segment");
1253 u->relation("SurfacePhone")->f.set("timing_style", "segment");
1254 // u->relation("Word")->f.set("timing_style", "segment");
1255
1256 u->relation("LexicalSyllable")->f.set("timing_style", "segment");
1257 u->relation("LexicalSyllable")->f.set("time_path",
1258 "LexicalSylStructure");
1259
1260 // u->relation("LexicalSylStructure")->f.set("timing_style", "segment");
1261 // u->relation("LexicalSylStructure")->f.set("time_relation",
1262 // "LexicalPhone");
1263
1264
1265 u->relation("LexicalMetricalTree")->f.set("timing_style", "segment");
1266
1267
1268 // if (lf_int != NIL)
1269 // add_feature_function(*u->relation("LexicalSyllable"),"vowel_start",
1270 // vowel_start_time);
1271
1272 // add_feature_function(*u->relation("LexicalPhone"), "start",
1273 // ff_start_time);
1274 // add_feature_function(*u->relation("SurfacePhone"), "start",
1275 // ff_start_time);
1276 // add_feature_function(*u->relation("SurfacePhone"), "dur",
1277 // duration_time);
1278
1279 // add_feature_function(*u->relation("LexicalSyllable"),"end", leaf_end_time);
1280
1281 EST_Features tf;
1282 tf.set("time_path", "LexicalMetricalTree");
1283 tf.set("end", leaf_end_time);
1284
1285 // add_feature_string(*u->relation("LexicalMetricalTree"), "time_path",
1286 // "LexicalMetricalTree");
1287 // add_feature_string(*u->relation("LexicalSylStructure"), "time_path",
1288 // "LexicalSylStructure");
1289 //
1290
1291 add_non_terminal_features(*u->relation("LexicalMetricalTree"),
1292 tf);
1293
1294 tf.set("time_path", "LexicalSylStructure");
1295
1296 add_non_terminal_features(*u->relation("LexicalSylStructure"),
1297 tf);
1298
1299
1300 // add_feature_function(*u->relation("LexicalSyllable"),"start",
1301 // ff_start_time);
1302 // add_feature_function(*u->relation("LexicalSyllable"),"dur", duration_time);
1303
1304
1305 // cout << "ADDED Features to phone\n\n";
1306 // cout << *(u->relation("LexicalPhone")) << endl << endl;
1307
1308 // cout << "ADDED Features\n\n";
1309 // cout << *(u->relation("LexicalSyllable"));
1310
1311 // cout << "\nfinished\n\n";
1312
1313 // if (lf_int != NIL)
1314 // add_trans_intonation(*u);
1315
1316 // cout <<"Lexical 3:" << *u->relation("LexicalPhone") << endl;
1317
1318 // end_to_dur(*u->relation("SurfacePhone"));
1319 // end_to_dur(*u->relation("LexicalPhone"));
1320
1321
1322
1323
1324 // cout <<"Lexical 3:" << *u->relation("LexicalPhone") << endl;
1325
1326 // clear_feature(*u->relation("SurfacePhone"), "end");
1327 // clear_feature(*u->relation("LexicalPhone"), "end");
1328
1329 if (siod_get_lval("mettree_debug", NULL) != NIL)
1330 u->save("met_data.utt", "est");
1331
1332 gc_unprotect(&lutt);
1333
1334 // u->save("zz_parse3.utt", "est");
1335
1336 return lutt;
1337 }
1338 #endif
1339