1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                       Copyright (c) 1996,1997                         */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*                      Author :  Alan W Black                           */
34 /*                      Date   :  April 1996                             */
35 /*-----------------------------------------------------------------------*/
36 /*                                                                       */
37 /* Duration averages and default and tree                                */
38 /*                                                                       */
39 /*=======================================================================*/
40 #include <cstdio>
41 #include "festival.h"
42 #include "durationP.h"
43 
FT_Duration_Ave_Utt(LISP utt)44 LISP FT_Duration_Ave_Utt(LISP utt)
45 {
46     // Predict average duration on segments
47     EST_Utterance *u = get_c_utt(utt);
48     EST_Item *s;
49     float end=0.0, dur;
50     LISP ph_durs,ldur;
51     float stretch;
52 
53     *cdebug << "Duration Average module\n";
54 
55     ph_durs = siod_get_lval("phoneme_durations","no phoneme durations");
56 
57     for (s=u->relation("Segment")->first(); s != 0; s = s->next())
58     {
59 	ldur = siod_assoc_str(s->name(),ph_durs);
60 	stretch = dur_get_stretch_at_seg(s);
61 	if (ldur == NIL)
62 	{
63 	    cerr << "Phoneme: " << s->name() << " have no default duration "
64 		<< endl;
65 	    dur = 0.100;
66 	}
67 	else
68 	    dur = get_c_float(car(cdr(ldur)));
69 	end += (dur*stretch);
70 	s->set("end",end);
71     }
72 
73     return utt;
74 }
75 
FT_Duration_Def_Utt(LISP utt)76 LISP FT_Duration_Def_Utt(LISP utt)
77 {
78     // Predict fixed duration on segments
79     EST_Utterance *u = get_c_utt(utt);
80     EST_Item *s;
81     float end=0.0;
82     float stretch;
83 
84     *cdebug << "Duration Default module\n";
85 
86     for (s=u->relation("Segment")->first(); s != 0; s = s->next())
87     {
88 	stretch = dur_get_stretch_at_seg(s);
89 	end += 0.100*stretch;
90 	s->set("end",end);
91     }
92 
93     return utt;
94 }
95 
FT_Duration_Tree_Utt(LISP utt)96 LISP FT_Duration_Tree_Utt(LISP utt)
97 {
98     // Predict duration on segments using CART tree
99     EST_Utterance *u = get_c_utt(utt);
100     EST_Item *s;
101     float end=0.0, dur,stretch;
102     LISP tree;
103     EST_Val pdur;
104 
105     *cdebug << "Duration Tree module\n";
106 
107     tree = siod_get_lval("duration_cart_tree","no duration cart tree");
108 
109     for (s=u->relation("Segment")->first(); s != 0; s = s->next())
110     {
111 	pdur = wagon_predict(s,tree);
112 	stretch = dur_get_stretch_at_seg(s);
113 	if (pdur == 0.0)
114 	{
115 	    cerr << "Phoneme: " << s->name() << " tree predicted 0.0 changing it"
116 		<< endl;
117 	    dur = 0.050;
118 	}
119 	else
120 	    dur = (float)pdur;
121 	dur *= stretch;
122 	end += dur;
123 	s->set("end",end);
124     }
125 
126     return utt;
127 }
128 
129 #define PH_AVE(X) (get_c_float(car(cdr(X))))
130 #define PH_STD(X) (get_c_float(car(cdr(cdr(X)))))
131 
FT_Duration_Tree_ZScores_Utt(LISP utt)132 LISP FT_Duration_Tree_ZScores_Utt(LISP utt)
133 {
134     // Predict duration on segments using CART tree
135     EST_Utterance *u = get_c_utt(utt);
136     EST_Item *s;
137     float end=0.0, dur,stretch;
138     LISP tree,dur_info,ph_info;
139     float pdur;
140     float ave, std;
141 
142     *cdebug << "Duration Tree ZScores module\n";
143 
144     tree = siod_get_lval("duration_cart_tree","no duration cart tree");
145     dur_info = siod_get_lval("duration_ph_info","no duration phone info");
146 
147     for (s=u->relation("Segment")->first(); s != 0; s = s->next())
148     {
149 	pdur = wagon_predict(s,tree);
150 	ph_info = siod_assoc_str(s->name(),dur_info);
151 	stretch = dur_get_stretch_at_seg(s);
152 	if (ph_info == NIL)
153 	{
154 	    cerr << "Phoneme: " << s->name() << " has no duration info\n";
155             ave = 0.080;
156             std = 0.020;
157 	}
158         else
159         {
160             ave = PH_AVE(ph_info);
161             std = PH_STD(ph_info);
162         }
163 	if ((pdur > 3) || (pdur < -3))
164 	{
165             //	    cerr << "Duration tree extreme for " << s->name() <<
166             //		" " << pdur << endl;
167 	    pdur = ((pdur < 0) ? -3 : 3);
168 	}
169 	s->set("dur_factor",pdur);
170 	dur = ave + (pdur*std);
171 	dur *= stretch;
172 	if (dur < 0.010)
173 	    dur = 0.010;  // just in case it goes wrong
174 	end += dur;
175 	s->set("end",end);
176     }
177 
178     return utt;
179 }
180