1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                       Copyright (c) 1996,1997                         */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*                                                                       */
34 /*                 Author: Paul Taylor                                   */
35 /*                   Date: 1998, 1999                                    */
36 /* --------------------------------------------------------------------- */
37 /*            LPC residual synthesis alternative version                 */
38 /*                                                                       */
39 /*************************************************************************/
40 
41 #include "siod.h"
42 #include "EST.h"
43 #include "us_diphone.h"
44 #include "Phone.h"
45 
46 extern USDiphIndex *diph_index;
47 
dur_to_end(EST_Relation & r)48 void dur_to_end(EST_Relation &r)
49 {
50     float prev_end = 0;
51 
52     for (EST_Item *p = r.head(); p ; p = p->next())
53     {
54 	p->set("end", p->F("dur") + prev_end);
55 	prev_end = p->F("end");
56     }
57 }
58 
add_end_silences(EST_Relation & segment,EST_Relation & target)59 void add_end_silences(EST_Relation &segment, EST_Relation &target)
60 {
61     EST_Item *t, *n;
62     float shift = 0.0;
63     const float pause_duration = 0.1;
64 
65     t = segment.head();
66     if (!ph_is_silence(t->f("name")))
67     {
68 	n = t->insert_before();
69 	n->set("name", ph_silence());
70 	n->set("dur", pause_duration);
71 	shift += pause_duration;
72     }
73 
74     t = segment.tail();
75     if (!ph_is_silence(t->S("name")))
76     {
77 	n = t->insert_after();
78 	n->set("name", ph_silence());
79 	n->set("dur", pause_duration);
80 	shift += pause_duration;
81     }
82     dur_to_end(segment);
83 
84     target.tail()->set("pos", (target.tail()->F("pos") + shift));
85 }
86 
add_end_silences(EST_Relation & segment)87 void add_end_silences(EST_Relation &segment)
88 {
89   EST_Item *t, *n;
90 
91   t = segment.head();
92   if (!ph_is_silence(t->S("name")))
93     {
94       n = t->insert_before();
95       n->set("name", ph_silence());
96     }
97 
98   t = segment.tail();
99   if (!ph_is_silence(t->S("name")))
100     {
101       n = t->insert_after();
102       n->set("name", ph_silence());
103     }
104 }
105 
parse_diphone_times(EST_Relation & diphone_stream,EST_Relation & source_lab)106 void parse_diphone_times(EST_Relation &diphone_stream,
107 			 EST_Relation &source_lab)
108 {
109     EST_Item *s, *u;
110     EST_Track *pm;
111     int e_frame, m_frame = 0;
112     float dur_1 = 0.0, dur_2 = 0.0, p_time;
113     float t_time = 0.0, end;
114     p_time = 0.0;
115 
116     for (s = source_lab.head(), u = diphone_stream.head(); u; u = u->next(),
117 	 s = s->next())
118     {
119 	pm = track(u->f("coefs"));
120 
121 	e_frame = pm->num_frames() - 1;
122 	m_frame = u->I("middle_frame");
123 
124         if (m_frame < 0) m_frame=0;
125 	dur_1 = pm->t(m_frame);
126         if (e_frame < m_frame) e_frame=m_frame;
127 	dur_2 = pm->t(e_frame) - dur_1;
128 
129 	s->set("source_end", (dur_1 + p_time));
130 
131 	p_time = s->F("source_end") + dur_2;
132 
133 	end = dur_1 + dur_2 + t_time;
134 	t_time = end;
135 	u->set("end", t_time);
136     }
137     if (s)
138 	s->set("source_end", (dur_2 + p_time));
139 }
140 
load_separate_diphone(int unit,bool keep_full,const EST_String & cut_type)141 void load_separate_diphone(int unit, bool keep_full,
142 			   const EST_String &cut_type)
143 {
144     // Load in the coefficients and signame for this diphone
145     // It caches the results in the diphone index entry, though
146     // someone else may clear them.  Note the full file is loaded
147     // each time which isn't optimal if there are multiple diphones
148     // is the same file
149     int samp_start, samp_end;
150     int pm_start, pm_end, pm_middle;
151     EST_Track full_coefs, dcoefs, *coefs;
152 //    float q_start, q_middle, q_end;
153 
154     if (full_coefs.load(diph_index->coef_dir + "/"
155 			+ diph_index->diphone[unit].S("filename")
156 			+ diph_index->coef_ext) != format_ok)
157     {
158 	cerr << "US DB: failed to read coefs file from " <<
159 	    diph_index->coef_dir + "/"
160 		+ diph_index->diphone[unit].S("filename")
161 		    + diph_index->coef_ext << endl;
162 	EST_error("");
163     }
164 
165     pm_start = full_coefs.index(diph_index->diphone[unit].f("start"));
166     pm_middle = full_coefs.index(diph_index->diphone[unit].f("middle"));
167     pm_end = full_coefs.index(diph_index->diphone[unit].f("end"));
168 
169     // option for taking half a diphone only
170     if (cut_type == "first_half")
171 	pm_end = pm_middle;
172     else if (cut_type == "second_half")
173 	pm_start = pm_middle;
174 
175     // find time of mid-point, i.e. boundary between phones
176     full_coefs.sub_track(dcoefs, pm_start, pm_end - pm_start + 1, 0, EST_ALL);
177     // Copy coefficients so the full coeffs can be safely deleted
178     coefs = new EST_Track(dcoefs);
179     for (int j = 0; j < dcoefs.num_frames(); ++j)
180 	coefs->t(j) = dcoefs.t(j) - full_coefs.t(Gof((pm_start - 1), 0));
181 
182     diph_index->diphone[unit].set("first_dur",
183 				   full_coefs.t(pm_middle) -
184 				   full_coefs.t(pm_start));
185 
186     diph_index->diphone[unit].set("second_dur",
187 				   full_coefs.t(pm_end) -
188 				   full_coefs.t(pm_middle));
189 
190     if (keep_full)
191     {
192 	EST_Track *f = new EST_Track;
193 	*f = full_coefs;
194 	diph_index->diphone[unit].set_val("full_coefs",est_val(f));
195     }
196 
197     diph_index->diphone[unit].set_val("coefs", est_val(coefs));
198     diph_index->diphone[unit].set("middle_frame", pm_middle - pm_start -1);
199 
200     EST_Wave full_sig, sub_sig;
201 
202     if (diph_index->sig_dir == "none")
203 	return;
204 
205     if (full_sig.load(diph_index->sig_dir + "/"
206 		      + diph_index->diphone[unit].f("filename")
207 		      + diph_index->sig_ext) != format_ok)
208     {
209 	cerr << "US DB: failed to read signal file from " <<
210 	    diph_index->sig_dir + "/"
211 		+ diph_index->diphone[unit].f("filename")
212 		    + diph_index->sig_ext << endl;
213 	EST_error("");
214     }
215 
216     // go to the periods before and after
217     samp_start = (int)(full_coefs.t(Gof((pm_start - 1), 0))
218 		       * (float)full_sig.sample_rate());
219     if (pm_end+1 < full_coefs.num_frames())
220 	pm_end++;
221 
222     samp_end = (int)(full_coefs.t(pm_end) * (float)full_sig.sample_rate());
223     full_sig.sub_wave(sub_sig, samp_start, samp_end - samp_start + 1);
224     EST_Wave *sig = new EST_Wave(sub_sig);
225 
226     diph_index->diphone[unit].set_val("sig", est_val(sig));
227 
228     if (keep_full)
229     {
230 	EST_Wave *s = new EST_Wave;
231 	*s = full_sig;
232 	diph_index->diphone[unit].set_val("full_sig", est_val(s));
233     }
234 }
235 
load_full_diphone(int unit)236 void load_full_diphone(int unit)
237 {
238     // Load in the coefficients and signame for this diphone
239     // It caches the results in the diphone index entry, though
240     // someone else may clear them.  Note the full file is loaded
241     // each time which isn't optimal if there are multiple diphones
242     // is the same file
243     int pm_start, pm_end, pm_middle;
244     EST_Track *full_coefs;
245 
246     full_coefs = new EST_Track;
247 
248     if (full_coefs->load(diph_index->coef_dir + "/"
249 			+ diph_index->diphone[unit].f("filename")
250 			+ diph_index->coef_ext) != format_ok)
251     {
252 	cerr << "US DB: failed to read coefs file from " <<
253 	    diph_index->coef_dir + "/"
254 		+ diph_index->diphone[unit].f("filename")
255 		    + diph_index->coef_ext << endl;
256 	EST_error("");
257     }
258 
259     pm_start = full_coefs->index(diph_index->diphone[unit].f("start"));
260     pm_middle = full_coefs->index(diph_index->diphone[unit].f("middle"));
261     pm_end = full_coefs->index(diph_index->diphone[unit].f("end"));
262 
263     diph_index->diphone[unit].set_val("full_coefs", est_val(full_coefs));
264 
265     EST_Wave *full_sig = new EST_Wave;
266 
267     if (full_sig->load(diph_index->sig_dir + "/"
268 		      + diph_index->diphone[unit].f("filename")
269 		      + diph_index->sig_ext) != format_ok)
270     {
271 	cerr << "US DB: failed to read signal file from " <<
272 	    diph_index->sig_dir + "/"
273 		+ diph_index->diphone[unit].f("filename")
274 		    + diph_index->sig_ext << endl;
275 	EST_error("");
276     }
277     diph_index->diphone[unit].set_val("full_sig", est_val(full_sig));
278 }
279