1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* */
34 /* Author: Paul Taylor */
35 /* Date: 1998, 1999 */
36 /* --------------------------------------------------------------------- */
37 /* LPC residual synthesis alternative version */
38 /* */
39 /*************************************************************************/
40
41 #include "siod.h"
42 #include "EST.h"
43 #include "us_diphone.h"
44 #include "Phone.h"
45
46 extern USDiphIndex *diph_index;
47
dur_to_end(EST_Relation & r)48 void dur_to_end(EST_Relation &r)
49 {
50 float prev_end = 0;
51
52 for (EST_Item *p = r.head(); p ; p = p->next())
53 {
54 p->set("end", p->F("dur") + prev_end);
55 prev_end = p->F("end");
56 }
57 }
58
add_end_silences(EST_Relation & segment,EST_Relation & target)59 void add_end_silences(EST_Relation &segment, EST_Relation &target)
60 {
61 EST_Item *t, *n;
62 float shift = 0.0;
63 const float pause_duration = 0.1;
64
65 t = segment.head();
66 if (!ph_is_silence(t->f("name")))
67 {
68 n = t->insert_before();
69 n->set("name", ph_silence());
70 n->set("dur", pause_duration);
71 shift += pause_duration;
72 }
73
74 t = segment.tail();
75 if (!ph_is_silence(t->S("name")))
76 {
77 n = t->insert_after();
78 n->set("name", ph_silence());
79 n->set("dur", pause_duration);
80 shift += pause_duration;
81 }
82 dur_to_end(segment);
83
84 target.tail()->set("pos", (target.tail()->F("pos") + shift));
85 }
86
add_end_silences(EST_Relation & segment)87 void add_end_silences(EST_Relation &segment)
88 {
89 EST_Item *t, *n;
90
91 t = segment.head();
92 if (!ph_is_silence(t->S("name")))
93 {
94 n = t->insert_before();
95 n->set("name", ph_silence());
96 }
97
98 t = segment.tail();
99 if (!ph_is_silence(t->S("name")))
100 {
101 n = t->insert_after();
102 n->set("name", ph_silence());
103 }
104 }
105
parse_diphone_times(EST_Relation & diphone_stream,EST_Relation & source_lab)106 void parse_diphone_times(EST_Relation &diphone_stream,
107 EST_Relation &source_lab)
108 {
109 EST_Item *s, *u;
110 EST_Track *pm;
111 int e_frame, m_frame = 0;
112 float dur_1 = 0.0, dur_2 = 0.0, p_time;
113 float t_time = 0.0, end;
114 p_time = 0.0;
115
116 for (s = source_lab.head(), u = diphone_stream.head(); u; u = u->next(),
117 s = s->next())
118 {
119 pm = track(u->f("coefs"));
120
121 e_frame = pm->num_frames() - 1;
122 m_frame = u->I("middle_frame");
123
124 if (m_frame < 0) m_frame=0;
125 dur_1 = pm->t(m_frame);
126 if (e_frame < m_frame) e_frame=m_frame;
127 dur_2 = pm->t(e_frame) - dur_1;
128
129 s->set("source_end", (dur_1 + p_time));
130
131 p_time = s->F("source_end") + dur_2;
132
133 end = dur_1 + dur_2 + t_time;
134 t_time = end;
135 u->set("end", t_time);
136 }
137 if (s)
138 s->set("source_end", (dur_2 + p_time));
139 }
140
load_separate_diphone(int unit,bool keep_full,const EST_String & cut_type)141 void load_separate_diphone(int unit, bool keep_full,
142 const EST_String &cut_type)
143 {
144 // Load in the coefficients and signame for this diphone
145 // It caches the results in the diphone index entry, though
146 // someone else may clear them. Note the full file is loaded
147 // each time which isn't optimal if there are multiple diphones
148 // is the same file
149 int samp_start, samp_end;
150 int pm_start, pm_end, pm_middle;
151 EST_Track full_coefs, dcoefs, *coefs;
152 // float q_start, q_middle, q_end;
153
154 if (full_coefs.load(diph_index->coef_dir + "/"
155 + diph_index->diphone[unit].S("filename")
156 + diph_index->coef_ext) != format_ok)
157 {
158 cerr << "US DB: failed to read coefs file from " <<
159 diph_index->coef_dir + "/"
160 + diph_index->diphone[unit].S("filename")
161 + diph_index->coef_ext << endl;
162 EST_error("");
163 }
164
165 pm_start = full_coefs.index(diph_index->diphone[unit].f("start"));
166 pm_middle = full_coefs.index(diph_index->diphone[unit].f("middle"));
167 pm_end = full_coefs.index(diph_index->diphone[unit].f("end"));
168
169 // option for taking half a diphone only
170 if (cut_type == "first_half")
171 pm_end = pm_middle;
172 else if (cut_type == "second_half")
173 pm_start = pm_middle;
174
175 // find time of mid-point, i.e. boundary between phones
176 full_coefs.sub_track(dcoefs, pm_start, pm_end - pm_start + 1, 0, EST_ALL);
177 // Copy coefficients so the full coeffs can be safely deleted
178 coefs = new EST_Track(dcoefs);
179 for (int j = 0; j < dcoefs.num_frames(); ++j)
180 coefs->t(j) = dcoefs.t(j) - full_coefs.t(Gof((pm_start - 1), 0));
181
182 diph_index->diphone[unit].set("first_dur",
183 full_coefs.t(pm_middle) -
184 full_coefs.t(pm_start));
185
186 diph_index->diphone[unit].set("second_dur",
187 full_coefs.t(pm_end) -
188 full_coefs.t(pm_middle));
189
190 if (keep_full)
191 {
192 EST_Track *f = new EST_Track;
193 *f = full_coefs;
194 diph_index->diphone[unit].set_val("full_coefs",est_val(f));
195 }
196
197 diph_index->diphone[unit].set_val("coefs", est_val(coefs));
198 diph_index->diphone[unit].set("middle_frame", pm_middle - pm_start -1);
199
200 EST_Wave full_sig, sub_sig;
201
202 if (diph_index->sig_dir == "none")
203 return;
204
205 if (full_sig.load(diph_index->sig_dir + "/"
206 + diph_index->diphone[unit].f("filename")
207 + diph_index->sig_ext) != format_ok)
208 {
209 cerr << "US DB: failed to read signal file from " <<
210 diph_index->sig_dir + "/"
211 + diph_index->diphone[unit].f("filename")
212 + diph_index->sig_ext << endl;
213 EST_error("");
214 }
215
216 // go to the periods before and after
217 samp_start = (int)(full_coefs.t(Gof((pm_start - 1), 0))
218 * (float)full_sig.sample_rate());
219 if (pm_end+1 < full_coefs.num_frames())
220 pm_end++;
221
222 samp_end = (int)(full_coefs.t(pm_end) * (float)full_sig.sample_rate());
223 full_sig.sub_wave(sub_sig, samp_start, samp_end - samp_start + 1);
224 EST_Wave *sig = new EST_Wave(sub_sig);
225
226 diph_index->diphone[unit].set_val("sig", est_val(sig));
227
228 if (keep_full)
229 {
230 EST_Wave *s = new EST_Wave;
231 *s = full_sig;
232 diph_index->diphone[unit].set_val("full_sig", est_val(s));
233 }
234 }
235
load_full_diphone(int unit)236 void load_full_diphone(int unit)
237 {
238 // Load in the coefficients and signame for this diphone
239 // It caches the results in the diphone index entry, though
240 // someone else may clear them. Note the full file is loaded
241 // each time which isn't optimal if there are multiple diphones
242 // is the same file
243 int pm_start, pm_end, pm_middle;
244 EST_Track *full_coefs;
245
246 full_coefs = new EST_Track;
247
248 if (full_coefs->load(diph_index->coef_dir + "/"
249 + diph_index->diphone[unit].f("filename")
250 + diph_index->coef_ext) != format_ok)
251 {
252 cerr << "US DB: failed to read coefs file from " <<
253 diph_index->coef_dir + "/"
254 + diph_index->diphone[unit].f("filename")
255 + diph_index->coef_ext << endl;
256 EST_error("");
257 }
258
259 pm_start = full_coefs->index(diph_index->diphone[unit].f("start"));
260 pm_middle = full_coefs->index(diph_index->diphone[unit].f("middle"));
261 pm_end = full_coefs->index(diph_index->diphone[unit].f("end"));
262
263 diph_index->diphone[unit].set_val("full_coefs", est_val(full_coefs));
264
265 EST_Wave *full_sig = new EST_Wave;
266
267 if (full_sig->load(diph_index->sig_dir + "/"
268 + diph_index->diphone[unit].f("filename")
269 + diph_index->sig_ext) != format_ok)
270 {
271 cerr << "US DB: failed to read signal file from " <<
272 diph_index->sig_dir + "/"
273 + diph_index->diphone[unit].f("filename")
274 + diph_index->sig_ext << endl;
275 EST_error("");
276 }
277 diph_index->diphone[unit].set_val("full_sig", est_val(full_sig));
278 }
279