1 /* ----------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 /* developed by HTS Working Group */
4 /* http://hts-engine.sourceforge.net/ */
5 /* ----------------------------------------------------------------- */
6 /* */
7 /* Copyright (c) 2001-2011 Nagoya Institute of Technology */
8 /* Department of Computer Science */
9 /* */
ready(&mut self, _: &mut EventLoop<Self>, token: Token, _: Ready)10 /* 2001-2008 Tokyo Institute of Technology */
11 /* Interdisciplinary Graduate School of */
12 /* Science and Engineering */
13 /* */
14 /* All rights reserved. */
15 /* */
16 /* Redistribution and use in source and binary forms, with or */
17 /* without modification, are permitted provided that the following */
18 /* conditions are met: */
19 /* */
20 /* - Redistributions of source code must retain the above copyright */
21 /* notice, this list of conditions and the following disclaimer. */
22 /* - Redistributions in binary form must reproduce the above */
23 /* copyright notice, this list of conditions and the following */
24 /* disclaimer in the documentation and/or other materials provided */
25 /* with the distribution. */
26 /* - Neither the name of the HTS working group nor the names of its */
27 /* contributors may be used to endorse or promote products derived */
28 /* from this software without specific prior written permission. */
29 /* */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 /* POSSIBILITY OF SUCH DAMAGE. */
43 /* ----------------------------------------------------------------- */
44
45 #ifndef HTS106_SSTREAM_C
46 #define HTS106_SSTREAM_C
47
48 #ifdef __cplusplus
49 #define HTS106_SSTREAM_C_START extern "C" {
50 #define HTS106_SSTREAM_C_END }
51 #else
52 #define HTS106_SSTREAM_C_START
53 #define HTS106_SSTREAM_C_END
54 #endif /* __CPLUSPLUS */
55
56 HTS106_SSTREAM_C_START;
57
58 #include <stdlib.h>
59
60 /* hts_engine libraries */
61 #include "HTS106_hidden.h"
62
63 /* HTS106_set_duration: set duration from state duration probability distribution */
64 static double HTS106_set_duration(int *duration, double *mean, double *vari, int size, double frame_length)
65 {
66 int i, j;
67 double temp1, temp2;
68 double rho = 0.0;
69 int sum = 0;
70 int target_length;
71
72 /* if the frame length is not specified, only the mean vector is used */
73 if (frame_length == 0.0) {
74 for (i = 0; i < size; i++) {
75 duration[i] = (int) (mean[i] + 0.5);
76 if (duration[i] < 1)
77 duration[i] = 1;
78 sum += duration[i];
79 }
80 return (double) sum;
81 }
82
83 /* get the target frame length */
84 target_length = (int) (frame_length + 0.5);
85
86 /* check the specified duration */
87 if (target_length <= size) {
88 if (target_length < size)
89 HTS106_error(-1, "HTS106_set_duration: Specified frame length is too short.\n");
90 for (i = 0; i < size; i++)
91 duration[i] = 1;
92 return (double) size;
93 }
94
95 /* RHO calculation */
96 temp1 = 0.0;
97 temp2 = 0.0;
98 for (i = 0; i < size; i++) {
99 temp1 += mean[i];
100 temp2 += vari[i];
101 }
102 rho = ((double) target_length - temp1) / temp2;
103
104 /* first estimation */
105 for (i = 0; i < size; i++) {
106 duration[i] = (int) (mean[i] + rho * vari[i] + 0.5);
107 if (duration[i] < 1)
108 duration[i] = 1;
109 sum += duration[i];
110 }
111
112 /* loop estimation */
113 while (target_length != sum) {
114 /* sarch flexible state and modify its duration */
115 if (target_length > sum) {
116 j = -1;
117 for (i = 0; i < size; i++) {
118 temp2 = abs(rho - ((double) duration[i] + 1 - mean[i]) / vari[i]);
119 if (j < 0 || temp1 < temp2) {
120 j = i;
121 temp1 = temp2;
122 }
123 }
124 sum++;
125 duration[j]++;
126 } else {
127 j = -1;
128 for (i = 0; i < size; i++) {
129 if (duration[i] > 1) {
130 temp2 = abs(rho - ((double) duration[i] - 1 - mean[i]) / vari[i]);
131 if (j < 0 || temp1 < temp2) {
132 j = i;
133 temp1 = temp2;
134 }
135 }
136 }
137 sum--;
138 duration[j]--;
139 }
140 }
141
142 return (double) target_length;
143 }
144
145 /* HTS106_SStreamSet_initialize: initialize state stream set */
146 void HTS106_SStreamSet_initialize(HTS106_SStreamSet * sss)
147 {
148 sss->nstream = 0;
149 sss->nstate = 0;
150 sss->sstream = NULL;
151 sss->duration = NULL;
152 sss->total_state = 0;
153 sss->total_frame = 0;
154 }
155
156 /* HTS106_SStreamSet_create: parse label and determine state duration */
157 HTS106_Boolean HTS106_SStreamSet_create(HTS106_SStreamSet * sss, HTS106_ModelSet * ms, HTS106_Label * label, double *duration_iw, double **parameter_iw, double **gv_iw)
158 {
159 int i, j, k;
160 double temp;
161 int state;
162 HTS106_SStream *sst;
163 double *duration_mean, *duration_vari;
164 double frame_length;
165 int next_time;
166 int next_state;
167
168 /* check interpolation weights */
169 for (i = 0, temp = 0.0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
170 temp += duration_iw[i];
171 if (temp == 0.0)
172 return FALSE;
173 for (i = 0; i < sss->nstream; i++) {
174 for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
175 temp += parameter_iw[i][j];
176 if (temp == 0.0)
177 return FALSE;
178 if (HTS106_ModelSet_use_gv(ms, i)) {
179 for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
180 temp += gv_iw[i][j];
181 return FALSE;
182 }
183 }
184
185 /* initialize state sequence */
186 sss->nstate = HTS106_ModelSet_get_nstate(ms);
187 sss->nstream = HTS106_ModelSet_get_nstream(ms);
188 sss->total_frame = 0;
189 sss->total_state = HTS106_Label_get_size(label) * sss->nstate;
190 sss->duration = (int *) HTS106_calloc(sss->total_state, sizeof(int));
191 sss->sstream = (HTS106_SStream *) HTS106_calloc(sss->nstream, sizeof(HTS106_SStream));
192 for (i = 0; i < sss->nstream; i++) {
193 sst = &sss->sstream[i];
194 sst->vector_length = HTS106_ModelSet_get_vector_length(ms, i);
195 sst->mean = (double **) HTS106_calloc(sss->total_state, sizeof(double *));
196 sst->vari = (double **) HTS106_calloc(sss->total_state, sizeof(double *));
197 if (HTS106_ModelSet_is_msd(ms, i))
198 sst->msd = (double *) HTS106_calloc(sss->total_state, sizeof(double));
199 else
200 sst->msd = NULL;
201 for (j = 0; j < sss->total_state; j++) {
202 sst->mean[j] = (double *) HTS106_calloc(sst->vector_length, sizeof(double));
203 sst->vari[j] = (double *) HTS106_calloc(sst->vector_length, sizeof(double));
204 }
205 sst->gv_switch = (HTS106_Boolean *) HTS106_calloc(sss->total_state, sizeof(HTS106_Boolean));
206 for (j = 0; j < sss->total_state; j++)
207 sst->gv_switch[j] = TRUE;
208 }
209
210 /* check interpolation weights */
211 for (i = 0, temp = 0.0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
212 temp += duration_iw[i];
213 for (i = 0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
214 if (duration_iw[i] != 0.0)
215 duration_iw[i] /= temp;
216 for (i = 0; i < sss->nstream; i++) {
217 for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
218 temp += parameter_iw[i][j];
219 for (j = 0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
220 if (parameter_iw[i][j] != 0.0)
221 parameter_iw[i][j] /= temp;
222 if (HTS106_ModelSet_use_gv(ms, i)) {
223 for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
224 temp += gv_iw[i][j];
225 for (j = 0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
226 if (gv_iw[i][j] != 0.0)
227 gv_iw[i][j] /= temp;
228 }
229 }
230
231 /* determine state duration */
232 duration_mean = (double *) HTS106_calloc(sss->nstate * HTS106_Label_get_size(label), sizeof(double));
233 duration_vari = (double *) HTS106_calloc(sss->nstate * HTS106_Label_get_size(label), sizeof(double));
234 for (i = 0; i < HTS106_Label_get_size(label); i++)
235 HTS106_ModelSet_get_duration(ms, HTS106_Label_get_string(label, i), NULL, &duration_mean[i * sss->nstate], &duration_vari[i * sss->nstate], duration_iw);
236 if (HTS106_Label_get_frame_specified_flag(label)) {
237 /* use duration set by user */
238 next_time = 0;
239 next_state = 0;
240 state = 0;
241 for (i = 0; i < HTS106_Label_get_size(label); i++) {
242 temp = HTS106_Label_get_end_frame(label, i);
243 if (temp >= 0) {
244 next_time += HTS106_set_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, temp - next_time);
245 next_state = state + sss->nstate;
246 } else if (i + 1 == HTS106_Label_get_size(label)) {
247 HTS106_error(-1, "HTS106_SStreamSet_create: The time of final label is not specified.\n");
248 HTS106_set_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, 0.0);
249 }
250 state += sss->nstate;
251 }
252 } else {
253 /* determine frame length */
254 if (HTS106_Label_get_speech_speed(label) != 1.0) {
255 temp = 0.0;
256 for (i = 0; i < HTS106_Label_get_size(label) * sss->nstate; i++) {
257 temp += duration_mean[i];
258 }
259 frame_length = temp / HTS106_Label_get_speech_speed(label);
260 } else {
261 frame_length = 0.0;
262 }
263 /* set state duration */
264 HTS106_set_duration(sss->duration, duration_mean, duration_vari, HTS106_Label_get_size(label) * sss->nstate, frame_length);
265 }
266 HTS106_free(duration_mean);
267 HTS106_free(duration_vari);
268
269 /* get parameter */
270 for (i = 0, state = 0; i < HTS106_Label_get_size(label); i++) {
271 for (j = 2; j <= sss->nstate + 1; j++) {
272 sss->total_frame += sss->duration[state];
273 for (k = 0; k < sss->nstream; k++) {
274 sst = &sss->sstream[k];
275 if (sst->msd)
276 HTS106_ModelSet_get_parameter(ms, HTS106_Label_get_string(label, i), NULL, sst->mean[state], sst->vari[state], &sst->msd[state], k, j, parameter_iw[k]);
277 else
278 HTS106_ModelSet_get_parameter(ms, HTS106_Label_get_string(label, i), NULL, sst->mean[state], sst->vari[state], NULL, k, j, parameter_iw[k]);
279 }
280 state++;
281 }
282 }
283
284 /* copy dynamic window */
285 for (i = 0; i < sss->nstream; i++) {
286 sst = &sss->sstream[i];
287 sst->win_size = HTS106_ModelSet_get_window_size(ms, i);
288 sst->win_max_width = HTS106_ModelSet_get_window_max_width(ms, i);
289 sst->win_l_width = (int *) HTS106_calloc(sst->win_size, sizeof(int));
290 sst->win_r_width = (int *) HTS106_calloc(sst->win_size, sizeof(int));
291 sst->win_coefficient = (double **) HTS106_calloc(sst->win_size, sizeof(double));
292 for (j = 0; j < sst->win_size; j++) {
293 sst->win_l_width[j] = HTS106_ModelSet_get_window_left_width(ms, i, j);
294 sst->win_r_width[j] = HTS106_ModelSet_get_window_right_width(ms, i, j);
295 if (sst->win_l_width[j] + sst->win_r_width[j] == 0)
296 sst->win_coefficient[j] = (double *) HTS106_calloc(-2 * sst->win_l_width[j] + 1, sizeof(double));
297 else
298 sst->win_coefficient[j] = (double *) HTS106_calloc(-2 * sst->win_l_width[j], sizeof(double));
299 sst->win_coefficient[j] -= sst->win_l_width[j];
300 for (k = sst->win_l_width[j]; k <= sst->win_r_width[j]; k++)
301 sst->win_coefficient[j][k] = HTS106_ModelSet_get_window_coefficient(ms, i, j, k);
302 }
303 }
304
305 /* determine GV */
306 for (i = 0; i < sss->nstream; i++) {
307 sst = &sss->sstream[i];
308 if (HTS106_ModelSet_use_gv(ms, i)) {
309 sst->gv_mean = (double *) HTS106_calloc(sst->vector_length / sst->win_size, sizeof(double));
310 sst->gv_vari = (double *) HTS106_calloc(sst->vector_length / sst->win_size, sizeof(double));
311 HTS106_ModelSet_get_gv(ms, HTS106_Label_get_string(label, 0), sst->gv_mean, sst->gv_vari, i, gv_iw[i]);
312 } else {
313 sst->gv_mean = NULL;
314 sst->gv_vari = NULL;
315 }
316 }
317
318 if (HTS106_ModelSet_have_gv_switch(ms) == TRUE)
319 for (i = 0; i < HTS106_Label_get_size(label); i++)
320 if (HTS106_ModelSet_get_gv_switch(ms, HTS106_Label_get_string(label, i)) == FALSE)
321 for (j = 0; j < sss->nstream; j++)
322 for (k = 0; k < sss->nstate; k++)
323 sss->sstream[j].gv_switch[i * sss->nstate + k] = FALSE;
324
325 return TRUE;
326 }
327
328 /* HTS106_SStreamSet_get_nstream: get number of stream */
329 int HTS106_SStreamSet_get_nstream(HTS106_SStreamSet * sss)
330 {
331 return sss->nstream;
332 }
333
334 /* HTS106_SStreamSet_get_vector_length: get vector length */
335 int HTS106_SStreamSet_get_vector_length(HTS106_SStreamSet * sss, int stream_index)
336 {
337 return sss->sstream[stream_index].vector_length;
338 }
339
340 /* HTS106_SStreamSet_is_msd: get MSD flag */
341 HTS106_Boolean HTS106_SStreamSet_is_msd(HTS106_SStreamSet * sss, int stream_index)
342 {
343 return sss->sstream[stream_index].msd ? TRUE : FALSE;
344 }
345
346 /* HTS106_SStreamSet_get_total_state: get total number of state */
347 int HTS106_SStreamSet_get_total_state(HTS106_SStreamSet * sss)
348 {
349 return sss->total_state;
350 }
351
352 /* HTS106_SStreamSet_get_total_frame: get total number of frame */
353 int HTS106_SStreamSet_get_total_frame(HTS106_SStreamSet * sss)
354 {
355 return sss->total_frame;
356 }
357
358 /* HTS106_SStreamSet_get_msd: get MSD parameter */
359 double HTS106_SStreamSet_get_msd(HTS106_SStreamSet * sss, int stream_index, int state_index)
360 {
361 return sss->sstream[stream_index].msd[state_index];
362 }
363
364 /* HTS106_SStreamSet_window_size: get dynamic window size */
365 int HTS106_SStreamSet_get_window_size(HTS106_SStreamSet * sss, int stream_index)
366 {
367 return sss->sstream[stream_index].win_size;
368 }
369
370 /* HTS106_SStreamSet_get_window_left_width: get left width of dynamic window */
371 int HTS106_SStreamSet_get_window_left_width(HTS106_SStreamSet * sss, int stream_index, int window_index)
372 {
373 return sss->sstream[stream_index].win_l_width[window_index];
374 }
375
376 /* HTS106_SStreamSet_get_winodow_right_width: get right width of dynamic window */
377 int HTS106_SStreamSet_get_window_right_width(HTS106_SStreamSet * sss, int stream_index, int window_index)
378 {
379 return sss->sstream[stream_index].win_r_width[window_index];
380 }
381
382 /* HTS106_SStreamSet_get_window_coefficient: get coefficient of dynamic window */
383 double HTS106_SStreamSet_get_window_coefficient(HTS106_SStreamSet * sss, int stream_index, int window_index, int coefficient_index)
384 {
385 return sss->sstream[stream_index].win_coefficient[window_index][coefficient_index];
386 }
387
388 /* HTS106_SStreamSet_get_window_max_width: get max width of dynamic window */
389 int HTS106_SStreamSet_get_window_max_width(HTS106_SStreamSet * sss, int stream_index)
390 {
391 return sss->sstream[stream_index].win_max_width;
392 }
393
394 /* HTS106_SStreamSet_use_gv: get GV flag */
395 HTS106_Boolean HTS106_SStreamSet_use_gv(HTS106_SStreamSet * sss, int stream_index)
396 {
397 return sss->sstream[stream_index].gv_mean ? TRUE : FALSE;
398 }
399
400 /* HTS106_SStreamSet_get_duration: get state duration */
401 int HTS106_SStreamSet_get_duration(HTS106_SStreamSet * sss, int state_index)
402 {
403 return sss->duration[state_index];
404 }
405
406 /* HTS106_SStreamSet_get_mean: get mean parameter */
407 double HTS106_SStreamSet_get_mean(HTS106_SStreamSet * sss, int stream_index, int state_index, int vector_index)
408 {
409 return sss->sstream[stream_index].mean[state_index][vector_index];
410 }
411
412 /* HTS106_SStreamSet_set_mean: set mean parameter */
413 void HTS106_SStreamSet_set_mean(HTS106_SStreamSet * sss, int stream_index, int state_index, int vector_index, double f)
414 {
415 sss->sstream[stream_index].mean[state_index][vector_index] = f;
416 }
417
418 /* HTS106_SStreamSet_get_vari: get variance parameter */
419 double HTS106_SStreamSet_get_vari(HTS106_SStreamSet * sss, int stream_index, int state_index, int vector_index)
420 {
421 return sss->sstream[stream_index].vari[state_index][vector_index];
422 }
423
424 /* HTS106_SStreamSet_set_vari: set variance parameter */
425 void HTS106_SStreamSet_set_vari(HTS106_SStreamSet * sss, int stream_index, int state_index, int vector_index, double f)
426 {
427 sss->sstream[stream_index].vari[state_index][vector_index] = f;
428 }
429
430 /* HTS106_SStreamSet_get_gv_mean: get GV mean parameter */
431 double HTS106_SStreamSet_get_gv_mean(HTS106_SStreamSet * sss, int stream_index, int vector_index)
432 {
433 return sss->sstream[stream_index].gv_mean[vector_index];
434 }
435
436 /* HTS106_SStreamSet_get_gv_mean: get GV variance parameter */
437 double HTS106_SStreamSet_get_gv_vari(HTS106_SStreamSet * sss, int stream_index, int vector_index)
438 {
439 return sss->sstream[stream_index].gv_vari[vector_index];
440 }
441
442 /* HTS106_SStreamSet_set_gv_switch: set GV switch */
443 void HTS106_SStreamSet_set_gv_switch(HTS106_SStreamSet * sss, int stream_index, int state_index, HTS106_Boolean i)
444 {
445 sss->sstream[stream_index].gv_switch[state_index] = i;
446 }
447
448 /* HTS106_SStreamSet_get_gv_switch: get GV switch */
449 HTS106_Boolean HTS106_SStreamSet_get_gv_switch(HTS106_SStreamSet * sss, int stream_index, int state_index)
450 {
451 return sss->sstream[stream_index].gv_switch[state_index];
452 }
453
454 /* HTS106_SStreamSet_clear: free state stream set */
455 void HTS106_SStreamSet_clear(HTS106_SStreamSet * sss)
456 {
457 int i, j;
458 HTS106_SStream *sst;
459
460 if (sss->sstream) {
461 for (i = 0; i < sss->nstream; i++) {
462 sst = &sss->sstream[i];
463 for (j = 0; j < sss->total_state; j++) {
464 HTS106_free(sst->mean[j]);
465 HTS106_free(sst->vari[j]);
466 }
467 if (sst->msd)
468 HTS106_free(sst->msd);
469 HTS106_free(sst->mean);
470 HTS106_free(sst->vari);
471 for (j = sst->win_size - 1; j >= 0; j--) {
472 sst->win_coefficient[j] += sst->win_l_width[j];
473 HTS106_free(sst->win_coefficient[j]);
474 }
475 HTS106_free(sst->win_coefficient);
476 HTS106_free(sst->win_l_width);
477 HTS106_free(sst->win_r_width);
478 if (sst->gv_mean)
479 HTS106_free(sst->gv_mean);
480 if (sst->gv_vari)
481 HTS106_free(sst->gv_vari);
482 HTS106_free(sst->gv_switch);
483 }
484 HTS106_free(sss->sstream);
485 }
486 if (sss->duration)
487 HTS106_free(sss->duration);
488
489 HTS106_SStreamSet_initialize(sss);
490 }
491
492 HTS106_SSTREAM_C_END;
493
494 #endif /* !HTS106_SSTREAM_C */
495