1 /* ----------------------------------------------------------------- */
2 /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
3 /*           developed by HTS Working Group                          */
4 /*           http://hts-engine.sourceforge.net/                      */
5 /* ----------------------------------------------------------------- */
6 /*                                                                   */
7 /*  Copyright (c) 2001-2011  Nagoya Institute of Technology          */
8 /*                           Department of Computer Science          */
9 /*                                                                   */
ready(&mut self, _: &mut EventLoop<Self>, token: Token, _: Ready)10 /*                2001-2008  Tokyo Institute of Technology           */
11 /*                           Interdisciplinary Graduate School of    */
12 /*                           Science and Engineering                 */
13 /*                                                                   */
14 /* All rights reserved.                                              */
15 /*                                                                   */
16 /* Redistribution and use in source and binary forms, with or        */
17 /* without modification, are permitted provided that the following   */
18 /* conditions are met:                                               */
19 /*                                                                   */
20 /* - Redistributions of source code must retain the above copyright  */
21 /*   notice, this list of conditions and the following disclaimer.   */
22 /* - Redistributions in binary form must reproduce the above         */
23 /*   copyright notice, this list of conditions and the following     */
24 /*   disclaimer in the documentation and/or other materials provided */
25 /*   with the distribution.                                          */
26 /* - Neither the name of the HTS working group nor the names of its  */
27 /*   contributors may be used to endorse or promote products derived */
28 /*   from this software without specific prior written permission.   */
29 /*                                                                   */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
42 /* POSSIBILITY OF SUCH DAMAGE.                                       */
43 /* ----------------------------------------------------------------- */
44 
45 #ifndef HTS106_SSTREAM_C
46 #define HTS106_SSTREAM_C
47 
48 #ifdef __cplusplus
49 #define HTS106_SSTREAM_C_START extern "C" {
50 #define HTS106_SSTREAM_C_END   }
51 #else
52 #define HTS106_SSTREAM_C_START
53 #define HTS106_SSTREAM_C_END
54 #endif                          /* __CPLUSPLUS */
55 
56 HTS106_SSTREAM_C_START;
57 
58 #include <stdlib.h>
59 
60 /* hts_engine libraries */
61 #include "HTS106_hidden.h"
62 
63 /* HTS106_set_duration: set duration from state duration probability distribution */
64 static double HTS106_set_duration(int *duration, double *mean, double *vari, int size, double frame_length)
65 {
66    int i, j;
67    double temp1, temp2;
68    double rho = 0.0;
69    int sum = 0;
70    int target_length;
71 
72    /* if the frame length is not specified, only the mean vector is used */
73    if (frame_length == 0.0) {
74       for (i = 0; i < size; i++) {
75          duration[i] = (int) (mean[i] + 0.5);
76          if (duration[i] < 1)
77             duration[i] = 1;
78          sum += duration[i];
79       }
80       return (double) sum;
81    }
82 
83    /* get the target frame length */
84    target_length = (int) (frame_length + 0.5);
85 
86    /* check the specified duration */
87    if (target_length <= size) {
88       if (target_length < size)
89          HTS106_error(-1, "HTS106_set_duration: Specified frame length is too short.\n");
90       for (i = 0; i < size; i++)
91          duration[i] = 1;
92       return (double) size;
93    }
94 
95    /* RHO calculation */
96    temp1 = 0.0;
97    temp2 = 0.0;
98    for (i = 0; i < size; i++) {
99       temp1 += mean[i];
100       temp2 += vari[i];
101    }
102    rho = ((double) target_length - temp1) / temp2;
103 
104    /* first estimation */
105    for (i = 0; i < size; i++) {
106       duration[i] = (int) (mean[i] + rho * vari[i] + 0.5);
107       if (duration[i] < 1)
108          duration[i] = 1;
109       sum += duration[i];
110    }
111 
112    /* loop estimation */
113    while (target_length != sum) {
114       /* sarch flexible state and modify its duration */
115       if (target_length > sum) {
116          j = -1;
117          for (i = 0; i < size; i++) {
118             temp2 = abs(rho - ((double) duration[i] + 1 - mean[i]) / vari[i]);
119             if (j < 0 || temp1 < temp2) {
120                j = i;
121                temp1 = temp2;
122             }
123          }
124          sum++;
125          duration[j]++;
126       } else {
127          j = -1;
128          for (i = 0; i < size; i++) {
129             if (duration[i] > 1) {
130                temp2 = abs(rho - ((double) duration[i] - 1 - mean[i]) / vari[i]);
131                if (j < 0 || temp1 < temp2) {
132                   j = i;
133                   temp1 = temp2;
134                }
135             }
136          }
137          sum--;
138          duration[j]--;
139       }
140    }
141 
142    return (double) target_length;
143 }
144 
145 /* HTS106_SStreamSet_initialize: initialize state stream set */
146 void HTS106_SStreamSet_initialize(HTS106_SStreamSet * sss)
147 {
148    sss->nstream = 0;
149    sss->nstate = 0;
150    sss->sstream = NULL;
151    sss->duration = NULL;
152    sss->total_state = 0;
153    sss->total_frame = 0;
154 }
155 
156 /* HTS106_SStreamSet_create: parse label and determine state duration */
157 HTS106_Boolean HTS106_SStreamSet_create(HTS106_SStreamSet * sss, HTS106_ModelSet * ms, HTS106_Label * label, double *duration_iw, double **parameter_iw, double **gv_iw)
158 {
159    int i, j, k;
160    double temp;
161    int state;
162    HTS106_SStream *sst;
163    double *duration_mean, *duration_vari;
164    double frame_length;
165    int next_time;
166    int next_state;
167 
168    /* check interpolation weights */
169    for (i = 0, temp = 0.0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
170       temp += duration_iw[i];
171    if (temp == 0.0)
172       return FALSE;
173    for (i = 0; i < sss->nstream; i++) {
174       for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
175          temp += parameter_iw[i][j];
176       if (temp == 0.0)
177          return FALSE;
178       if (HTS106_ModelSet_use_gv(ms, i)) {
179          for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
180             temp += gv_iw[i][j];
181          return FALSE;
182       }
183    }
184 
185    /* initialize state sequence */
186    sss->nstate = HTS106_ModelSet_get_nstate(ms);
187    sss->nstream = HTS106_ModelSet_get_nstream(ms);
188    sss->total_frame = 0;
189    sss->total_state = HTS106_Label_get_size(label) * sss->nstate;
190    sss->duration = (int *) HTS106_calloc(sss->total_state, sizeof(int));
191    sss->sstream = (HTS106_SStream *) HTS106_calloc(sss->nstream, sizeof(HTS106_SStream));
192    for (i = 0; i < sss->nstream; i++) {
193       sst = &sss->sstream[i];
194       sst->vector_length = HTS106_ModelSet_get_vector_length(ms, i);
195       sst->mean = (double **) HTS106_calloc(sss->total_state, sizeof(double *));
196       sst->vari = (double **) HTS106_calloc(sss->total_state, sizeof(double *));
197       if (HTS106_ModelSet_is_msd(ms, i))
198          sst->msd = (double *) HTS106_calloc(sss->total_state, sizeof(double));
199       else
200          sst->msd = NULL;
201       for (j = 0; j < sss->total_state; j++) {
202          sst->mean[j] = (double *) HTS106_calloc(sst->vector_length, sizeof(double));
203          sst->vari[j] = (double *) HTS106_calloc(sst->vector_length, sizeof(double));
204       }
205       sst->gv_switch = (HTS106_Boolean *) HTS106_calloc(sss->total_state, sizeof(HTS106_Boolean));
206       for (j = 0; j < sss->total_state; j++)
207          sst->gv_switch[j] = TRUE;
208    }
209 
210    /* check interpolation weights */
211    for (i = 0, temp = 0.0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
212       temp += duration_iw[i];
213    for (i = 0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
214       if (duration_iw[i] != 0.0)
215          duration_iw[i] /= temp;
216    for (i = 0; i < sss->nstream; i++) {
217       for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
218          temp += parameter_iw[i][j];
219       for (j = 0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
220          if (parameter_iw[i][j] != 0.0)
221             parameter_iw[i][j] /= temp;
222       if (HTS106_ModelSet_use_gv(ms, i)) {
223          for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
224             temp += gv_iw[i][j];
225          for (j = 0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
226             if (gv_iw[i][j] != 0.0)
227                gv_iw[i][j] /= temp;
228       }
229    }
230 
231    /* determine state duration */
232    duration_mean = (double *) HTS106_calloc(sss->nstate * HTS106_Label_get_size(label), sizeof(double));
233    duration_vari = (double *) HTS106_calloc(sss->nstate * HTS106_Label_get_size(label), sizeof(double));
234    for (i = 0; i < HTS106_Label_get_size(label); i++)
235      HTS106_ModelSet_get_duration(ms, HTS106_Label_get_string(label, i), NULL, &duration_mean[i * sss->nstate], &duration_vari[i * sss->nstate], duration_iw);
236    if (HTS106_Label_get_frame_specified_flag(label)) {
237       /* use duration set by user */
238       next_time = 0;
239       next_state = 0;
240       state = 0;
241       for (i = 0; i < HTS106_Label_get_size(label); i++) {
242          temp = HTS106_Label_get_end_frame(label, i);
243          if (temp >= 0) {
244             next_time += HTS106_set_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, temp - next_time);
245             next_state = state + sss->nstate;
246          } else if (i + 1 == HTS106_Label_get_size(label)) {
247             HTS106_error(-1, "HTS106_SStreamSet_create: The time of final label is not specified.\n");
248             HTS106_set_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, 0.0);
249          }
250          state += sss->nstate;
251       }
252    } else {
253       /* determine frame length */
254       if (HTS106_Label_get_speech_speed(label) != 1.0) {
255          temp = 0.0;
256          for (i = 0; i < HTS106_Label_get_size(label) * sss->nstate; i++) {
257             temp += duration_mean[i];
258          }
259          frame_length = temp / HTS106_Label_get_speech_speed(label);
260       } else {
261          frame_length = 0.0;
262       }
263       /* set state duration */
264       HTS106_set_duration(sss->duration, duration_mean, duration_vari, HTS106_Label_get_size(label) * sss->nstate, frame_length);
265    }
266    HTS106_free(duration_mean);
267    HTS106_free(duration_vari);
268 
269    /* get parameter */
270    for (i = 0, state = 0; i < HTS106_Label_get_size(label); i++) {
271       for (j = 2; j <= sss->nstate + 1; j++) {
272          sss->total_frame += sss->duration[state];
273          for (k = 0; k < sss->nstream; k++) {
274             sst = &sss->sstream[k];
275             if (sst->msd)
276               HTS106_ModelSet_get_parameter(ms, HTS106_Label_get_string(label, i), NULL, sst->mean[state], sst->vari[state], &sst->msd[state], k, j, parameter_iw[k]);
277             else
278               HTS106_ModelSet_get_parameter(ms, HTS106_Label_get_string(label, i), NULL, sst->mean[state], sst->vari[state], NULL, k, j, parameter_iw[k]);
279          }
280          state++;
281       }
282    }
283 
284    /* copy dynamic window */
285    for (i = 0; i < sss->nstream; i++) {
286       sst = &sss->sstream[i];
287       sst->win_size = HTS106_ModelSet_get_window_size(ms, i);
288       sst->win_max_width = HTS106_ModelSet_get_window_max_width(ms, i);
289       sst->win_l_width = (int *) HTS106_calloc(sst->win_size, sizeof(int));
290       sst->win_r_width = (int *) HTS106_calloc(sst->win_size, sizeof(int));
291       sst->win_coefficient = (double **) HTS106_calloc(sst->win_size, sizeof(double));
292       for (j = 0; j < sst->win_size; j++) {
293          sst->win_l_width[j] = HTS106_ModelSet_get_window_left_width(ms, i, j);
294          sst->win_r_width[j] = HTS106_ModelSet_get_window_right_width(ms, i, j);
295          if (sst->win_l_width[j] + sst->win_r_width[j] == 0)
296             sst->win_coefficient[j] = (double *) HTS106_calloc(-2 * sst->win_l_width[j] + 1, sizeof(double));
297          else
298             sst->win_coefficient[j] = (double *) HTS106_calloc(-2 * sst->win_l_width[j], sizeof(double));
299          sst->win_coefficient[j] -= sst->win_l_width[j];
300          for (k = sst->win_l_width[j]; k <= sst->win_r_width[j]; k++)
301             sst->win_coefficient[j][k] = HTS106_ModelSet_get_window_coefficient(ms, i, j, k);
302       }
303    }
304 
305    /* determine GV */
306    for (i = 0; i < sss->nstream; i++) {
307       sst = &sss->sstream[i];
308       if (HTS106_ModelSet_use_gv(ms, i)) {
309          sst->gv_mean = (double *) HTS106_calloc(sst->vector_length / sst->win_size, sizeof(double));
310          sst->gv_vari = (double *) HTS106_calloc(sst->vector_length / sst->win_size, sizeof(double));
311          HTS106_ModelSet_get_gv(ms, HTS106_Label_get_string(label, 0), sst->gv_mean, sst->gv_vari, i, gv_iw[i]);
312       } else {
313          sst->gv_mean = NULL;
314          sst->gv_vari = NULL;
315       }
316    }
317 
318    if (HTS106_ModelSet_have_gv_switch(ms) == TRUE)
319       for (i = 0; i < HTS106_Label_get_size(label); i++)
320          if (HTS106_ModelSet_get_gv_switch(ms, HTS106_Label_get_string(label, i)) == FALSE)
321             for (j = 0; j < sss->nstream; j++)
322                for (k = 0; k < sss->nstate; k++)
323                   sss->sstream[j].gv_switch[i * sss->nstate + k] = FALSE;
324 
325    return TRUE;
326 }
327 
328 /* HTS106_SStreamSet_get_nstream: get number of stream */
329 int HTS106_SStreamSet_get_nstream(HTS106_SStreamSet * sss)
330 {
331    return sss->nstream;
332 }
333 
334 /* HTS106_SStreamSet_get_vector_length: get vector length */
335 int HTS106_SStreamSet_get_vector_length(HTS106_SStreamSet * sss, int stream_index)
336 {
337    return sss->sstream[stream_index].vector_length;
338 }
339 
340 /* HTS106_SStreamSet_is_msd: get MSD flag */
341 HTS106_Boolean HTS106_SStreamSet_is_msd(HTS106_SStreamSet * sss, int stream_index)
342 {
343    return sss->sstream[stream_index].msd ? TRUE : FALSE;
344 }
345 
346 /* HTS106_SStreamSet_get_total_state: get total number of state */
347 int HTS106_SStreamSet_get_total_state(HTS106_SStreamSet * sss)
348 {
349    return sss->total_state;
350 }
351 
352 /* HTS106_SStreamSet_get_total_frame: get total number of frame */
353 int HTS106_SStreamSet_get_total_frame(HTS106_SStreamSet * sss)
354 {
355    return sss->total_frame;
356 }
357 
358 /* HTS106_SStreamSet_get_msd: get MSD parameter */
359 double HTS106_SStreamSet_get_msd(HTS106_SStreamSet * sss, int stream_index, int state_index)
360 {
361    return sss->sstream[stream_index].msd[state_index];
362 }
363 
364 /* HTS106_SStreamSet_window_size: get dynamic window size */
365 int HTS106_SStreamSet_get_window_size(HTS106_SStreamSet * sss, int stream_index)
366 {
367    return sss->sstream[stream_index].win_size;
368 }
369 
370 /* HTS106_SStreamSet_get_window_left_width: get left width of dynamic window */
371 int HTS106_SStreamSet_get_window_left_width(HTS106_SStreamSet * sss, int stream_index, int window_index)
372 {
373    return sss->sstream[stream_index].win_l_width[window_index];
374 }
375 
376 /* HTS106_SStreamSet_get_winodow_right_width: get right width of dynamic window */
377 int HTS106_SStreamSet_get_window_right_width(HTS106_SStreamSet * sss, int stream_index, int window_index)
378 {
379    return sss->sstream[stream_index].win_r_width[window_index];
380 }
381 
382 /* HTS106_SStreamSet_get_window_coefficient: get coefficient of dynamic window */
383 double HTS106_SStreamSet_get_window_coefficient(HTS106_SStreamSet * sss, int stream_index, int window_index, int coefficient_index)
384 {
385    return sss->sstream[stream_index].win_coefficient[window_index][coefficient_index];
386 }
387 
388 /* HTS106_SStreamSet_get_window_max_width: get max width of dynamic window */
389 int HTS106_SStreamSet_get_window_max_width(HTS106_SStreamSet * sss, int stream_index)
390 {
391    return sss->sstream[stream_index].win_max_width;
392 }
393 
394 /* HTS106_SStreamSet_use_gv: get GV flag */
395 HTS106_Boolean HTS106_SStreamSet_use_gv(HTS106_SStreamSet * sss, int stream_index)
396 {
397    return sss->sstream[stream_index].gv_mean ? TRUE : FALSE;
398 }
399 
400 /* HTS106_SStreamSet_get_duration: get state duration */
401 int HTS106_SStreamSet_get_duration(HTS106_SStreamSet * sss, int state_index)
402 {
403    return sss->duration[state_index];
404 }
405 
406 /* HTS106_SStreamSet_get_mean: get mean parameter */
407 double HTS106_SStreamSet_get_mean(HTS106_SStreamSet * sss, int stream_index, int state_index, int vector_index)
408 {
409    return sss->sstream[stream_index].mean[state_index][vector_index];
410 }
411 
412 /* HTS106_SStreamSet_set_mean: set mean parameter */
413 void HTS106_SStreamSet_set_mean(HTS106_SStreamSet * sss, int stream_index, int state_index, int vector_index, double f)
414 {
415    sss->sstream[stream_index].mean[state_index][vector_index] = f;
416 }
417 
418 /* HTS106_SStreamSet_get_vari: get variance parameter */
419 double HTS106_SStreamSet_get_vari(HTS106_SStreamSet * sss, int stream_index, int state_index, int vector_index)
420 {
421    return sss->sstream[stream_index].vari[state_index][vector_index];
422 }
423 
424 /* HTS106_SStreamSet_set_vari: set variance parameter */
425 void HTS106_SStreamSet_set_vari(HTS106_SStreamSet * sss, int stream_index, int state_index, int vector_index, double f)
426 {
427    sss->sstream[stream_index].vari[state_index][vector_index] = f;
428 }
429 
430 /* HTS106_SStreamSet_get_gv_mean: get GV mean parameter */
431 double HTS106_SStreamSet_get_gv_mean(HTS106_SStreamSet * sss, int stream_index, int vector_index)
432 {
433    return sss->sstream[stream_index].gv_mean[vector_index];
434 }
435 
436 /* HTS106_SStreamSet_get_gv_mean: get GV variance parameter */
437 double HTS106_SStreamSet_get_gv_vari(HTS106_SStreamSet * sss, int stream_index, int vector_index)
438 {
439    return sss->sstream[stream_index].gv_vari[vector_index];
440 }
441 
442 /* HTS106_SStreamSet_set_gv_switch: set GV switch */
443 void HTS106_SStreamSet_set_gv_switch(HTS106_SStreamSet * sss, int stream_index, int state_index, HTS106_Boolean i)
444 {
445    sss->sstream[stream_index].gv_switch[state_index] = i;
446 }
447 
448 /* HTS106_SStreamSet_get_gv_switch: get GV switch */
449 HTS106_Boolean HTS106_SStreamSet_get_gv_switch(HTS106_SStreamSet * sss, int stream_index, int state_index)
450 {
451    return sss->sstream[stream_index].gv_switch[state_index];
452 }
453 
454 /* HTS106_SStreamSet_clear: free state stream set */
455 void HTS106_SStreamSet_clear(HTS106_SStreamSet * sss)
456 {
457    int i, j;
458    HTS106_SStream *sst;
459 
460    if (sss->sstream) {
461       for (i = 0; i < sss->nstream; i++) {
462          sst = &sss->sstream[i];
463          for (j = 0; j < sss->total_state; j++) {
464             HTS106_free(sst->mean[j]);
465             HTS106_free(sst->vari[j]);
466          }
467          if (sst->msd)
468             HTS106_free(sst->msd);
469          HTS106_free(sst->mean);
470          HTS106_free(sst->vari);
471          for (j = sst->win_size - 1; j >= 0; j--) {
472             sst->win_coefficient[j] += sst->win_l_width[j];
473             HTS106_free(sst->win_coefficient[j]);
474          }
475          HTS106_free(sst->win_coefficient);
476          HTS106_free(sst->win_l_width);
477          HTS106_free(sst->win_r_width);
478          if (sst->gv_mean)
479             HTS106_free(sst->gv_mean);
480          if (sst->gv_vari)
481             HTS106_free(sst->gv_vari);
482          HTS106_free(sst->gv_switch);
483       }
484       HTS106_free(sss->sstream);
485    }
486    if (sss->duration)
487       HTS106_free(sss->duration);
488 
489    HTS106_SStreamSet_initialize(sss);
490 }
491 
492 HTS106_SSTREAM_C_END;
493 
494 #endif                          /* !HTS106_SSTREAM_C */
495