1 /* ----------------------------------------------------------------- */
2 /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
3 /*           developed by HTS Working Group                          */
4 /*           http://hts-engine.sourceforge.net/                      */
5 /* ----------------------------------------------------------------- */
6 /*                                                                   */
7 /*  Copyright (c) 2001-2011  Nagoya Institute of Technology          */
8 /*                           Department of Computer Science          */
9 /*                                                                   */
10 /*                2001-2008  Tokyo Institute of Technology           */
11 /*                           Interdisciplinary Graduate School of    */
12 /*                           Science and Engineering                 */
13 /*                                                                   */
14 /* All rights reserved.                                              */
15 /*                                                                   */
16 /* Redistribution and use in source and binary forms, with or        */
17 /* without modification, are permitted provided that the following   */
18 /* conditions are met:                                               */
19 /*                                                                   */
20 /* - Redistributions of source code must retain the above copyright  */
21 /*   notice, this list of conditions and the following disclaimer.   */
22 /* - Redistributions in binary form must reproduce the above         */
23 /*   copyright notice, this list of conditions and the following     */
24 /*   disclaimer in the documentation and/or other materials provided */
25 /*   with the distribution.                                          */
26 /* - Neither the name of the HTS working group nor the names of its  */
27 /*   contributors may be used to endorse or promote products derived */
28 /*   from this software without specific prior written permission.   */
29 /*                                                                   */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
42 /* POSSIBILITY OF SUCH DAMAGE.                                       */
43 /* ----------------------------------------------------------------- */
44 
45 #ifndef HTS106_ENGINE_C
46 #define HTS106_ENGINE_C
47 
48 #ifdef __cplusplus
49 #define HTS106_ENGINE_C_START extern "C" {
50 #define HTS106_ENGINE_C_END   }
51 #else
52 #define HTS106_ENGINE_C_START
53 #define HTS106_ENGINE_C_END
54 #endif                          /* __CPLUSPLUS */
55 
56 HTS106_ENGINE_C_START;
57 
58 #include <string.h>             /* for strcpy() */
59 
60 /* hts_engine libraries */
61 #include "HTS106_hidden.h"
62 
63 /* HTS106_Engine_initialize: initialize engine */
HTS106_Engine_initialize(HTS106_Engine * engine,int nstream)64 void HTS106_Engine_initialize(HTS106_Engine * engine, int nstream)
65 {
66    int i;
67 
68    /* default value for control parameter */
69    engine->global.stage = 0;
70    engine->global.use_log_gain = FALSE;
71    engine->global.sampling_rate = 16000;
72    engine->global.fperiod = 80;
73    engine->global.alpha = 0.42;
74    engine->global.beta = 0.0;
75    engine->global.audio_buff_size = 0;
76    engine->global.msd_threshold = (double *) HTS106_calloc(nstream, sizeof(double));
77    for (i = 0; i < nstream; i++)
78       engine->global.msd_threshold[i] = 0.5;
79 
80    /* interpolation weight */
81    engine->global.parameter_iw = (double **) HTS106_calloc(nstream, sizeof(double *));
82    engine->global.gv_iw = (double **) HTS106_calloc(nstream, sizeof(double *));
83    engine->global.duration_iw = NULL;
84    for (i = 0; i < nstream; i++)
85       engine->global.parameter_iw[i] = NULL;
86    for (i = 0; i < nstream; i++)
87       engine->global.gv_iw[i] = NULL;
88 
89    /* GV weight */
90    engine->global.gv_weight = (double *) HTS106_calloc(nstream, sizeof(double));
91    for (i = 0; i < nstream; i++)
92       engine->global.gv_weight[i] = 1.0;
93 
94    /* stop flag */
95    engine->global.stop = FALSE;
96    /* volume */
97    engine->global.volume = 1.0;
98 
99    /* initialize audio */
100    HTS106_Audio_initialize(&engine->audio, engine->global.sampling_rate, engine->global.audio_buff_size);
101    /* initialize model set */
102    HTS106_ModelSet_initialize(&engine->ms, nstream);
103    /* initialize label list */
104    HTS106_Label_initialize(&engine->label);
105    /* initialize state sequence set */
106    HTS106_SStreamSet_initialize(&engine->sss);
107    /* initialize pstream set */
108    HTS106_PStreamSet_initialize(&engine->pss);
109    /* initialize gstream set */
110    HTS106_GStreamSet_initialize(&engine->gss);
111 }
112 
113 /* HTS106_Engine_load_duratin_from_fn: load duration pdfs, trees and number of state from file names */
HTS106_Engine_load_duration_from_fn(HTS106_Engine * engine,char ** pdf_fn,char ** tree_fn,int interpolation_size)114 HTS106_Boolean HTS106_Engine_load_duration_from_fn(HTS106_Engine * engine, char **pdf_fn, char **tree_fn, int interpolation_size)
115 {
116    int i;
117    HTS106_File **pdf_fp, **tree_fp;
118    HTS106_Boolean result;
119 
120    pdf_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
121    tree_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
122    for (i = 0; i < interpolation_size; i++) {
123       pdf_fp[i] = HTS106_fopen(pdf_fn[i], "rb");
124       tree_fp[i] = HTS106_fopen(tree_fn[i], "r");
125    }
126    result = HTS106_Engine_load_duration_from_fp(engine, pdf_fp, tree_fp, interpolation_size);
127    for (i = 0; i < interpolation_size; i++) {
128       HTS106_fclose(pdf_fp[i]);
129       HTS106_fclose(tree_fp[i]);
130    }
131    HTS106_free(pdf_fp);
132    HTS106_free(tree_fp);
133 
134    return result;
135 }
136 
137 /* HTS106_Engine_load_duration_from_fp: load duration pdfs, trees and number of state from file pointers */
HTS106_Engine_load_duration_from_fp(HTS106_Engine * engine,HTS106_File ** pdf_fp,HTS106_File ** tree_fp,int interpolation_size)138 HTS106_Boolean HTS106_Engine_load_duration_from_fp(HTS106_Engine * engine, HTS106_File ** pdf_fp, HTS106_File ** tree_fp, int interpolation_size)
139 {
140    int i;
141 
142    if (HTS106_ModelSet_load_duration(&engine->ms, pdf_fp, tree_fp, interpolation_size) == FALSE) {
143       return FALSE;
144    }
145    engine->global.duration_iw = (double *) HTS106_calloc(interpolation_size, sizeof(double));
146    for (i = 0; i < interpolation_size; i++)
147       engine->global.duration_iw[i] = 1.0 / interpolation_size;
148 
149    return TRUE;
150 }
151 
152 /* HTS106_Engine_load_parameter_from_fn: load parameter pdfs, trees and windows from file names */
HTS106_Engine_load_parameter_from_fn(HTS106_Engine * engine,char ** pdf_fn,char ** tree_fn,char ** win_fn,int stream_index,HTS106_Boolean msd_flag,int window_size,int interpolation_size)153 HTS106_Boolean HTS106_Engine_load_parameter_from_fn(HTS106_Engine * engine, char **pdf_fn, char **tree_fn, char **win_fn, int stream_index, HTS106_Boolean msd_flag, int window_size, int interpolation_size)
154 {
155    int i;
156    HTS106_File **pdf_fp, **tree_fp, **win_fp;
157    HTS106_Boolean result;
158 
159    pdf_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
160    tree_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
161    win_fp = (HTS106_File **) HTS106_calloc(window_size, sizeof(HTS106_File *));
162    for (i = 0; i < interpolation_size; i++) {
163       pdf_fp[i] = HTS106_fopen(pdf_fn[i], "rb");
164       tree_fp[i] = HTS106_fopen(tree_fn[i], "r");
165    }
166    for (i = 0; i < window_size; i++)
167       win_fp[i] = HTS106_fopen(win_fn[i], "r");
168    result = HTS106_Engine_load_parameter_from_fp(engine, pdf_fp, tree_fp, win_fp, stream_index, msd_flag, window_size, interpolation_size);
169    for (i = 0; i < interpolation_size; i++) {
170       HTS106_fclose(pdf_fp[i]);
171       HTS106_fclose(tree_fp[i]);
172    }
173    for (i = 0; i < window_size; i++)
174       HTS106_fclose(win_fp[i]);
175    HTS106_free(pdf_fp);
176    HTS106_free(tree_fp);
177    HTS106_free(win_fp);
178 
179    return result;
180 }
181 
182 /* HTS106_Engine_load_parameter_from_fp: load parameter pdfs, trees and windows from file pointers */
HTS106_Engine_load_parameter_from_fp(HTS106_Engine * engine,HTS106_File ** pdf_fp,HTS106_File ** tree_fp,HTS106_File ** win_fp,int stream_index,HTS106_Boolean msd_flag,int window_size,int interpolation_size)183 HTS106_Boolean HTS106_Engine_load_parameter_from_fp(HTS106_Engine * engine, HTS106_File ** pdf_fp, HTS106_File ** tree_fp, HTS106_File ** win_fp, int stream_index, HTS106_Boolean msd_flag, int window_size, int interpolation_size)
184 {
185    int i;
186 
187    if (HTS106_ModelSet_load_parameter(&engine->ms, pdf_fp, tree_fp, win_fp, stream_index, msd_flag, window_size, interpolation_size) == FALSE) {
188       return FALSE;
189    }
190    engine->global.parameter_iw[stream_index] = (double *) HTS106_calloc(interpolation_size, sizeof(double));
191    for (i = 0; i < interpolation_size; i++)
192       engine->global.parameter_iw[stream_index][i] = 1.0 / interpolation_size;
193 
194    return TRUE;
195 }
196 
197 /* HTS106_Engine_load_gv_from_fn: load GV pdfs and trees from file names */
HTS106_Engine_load_gv_from_fn(HTS106_Engine * engine,char ** pdf_fn,char ** tree_fn,int stream_index,int interpolation_size)198 HTS106_Boolean HTS106_Engine_load_gv_from_fn(HTS106_Engine * engine, char **pdf_fn, char **tree_fn, int stream_index, int interpolation_size)
199 {
200    int i;
201    HTS106_File **pdf_fp, **tree_fp;
202    HTS106_Boolean result;
203 
204    pdf_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
205    if (tree_fn)
206       tree_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
207    else
208       tree_fp = NULL;
209    for (i = 0; i < interpolation_size; i++) {
210       pdf_fp[i] = HTS106_fopen(pdf_fn[i], "rb");
211       if (tree_fn) {
212          if (tree_fn[i])
213             tree_fp[i] = HTS106_fopen(tree_fn[i], "r");
214          else
215             tree_fp[i] = NULL;
216       }
217    }
218    result = HTS106_Engine_load_gv_from_fp(engine, pdf_fp, tree_fp, stream_index, interpolation_size);
219    for (i = 0; i < interpolation_size; i++) {
220       HTS106_fclose(pdf_fp[i]);
221       if (tree_fp && tree_fp[i])
222          HTS106_fclose(tree_fp[i]);
223    }
224    HTS106_free(pdf_fp);
225    if (tree_fp)
226       HTS106_free(tree_fp);
227 
228    return result;
229 }
230 
231 /* HTS106_Engine_load_gv_from_fp: load GV pdfs and trees from file pointers */
HTS106_Engine_load_gv_from_fp(HTS106_Engine * engine,HTS106_File ** pdf_fp,HTS106_File ** tree_fp,int stream_index,int interpolation_size)232 HTS106_Boolean HTS106_Engine_load_gv_from_fp(HTS106_Engine * engine, HTS106_File ** pdf_fp, HTS106_File ** tree_fp, int stream_index, int interpolation_size)
233 {
234    int i;
235 
236    if (HTS106_ModelSet_load_gv(&engine->ms, pdf_fp, tree_fp, stream_index, interpolation_size) == FALSE) {
237       return FALSE;
238    }
239    engine->global.gv_iw[stream_index] = (double *) HTS106_calloc(interpolation_size, sizeof(double));
240    for (i = 0; i < interpolation_size; i++)
241       engine->global.gv_iw[stream_index][i] = 1.0 / interpolation_size;
242 
243    return TRUE;
244 }
245 
246 /* HTS106_Engine_load_gv_switch_from_fn: load GV switch from file name */
HTS106_Engine_load_gv_switch_from_fn(HTS106_Engine * engine,char * fn)247 HTS106_Boolean HTS106_Engine_load_gv_switch_from_fn(HTS106_Engine * engine, char *fn)
248 {
249    HTS106_File *fp = HTS106_fopen(fn, "r");
250    HTS106_Boolean result;
251 
252    result = HTS106_Engine_load_gv_switch_from_fp(engine, fp);
253    HTS106_fclose(fp);
254 
255    return result;
256 }
257 
258 /* HTS106_Engine_load_gv_switch_from_fp: load GV switch from file pointer */
HTS106_Engine_load_gv_switch_from_fp(HTS106_Engine * engine,HTS106_File * fp)259 HTS106_Boolean HTS106_Engine_load_gv_switch_from_fp(HTS106_Engine * engine, HTS106_File * fp)
260 {
261    return HTS106_ModelSet_load_gv_switch(&engine->ms, fp);
262 }
263 
264 /* HTS106_Engine_set_sampling_rate: set sampling rate */
HTS106_Engine_set_sampling_rate(HTS106_Engine * engine,int i)265 void HTS106_Engine_set_sampling_rate(HTS106_Engine * engine, int i)
266 {
267    if (i < 1)
268       i = 1;
269    if (i > 48000)
270       i = 48000;
271    engine->global.sampling_rate = i;
272    HTS106_Audio_set_parameter(&engine->audio, engine->global.sampling_rate, engine->global.audio_buff_size);
273 }
274 
275 /* HTS106_Engine_get_sampling_rate: get sampling rate */
HTS106_Engine_get_sampling_rate(HTS106_Engine * engine)276 int HTS106_Engine_get_sampling_rate(HTS106_Engine * engine)
277 {
278    return engine->global.sampling_rate;
279 }
280 
281 /* HTS106_Engine_set_fperiod: set frame shift */
HTS106_Engine_set_fperiod(HTS106_Engine * engine,int i)282 void HTS106_Engine_set_fperiod(HTS106_Engine * engine, int i)
283 {
284    if (i < 1)
285       i = 1;
286    if (i > 48000)
287       i = 48000;
288    engine->global.fperiod = i;
289 }
290 
291 /* HTS106_Engine_get_fperiod: get frame shift */
HTS106_Engine_get_fperiod(HTS106_Engine * engine)292 int HTS106_Engine_get_fperiod(HTS106_Engine * engine)
293 {
294    return engine->global.fperiod;
295 }
296 
297 /* HTS106_Engine_set_alpha: set alpha */
HTS106_Engine_set_alpha(HTS106_Engine * engine,double f)298 void HTS106_Engine_set_alpha(HTS106_Engine * engine, double f)
299 {
300    if (f < 0.0)
301       f = 0.0;
302    if (f > 1.0)
303       f = 1.0;
304    engine->global.alpha = f;
305 }
306 
307 /* HTS106_Engine_set_gamma: set gamma (Gamma = -1/i: if i=0 then Gamma=0) */
HTS106_Engine_set_gamma(HTS106_Engine * engine,int i)308 void HTS106_Engine_set_gamma(HTS106_Engine * engine, int i)
309 {
310    if (i < 0)
311       i = 0;
312    engine->global.stage = i;
313 }
314 
315 /* HTS106_Engine_set_log_gain: set log gain flag (for LSP) */
HTS106_Engine_set_log_gain(HTS106_Engine * engine,HTS106_Boolean i)316 void HTS106_Engine_set_log_gain(HTS106_Engine * engine, HTS106_Boolean i)
317 {
318    engine->global.use_log_gain = i;
319 }
320 
321 /* HTS106_Engine_set_beta: set beta */
HTS106_Engine_set_beta(HTS106_Engine * engine,double f)322 void HTS106_Engine_set_beta(HTS106_Engine * engine, double f)
323 {
324    if (f < -0.8)
325       f = -0.8;
326    if (f > 0.8)
327       f = 0.8;
328    engine->global.beta = f;
329 }
330 
331 /* HTS106_Engine_set_audio_buff_size: set audio buffer size */
HTS106_Engine_set_audio_buff_size(HTS106_Engine * engine,int i)332 void HTS106_Engine_set_audio_buff_size(HTS106_Engine * engine, int i)
333 {
334    if (i < 0)
335       i = 0;
336    if (i > 48000)
337       i = 48000;
338    engine->global.audio_buff_size = i;
339    HTS106_Audio_set_parameter(&engine->audio, engine->global.sampling_rate, engine->global.audio_buff_size);
340 }
341 
342 /* HTS106_Engine_get_audio_buff_size: get audio buffer size */
HTS106_Engine_get_audio_buff_size(HTS106_Engine * engine)343 int HTS106_Engine_get_audio_buff_size(HTS106_Engine * engine)
344 {
345    return engine->global.audio_buff_size;
346 }
347 
348 /* HTS106_Egnine_set_msd_threshold: set MSD threshold */
HTS106_Engine_set_msd_threshold(HTS106_Engine * engine,int stream_index,double f)349 void HTS106_Engine_set_msd_threshold(HTS106_Engine * engine, int stream_index, double f)
350 {
351    if (f < 0.0)
352       f = 0.0;
353    if (f > 1.0)
354       f = 1.0;
355    engine->global.msd_threshold[stream_index] = f;
356 }
357 
358 /* HTS106_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
HTS106_Engine_set_duration_interpolation_weight(HTS106_Engine * engine,int interpolation_index,double f)359 void HTS106_Engine_set_duration_interpolation_weight(HTS106_Engine * engine, int interpolation_index, double f)
360 {
361    engine->global.duration_iw[interpolation_index] = f;
362 }
363 
364 /* HTS106_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
HTS106_Engine_set_parameter_interpolation_weight(HTS106_Engine * engine,int stream_index,int interpolation_index,double f)365 void HTS106_Engine_set_parameter_interpolation_weight(HTS106_Engine * engine, int stream_index, int interpolation_index, double f)
366 {
367    engine->global.parameter_iw[stream_index][interpolation_index] = f;
368 }
369 
370 /* HTS106_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
HTS106_Engine_set_gv_interpolation_weight(HTS106_Engine * engine,int stream_index,int interpolation_index,double f)371 void HTS106_Engine_set_gv_interpolation_weight(HTS106_Engine * engine, int stream_index, int interpolation_index, double f)
372 {
373    engine->global.gv_iw[stream_index][interpolation_index] = f;
374 }
375 
376 /* HTS106_Engine_set_gv_weight: set GV weight */
HTS106_Engine_set_gv_weight(HTS106_Engine * engine,int stream_index,double f)377 void HTS106_Engine_set_gv_weight(HTS106_Engine * engine, int stream_index, double f)
378 {
379    if (f < 0.0)
380       f = 0.0;
381    if (f > 2.0)
382       f = 2.0;
383    engine->global.gv_weight[stream_index] = f;
384 }
385 
386 /* HTS106_Engine_set_stop_flag: set stop flag */
HTS106_Engine_set_stop_flag(HTS106_Engine * engine,HTS106_Boolean b)387 void HTS106_Engine_set_stop_flag(HTS106_Engine * engine, HTS106_Boolean b)
388 {
389    engine->global.stop = b;
390 }
391 
392 /* HTS106_Engine_set_volume: set volume */
HTS106_Engine_set_volume(HTS106_Engine * engine,double f)393 void HTS106_Engine_set_volume(HTS106_Engine * engine, double f)
394 {
395    if (f < 0.0)
396       f = 0.0;
397    engine->global.volume = f;
398 }
399 
400 /* HTS106_Engine_get_total_state: get total number of state */
HTS106_Engine_get_total_state(HTS106_Engine * engine)401 int HTS106_Engine_get_total_state(HTS106_Engine * engine)
402 {
403    return HTS106_SStreamSet_get_total_state(&engine->sss);
404 }
405 
406 /* HTS106_Engine_set_state_mean: set mean value of state */
HTS106_Engine_set_state_mean(HTS106_Engine * engine,int stream_index,int state_index,int vector_index,double f)407 void HTS106_Engine_set_state_mean(HTS106_Engine * engine, int stream_index, int state_index, int vector_index, double f)
408 {
409    HTS106_SStreamSet_set_mean(&engine->sss, stream_index, state_index, vector_index, f);
410 }
411 
412 /* HTS106_Engine_get_state_mean: get mean value of state */
HTS106_Engine_get_state_mean(HTS106_Engine * engine,int stream_index,int state_index,int vector_index)413 double HTS106_Engine_get_state_mean(HTS106_Engine * engine, int stream_index, int state_index, int vector_index)
414 {
415    return HTS106_SStreamSet_get_mean(&engine->sss, stream_index, state_index, vector_index);
416 }
417 
418 /* HTS106_Engine_get_state_duration: get state duration */
HTS106_Engine_get_state_duration(HTS106_Engine * engine,int state_index)419 int HTS106_Engine_get_state_duration(HTS106_Engine * engine, int state_index)
420 {
421    return HTS106_SStreamSet_get_duration(&engine->sss, state_index);
422 }
423 
424 /* HTS106_Engine_get_nstream: get number of stream */
HTS106_Engine_get_nstream(HTS106_Engine * engine)425 int HTS106_Engine_get_nstream(HTS106_Engine * engine)
426 {
427    return HTS106_ModelSet_get_nstream(&engine->ms);
428 }
429 
430 /* HTS106_Engine_get_nstate: get number of state */
HTS106_Engine_get_nstate(HTS106_Engine * engine)431 int HTS106_Engine_get_nstate(HTS106_Engine * engine)
432 {
433    return HTS106_ModelSet_get_nstate(&engine->ms);
434 }
435 
436 /* HTS106_Engine_load_label_from_fn: load label from file name */
HTS106_Engine_load_label_from_fn(HTS106_Engine * engine,char * fn)437 void HTS106_Engine_load_label_from_fn(HTS106_Engine * engine, char *fn)
438 {
439    HTS106_Label_load_from_fn(&engine->label, engine->global.sampling_rate, engine->global.fperiod, fn);
440 }
441 
442 /* HTS106_Engine_load_label_from_fp: load label from file pointer */
HTS106_Engine_load_label_from_fp(HTS106_Engine * engine,HTS106_File * fp)443 void HTS106_Engine_load_label_from_fp(HTS106_Engine * engine, HTS106_File * fp)
444 {
445    HTS106_Label_load_from_fp(&engine->label, engine->global.sampling_rate, engine->global.fperiod, fp);
446 }
447 
448 /* HTS106_Engine_load_label_from_string: load label from string */
HTS106_Engine_load_label_from_string(HTS106_Engine * engine,char * data)449 void HTS106_Engine_load_label_from_string(HTS106_Engine * engine, char *data)
450 {
451    HTS106_Label_load_from_string(&engine->label, engine->global.sampling_rate, engine->global.fperiod, data);
452 }
453 
454 /* HTS106_Engine_load_label_from_string_list: load label from string list */
HTS106_Engine_load_label_from_string_list(HTS106_Engine * engine,char ** data,int size)455 void HTS106_Engine_load_label_from_string_list(HTS106_Engine * engine, char **data, int size)
456 {
457    HTS106_Label_load_from_string_list(&engine->label, engine->global.sampling_rate, engine->global.fperiod, data, size);
458 }
459 
460 /* HTS106_Engine_create_sstream: parse label and determine state duration */
HTS106_Engine_create_sstream(HTS106_Engine * engine)461 HTS106_Boolean HTS106_Engine_create_sstream(HTS106_Engine * engine)
462 {
463    return HTS106_SStreamSet_create(&engine->sss, &engine->ms, &engine->label, engine->global.duration_iw, engine->global.parameter_iw, engine->global.gv_iw);
464 }
465 
466 /* HTS106_Engine_create_pstream: generate speech parameter vector sequence */
HTS106_Engine_create_pstream(HTS106_Engine * engine)467 HTS106_Boolean HTS106_Engine_create_pstream(HTS106_Engine * engine)
468 {
469    return HTS106_PStreamSet_create(&engine->pss, &engine->sss, engine->global.msd_threshold, engine->global.gv_weight);
470 }
471 
472 /* HTS106_Engine_create_gstream: synthesis speech */
HTS106_Engine_create_gstream(HTS106_Engine * engine)473 HTS106_Boolean HTS106_Engine_create_gstream(HTS106_Engine * engine)
474 {
475    return HTS106_GStreamSet_create(&engine->gss, &engine->pss, engine->global.stage, engine->global.use_log_gain, engine->global.sampling_rate, engine->global.fperiod, engine->global.alpha, engine->global.beta, &engine->global.stop, engine->global.volume, engine->global.audio_buff_size > 0 ? &engine->audio : NULL);
476 }
477 
478 /* HTS106_Engine_save_information: output trace information */
HTS106_Engine_save_information(HTS106_Engine * engine,HTS106_File * fp)479 void HTS106_Engine_save_information(HTS106_Engine * engine, HTS106_File * fp)
480 {
481    int i, j, k, l, m, n;
482    double temp;
483    HTS106_Global *global = &engine->global;
484    HTS106_ModelSet *ms = &engine->ms;
485    HTS106_Label *label = &engine->label;
486    HTS106_SStreamSet *sss = &engine->sss;
487    HTS106_PStreamSet *pss = &engine->pss;
488 
489    /* global parameter */
490    fprintf(fp, "[Global parameter]\n");
491    fprintf(fp, "Sampring frequency                     -> %8d(Hz)\n", global->sampling_rate);
492    fprintf(fp, "Frame period                           -> %8d(point)\n", global->fperiod);
493    fprintf(fp, "                                          %8.5f(msec)\n", 1e+3 * global->fperiod / global->sampling_rate);
494    fprintf(fp, "All-pass constant                      -> %8.5f\n", (float) global->alpha);
495    fprintf(fp, "Gamma                                  -> %8.5f\n", (float) (global->stage == 0 ? 0.0 : -1.0 / global->stage));
496    if (global->stage != 0)
497       fprintf(fp, "Log gain flag                          -> %s\n", global->use_log_gain ? "TRUE" : "FALSE");
498    fprintf(fp, "Postfiltering coefficient              -> %8.5f\n", (float) global->beta);
499    fprintf(fp, "Audio buffer size                      -> %8d(sample)\n", global->audio_buff_size);
500    fprintf(fp, "\n");
501 
502    /* duration parameter */
503    fprintf(fp, "[Duration parameter]\n");
504    fprintf(fp, "Number of states                       -> %8d\n", HTS106_ModelSet_get_nstate(ms));
505    fprintf(fp, "         Interpolation                 -> %8d\n", HTS106_ModelSet_get_duration_interpolation_size(ms));
506    /* check interpolation */
507    for (i = 0, temp = 0.0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
508       temp += global->duration_iw[i];
509    for (i = 0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
510       if (global->duration_iw[i] != 0.0)
511          global->duration_iw[i] /= temp;
512    for (i = 0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
513       fprintf(fp, "         Interpolation weight[%2d]      -> %8.0f(%%)\n", i, (float) (100 * global->duration_iw[i]));
514    fprintf(fp, "\n");
515 
516    fprintf(fp, "[Stream parameter]\n");
517    for (i = 0; i < HTS106_ModelSet_get_nstream(ms); i++) {
518       /* stream parameter */
519       fprintf(fp, "Stream[%2d] vector length               -> %8d\n", i, HTS106_ModelSet_get_vector_length(ms, i));
520       fprintf(fp, "           Dynamic window size         -> %8d\n", HTS106_ModelSet_get_window_size(ms, i));
521       /* interpolation */
522       fprintf(fp, "           Interpolation               -> %8d\n", HTS106_ModelSet_get_parameter_interpolation_size(ms, i));
523       for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
524          temp += global->parameter_iw[i][j];
525       for (j = 0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
526          if (global->parameter_iw[i][j] != 0.0)
527             global->parameter_iw[i][j] /= temp;
528       for (j = 0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
529          fprintf(fp, "           Interpolation weight[%2d]    -> %8.0f(%%)\n", j, (float) (100 * global->parameter_iw[i][j]));
530       /* MSD */
531       if (HTS106_ModelSet_is_msd(ms, i)) { /* for MSD */
532          fprintf(fp, "           MSD flag                    ->     TRUE\n");
533          fprintf(fp, "           MSD threshold               -> %8.5f\n", global->msd_threshold[i]);
534       } else {                  /* for non MSD */
535          fprintf(fp, "           MSD flag                    ->    FALSE\n");
536       }
537       /* GV */
538       if (HTS106_ModelSet_use_gv(ms, i)) {
539          fprintf(fp, "           GV flag                     ->     TRUE\n");
540          if (HTS106_ModelSet_have_gv_switch(ms)) {
541             if (HTS106_ModelSet_have_gv_tree(ms, i)) {
542                fprintf(fp, "           GV type                     ->     CDGV\n");
543                fprintf(fp, "                                       ->  +SWITCH\n");
544             } else
545                fprintf(fp, "           GV type                     ->   SWITCH\n");
546          } else {
547             if (HTS106_ModelSet_have_gv_tree(ms, i))
548                fprintf(fp, "           GV type                     ->     CDGV\n");
549             else
550                fprintf(fp, "           GV type                     ->   NORMAL\n");
551          }
552          fprintf(fp, "           GV weight                   -> %8.0f(%%)\n", (float) (100 * global->gv_weight[i]));
553          fprintf(fp, "           GV interpolation size       -> %8d\n", HTS106_ModelSet_get_gv_interpolation_size(ms, i));
554          /* interpolation */
555          for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
556             temp += global->gv_iw[i][j];
557          for (j = 0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
558             if (global->gv_iw[i][j] != 0.0)
559                global->gv_iw[i][j] /= temp;
560          for (j = 0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
561             fprintf(fp, "           GV interpolation weight[%2d] -> %8.0f(%%)\n", j, (float) (100 * global->gv_iw[i][j]));
562       } else {
563          fprintf(fp, "           GV flag                     ->    FALSE\n");
564       }
565    }
566    fprintf(fp, "\n");
567 
568    /* generated sequence */
569    fprintf(fp, "[Generated sequence]\n");
570    fprintf(fp, "Number of HMMs                         -> %8d\n", HTS106_Label_get_size(label));
571    fprintf(fp, "Number of stats                        -> %8d\n", HTS106_Label_get_size(label) * HTS106_ModelSet_get_nstate(ms));
572    fprintf(fp, "Length of this speech                  -> %8.3f(sec)\n", (float) ((double) HTS106_PStreamSet_get_total_frame(pss) * global->fperiod / global->sampling_rate));
573    fprintf(fp, "                                       -> %8.3d(frames)\n", HTS106_PStreamSet_get_total_frame(pss) * global->fperiod);
574 
575    for (i = 0; i < HTS106_Label_get_size(label); i++) {
576       fprintf(fp, "HMM[%2d]\n", i);
577       fprintf(fp, "  Name                                 -> %s\n", HTS106_Label_get_string(label, i));
578       fprintf(fp, "  Duration\n");
579       for (j = 0; j < HTS106_ModelSet_get_duration_interpolation_size(ms); j++) {
580          fprintf(fp, "    Interpolation[%2d]\n", j);
581          HTS106_ModelSet_get_duration_index(ms, HTS106_Label_get_string(label, i), NULL, &k, &l, j);
582          fprintf(fp, "      Tree index                       -> %8d\n", k);
583          fprintf(fp, "      PDF index                        -> %8d\n", l);
584       }
585       for (j = 0; j < HTS106_ModelSet_get_nstate(ms); j++) {
586          fprintf(fp, "  State[%2d]\n", j + 2);
587          fprintf(fp, "    Length                             -> %8d(frames)\n", HTS106_SStreamSet_get_duration(sss, i * HTS106_ModelSet_get_nstate(ms) + j));
588          for (k = 0; k < HTS106_ModelSet_get_nstream(ms); k++) {
589             fprintf(fp, "    Stream[%2d]\n", k);
590             if (HTS106_ModelSet_is_msd(ms, k)) {
591                if (HTS106_SStreamSet_get_msd(sss, k, i * HTS106_ModelSet_get_nstate(ms) + j) > global->msd_threshold[k])
592                   fprintf(fp, "      MSD flag                         ->     TRUE\n");
593                else
594                   fprintf(fp, "      MSD flag                         ->    FALSE\n");
595             }
596             for (l = 0; l < HTS106_ModelSet_get_parameter_interpolation_size(ms, k); l++) {
597                fprintf(fp, "      Interpolation[%2d]\n", l);
598                HTS106_ModelSet_get_parameter_index(ms, HTS106_Label_get_string(label, i), NULL, &m, &n, k, j + 2, l);
599                fprintf(fp, "        Tree index                     -> %8d\n", m);
600                fprintf(fp, "        PDF index                      -> %8d\n", n);
601             }
602          }
603       }
604    }
605 }
606 
607 /* HTS106_Engine_save_label: output label with time */
HTS106_Engine_save_label(HTS106_Engine * engine,HTS106_File * fp)608 void HTS106_Engine_save_label(HTS106_Engine * engine, HTS106_File * fp)
609 {
610    int i, j;
611    int frame, state, duration;
612 
613    HTS106_Label *label = &engine->label;
614    HTS106_SStreamSet *sss = &engine->sss;
615    const int nstate = HTS106_ModelSet_get_nstate(&engine->ms);
616    const double rate = engine->global.fperiod * 1e+7 / engine->global.sampling_rate;
617 
618    for (i = 0, state = 0, frame = 0; i < HTS106_Label_get_size(label); i++) {
619       for (j = 0, duration = 0; j < nstate; j++)
620          duration += HTS106_SStreamSet_get_duration(sss, state++);
621       /* in HTK & HTS format */
622       fprintf(fp, "%lu %lu %s\n", (unsigned long) (frame * rate), (unsigned long) ((frame + duration) * rate), HTS106_Label_get_string(label, i));
623       frame += duration;
624    }
625 }
626 
627 /* HTS106_Engine_save_generated_parameter: output generated parameter */
HTS106_Engine_save_generated_parameter(HTS106_Engine * engine,HTS106_File * fp,int stream_index)628 void HTS106_Engine_save_generated_parameter(HTS106_Engine * engine, HTS106_File * fp, int stream_index)
629 {
630    int i, j;
631    float temp;
632    HTS106_GStreamSet *gss = &engine->gss;
633 
634    for (i = 0; i < HTS106_GStreamSet_get_total_frame(gss); i++)
635       for (j = 0; j < HTS106_GStreamSet_get_static_length(gss, stream_index); j++) {
636          temp = (float) HTS106_GStreamSet_get_parameter(gss, stream_index, i, j);
637          fwrite(&temp, sizeof(float), 1, fp);
638       }
639 }
640 
641 /* HTS106_Engine_save_generated_speech: output generated speech */
HTS106_Engine_save_generated_speech(HTS106_Engine * engine,HTS106_File * fp)642 void HTS106_Engine_save_generated_speech(HTS106_Engine * engine, HTS106_File * fp)
643 {
644    int i;
645    short temp;
646    HTS106_GStreamSet *gss = &engine->gss;
647 
648    for (i = 0; i < HTS106_GStreamSet_get_total_nsample(gss); i++) {
649       temp = HTS106_GStreamSet_get_speech(gss, i);
650       fwrite(&temp, sizeof(short), 1, fp);
651    }
652 }
653 
654 /* HTS106_Engine_save_riff: output RIFF format file */
HTS106_Engine_save_riff(HTS106_Engine * engine,HTS106_File * fp)655 void HTS106_Engine_save_riff(HTS106_Engine * engine, HTS106_File * fp)
656 {
657    int i;
658    short temp;
659 
660    HTS106_GStreamSet *gss = &engine->gss;
661    char data_01_04[] = { 'R', 'I', 'F', 'F' };
662    int data_05_08 = HTS106_GStreamSet_get_total_nsample(gss) * sizeof(short) + 36;
663    char data_09_12[] = { 'W', 'A', 'V', 'E' };
664    char data_13_16[] = { 'f', 'm', 't', ' ' };
665    int data_17_20 = 16;
666    short data_21_22 = 1;        /* PCM */
667    short data_23_24 = 1;        /* monoral */
668    int data_25_28 = engine->global.sampling_rate;
669    int data_29_32 = engine->global.sampling_rate * sizeof(short);
670    short data_33_34 = sizeof(short);
671    short data_35_36 = (short) (sizeof(short) * 8);
672    char data_37_40[] = { 'd', 'a', 't', 'a' };
673    int data_41_44 = HTS106_GStreamSet_get_total_nsample(gss) * sizeof(short);
674 
675    /* write header */
676    HTS106_fwrite_little_endian(data_01_04, sizeof(char), 4, fp);
677    HTS106_fwrite_little_endian(&data_05_08, sizeof(int), 1, fp);
678    HTS106_fwrite_little_endian(data_09_12, sizeof(char), 4, fp);
679    HTS106_fwrite_little_endian(data_13_16, sizeof(char), 4, fp);
680    HTS106_fwrite_little_endian(&data_17_20, sizeof(int), 1, fp);
681    HTS106_fwrite_little_endian(&data_21_22, sizeof(short), 1, fp);
682    HTS106_fwrite_little_endian(&data_23_24, sizeof(short), 1, fp);
683    HTS106_fwrite_little_endian(&data_25_28, sizeof(int), 1, fp);
684    HTS106_fwrite_little_endian(&data_29_32, sizeof(int), 1, fp);
685    HTS106_fwrite_little_endian(&data_33_34, sizeof(short), 1, fp);
686    HTS106_fwrite_little_endian(&data_35_36, sizeof(short), 1, fp);
687    HTS106_fwrite_little_endian(data_37_40, sizeof(char), 4, fp);
688    HTS106_fwrite_little_endian(&data_41_44, sizeof(int), 1, fp);
689    /* write data */
690    for (i = 0; i < HTS106_GStreamSet_get_total_nsample(gss); i++) {
691       temp = HTS106_GStreamSet_get_speech(gss, i);
692       HTS106_fwrite_little_endian(&temp, sizeof(short), 1, fp);
693    }
694 }
695 
696 /* HTS106_Engine_refresh: free model per one time synthesis */
HTS106_Engine_refresh(HTS106_Engine * engine)697 void HTS106_Engine_refresh(HTS106_Engine * engine)
698 {
699    /* free generated parameter stream set */
700    HTS106_GStreamSet_clear(&engine->gss);
701    /* free parameter stream set */
702    HTS106_PStreamSet_clear(&engine->pss);
703    /* free state stream set */
704    HTS106_SStreamSet_clear(&engine->sss);
705    /* free label list */
706    HTS106_Label_clear(&engine->label);
707    /* stop flag */
708    engine->global.stop = FALSE;
709 }
710 
711 /* HTS106_Engine_clear: free engine */
HTS106_Engine_clear(HTS106_Engine * engine)712 void HTS106_Engine_clear(HTS106_Engine * engine)
713 {
714    int i;
715 
716    HTS106_free(engine->global.msd_threshold);
717    HTS106_free(engine->global.duration_iw);
718    for (i = 0; i < HTS106_ModelSet_get_nstream(&engine->ms); i++) {
719       HTS106_free(engine->global.parameter_iw[i]);
720       if (engine->global.gv_iw[i])
721          HTS106_free(engine->global.gv_iw[i]);
722    }
723    HTS106_free(engine->global.parameter_iw);
724    HTS106_free(engine->global.gv_iw);
725    HTS106_free(engine->global.gv_weight);
726 
727    HTS106_ModelSet_clear(&engine->ms);
728    HTS106_Audio_clear(&engine->audio);
729 }
730 
731 /* HTS106_get_copyright: write copyright to string */
HTS106_get_copyright(char * str)732 void HTS106_get_copyright(char *str)
733 {
734    int i, nCopyright = HTS106_NCOPYRIGHT;
735    char url[] = HTS106_URL, version[] = HTS106_VERSION;
736    char *copyright[] = { HTS106_COPYRIGHT };
737 
738    sprintf(str, "\nThe HMM-Based Speech Synthesis Engine \"hts_engine API\"\n");
739    sprintf(str, "%shts_engine API version %s (%s)\n", str, version, url);
740    for (i = 0; i < nCopyright; i++) {
741       if (i == 0)
742          sprintf(str, "%sCopyright (C) %s\n", str, copyright[i]);
743       else
744          sprintf(str, "%s              %s\n", str, copyright[i]);
745    }
746    sprintf(str, "%sAll rights reserved.\n", str);
747 
748    return;
749 }
750 
751 /* HTS106_show_copyright: write copyright to file pointer */
HTS106_show_copyright(HTS106_File * fp)752 void HTS106_show_copyright(HTS106_File * fp)
753 {
754    char buf[HTS106_MAXBUFLEN];
755 
756    HTS106_get_copyright(buf);
757    fprintf(fp, "%s", buf);
758 
759    return;
760 }
761 
762 HTS106_ENGINE_C_END;
763 
764 #endif                          /* !HTS106_ENGINE_C */
765