1 /* ----------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 /* developed by HTS Working Group */
4 /* http://hts-engine.sourceforge.net/ */
5 /* ----------------------------------------------------------------- */
6 /* */
7 /* Copyright (c) 2001-2011 Nagoya Institute of Technology */
8 /* Department of Computer Science */
9 /* */
10 /* 2001-2008 Tokyo Institute of Technology */
11 /* Interdisciplinary Graduate School of */
12 /* Science and Engineering */
13 /* */
14 /* All rights reserved. */
15 /* */
16 /* Redistribution and use in source and binary forms, with or */
17 /* without modification, are permitted provided that the following */
18 /* conditions are met: */
19 /* */
20 /* - Redistributions of source code must retain the above copyright */
21 /* notice, this list of conditions and the following disclaimer. */
22 /* - Redistributions in binary form must reproduce the above */
23 /* copyright notice, this list of conditions and the following */
24 /* disclaimer in the documentation and/or other materials provided */
25 /* with the distribution. */
26 /* - Neither the name of the HTS working group nor the names of its */
27 /* contributors may be used to endorse or promote products derived */
28 /* from this software without specific prior written permission. */
29 /* */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 /* POSSIBILITY OF SUCH DAMAGE. */
43 /* ----------------------------------------------------------------- */
44
45 #ifndef HTS106_ENGINE_C
46 #define HTS106_ENGINE_C
47
48 #ifdef __cplusplus
49 #define HTS106_ENGINE_C_START extern "C" {
50 #define HTS106_ENGINE_C_END }
51 #else
52 #define HTS106_ENGINE_C_START
53 #define HTS106_ENGINE_C_END
54 #endif /* __CPLUSPLUS */
55
56 HTS106_ENGINE_C_START;
57
58 #include <string.h> /* for strcpy() */
59
60 /* hts_engine libraries */
61 #include "HTS106_hidden.h"
62
63 /* HTS106_Engine_initialize: initialize engine */
HTS106_Engine_initialize(HTS106_Engine * engine,int nstream)64 void HTS106_Engine_initialize(HTS106_Engine * engine, int nstream)
65 {
66 int i;
67
68 /* default value for control parameter */
69 engine->global.stage = 0;
70 engine->global.use_log_gain = FALSE;
71 engine->global.sampling_rate = 16000;
72 engine->global.fperiod = 80;
73 engine->global.alpha = 0.42;
74 engine->global.beta = 0.0;
75 engine->global.audio_buff_size = 0;
76 engine->global.msd_threshold = (double *) HTS106_calloc(nstream, sizeof(double));
77 for (i = 0; i < nstream; i++)
78 engine->global.msd_threshold[i] = 0.5;
79
80 /* interpolation weight */
81 engine->global.parameter_iw = (double **) HTS106_calloc(nstream, sizeof(double *));
82 engine->global.gv_iw = (double **) HTS106_calloc(nstream, sizeof(double *));
83 engine->global.duration_iw = NULL;
84 for (i = 0; i < nstream; i++)
85 engine->global.parameter_iw[i] = NULL;
86 for (i = 0; i < nstream; i++)
87 engine->global.gv_iw[i] = NULL;
88
89 /* GV weight */
90 engine->global.gv_weight = (double *) HTS106_calloc(nstream, sizeof(double));
91 for (i = 0; i < nstream; i++)
92 engine->global.gv_weight[i] = 1.0;
93
94 /* stop flag */
95 engine->global.stop = FALSE;
96 /* volume */
97 engine->global.volume = 1.0;
98
99 /* initialize audio */
100 HTS106_Audio_initialize(&engine->audio, engine->global.sampling_rate, engine->global.audio_buff_size);
101 /* initialize model set */
102 HTS106_ModelSet_initialize(&engine->ms, nstream);
103 /* initialize label list */
104 HTS106_Label_initialize(&engine->label);
105 /* initialize state sequence set */
106 HTS106_SStreamSet_initialize(&engine->sss);
107 /* initialize pstream set */
108 HTS106_PStreamSet_initialize(&engine->pss);
109 /* initialize gstream set */
110 HTS106_GStreamSet_initialize(&engine->gss);
111 }
112
113 /* HTS106_Engine_load_duratin_from_fn: load duration pdfs, trees and number of state from file names */
HTS106_Engine_load_duration_from_fn(HTS106_Engine * engine,char ** pdf_fn,char ** tree_fn,int interpolation_size)114 HTS106_Boolean HTS106_Engine_load_duration_from_fn(HTS106_Engine * engine, char **pdf_fn, char **tree_fn, int interpolation_size)
115 {
116 int i;
117 HTS106_File **pdf_fp, **tree_fp;
118 HTS106_Boolean result;
119
120 pdf_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
121 tree_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
122 for (i = 0; i < interpolation_size; i++) {
123 pdf_fp[i] = HTS106_fopen(pdf_fn[i], "rb");
124 tree_fp[i] = HTS106_fopen(tree_fn[i], "r");
125 }
126 result = HTS106_Engine_load_duration_from_fp(engine, pdf_fp, tree_fp, interpolation_size);
127 for (i = 0; i < interpolation_size; i++) {
128 HTS106_fclose(pdf_fp[i]);
129 HTS106_fclose(tree_fp[i]);
130 }
131 HTS106_free(pdf_fp);
132 HTS106_free(tree_fp);
133
134 return result;
135 }
136
137 /* HTS106_Engine_load_duration_from_fp: load duration pdfs, trees and number of state from file pointers */
HTS106_Engine_load_duration_from_fp(HTS106_Engine * engine,HTS106_File ** pdf_fp,HTS106_File ** tree_fp,int interpolation_size)138 HTS106_Boolean HTS106_Engine_load_duration_from_fp(HTS106_Engine * engine, HTS106_File ** pdf_fp, HTS106_File ** tree_fp, int interpolation_size)
139 {
140 int i;
141
142 if (HTS106_ModelSet_load_duration(&engine->ms, pdf_fp, tree_fp, interpolation_size) == FALSE) {
143 return FALSE;
144 }
145 engine->global.duration_iw = (double *) HTS106_calloc(interpolation_size, sizeof(double));
146 for (i = 0; i < interpolation_size; i++)
147 engine->global.duration_iw[i] = 1.0 / interpolation_size;
148
149 return TRUE;
150 }
151
152 /* HTS106_Engine_load_parameter_from_fn: load parameter pdfs, trees and windows from file names */
HTS106_Engine_load_parameter_from_fn(HTS106_Engine * engine,char ** pdf_fn,char ** tree_fn,char ** win_fn,int stream_index,HTS106_Boolean msd_flag,int window_size,int interpolation_size)153 HTS106_Boolean HTS106_Engine_load_parameter_from_fn(HTS106_Engine * engine, char **pdf_fn, char **tree_fn, char **win_fn, int stream_index, HTS106_Boolean msd_flag, int window_size, int interpolation_size)
154 {
155 int i;
156 HTS106_File **pdf_fp, **tree_fp, **win_fp;
157 HTS106_Boolean result;
158
159 pdf_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
160 tree_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
161 win_fp = (HTS106_File **) HTS106_calloc(window_size, sizeof(HTS106_File *));
162 for (i = 0; i < interpolation_size; i++) {
163 pdf_fp[i] = HTS106_fopen(pdf_fn[i], "rb");
164 tree_fp[i] = HTS106_fopen(tree_fn[i], "r");
165 }
166 for (i = 0; i < window_size; i++)
167 win_fp[i] = HTS106_fopen(win_fn[i], "r");
168 result = HTS106_Engine_load_parameter_from_fp(engine, pdf_fp, tree_fp, win_fp, stream_index, msd_flag, window_size, interpolation_size);
169 for (i = 0; i < interpolation_size; i++) {
170 HTS106_fclose(pdf_fp[i]);
171 HTS106_fclose(tree_fp[i]);
172 }
173 for (i = 0; i < window_size; i++)
174 HTS106_fclose(win_fp[i]);
175 HTS106_free(pdf_fp);
176 HTS106_free(tree_fp);
177 HTS106_free(win_fp);
178
179 return result;
180 }
181
182 /* HTS106_Engine_load_parameter_from_fp: load parameter pdfs, trees and windows from file pointers */
HTS106_Engine_load_parameter_from_fp(HTS106_Engine * engine,HTS106_File ** pdf_fp,HTS106_File ** tree_fp,HTS106_File ** win_fp,int stream_index,HTS106_Boolean msd_flag,int window_size,int interpolation_size)183 HTS106_Boolean HTS106_Engine_load_parameter_from_fp(HTS106_Engine * engine, HTS106_File ** pdf_fp, HTS106_File ** tree_fp, HTS106_File ** win_fp, int stream_index, HTS106_Boolean msd_flag, int window_size, int interpolation_size)
184 {
185 int i;
186
187 if (HTS106_ModelSet_load_parameter(&engine->ms, pdf_fp, tree_fp, win_fp, stream_index, msd_flag, window_size, interpolation_size) == FALSE) {
188 return FALSE;
189 }
190 engine->global.parameter_iw[stream_index] = (double *) HTS106_calloc(interpolation_size, sizeof(double));
191 for (i = 0; i < interpolation_size; i++)
192 engine->global.parameter_iw[stream_index][i] = 1.0 / interpolation_size;
193
194 return TRUE;
195 }
196
197 /* HTS106_Engine_load_gv_from_fn: load GV pdfs and trees from file names */
HTS106_Engine_load_gv_from_fn(HTS106_Engine * engine,char ** pdf_fn,char ** tree_fn,int stream_index,int interpolation_size)198 HTS106_Boolean HTS106_Engine_load_gv_from_fn(HTS106_Engine * engine, char **pdf_fn, char **tree_fn, int stream_index, int interpolation_size)
199 {
200 int i;
201 HTS106_File **pdf_fp, **tree_fp;
202 HTS106_Boolean result;
203
204 pdf_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
205 if (tree_fn)
206 tree_fp = (HTS106_File **) HTS106_calloc(interpolation_size, sizeof(HTS106_File *));
207 else
208 tree_fp = NULL;
209 for (i = 0; i < interpolation_size; i++) {
210 pdf_fp[i] = HTS106_fopen(pdf_fn[i], "rb");
211 if (tree_fn) {
212 if (tree_fn[i])
213 tree_fp[i] = HTS106_fopen(tree_fn[i], "r");
214 else
215 tree_fp[i] = NULL;
216 }
217 }
218 result = HTS106_Engine_load_gv_from_fp(engine, pdf_fp, tree_fp, stream_index, interpolation_size);
219 for (i = 0; i < interpolation_size; i++) {
220 HTS106_fclose(pdf_fp[i]);
221 if (tree_fp && tree_fp[i])
222 HTS106_fclose(tree_fp[i]);
223 }
224 HTS106_free(pdf_fp);
225 if (tree_fp)
226 HTS106_free(tree_fp);
227
228 return result;
229 }
230
231 /* HTS106_Engine_load_gv_from_fp: load GV pdfs and trees from file pointers */
HTS106_Engine_load_gv_from_fp(HTS106_Engine * engine,HTS106_File ** pdf_fp,HTS106_File ** tree_fp,int stream_index,int interpolation_size)232 HTS106_Boolean HTS106_Engine_load_gv_from_fp(HTS106_Engine * engine, HTS106_File ** pdf_fp, HTS106_File ** tree_fp, int stream_index, int interpolation_size)
233 {
234 int i;
235
236 if (HTS106_ModelSet_load_gv(&engine->ms, pdf_fp, tree_fp, stream_index, interpolation_size) == FALSE) {
237 return FALSE;
238 }
239 engine->global.gv_iw[stream_index] = (double *) HTS106_calloc(interpolation_size, sizeof(double));
240 for (i = 0; i < interpolation_size; i++)
241 engine->global.gv_iw[stream_index][i] = 1.0 / interpolation_size;
242
243 return TRUE;
244 }
245
246 /* HTS106_Engine_load_gv_switch_from_fn: load GV switch from file name */
HTS106_Engine_load_gv_switch_from_fn(HTS106_Engine * engine,char * fn)247 HTS106_Boolean HTS106_Engine_load_gv_switch_from_fn(HTS106_Engine * engine, char *fn)
248 {
249 HTS106_File *fp = HTS106_fopen(fn, "r");
250 HTS106_Boolean result;
251
252 result = HTS106_Engine_load_gv_switch_from_fp(engine, fp);
253 HTS106_fclose(fp);
254
255 return result;
256 }
257
258 /* HTS106_Engine_load_gv_switch_from_fp: load GV switch from file pointer */
HTS106_Engine_load_gv_switch_from_fp(HTS106_Engine * engine,HTS106_File * fp)259 HTS106_Boolean HTS106_Engine_load_gv_switch_from_fp(HTS106_Engine * engine, HTS106_File * fp)
260 {
261 return HTS106_ModelSet_load_gv_switch(&engine->ms, fp);
262 }
263
264 /* HTS106_Engine_set_sampling_rate: set sampling rate */
HTS106_Engine_set_sampling_rate(HTS106_Engine * engine,int i)265 void HTS106_Engine_set_sampling_rate(HTS106_Engine * engine, int i)
266 {
267 if (i < 1)
268 i = 1;
269 if (i > 48000)
270 i = 48000;
271 engine->global.sampling_rate = i;
272 HTS106_Audio_set_parameter(&engine->audio, engine->global.sampling_rate, engine->global.audio_buff_size);
273 }
274
275 /* HTS106_Engine_get_sampling_rate: get sampling rate */
HTS106_Engine_get_sampling_rate(HTS106_Engine * engine)276 int HTS106_Engine_get_sampling_rate(HTS106_Engine * engine)
277 {
278 return engine->global.sampling_rate;
279 }
280
281 /* HTS106_Engine_set_fperiod: set frame shift */
HTS106_Engine_set_fperiod(HTS106_Engine * engine,int i)282 void HTS106_Engine_set_fperiod(HTS106_Engine * engine, int i)
283 {
284 if (i < 1)
285 i = 1;
286 if (i > 48000)
287 i = 48000;
288 engine->global.fperiod = i;
289 }
290
291 /* HTS106_Engine_get_fperiod: get frame shift */
HTS106_Engine_get_fperiod(HTS106_Engine * engine)292 int HTS106_Engine_get_fperiod(HTS106_Engine * engine)
293 {
294 return engine->global.fperiod;
295 }
296
297 /* HTS106_Engine_set_alpha: set alpha */
HTS106_Engine_set_alpha(HTS106_Engine * engine,double f)298 void HTS106_Engine_set_alpha(HTS106_Engine * engine, double f)
299 {
300 if (f < 0.0)
301 f = 0.0;
302 if (f > 1.0)
303 f = 1.0;
304 engine->global.alpha = f;
305 }
306
307 /* HTS106_Engine_set_gamma: set gamma (Gamma = -1/i: if i=0 then Gamma=0) */
HTS106_Engine_set_gamma(HTS106_Engine * engine,int i)308 void HTS106_Engine_set_gamma(HTS106_Engine * engine, int i)
309 {
310 if (i < 0)
311 i = 0;
312 engine->global.stage = i;
313 }
314
315 /* HTS106_Engine_set_log_gain: set log gain flag (for LSP) */
HTS106_Engine_set_log_gain(HTS106_Engine * engine,HTS106_Boolean i)316 void HTS106_Engine_set_log_gain(HTS106_Engine * engine, HTS106_Boolean i)
317 {
318 engine->global.use_log_gain = i;
319 }
320
321 /* HTS106_Engine_set_beta: set beta */
HTS106_Engine_set_beta(HTS106_Engine * engine,double f)322 void HTS106_Engine_set_beta(HTS106_Engine * engine, double f)
323 {
324 if (f < -0.8)
325 f = -0.8;
326 if (f > 0.8)
327 f = 0.8;
328 engine->global.beta = f;
329 }
330
331 /* HTS106_Engine_set_audio_buff_size: set audio buffer size */
HTS106_Engine_set_audio_buff_size(HTS106_Engine * engine,int i)332 void HTS106_Engine_set_audio_buff_size(HTS106_Engine * engine, int i)
333 {
334 if (i < 0)
335 i = 0;
336 if (i > 48000)
337 i = 48000;
338 engine->global.audio_buff_size = i;
339 HTS106_Audio_set_parameter(&engine->audio, engine->global.sampling_rate, engine->global.audio_buff_size);
340 }
341
342 /* HTS106_Engine_get_audio_buff_size: get audio buffer size */
HTS106_Engine_get_audio_buff_size(HTS106_Engine * engine)343 int HTS106_Engine_get_audio_buff_size(HTS106_Engine * engine)
344 {
345 return engine->global.audio_buff_size;
346 }
347
348 /* HTS106_Egnine_set_msd_threshold: set MSD threshold */
HTS106_Engine_set_msd_threshold(HTS106_Engine * engine,int stream_index,double f)349 void HTS106_Engine_set_msd_threshold(HTS106_Engine * engine, int stream_index, double f)
350 {
351 if (f < 0.0)
352 f = 0.0;
353 if (f > 1.0)
354 f = 1.0;
355 engine->global.msd_threshold[stream_index] = f;
356 }
357
358 /* HTS106_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
HTS106_Engine_set_duration_interpolation_weight(HTS106_Engine * engine,int interpolation_index,double f)359 void HTS106_Engine_set_duration_interpolation_weight(HTS106_Engine * engine, int interpolation_index, double f)
360 {
361 engine->global.duration_iw[interpolation_index] = f;
362 }
363
364 /* HTS106_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
HTS106_Engine_set_parameter_interpolation_weight(HTS106_Engine * engine,int stream_index,int interpolation_index,double f)365 void HTS106_Engine_set_parameter_interpolation_weight(HTS106_Engine * engine, int stream_index, int interpolation_index, double f)
366 {
367 engine->global.parameter_iw[stream_index][interpolation_index] = f;
368 }
369
370 /* HTS106_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
HTS106_Engine_set_gv_interpolation_weight(HTS106_Engine * engine,int stream_index,int interpolation_index,double f)371 void HTS106_Engine_set_gv_interpolation_weight(HTS106_Engine * engine, int stream_index, int interpolation_index, double f)
372 {
373 engine->global.gv_iw[stream_index][interpolation_index] = f;
374 }
375
376 /* HTS106_Engine_set_gv_weight: set GV weight */
HTS106_Engine_set_gv_weight(HTS106_Engine * engine,int stream_index,double f)377 void HTS106_Engine_set_gv_weight(HTS106_Engine * engine, int stream_index, double f)
378 {
379 if (f < 0.0)
380 f = 0.0;
381 if (f > 2.0)
382 f = 2.0;
383 engine->global.gv_weight[stream_index] = f;
384 }
385
386 /* HTS106_Engine_set_stop_flag: set stop flag */
HTS106_Engine_set_stop_flag(HTS106_Engine * engine,HTS106_Boolean b)387 void HTS106_Engine_set_stop_flag(HTS106_Engine * engine, HTS106_Boolean b)
388 {
389 engine->global.stop = b;
390 }
391
392 /* HTS106_Engine_set_volume: set volume */
HTS106_Engine_set_volume(HTS106_Engine * engine,double f)393 void HTS106_Engine_set_volume(HTS106_Engine * engine, double f)
394 {
395 if (f < 0.0)
396 f = 0.0;
397 engine->global.volume = f;
398 }
399
400 /* HTS106_Engine_get_total_state: get total number of state */
HTS106_Engine_get_total_state(HTS106_Engine * engine)401 int HTS106_Engine_get_total_state(HTS106_Engine * engine)
402 {
403 return HTS106_SStreamSet_get_total_state(&engine->sss);
404 }
405
406 /* HTS106_Engine_set_state_mean: set mean value of state */
HTS106_Engine_set_state_mean(HTS106_Engine * engine,int stream_index,int state_index,int vector_index,double f)407 void HTS106_Engine_set_state_mean(HTS106_Engine * engine, int stream_index, int state_index, int vector_index, double f)
408 {
409 HTS106_SStreamSet_set_mean(&engine->sss, stream_index, state_index, vector_index, f);
410 }
411
412 /* HTS106_Engine_get_state_mean: get mean value of state */
HTS106_Engine_get_state_mean(HTS106_Engine * engine,int stream_index,int state_index,int vector_index)413 double HTS106_Engine_get_state_mean(HTS106_Engine * engine, int stream_index, int state_index, int vector_index)
414 {
415 return HTS106_SStreamSet_get_mean(&engine->sss, stream_index, state_index, vector_index);
416 }
417
418 /* HTS106_Engine_get_state_duration: get state duration */
HTS106_Engine_get_state_duration(HTS106_Engine * engine,int state_index)419 int HTS106_Engine_get_state_duration(HTS106_Engine * engine, int state_index)
420 {
421 return HTS106_SStreamSet_get_duration(&engine->sss, state_index);
422 }
423
424 /* HTS106_Engine_get_nstream: get number of stream */
HTS106_Engine_get_nstream(HTS106_Engine * engine)425 int HTS106_Engine_get_nstream(HTS106_Engine * engine)
426 {
427 return HTS106_ModelSet_get_nstream(&engine->ms);
428 }
429
430 /* HTS106_Engine_get_nstate: get number of state */
HTS106_Engine_get_nstate(HTS106_Engine * engine)431 int HTS106_Engine_get_nstate(HTS106_Engine * engine)
432 {
433 return HTS106_ModelSet_get_nstate(&engine->ms);
434 }
435
436 /* HTS106_Engine_load_label_from_fn: load label from file name */
HTS106_Engine_load_label_from_fn(HTS106_Engine * engine,char * fn)437 void HTS106_Engine_load_label_from_fn(HTS106_Engine * engine, char *fn)
438 {
439 HTS106_Label_load_from_fn(&engine->label, engine->global.sampling_rate, engine->global.fperiod, fn);
440 }
441
442 /* HTS106_Engine_load_label_from_fp: load label from file pointer */
HTS106_Engine_load_label_from_fp(HTS106_Engine * engine,HTS106_File * fp)443 void HTS106_Engine_load_label_from_fp(HTS106_Engine * engine, HTS106_File * fp)
444 {
445 HTS106_Label_load_from_fp(&engine->label, engine->global.sampling_rate, engine->global.fperiod, fp);
446 }
447
448 /* HTS106_Engine_load_label_from_string: load label from string */
HTS106_Engine_load_label_from_string(HTS106_Engine * engine,char * data)449 void HTS106_Engine_load_label_from_string(HTS106_Engine * engine, char *data)
450 {
451 HTS106_Label_load_from_string(&engine->label, engine->global.sampling_rate, engine->global.fperiod, data);
452 }
453
454 /* HTS106_Engine_load_label_from_string_list: load label from string list */
HTS106_Engine_load_label_from_string_list(HTS106_Engine * engine,char ** data,int size)455 void HTS106_Engine_load_label_from_string_list(HTS106_Engine * engine, char **data, int size)
456 {
457 HTS106_Label_load_from_string_list(&engine->label, engine->global.sampling_rate, engine->global.fperiod, data, size);
458 }
459
460 /* HTS106_Engine_create_sstream: parse label and determine state duration */
HTS106_Engine_create_sstream(HTS106_Engine * engine)461 HTS106_Boolean HTS106_Engine_create_sstream(HTS106_Engine * engine)
462 {
463 return HTS106_SStreamSet_create(&engine->sss, &engine->ms, &engine->label, engine->global.duration_iw, engine->global.parameter_iw, engine->global.gv_iw);
464 }
465
466 /* HTS106_Engine_create_pstream: generate speech parameter vector sequence */
HTS106_Engine_create_pstream(HTS106_Engine * engine)467 HTS106_Boolean HTS106_Engine_create_pstream(HTS106_Engine * engine)
468 {
469 return HTS106_PStreamSet_create(&engine->pss, &engine->sss, engine->global.msd_threshold, engine->global.gv_weight);
470 }
471
472 /* HTS106_Engine_create_gstream: synthesis speech */
HTS106_Engine_create_gstream(HTS106_Engine * engine)473 HTS106_Boolean HTS106_Engine_create_gstream(HTS106_Engine * engine)
474 {
475 return HTS106_GStreamSet_create(&engine->gss, &engine->pss, engine->global.stage, engine->global.use_log_gain, engine->global.sampling_rate, engine->global.fperiod, engine->global.alpha, engine->global.beta, &engine->global.stop, engine->global.volume, engine->global.audio_buff_size > 0 ? &engine->audio : NULL);
476 }
477
478 /* HTS106_Engine_save_information: output trace information */
HTS106_Engine_save_information(HTS106_Engine * engine,HTS106_File * fp)479 void HTS106_Engine_save_information(HTS106_Engine * engine, HTS106_File * fp)
480 {
481 int i, j, k, l, m, n;
482 double temp;
483 HTS106_Global *global = &engine->global;
484 HTS106_ModelSet *ms = &engine->ms;
485 HTS106_Label *label = &engine->label;
486 HTS106_SStreamSet *sss = &engine->sss;
487 HTS106_PStreamSet *pss = &engine->pss;
488
489 /* global parameter */
490 fprintf(fp, "[Global parameter]\n");
491 fprintf(fp, "Sampring frequency -> %8d(Hz)\n", global->sampling_rate);
492 fprintf(fp, "Frame period -> %8d(point)\n", global->fperiod);
493 fprintf(fp, " %8.5f(msec)\n", 1e+3 * global->fperiod / global->sampling_rate);
494 fprintf(fp, "All-pass constant -> %8.5f\n", (float) global->alpha);
495 fprintf(fp, "Gamma -> %8.5f\n", (float) (global->stage == 0 ? 0.0 : -1.0 / global->stage));
496 if (global->stage != 0)
497 fprintf(fp, "Log gain flag -> %s\n", global->use_log_gain ? "TRUE" : "FALSE");
498 fprintf(fp, "Postfiltering coefficient -> %8.5f\n", (float) global->beta);
499 fprintf(fp, "Audio buffer size -> %8d(sample)\n", global->audio_buff_size);
500 fprintf(fp, "\n");
501
502 /* duration parameter */
503 fprintf(fp, "[Duration parameter]\n");
504 fprintf(fp, "Number of states -> %8d\n", HTS106_ModelSet_get_nstate(ms));
505 fprintf(fp, " Interpolation -> %8d\n", HTS106_ModelSet_get_duration_interpolation_size(ms));
506 /* check interpolation */
507 for (i = 0, temp = 0.0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
508 temp += global->duration_iw[i];
509 for (i = 0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
510 if (global->duration_iw[i] != 0.0)
511 global->duration_iw[i] /= temp;
512 for (i = 0; i < HTS106_ModelSet_get_duration_interpolation_size(ms); i++)
513 fprintf(fp, " Interpolation weight[%2d] -> %8.0f(%%)\n", i, (float) (100 * global->duration_iw[i]));
514 fprintf(fp, "\n");
515
516 fprintf(fp, "[Stream parameter]\n");
517 for (i = 0; i < HTS106_ModelSet_get_nstream(ms); i++) {
518 /* stream parameter */
519 fprintf(fp, "Stream[%2d] vector length -> %8d\n", i, HTS106_ModelSet_get_vector_length(ms, i));
520 fprintf(fp, " Dynamic window size -> %8d\n", HTS106_ModelSet_get_window_size(ms, i));
521 /* interpolation */
522 fprintf(fp, " Interpolation -> %8d\n", HTS106_ModelSet_get_parameter_interpolation_size(ms, i));
523 for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
524 temp += global->parameter_iw[i][j];
525 for (j = 0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
526 if (global->parameter_iw[i][j] != 0.0)
527 global->parameter_iw[i][j] /= temp;
528 for (j = 0; j < HTS106_ModelSet_get_parameter_interpolation_size(ms, i); j++)
529 fprintf(fp, " Interpolation weight[%2d] -> %8.0f(%%)\n", j, (float) (100 * global->parameter_iw[i][j]));
530 /* MSD */
531 if (HTS106_ModelSet_is_msd(ms, i)) { /* for MSD */
532 fprintf(fp, " MSD flag -> TRUE\n");
533 fprintf(fp, " MSD threshold -> %8.5f\n", global->msd_threshold[i]);
534 } else { /* for non MSD */
535 fprintf(fp, " MSD flag -> FALSE\n");
536 }
537 /* GV */
538 if (HTS106_ModelSet_use_gv(ms, i)) {
539 fprintf(fp, " GV flag -> TRUE\n");
540 if (HTS106_ModelSet_have_gv_switch(ms)) {
541 if (HTS106_ModelSet_have_gv_tree(ms, i)) {
542 fprintf(fp, " GV type -> CDGV\n");
543 fprintf(fp, " -> +SWITCH\n");
544 } else
545 fprintf(fp, " GV type -> SWITCH\n");
546 } else {
547 if (HTS106_ModelSet_have_gv_tree(ms, i))
548 fprintf(fp, " GV type -> CDGV\n");
549 else
550 fprintf(fp, " GV type -> NORMAL\n");
551 }
552 fprintf(fp, " GV weight -> %8.0f(%%)\n", (float) (100 * global->gv_weight[i]));
553 fprintf(fp, " GV interpolation size -> %8d\n", HTS106_ModelSet_get_gv_interpolation_size(ms, i));
554 /* interpolation */
555 for (j = 0, temp = 0.0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
556 temp += global->gv_iw[i][j];
557 for (j = 0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
558 if (global->gv_iw[i][j] != 0.0)
559 global->gv_iw[i][j] /= temp;
560 for (j = 0; j < HTS106_ModelSet_get_gv_interpolation_size(ms, i); j++)
561 fprintf(fp, " GV interpolation weight[%2d] -> %8.0f(%%)\n", j, (float) (100 * global->gv_iw[i][j]));
562 } else {
563 fprintf(fp, " GV flag -> FALSE\n");
564 }
565 }
566 fprintf(fp, "\n");
567
568 /* generated sequence */
569 fprintf(fp, "[Generated sequence]\n");
570 fprintf(fp, "Number of HMMs -> %8d\n", HTS106_Label_get_size(label));
571 fprintf(fp, "Number of stats -> %8d\n", HTS106_Label_get_size(label) * HTS106_ModelSet_get_nstate(ms));
572 fprintf(fp, "Length of this speech -> %8.3f(sec)\n", (float) ((double) HTS106_PStreamSet_get_total_frame(pss) * global->fperiod / global->sampling_rate));
573 fprintf(fp, " -> %8.3d(frames)\n", HTS106_PStreamSet_get_total_frame(pss) * global->fperiod);
574
575 for (i = 0; i < HTS106_Label_get_size(label); i++) {
576 fprintf(fp, "HMM[%2d]\n", i);
577 fprintf(fp, " Name -> %s\n", HTS106_Label_get_string(label, i));
578 fprintf(fp, " Duration\n");
579 for (j = 0; j < HTS106_ModelSet_get_duration_interpolation_size(ms); j++) {
580 fprintf(fp, " Interpolation[%2d]\n", j);
581 HTS106_ModelSet_get_duration_index(ms, HTS106_Label_get_string(label, i), NULL, &k, &l, j);
582 fprintf(fp, " Tree index -> %8d\n", k);
583 fprintf(fp, " PDF index -> %8d\n", l);
584 }
585 for (j = 0; j < HTS106_ModelSet_get_nstate(ms); j++) {
586 fprintf(fp, " State[%2d]\n", j + 2);
587 fprintf(fp, " Length -> %8d(frames)\n", HTS106_SStreamSet_get_duration(sss, i * HTS106_ModelSet_get_nstate(ms) + j));
588 for (k = 0; k < HTS106_ModelSet_get_nstream(ms); k++) {
589 fprintf(fp, " Stream[%2d]\n", k);
590 if (HTS106_ModelSet_is_msd(ms, k)) {
591 if (HTS106_SStreamSet_get_msd(sss, k, i * HTS106_ModelSet_get_nstate(ms) + j) > global->msd_threshold[k])
592 fprintf(fp, " MSD flag -> TRUE\n");
593 else
594 fprintf(fp, " MSD flag -> FALSE\n");
595 }
596 for (l = 0; l < HTS106_ModelSet_get_parameter_interpolation_size(ms, k); l++) {
597 fprintf(fp, " Interpolation[%2d]\n", l);
598 HTS106_ModelSet_get_parameter_index(ms, HTS106_Label_get_string(label, i), NULL, &m, &n, k, j + 2, l);
599 fprintf(fp, " Tree index -> %8d\n", m);
600 fprintf(fp, " PDF index -> %8d\n", n);
601 }
602 }
603 }
604 }
605 }
606
607 /* HTS106_Engine_save_label: output label with time */
HTS106_Engine_save_label(HTS106_Engine * engine,HTS106_File * fp)608 void HTS106_Engine_save_label(HTS106_Engine * engine, HTS106_File * fp)
609 {
610 int i, j;
611 int frame, state, duration;
612
613 HTS106_Label *label = &engine->label;
614 HTS106_SStreamSet *sss = &engine->sss;
615 const int nstate = HTS106_ModelSet_get_nstate(&engine->ms);
616 const double rate = engine->global.fperiod * 1e+7 / engine->global.sampling_rate;
617
618 for (i = 0, state = 0, frame = 0; i < HTS106_Label_get_size(label); i++) {
619 for (j = 0, duration = 0; j < nstate; j++)
620 duration += HTS106_SStreamSet_get_duration(sss, state++);
621 /* in HTK & HTS format */
622 fprintf(fp, "%lu %lu %s\n", (unsigned long) (frame * rate), (unsigned long) ((frame + duration) * rate), HTS106_Label_get_string(label, i));
623 frame += duration;
624 }
625 }
626
627 /* HTS106_Engine_save_generated_parameter: output generated parameter */
HTS106_Engine_save_generated_parameter(HTS106_Engine * engine,HTS106_File * fp,int stream_index)628 void HTS106_Engine_save_generated_parameter(HTS106_Engine * engine, HTS106_File * fp, int stream_index)
629 {
630 int i, j;
631 float temp;
632 HTS106_GStreamSet *gss = &engine->gss;
633
634 for (i = 0; i < HTS106_GStreamSet_get_total_frame(gss); i++)
635 for (j = 0; j < HTS106_GStreamSet_get_static_length(gss, stream_index); j++) {
636 temp = (float) HTS106_GStreamSet_get_parameter(gss, stream_index, i, j);
637 fwrite(&temp, sizeof(float), 1, fp);
638 }
639 }
640
641 /* HTS106_Engine_save_generated_speech: output generated speech */
HTS106_Engine_save_generated_speech(HTS106_Engine * engine,HTS106_File * fp)642 void HTS106_Engine_save_generated_speech(HTS106_Engine * engine, HTS106_File * fp)
643 {
644 int i;
645 short temp;
646 HTS106_GStreamSet *gss = &engine->gss;
647
648 for (i = 0; i < HTS106_GStreamSet_get_total_nsample(gss); i++) {
649 temp = HTS106_GStreamSet_get_speech(gss, i);
650 fwrite(&temp, sizeof(short), 1, fp);
651 }
652 }
653
654 /* HTS106_Engine_save_riff: output RIFF format file */
HTS106_Engine_save_riff(HTS106_Engine * engine,HTS106_File * fp)655 void HTS106_Engine_save_riff(HTS106_Engine * engine, HTS106_File * fp)
656 {
657 int i;
658 short temp;
659
660 HTS106_GStreamSet *gss = &engine->gss;
661 char data_01_04[] = { 'R', 'I', 'F', 'F' };
662 int data_05_08 = HTS106_GStreamSet_get_total_nsample(gss) * sizeof(short) + 36;
663 char data_09_12[] = { 'W', 'A', 'V', 'E' };
664 char data_13_16[] = { 'f', 'm', 't', ' ' };
665 int data_17_20 = 16;
666 short data_21_22 = 1; /* PCM */
667 short data_23_24 = 1; /* monoral */
668 int data_25_28 = engine->global.sampling_rate;
669 int data_29_32 = engine->global.sampling_rate * sizeof(short);
670 short data_33_34 = sizeof(short);
671 short data_35_36 = (short) (sizeof(short) * 8);
672 char data_37_40[] = { 'd', 'a', 't', 'a' };
673 int data_41_44 = HTS106_GStreamSet_get_total_nsample(gss) * sizeof(short);
674
675 /* write header */
676 HTS106_fwrite_little_endian(data_01_04, sizeof(char), 4, fp);
677 HTS106_fwrite_little_endian(&data_05_08, sizeof(int), 1, fp);
678 HTS106_fwrite_little_endian(data_09_12, sizeof(char), 4, fp);
679 HTS106_fwrite_little_endian(data_13_16, sizeof(char), 4, fp);
680 HTS106_fwrite_little_endian(&data_17_20, sizeof(int), 1, fp);
681 HTS106_fwrite_little_endian(&data_21_22, sizeof(short), 1, fp);
682 HTS106_fwrite_little_endian(&data_23_24, sizeof(short), 1, fp);
683 HTS106_fwrite_little_endian(&data_25_28, sizeof(int), 1, fp);
684 HTS106_fwrite_little_endian(&data_29_32, sizeof(int), 1, fp);
685 HTS106_fwrite_little_endian(&data_33_34, sizeof(short), 1, fp);
686 HTS106_fwrite_little_endian(&data_35_36, sizeof(short), 1, fp);
687 HTS106_fwrite_little_endian(data_37_40, sizeof(char), 4, fp);
688 HTS106_fwrite_little_endian(&data_41_44, sizeof(int), 1, fp);
689 /* write data */
690 for (i = 0; i < HTS106_GStreamSet_get_total_nsample(gss); i++) {
691 temp = HTS106_GStreamSet_get_speech(gss, i);
692 HTS106_fwrite_little_endian(&temp, sizeof(short), 1, fp);
693 }
694 }
695
696 /* HTS106_Engine_refresh: free model per one time synthesis */
HTS106_Engine_refresh(HTS106_Engine * engine)697 void HTS106_Engine_refresh(HTS106_Engine * engine)
698 {
699 /* free generated parameter stream set */
700 HTS106_GStreamSet_clear(&engine->gss);
701 /* free parameter stream set */
702 HTS106_PStreamSet_clear(&engine->pss);
703 /* free state stream set */
704 HTS106_SStreamSet_clear(&engine->sss);
705 /* free label list */
706 HTS106_Label_clear(&engine->label);
707 /* stop flag */
708 engine->global.stop = FALSE;
709 }
710
711 /* HTS106_Engine_clear: free engine */
HTS106_Engine_clear(HTS106_Engine * engine)712 void HTS106_Engine_clear(HTS106_Engine * engine)
713 {
714 int i;
715
716 HTS106_free(engine->global.msd_threshold);
717 HTS106_free(engine->global.duration_iw);
718 for (i = 0; i < HTS106_ModelSet_get_nstream(&engine->ms); i++) {
719 HTS106_free(engine->global.parameter_iw[i]);
720 if (engine->global.gv_iw[i])
721 HTS106_free(engine->global.gv_iw[i]);
722 }
723 HTS106_free(engine->global.parameter_iw);
724 HTS106_free(engine->global.gv_iw);
725 HTS106_free(engine->global.gv_weight);
726
727 HTS106_ModelSet_clear(&engine->ms);
728 HTS106_Audio_clear(&engine->audio);
729 }
730
731 /* HTS106_get_copyright: write copyright to string */
HTS106_get_copyright(char * str)732 void HTS106_get_copyright(char *str)
733 {
734 int i, nCopyright = HTS106_NCOPYRIGHT;
735 char url[] = HTS106_URL, version[] = HTS106_VERSION;
736 char *copyright[] = { HTS106_COPYRIGHT };
737
738 sprintf(str, "\nThe HMM-Based Speech Synthesis Engine \"hts_engine API\"\n");
739 sprintf(str, "%shts_engine API version %s (%s)\n", str, version, url);
740 for (i = 0; i < nCopyright; i++) {
741 if (i == 0)
742 sprintf(str, "%sCopyright (C) %s\n", str, copyright[i]);
743 else
744 sprintf(str, "%s %s\n", str, copyright[i]);
745 }
746 sprintf(str, "%sAll rights reserved.\n", str);
747
748 return;
749 }
750
751 /* HTS106_show_copyright: write copyright to file pointer */
HTS106_show_copyright(HTS106_File * fp)752 void HTS106_show_copyright(HTS106_File * fp)
753 {
754 char buf[HTS106_MAXBUFLEN];
755
756 HTS106_get_copyright(buf);
757 fprintf(fp, "%s", buf);
758
759 return;
760 }
761
762 HTS106_ENGINE_C_END;
763
764 #endif /* !HTS106_ENGINE_C */
765