1 /* ----------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 /* developed by HTS Working Group */
4 /* http://hts-engine.sourceforge.net/ */
5 /* ----------------------------------------------------------------- */
6 /* */
7 /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 /* Department of Computer Science */
9 /* */
10 /* 2001-2008 Tokyo Institute of Technology */
11 /* Interdisciplinary Graduate School of */
12 /* Science and Engineering */
13 /* */
14 /* All rights reserved. */
15 /* */
16 /* Redistribution and use in source and binary forms, with or */
17 /* without modification, are permitted provided that the following */
18 /* conditions are met: */
19 /* */
20 /* - Redistributions of source code must retain the above copyright */
21 /* notice, this list of conditions and the following disclaimer. */
22 /* - Redistributions in binary form must reproduce the above */
23 /* copyright notice, this list of conditions and the following */
24 /* disclaimer in the documentation and/or other materials provided */
25 /* with the distribution. */
26 /* - Neither the name of the HTS working group nor the names of its */
27 /* contributors may be used to endorse or promote products derived */
28 /* from this software without specific prior written permission. */
29 /* */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 /* POSSIBILITY OF SUCH DAMAGE. */
43 /* ----------------------------------------------------------------- */
44
45 #ifndef HTS_ENGINE_C
46 #define HTS_ENGINE_C
47
48 #ifdef __cplusplus
49 #define HTS_ENGINE_C_START extern "C" {
50 #define HTS_ENGINE_C_END }
51 #else
52 #define HTS_ENGINE_C_START
53 #define HTS_ENGINE_C_END
54 #endif /* __CPLUSPLUS */
55
56 HTS_ENGINE_C_START;
57
58 #include <stdlib.h>
59
60 #include "HTS_engine.h"
61
62 /* usage: output usage */
usage(void)63 void usage(void)
64 {
65 fprintf(stderr, "%s\n", HTS_COPYRIGHT);
66 fprintf(stderr, "hts_engine - The HMM-based speech synthesis engine \"hts_engine API\"\n");
67 fprintf(stderr, "\n");
68 fprintf(stderr, " usage:\n");
69 fprintf(stderr, " hts_engine [ options ] [ infile ]\n");
70 fprintf(stderr, " options: [ def][ min-- max]\n");
71 fprintf(stderr, " -m htsvoice : HTS voice files [ N/A]\n");
72 fprintf(stderr, " -od s : filename of output label with duration [ N/A]\n");
73 fprintf(stderr, " -om s : filename of output spectrum [ N/A]\n");
74 fprintf(stderr, " -of s : filename of output log F0 [ N/A]\n");
75 fprintf(stderr, " -ol s : filename of output low-pass filter [ N/A]\n");
76 fprintf(stderr, " -or s : filename of output raw audio (generated speech) [ N/A]\n");
77 fprintf(stderr, " -ow s : filename of output wav audio (generated speech) [ N/A]\n");
78 fprintf(stderr, " -ot s : filename of output trace information [ N/A]\n");
79 fprintf(stderr, " -vp : use phoneme alignment for duration [ N/A]\n");
80 fprintf(stderr, " -i i f1 .. fi : enable interpolation & specify number(i),coefficient(f) [ N/A]\n");
81 fprintf(stderr, " -s i : sampling frequency [ auto][ 1-- ]\n");
82 fprintf(stderr, " -p i : frame period (point) [ auto][ 1-- ]\n");
83 fprintf(stderr, " -a f : all-pass constant [ auto][ 0.0-- 1.0]\n");
84 fprintf(stderr, " -b f : postfiltering coefficient [ 0.0][ 0.0-- 1.0]\n");
85 fprintf(stderr, " -r f : speech speed rate [ 1.0][ 0.0-- ]\n");
86 fprintf(stderr, " -fm f : additional half-tone [ 0.0][ -- ]\n");
87 fprintf(stderr, " -u f : voiced/unvoiced threshold [ 0.5][ 0.0-- 1.0]\n");
88 fprintf(stderr, " -jm f : weight of GV for spectrum [ 1.0][ 0.0-- ]\n");
89 fprintf(stderr, " -jf f : weight of GV for log F0 [ 1.0][ 0.0-- ]\n");
90 fprintf(stderr, " -g f : volume (dB) [ 0.0][ -- ]\n");
91 fprintf(stderr, " -z i : audio buffer size (if i==0, turn off) [ 0][ 0-- ]\n");
92 fprintf(stderr, " infile:\n");
93 fprintf(stderr, " label file\n");
94 fprintf(stderr, " note:\n");
95 fprintf(stderr, " generated spectrum, log F0, and low-pass filter coefficient\n");
96 fprintf(stderr, " sequences are saved in natural endian, binary (float) format.\n");
97 fprintf(stderr, "\n");
98
99 exit(0);
100 }
101
main(int argc,char ** argv)102 int main(int argc, char **argv)
103 {
104 int i;
105 double f;
106
107 /* hts_engine API */
108 HTS_Engine engine;
109
110 /* HTS voices */
111 size_t num_voices;
112 char **fn_voices;
113
114 /* input label file name */
115 char *labfn = NULL;
116
117 /* output file pointers */
118 FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL;
119
120 /* interpolation weights */
121 size_t num_interpolation_weights;
122
123 /* output usage */
124 if (argc <= 1)
125 usage();
126
127 /* initialize hts_engine API */
128 HTS_Engine_initialize(&engine);
129
130 /* get HTS voice file names */
131 num_voices = 0;
132 fn_voices = (char **) malloc(argc * sizeof(char *));
133 for (i = 0; i < argc; i++) {
134 if (argv[i][0] == '-' && argv[i][1] == 'm')
135 fn_voices[num_voices++] = argv[++i];
136 if (argv[i][0] == '-' && argv[i][1] == 'h')
137 usage();
138 }
139 if (num_voices == 0) {
140 fprintf(stderr, "Error: HTS voice must be specified.\n");
141 free(fn_voices);
142 exit(1);
143 }
144
145 /* load HTS voices */
146 if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) {
147 fprintf(stderr, "Error: HTS voices cannot be loaded.\n");
148 free(fn_voices);
149 HTS_Engine_clear(&engine);
150 exit(1);
151 }
152 free(fn_voices);
153
154 /* get options */
155 while (--argc) {
156 if (**++argv == '-') {
157 switch (*(*argv + 1)) {
158 case 'v':
159 switch (*(*argv + 2)) {
160 case 'p':
161 HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE);
162 break;
163 default:
164 fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2));
165 HTS_Engine_clear(&engine);
166 exit(1);
167 }
168 break;
169 case 'o':
170 switch (*(*argv + 2)) {
171 case 'w':
172 wavfp = fopen(*++argv, "wb");
173 break;
174 case 'r':
175 rawfp = fopen(*++argv, "wb");
176 break;
177 case 'd':
178 durfp = fopen(*++argv, "wt");
179 break;
180 case 'm':
181 mgcfp = fopen(*++argv, "wb");
182 break;
183 case 'f':
184 case 'p':
185 lf0fp = fopen(*++argv, "wb");
186 break;
187 case 'l':
188 lpffp = fopen(*++argv, "wb");
189 break;
190 case 't':
191 tracefp = fopen(*++argv, "wt");
192 break;
193 default:
194 fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2));
195 HTS_Engine_clear(&engine);
196 exit(1);
197 }
198 --argc;
199 break;
200 case 'h':
201 usage();
202 break;
203 case 'm':
204 argv++; /* HTS voices were already loaded */
205 --argc;
206 break;
207 case 's':
208 HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv));
209 --argc;
210 break;
211 case 'p':
212 HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv));
213 --argc;
214 break;
215 case 'a':
216 HTS_Engine_set_alpha(&engine, atof(*++argv));
217 --argc;
218 break;
219 case 'b':
220 HTS_Engine_set_beta(&engine, atof(*++argv));
221 --argc;
222 break;
223 case 'r':
224 HTS_Engine_set_speed(&engine, atof(*++argv));
225 --argc;
226 break;
227 case 'f':
228 switch (*(*argv + 2)) {
229 case 'm':
230 HTS_Engine_add_half_tone(&engine, atof(*++argv));
231 break;
232 default:
233 fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2));
234 HTS_Engine_clear(&engine);
235 exit(1);
236 }
237 --argc;
238 break;
239 case 'u':
240 HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv));
241 --argc;
242 break;
243 case 'i':
244 num_interpolation_weights = atoi(*++argv);
245 argc--;
246 if (num_interpolation_weights != num_voices) {
247 HTS_Engine_clear(&engine);
248 exit(1);
249 }
250 for (i = 0; i < num_interpolation_weights; i++) {
251 f = atof(*++argv);
252 argc--;
253 HTS_Engine_set_duration_interpolation_weight(&engine, i, f);
254 HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f);
255 HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f);
256 HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f);
257 HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f);
258 }
259 break;
260 case 'j':
261 switch (*(*argv + 2)) {
262 case 'm':
263 HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv));
264 break;
265 case 'f':
266 case 'p':
267 HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv));
268 break;
269 default:
270 fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2));
271 HTS_Engine_clear(&engine);
272 exit(1);
273 }
274 --argc;
275 break;
276 case 'g':
277 HTS_Engine_set_volume(&engine, atof(*++argv));
278 --argc;
279 break;
280 case 'z':
281 HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv));
282 --argc;
283 break;
284 default:
285 fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1));
286 HTS_Engine_clear(&engine);
287 exit(1);
288 }
289 } else {
290 labfn = *argv;
291 }
292 }
293
294 /* synthesize */
295 if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) {
296 fprintf(stderr, "Error: waveform cannot be synthesized.\n");
297 HTS_Engine_clear(&engine);
298 exit(1);
299 }
300
301 /* output */
302 if (tracefp != NULL)
303 HTS_Engine_save_information(&engine, tracefp);
304 if (durfp != NULL)
305 HTS_Engine_save_label(&engine, durfp);
306 if (rawfp)
307 HTS_Engine_save_generated_speech(&engine, rawfp);
308 if (wavfp)
309 HTS_Engine_save_riff(&engine, wavfp);
310 if (mgcfp)
311 HTS_Engine_save_generated_parameter(&engine, 0, mgcfp);
312 if (lf0fp)
313 HTS_Engine_save_generated_parameter(&engine, 1, lf0fp);
314 if (lpffp)
315 HTS_Engine_save_generated_parameter(&engine, 2, lpffp);
316
317 /* reset */
318 HTS_Engine_refresh(&engine);
319
320 /* free memory */
321 HTS_Engine_clear(&engine);
322
323 /* close files */
324 if (durfp != NULL)
325 fclose(durfp);
326 if (mgcfp != NULL)
327 fclose(mgcfp);
328 if (lf0fp != NULL)
329 fclose(lf0fp);
330 if (lpffp != NULL)
331 fclose(lpffp);
332 if (wavfp != NULL)
333 fclose(wavfp);
334 if (rawfp != NULL)
335 fclose(rawfp);
336 if (tracefp != NULL)
337 fclose(tracefp);
338
339 return 0;
340 }
341
342 HTS_ENGINE_C_END;
343
344 #endif /* !HTS_ENGINE_C */
345