1 /* ----------------------------------------------------------------- */
2 /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
3 /*           developed by HTS Working Group                          */
4 /*           http://hts-engine.sourceforge.net/                      */
5 /* ----------------------------------------------------------------- */
6 /*                                                                   */
7 /*  Copyright (c) 2001-2015  Nagoya Institute of Technology          */
8 /*                           Department of Computer Science          */
9 /*                                                                   */
10 /*                2001-2008  Tokyo Institute of Technology           */
11 /*                           Interdisciplinary Graduate School of    */
12 /*                           Science and Engineering                 */
13 /*                                                                   */
14 /* All rights reserved.                                              */
15 /*                                                                   */
16 /* Redistribution and use in source and binary forms, with or        */
17 /* without modification, are permitted provided that the following   */
18 /* conditions are met:                                               */
19 /*                                                                   */
20 /* - Redistributions of source code must retain the above copyright  */
21 /*   notice, this list of conditions and the following disclaimer.   */
22 /* - Redistributions in binary form must reproduce the above         */
23 /*   copyright notice, this list of conditions and the following     */
24 /*   disclaimer in the documentation and/or other materials provided */
25 /*   with the distribution.                                          */
26 /* - Neither the name of the HTS working group nor the names of its  */
27 /*   contributors may be used to endorse or promote products derived */
28 /*   from this software without specific prior written permission.   */
29 /*                                                                   */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
42 /* POSSIBILITY OF SUCH DAMAGE.                                       */
43 /* ----------------------------------------------------------------- */
44 
45 #ifndef HTS_ENGINE_C
46 #define HTS_ENGINE_C
47 
48 #ifdef __cplusplus
49 #define HTS_ENGINE_C_START extern "C" {
50 #define HTS_ENGINE_C_END   }
51 #else
52 #define HTS_ENGINE_C_START
53 #define HTS_ENGINE_C_END
54 #endif                          /* __CPLUSPLUS */
55 
56 HTS_ENGINE_C_START;
57 
58 #include <stdlib.h>
59 
60 #include "HTS_engine.h"
61 
62 /* usage: output usage */
usage(void)63 void usage(void)
64 {
65    fprintf(stderr, "%s\n", HTS_COPYRIGHT);
66    fprintf(stderr, "hts_engine - The HMM-based speech synthesis engine \"hts_engine API\"\n");
67    fprintf(stderr, "\n");
68    fprintf(stderr, "  usage:\n");
69    fprintf(stderr, "    hts_engine [ options ] [ infile ]\n");
70    fprintf(stderr, "  options:                                                                   [  def][ min-- max]\n");
71    fprintf(stderr, "    -m  htsvoice   : HTS voice files                                         [  N/A]\n");
72    fprintf(stderr, "    -od s          : filename of output label with duration                  [  N/A]\n");
73    fprintf(stderr, "    -om s          : filename of output spectrum                             [  N/A]\n");
74    fprintf(stderr, "    -of s          : filename of output log F0                               [  N/A]\n");
75    fprintf(stderr, "    -ol s          : filename of output low-pass filter                      [  N/A]\n");
76    fprintf(stderr, "    -or s          : filename of output raw audio (generated speech)         [  N/A]\n");
77    fprintf(stderr, "    -ow s          : filename of output wav audio (generated speech)         [  N/A]\n");
78    fprintf(stderr, "    -ot s          : filename of output trace information                    [  N/A]\n");
79    fprintf(stderr, "    -vp            : use phoneme alignment for duration                      [  N/A]\n");
80    fprintf(stderr, "    -i  i f1 .. fi : enable interpolation & specify number(i),coefficient(f) [  N/A]\n");
81    fprintf(stderr, "    -s  i          : sampling frequency                                      [ auto][   1--    ]\n");
82    fprintf(stderr, "    -p  i          : frame period (point)                                    [ auto][   1--    ]\n");
83    fprintf(stderr, "    -a  f          : all-pass constant                                       [ auto][ 0.0-- 1.0]\n");
84    fprintf(stderr, "    -b  f          : postfiltering coefficient                               [  0.0][ 0.0-- 1.0]\n");
85    fprintf(stderr, "    -r  f          : speech speed rate                                       [  1.0][ 0.0--    ]\n");
86    fprintf(stderr, "    -fm f          : additional half-tone                                    [  0.0][    --    ]\n");
87    fprintf(stderr, "    -u  f          : voiced/unvoiced threshold                               [  0.5][ 0.0-- 1.0]\n");
88    fprintf(stderr, "    -jm f          : weight of GV for spectrum                               [  1.0][ 0.0--    ]\n");
89    fprintf(stderr, "    -jf f          : weight of GV for log F0                                 [  1.0][ 0.0--    ]\n");
90    fprintf(stderr, "    -g  f          : volume (dB)                                             [  0.0][    --    ]\n");
91    fprintf(stderr, "    -z  i          : audio buffer size (if i==0, turn off)                   [    0][   0--    ]\n");
92    fprintf(stderr, "  infile:\n");
93    fprintf(stderr, "    label file\n");
94    fprintf(stderr, "  note:\n");
95    fprintf(stderr, "    generated spectrum, log F0, and low-pass filter coefficient\n");
96    fprintf(stderr, "    sequences are saved in natural endian, binary (float) format.\n");
97    fprintf(stderr, "\n");
98 
99    exit(0);
100 }
101 
main(int argc,char ** argv)102 int main(int argc, char **argv)
103 {
104    int i;
105    double f;
106 
107    /* hts_engine API */
108    HTS_Engine engine;
109 
110    /* HTS voices */
111    size_t num_voices;
112    char **fn_voices;
113 
114    /* input label file name */
115    char *labfn = NULL;
116 
117    /* output file pointers */
118    FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL;
119 
120    /* interpolation weights */
121    size_t num_interpolation_weights;
122 
123    /* output usage */
124    if (argc <= 1)
125       usage();
126 
127    /* initialize hts_engine API */
128    HTS_Engine_initialize(&engine);
129 
130    /* get HTS voice file names */
131    num_voices = 0;
132    fn_voices = (char **) malloc(argc * sizeof(char *));
133    for (i = 0; i < argc; i++) {
134       if (argv[i][0] == '-' && argv[i][1] == 'm')
135          fn_voices[num_voices++] = argv[++i];
136       if (argv[i][0] == '-' && argv[i][1] == 'h')
137          usage();
138    }
139    if (num_voices == 0) {
140       fprintf(stderr, "Error: HTS voice must be specified.\n");
141       free(fn_voices);
142       exit(1);
143    }
144 
145    /* load HTS voices */
146    if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) {
147       fprintf(stderr, "Error: HTS voices cannot be loaded.\n");
148       free(fn_voices);
149       HTS_Engine_clear(&engine);
150       exit(1);
151    }
152    free(fn_voices);
153 
154    /* get options */
155    while (--argc) {
156       if (**++argv == '-') {
157          switch (*(*argv + 1)) {
158          case 'v':
159             switch (*(*argv + 2)) {
160             case 'p':
161                HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE);
162                break;
163             default:
164                fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2));
165                HTS_Engine_clear(&engine);
166                exit(1);
167             }
168             break;
169          case 'o':
170             switch (*(*argv + 2)) {
171             case 'w':
172                wavfp = fopen(*++argv, "wb");
173                break;
174             case 'r':
175                rawfp = fopen(*++argv, "wb");
176                break;
177             case 'd':
178                durfp = fopen(*++argv, "wt");
179                break;
180             case 'm':
181                mgcfp = fopen(*++argv, "wb");
182                break;
183             case 'f':
184             case 'p':
185                lf0fp = fopen(*++argv, "wb");
186                break;
187             case 'l':
188                lpffp = fopen(*++argv, "wb");
189                break;
190             case 't':
191                tracefp = fopen(*++argv, "wt");
192                break;
193             default:
194                fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2));
195                HTS_Engine_clear(&engine);
196                exit(1);
197             }
198             --argc;
199             break;
200          case 'h':
201             usage();
202             break;
203          case 'm':
204             argv++;             /* HTS voices were already loaded */
205             --argc;
206             break;
207          case 's':
208             HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv));
209             --argc;
210             break;
211          case 'p':
212             HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv));
213             --argc;
214             break;
215          case 'a':
216             HTS_Engine_set_alpha(&engine, atof(*++argv));
217             --argc;
218             break;
219          case 'b':
220             HTS_Engine_set_beta(&engine, atof(*++argv));
221             --argc;
222             break;
223          case 'r':
224             HTS_Engine_set_speed(&engine, atof(*++argv));
225             --argc;
226             break;
227          case 'f':
228             switch (*(*argv + 2)) {
229             case 'm':
230                HTS_Engine_add_half_tone(&engine, atof(*++argv));
231                break;
232             default:
233                fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2));
234                HTS_Engine_clear(&engine);
235                exit(1);
236             }
237             --argc;
238             break;
239          case 'u':
240             HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv));
241             --argc;
242             break;
243          case 'i':
244             num_interpolation_weights = atoi(*++argv);
245             argc--;
246             if (num_interpolation_weights != num_voices) {
247                HTS_Engine_clear(&engine);
248                exit(1);
249             }
250             for (i = 0; i < num_interpolation_weights; i++) {
251                f = atof(*++argv);
252                argc--;
253                HTS_Engine_set_duration_interpolation_weight(&engine, i, f);
254                HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f);
255                HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f);
256                HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f);
257                HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f);
258             }
259             break;
260          case 'j':
261             switch (*(*argv + 2)) {
262             case 'm':
263                HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv));
264                break;
265             case 'f':
266             case 'p':
267                HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv));
268                break;
269             default:
270                fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2));
271                HTS_Engine_clear(&engine);
272                exit(1);
273             }
274             --argc;
275             break;
276          case 'g':
277             HTS_Engine_set_volume(&engine, atof(*++argv));
278             --argc;
279             break;
280          case 'z':
281             HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv));
282             --argc;
283             break;
284          default:
285             fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1));
286             HTS_Engine_clear(&engine);
287             exit(1);
288          }
289       } else {
290          labfn = *argv;
291       }
292    }
293 
294    /* synthesize */
295    if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) {
296       fprintf(stderr, "Error: waveform cannot be synthesized.\n");
297       HTS_Engine_clear(&engine);
298       exit(1);
299    }
300 
301    /* output */
302    if (tracefp != NULL)
303       HTS_Engine_save_information(&engine, tracefp);
304    if (durfp != NULL)
305       HTS_Engine_save_label(&engine, durfp);
306    if (rawfp)
307       HTS_Engine_save_generated_speech(&engine, rawfp);
308    if (wavfp)
309       HTS_Engine_save_riff(&engine, wavfp);
310    if (mgcfp)
311       HTS_Engine_save_generated_parameter(&engine, 0, mgcfp);
312    if (lf0fp)
313       HTS_Engine_save_generated_parameter(&engine, 1, lf0fp);
314    if (lpffp)
315       HTS_Engine_save_generated_parameter(&engine, 2, lpffp);
316 
317    /* reset */
318    HTS_Engine_refresh(&engine);
319 
320    /* free memory */
321    HTS_Engine_clear(&engine);
322 
323    /* close files */
324    if (durfp != NULL)
325       fclose(durfp);
326    if (mgcfp != NULL)
327       fclose(mgcfp);
328    if (lf0fp != NULL)
329       fclose(lf0fp);
330    if (lpffp != NULL)
331       fclose(lpffp);
332    if (wavfp != NULL)
333       fclose(wavfp);
334    if (rawfp != NULL)
335       fclose(rawfp);
336    if (tracefp != NULL)
337       fclose(tracefp);
338 
339    return 0;
340 }
341 
342 HTS_ENGINE_C_END;
343 
344 #endif                          /* !HTS_ENGINE_C */
345