1 /* ----------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 /* developed by HTS Working Group */
4 /* http://hts-engine.sourceforge.net/ */
5 /* ----------------------------------------------------------------- */
6 /* */
7 /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 /* Department of Computer Science */
9 /* */
10 /* 2001-2008 Tokyo Institute of Technology */
11 /* Interdisciplinary Graduate School of */
12 /* Science and Engineering */
13 /* */
14 /* All rights reserved. */
15 /* */
16 /* Redistribution and use in source and binary forms, with or */
17 /* without modification, are permitted provided that the following */
18 /* conditions are met: */
19 /* */
20 /* - Redistributions of source code must retain the above copyright */
21 /* notice, this list of conditions and the following disclaimer. */
22 /* - Redistributions in binary form must reproduce the above */
23 /* copyright notice, this list of conditions and the following */
24 /* disclaimer in the documentation and/or other materials provided */
25 /* with the distribution. */
26 /* - Neither the name of the HTS working group nor the names of its */
27 /* contributors may be used to endorse or promote products derived */
28 /* from this software without specific prior written permission. */
29 /* */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 /* POSSIBILITY OF SUCH DAMAGE. */
43 /* ----------------------------------------------------------------- */
44
45 #ifndef HTS_GSTREAM_C
46 #define HTS_GSTREAM_C
47
48 #ifdef __cplusplus
49 #define HTS_GSTREAM_C_START extern "C" {
50 #define HTS_GSTREAM_C_END }
51 #else
52 #define HTS_GSTREAM_C_START
53 #define HTS_GSTREAM_C_END
54 #endif /* __CPLUSPLUS */
55
56 HTS_GSTREAM_C_START;
57
58 /* hts_engine libraries */
59 #include "HTS_hidden.h"
60
61 /* HTS_GStreamSet_initialize: initialize generated parameter stream set */
HTS_GStreamSet_initialize(HTS_GStreamSet * gss)62 void HTS_GStreamSet_initialize(HTS_GStreamSet * gss)
63 {
64 gss->nstream = 0;
65 gss->total_frame = 0;
66 gss->total_nsample = 0;
67 gss->gstream = NULL;
68 gss->gspeech = NULL;
69 }
70
71 /* HTS_GStreamSet_create: generate speech */
HTS_GStreamSet_create(HTS_GStreamSet * gss,HTS_PStreamSet * pss,size_t stage,HTS_Boolean use_log_gain,size_t sampling_rate,size_t fperiod,double alpha,double beta,HTS_Boolean * stop,double volume,HTS_Audio * audio)72 HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio)
73 {
74 size_t i, j, k;
75 size_t msd_frame;
76 HTS_Vocoder v;
77 size_t nlpf = 0;
78 double *lpf = NULL;
79
80 /* check */
81 if (gss->gstream || gss->gspeech) {
82 HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n");
83 return FALSE;
84 }
85
86 /* initialize */
87 gss->nstream = HTS_PStreamSet_get_nstream(pss);
88 gss->total_frame = HTS_PStreamSet_get_total_frame(pss);
89 gss->total_nsample = fperiod * gss->total_frame;
90 gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream));
91 for (i = 0; i < gss->nstream; i++) {
92 gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i);
93 gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *));
94 for (j = 0; j < gss->total_frame; j++)
95 gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double));
96 }
97 gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double));
98
99 /* copy generated parameter */
100 for (i = 0; i < gss->nstream; i++) {
101 if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */
102 for (j = 0, msd_frame = 0; j < gss->total_frame; j++)
103 if (HTS_PStreamSet_get_msd_flag(pss, i, j) == TRUE) {
104 for (k = 0; k < gss->gstream[i].vector_length; k++)
105 gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k);
106 msd_frame++;
107 } else
108 for (k = 0; k < gss->gstream[i].vector_length; k++)
109 gss->gstream[i].par[j][k] = HTS_NODATA;
110 } else { /* for non MSD */
111 for (j = 0; j < gss->total_frame; j++)
112 for (k = 0; k < gss->gstream[i].vector_length; k++)
113 gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k);
114 }
115 }
116
117 /* check */
118 if (gss->nstream != 2 && gss->nstream != 3) {
119 HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n");
120 HTS_GStreamSet_clear(gss);
121 return FALSE;
122 }
123 if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) {
124 HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n");
125 HTS_GStreamSet_clear(gss);
126 return FALSE;
127 }
128 if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) {
129 HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers.");
130 HTS_GStreamSet_clear(gss);
131 return FALSE;
132 }
133
134 /* synthesize speech waveform */
135 HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod);
136 if (gss->nstream >= 3)
137 nlpf = gss->gstream[2].vector_length;
138 for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) {
139 j = i * fperiod;
140 if (gss->nstream >= 3)
141 lpf = &gss->gstream[2].par[i][0];
142 HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio);
143 }
144 HTS_Vocoder_clear(&v);
145 if (audio)
146 HTS_Audio_flush(audio);
147
148 return TRUE;
149 }
150
151 /* HTS_GStreamSet_get_total_nsamples: get total number of sample */
HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss)152 size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss)
153 {
154 return gss->total_nsample;
155 }
156
157 /* HTS_GStreamSet_get_total_frame: get total number of frame */
HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss)158 size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss)
159 {
160 return gss->total_frame;
161 }
162
163 /* HTS_GStreamSet_get_vector_length: get features length */
HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss,size_t stream_index)164 size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index)
165 {
166 return gss->gstream[stream_index].vector_length;
167 }
168
169 /* HTS_GStreamSet_get_speech: get synthesized speech parameter */
HTS_GStreamSet_get_speech(HTS_GStreamSet * gss,size_t sample_index)170 double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index)
171 {
172 return gss->gspeech[sample_index];
173 }
174
175 /* HTS_GStreamSet_get_parameter: get generated parameter */
HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss,size_t stream_index,size_t frame_index,size_t vector_index)176 double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index)
177 {
178 return gss->gstream[stream_index].par[frame_index][vector_index];
179 }
180
181 /* HTS_GStreamSet_clear: free generated parameter stream set */
HTS_GStreamSet_clear(HTS_GStreamSet * gss)182 void HTS_GStreamSet_clear(HTS_GStreamSet * gss)
183 {
184 size_t i, j;
185
186 if (gss->gstream) {
187 for (i = 0; i < gss->nstream; i++) {
188 if (gss->gstream[i].par != NULL) {
189 for (j = 0; j < gss->total_frame; j++)
190 HTS_free(gss->gstream[i].par[j]);
191 HTS_free(gss->gstream[i].par);
192 }
193 }
194 HTS_free(gss->gstream);
195 }
196 if (gss->gspeech)
197 HTS_free(gss->gspeech);
198 HTS_GStreamSet_initialize(gss);
199 }
200
201 HTS_GSTREAM_C_END;
202
203 #endif /* !HTS_GSTREAM_C */
204