1 /************************************************************************************/
2 /* Copyright (c) 2012 The Department of Arts and Culture, */
3 /* The Government of the Republic of South Africa. */
4 /* */
5 /* Contributors: Meraka Institute, CSIR, South Africa. */
6 /* */
7 /* Permission is hereby granted, free of charge, to any person obtaining a copy */
8 /* of this software and associated documentation files (the "Software"), to deal */
9 /* in the Software without restriction, including without limitation the rights */
10 /* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
11 /* copies of the Software, and to permit persons to whom the Software is */
12 /* furnished to do so, subject to the following conditions: */
13 /* The above copyright notice and this permission notice shall be included in */
14 /* all copies or substantial portions of the Software. */
15 /* */
16 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
17 /* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
18 /* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
19 /* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
20 /* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
21 /* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN */
22 /* THE SOFTWARE. */
23 /* */
24 /************************************************************************************/
25 /* */
26 /* AUTHOR : Aby Louw */
27 /* DATE : 14 May 2012 */
28 /* */
29 /************************************************************************************/
30 /* ----------------------------------------------------------------- */
31 /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
32 /* developed by HTS Working Group */
33 /* http://hts-engine.sourceforge.net/ */
34 /* ----------------------------------------------------------------- */
35 /* */
36 /* Copyright (c) 2001-2012 Nagoya Institute of Technology */
37 /* Department of Computer Science */
38 /* */
39 /* 2001-2008 Tokyo Institute of Technology */
40 /* Interdisciplinary Graduate School of */
41 /* Science and Engineering */
42 /* */
43 /* All rights reserved. */
44 /* */
45 /* Redistribution and use in source and binary forms, with or */
46 /* without modification, are permitted provided that the following */
47 /* conditions are met: */
48 /* */
49 /* - Redistributions of source code must retain the above copyright */
50 /* notice, this list of conditions and the following disclaimer. */
51 /* - Redistributions in binary form must reproduce the above */
52 /* copyright notice, this list of conditions and the following */
53 /* disclaimer in the documentation and/or other materials provided */
54 /* with the distribution. */
55 /* - Neither the name of the HTS working group nor the names of its */
56 /* contributors may be used to endorse or promote products derived */
57 /* from this software without specific prior written permission. */
58 /* */
59 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
60 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
61 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
62 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
63 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
64 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
65 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
66 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
67 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
68 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
69 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
70 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
71 /* POSSIBILITY OF SUCH DAMAGE. */
72 /* ----------------------------------------------------------------- */
73 /* */
74 /* Added mixed excitation, see code at end of file. */
75 /*
76 Alok Parlikar modified this file for use with HTS Engine 1.07
77 */
78 /************************************************************************************/
79
80 #ifndef HTS_VOCODER_ME_C
81 #define HTS_VOCODER_ME_C
82
83 #ifdef __cplusplus
84 #define HTS_VOCODER_ME_C_START extern "C" {
85 #define HTS_VOCODER_ME_C_END }
86 #else
87 #define HTS_VOCODER_ME_C_START
88 #define HTS_VOCODER_ME_C_END
89 #endif /* __CPLUSPLUS */
90
91 HTS_VOCODER_ME_C_START;
92
93 #include "./HTS_vocoder_me.h"
94 #include "../hts_engine/HTS_vocoder.c"
95
96 /* HTS_Vocoder_initialize_me: initialize vocoder (mixed excitation) */
HTS_Vocoder_initialize_me(HTS_Vocoder_ME * v_me,const int m,const int stage,HTS_Boolean use_log_gain,const int rate,const int fperiod,int num_filters,int filter_order,double ** h,double * xp_sig,double * xn_sig,double * hp,double * hn)97 void HTS_Vocoder_initialize_me(HTS_Vocoder_ME * v_me,
98 const int m, const int stage,
99 HTS_Boolean use_log_gain,
100 const int rate,
101 const int fperiod,
102 int num_filters, int filter_order,
103 double **h,
104 double *xp_sig, double *xn_sig,
105 double *hp, double *hn) {
106 int i;
107 HTS_Vocoder *v = v_me->v; /* access to original HTS_Vocoder structure */
108
109 /* set parameter */
110 v->stage = stage;
111 if (stage != 0)
112 v->gamma = -1.0 / v->stage;
113 else
114 v->gamma = 0.0;
115 v->use_log_gain = use_log_gain;
116 v->fprd = fperiod;
117 v->next = SEED;
118 v->gauss = GAUSS;
119 v->rate = rate;
120 v->p1 = -1.0;
121 v->sw = 0;
122 v->x = 0x55555555;
123 /* init buffer */
124 v->freqt_buff = NULL;
125 v->freqt_size = 0;
126 v->gc2gc_buff = NULL;
127 v->gc2gc_size = 0;
128 v->lsp2lpc_buff = NULL;
129 v->lsp2lpc_size = 0;
130 v->postfilter_buff = NULL;
131 v->postfilter_size = 0;
132 v->spectrum2en_buff = NULL;
133 v->spectrum2en_size = 0;
134 if (v->stage == 0) { /* for MCP */
135 v->c = (double *) HTS_calloc(m * (3 + PADEORDER) + 5 * PADEORDER + 6, sizeof(double));
136 v->cc = v->c + m + 1;
137 v->cinc = v->cc + m + 1;
138 v->d1 = v->cinc + m + 1;
139 } else { /* for LSP */
140 v->c = (double *) HTS_calloc((m + 1) * (v->stage + 3), sizeof(double));
141 v->cc = v->c + m + 1;
142 v->cinc = v->cc + m + 1;
143 v->d1 = v->cinc + m + 1;
144 }
145 /* ABY: think this is not required */
146 v->pulse_buff = NULL;
147 v->pulse_size = 0;
148
149 /* initialize mixed excitation variables */
150 /*---------------------------------------*/
151
152 v_me->num_filters = num_filters;
153 v_me->filter_order = filter_order;
154 v_me->xp_sig = xp_sig;
155 v_me->xn_sig = xn_sig;
156
157 /* initialise xp_sig and xn_sig */
158 for (i = 0; i < v_me->filter_order; i++)
159 {
160 v_me->xp_sig[i] = 0.0;
161 v_me->xn_sig[i] = 0.0;
162 }
163
164 /* allocate memory for pulse and noise shaping filters */
165 v_me->hp = hp;
166 v_me->hn = hn;
167
168
169 /* get filter coefs */
170 v_me->h = h;
171
172 }
173
174
175 /* HTS_Vocoder_synthesize_me: mixed excitation and MLSA/MGLSA filster
176 * based waveform synthesis */
HTS_Vocoder_synthesize_me(HTS_Vocoder_ME * v_me,const int m,double lf0,double * spectrum,double * strengths,size_t nlpf,double * lpf,double alpha,double beta,double volume,double * rawdata,HTS_Audio * audio)177 void HTS_Vocoder_synthesize_me(HTS_Vocoder_ME * v_me,
178 const int m, double lf0,
179 double *spectrum, double *strengths,
180 size_t nlpf, double *lpf,
181 double alpha,
182 double beta, double volume,
183 double *rawdata, HTS_Audio * audio)
184 {
185 double x;
186 int i, j;
187 int k;
188 short xs;
189 int rawidx = 0;
190 double p;
191 HTS_Vocoder *v = v_me->v; /* access to original HTS_Vocoder struct */
192 double xpulse;
193 double xnoise;
194 double fxpulse;
195 double fxnoise;
196 double e1, e2;
197
198
199 /* Copy in str's and build pulse and noise shaping filter for this frame */
200 for (i = 0; i < v_me->filter_order; i++)
201 {
202 v_me->hp[i] = v_me->hn[i] = 0.0;
203 for (j = 0; j < v_me->num_filters; j++)
204 {
205 v_me->hp[i] += strengths[j] * v_me->h[j][i];
206 v_me->hn[i] += (1 - strengths[j]) * v_me->h[j][i];
207 }
208 }
209
210 /* lf0 -> pitch */
211 if (lf0 == LZERO)
212 p = 0.0;
213 else
214 p = v->rate / exp(lf0);
215
216 /* first time */
217 if (v->p1 < 0.0) {
218 HTS_Vocoder_initialize_excitation(v, 0);
219 if (v->stage == 0) { /* for MCP */
220 HTS_mc2b(spectrum, v->c, m, alpha);
221 } else { /* for LSP */
222 if (v->use_log_gain)
223 v->c[0] = LZERO;
224 else
225 v->c[0] = ZERO;
226 for (i = 1; i <= m; i++)
227 v->c[i] = i * PI / (m + 1);
228 HTS_lsp2mgc(v, v->c, v->c, m, alpha);
229 HTS_mc2b(v->c, v->c, m, alpha);
230 HTS_gnorm(v->c, v->c, m, v->gamma);
231 for (i = 1; i <= m; i++)
232 v->c[i] *= v->gamma;
233 }
234 }
235
236 HTS_Vocoder_start_excitation(v, p, 0);
237 if (v->stage == 0) { /* for MCP */
238 HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta);
239 HTS_mc2b(spectrum, v->cc, m, alpha);
240 for (i = 0; i <= m; i++)
241 v->cinc[i] = (v->cc[i] - v->c[i]) * IPERIOD / v->fprd;
242 } else { /* for LSP */
243 HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta);
244 HTS_check_lsp_stability(spectrum, m);
245 HTS_lsp2mgc(v, spectrum, v->cc, m, alpha);
246 HTS_mc2b(v->cc, v->cc, m, alpha);
247 HTS_gnorm(v->cc, v->cc, m, v->gamma);
248 for (i = 1; i <= m; i++)
249 v->cc[i] *= v->gamma;
250 for (i = 0; i <= m; i++)
251 v->cinc[i] = (v->cc[i] - v->c[i]) * IPERIOD / v->fprd;
252 }
253
254
255 for (j = 0, i = (IPERIOD + 1) / 2; j < v->fprd; j++)
256 {
257 if (v->stage == 0) { /* for MCP */
258 if (v->p1 == 0.0)
259 {
260 x = HTS_white_noise(v);
261
262 /* MIXED EXCITATION */
263 xnoise = x;
264 xpulse = 0.0;
265 }
266 else
267 {
268 if ((v->pc += 1.0) >= v->p1)
269 {
270 x = sqrt(v->p1);
271 v->pc = v->pc - v->p1;
272 }
273 else
274 {
275 x = 0.0;
276 }
277
278 /* MIXED EXCITATION */
279 xpulse = x;
280 xnoise = HTS_mseq(v); /* ABY: plus or minus 1 */
281 }
282
283 /* MIXED EXCITATION */
284 /* The real work -- apply shaping filters to pulse and noise */
285 fxpulse = fxnoise = 0.0;
286 for (k = v_me->filter_order - 1; k > 0; k--)
287 {
288 fxpulse += v_me->hp[k] * v_me->xp_sig[k];
289 fxnoise += v_me->hn[k] * v_me->xn_sig[k];
290
291 v_me->xp_sig[k] = v_me->xp_sig[k-1];
292 v_me->xn_sig[k] = v_me->xn_sig[k-1];
293 }
294
295 fxpulse += v_me->hp[0] * xpulse;
296 fxnoise += v_me->hn[0] * xnoise;
297 v_me->xp_sig[0] = xpulse;
298 v_me->xn_sig[0] = xnoise;
299
300 x = fxpulse + fxnoise; /* excitation is pulse plus noise */
301
302 x *= exp(v->c[0]);
303 x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1);
304
305 } else { /* for LSP */
306 if (!NGAIN)
307 x *= v->c[0];
308 x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1);
309 }
310
311 x *= volume;
312
313 /* output */
314 if (rawdata)
315 rawdata[rawidx++] = x;
316 if (audio) {
317 if (x > 32767.0)
318 xs = 32767;
319 else if (x < -32768.0)
320 xs = -32768;
321 else
322 xs = (short) x;
323 HTS_Audio_write(audio, xs);
324 }
325
326 if (!--i) {
327 for (i = 0; i <= m; i++)
328 v->c[i] += v->cinc[i];
329 i = IPERIOD;
330 }
331 }
332
333 HTS_Vocoder_end_excitation(v, nlpf);
334 HTS_movem(v->cc, v->c, m + 1);
335 }
336
337 /* HTS_Vocoder_clear_me: clear vocoder (mixed excitation) */
HTS_Vocoder_clear_me(HTS_Vocoder_ME * v_me)338 void HTS_Vocoder_clear_me(HTS_Vocoder_ME * v_me)
339 {
340 HTS_Vocoder *v = v_me->v; /* access to original HTS_Vocoder structure */
341
342 if ((v_me != NULL)
343 && (v != NULL))
344 {
345 /* free buffer */
346 if (v->freqt_buff != NULL) {
347 HTS_free(v->freqt_buff);
348 v->freqt_buff = NULL;
349 }
350 v->freqt_size = 0;
351 if (v->gc2gc_buff != NULL) {
352 HTS_free(v->gc2gc_buff);
353 v->gc2gc_buff = NULL;
354 }
355 v->gc2gc_size = 0;
356 if (v->lsp2lpc_buff != NULL) {
357 HTS_free(v->lsp2lpc_buff);
358 v->lsp2lpc_buff = NULL;
359 }
360 v->lsp2lpc_size = 0;
361 if (v->postfilter_buff != NULL) {
362 HTS_free(v->postfilter_buff);
363 v->postfilter_buff = NULL;
364 }
365 v->postfilter_size = 0;
366 if (v->spectrum2en_buff != NULL) {
367 HTS_free(v->spectrum2en_buff);
368 v->spectrum2en_buff = NULL;
369 }
370 v->spectrum2en_size = 0;
371 if (v->c != NULL) {
372 HTS_free(v->c);
373 v->c = NULL;
374 }
375 v->pulse_size = 0;
376 if (v->pulse_buff != NULL){
377 HTS_free(v->pulse_buff);
378 v->pulse_buff = NULL;
379 }
380
381 v_me->num_filters = 0;
382 v_me->filter_order = 0;
383 v_me->xp_sig = NULL;
384 v_me->xn_sig = NULL;
385 v_me->hp = NULL;
386 v_me->hn = NULL;
387 v_me->h = NULL;
388 }
389 }
390
391 HTS_VOCODER_ME_C_END;
392
393 #endif /* !HTS_VOCODER_ME_C */
394