1 /***************************************************************************
2 * Copyright 1991, 1992, 1993, 1994, 1995, 1996, 2001, 2002 *
3 * David R. Hill, Leonard Manzara, Craig Schock *
4 * *
5 * This program is free software: you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation, either version 3 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program. If not, see <http://www.gnu.org/licenses/>. *
17 ***************************************************************************/
18 // 2014-09
19 // This file was copied from Gnuspeech and modified by Marcelo Y. Matuda.
20
21 #ifndef TRM_TUBE_H_
22 #define TRM_TUBE_H_
23
24 #include <algorithm> /* max, min */
25 #include <istream>
26 #include <memory>
27 #include <vector>
28
29 #include "BandpassFilter.h"
30 #include "MovingAverageFilter.h"
31 #include "NoiseFilter.h"
32 #include "NoiseSource.h"
33 #include "RadiationFilter.h"
34 #include "ReflectionFilter.h"
35 #include "SampleRateConverter.h"
36 #include "Throat.h"
37 #include "VocalTractModelParameterValue.h"
38 #include "WavetableGlottalSource.h"
39
40 #define GS_TRM_TUBE_MIN_RADIUS (0.001)
41
42
43
44 namespace GS {
45 namespace TRM {
46
47 class Tube {
48 public:
49 enum { /* OROPHARYNX REGIONS */
50 R1 = 0, /* S1 */
51 R2 = 1, /* S2 */
52 R3 = 2, /* S3 */
53 R4 = 3, /* S4 & S5 */
54 R5 = 4, /* S6 & S7 */
55 R6 = 5, /* S8 */
56 R7 = 6, /* S9 */
57 R8 = 7, /* S10 */
58 TOTAL_REGIONS = 8
59 };
60 enum { /* NASAL TRACT SECTIONS */
61 N1 = 0,
62 N2 = 1,
63 N3 = 2,
64 N4 = 3,
65 N5 = 4,
66 N6 = 5,
67 TOTAL_NASAL_SECTIONS = 6
68 };
69 enum ParameterIndex {
70 PARAM_GLOT_PITCH = 0,
71 PARAM_GLOT_VOL = 1,
72 PARAM_ASP_VOL = 2,
73 PARAM_FRIC_VOL = 3,
74 PARAM_FRIC_POS = 4,
75 PARAM_FRIC_CF = 5,
76 PARAM_FRIC_BW = 6,
77 PARAM_R1 = 7,
78 PARAM_R2 = 8,
79 PARAM_R3 = 9,
80 PARAM_R4 = 10,
81 PARAM_R5 = 11,
82 PARAM_R6 = 12,
83 PARAM_R7 = 13,
84 PARAM_R8 = 14,
85 PARAM_VELUM = 15
86 };
87
88 Tube();
89 ~Tube();
90
91 void synthesizeToFile(std::istream& inputStream, const char* outputFile);
92 void synthesizeToBuffer(std::istream& inputStream, std::vector<float>& outputBuffer);
93
94 template<typename T> void loadConfigurationForInteractiveExecution(const T& config);
95 void initializeSynthesizer();
96 void initializeInputFilters(double period);
97 void loadSingleInput(const VocalTractModelParameterValue pv);
98 void synthesizeForInputSequence();
99 void synthesizeForSingleInput(int numIterations);
100
outputData()101 std::vector<float>& outputData() { return outputData_; }
outputDataPos()102 std::size_t outputDataPos() const { return outputDataPos_; }
setOutputDataPos(std::size_t pos)103 void setOutputDataPos(std::size_t pos) { outputDataPos_ = pos; }
resetOutputData()104 void resetOutputData() {
105 outputData_.clear();
106 outputDataPos_ = 0;
107 }
maximumOutputSampleValue()108 double maximumOutputSampleValue() const { return srConv_->maximumSampleValue(); }
outputRate()109 float outputRate() const { return outputRate_; }
numChannels()110 unsigned int numChannels() const { return channels_; }
111 private:
112 enum {
113 VELUM = N1
114 };
115 enum { /* OROPHARYNX SCATTERING JUNCTION COEFFICIENTS (BETWEEN EACH REGION) */
116 C1 = R1, /* R1-R2 (S1-S2) */
117 C2 = R2, /* R2-R3 (S2-S3) */
118 C3 = R3, /* R3-R4 (S3-S4) */
119 C4 = R4, /* R4-R5 (S5-S6) */
120 C5 = R5, /* R5-R6 (S7-S8) */
121 C6 = R6, /* R6-R7 (S8-S9) */
122 C7 = R7, /* R7-R8 (S9-S10) */
123 C8 = R8, /* R8-AIR (S10-AIR) */
124 TOTAL_COEFFICIENTS = TOTAL_REGIONS
125 };
126 enum { /* OROPHARYNX SECTIONS */
127 S1 = 0, /* R1 */
128 S2 = 1, /* R2 */
129 S3 = 2, /* R3 */
130 S4 = 3, /* R4 */
131 S5 = 4, /* R4 */
132 S6 = 5, /* R5 */
133 S7 = 6, /* R5 */
134 S8 = 7, /* R6 */
135 S9 = 8, /* R7 */
136 S10 = 9, /* R8 */
137 TOTAL_SECTIONS = 10
138 };
139 enum { /* NASAL TRACT COEFFICIENTS */
140 NC1 = N1, /* N1-N2 */
141 NC2 = N2, /* N2-N3 */
142 NC3 = N3, /* N3-N4 */
143 NC4 = N4, /* N4-N5 */
144 NC5 = N5, /* N5-N6 */
145 NC6 = N6, /* N6-AIR */
146 TOTAL_NASAL_COEFFICIENTS = TOTAL_NASAL_SECTIONS
147 };
148 enum { /* THREE-WAY JUNCTION ALPHA COEFFICIENTS */
149 LEFT = 0,
150 RIGHT = 1,
151 UPPER = 2,
152 TOTAL_ALPHA_COEFFICIENTS = 3
153 };
154 enum { /* FRICATION INJECTION COEFFICIENTS */
155 FC1 = 0, /* S3 */
156 FC2 = 1, /* S4 */
157 FC3 = 2, /* S5 */
158 FC4 = 3, /* S6 */
159 FC5 = 4, /* S7 */
160 FC6 = 5, /* S8 */
161 FC7 = 6, /* S9 */
162 FC8 = 7, /* S10 */
163 TOTAL_FRIC_COEFFICIENTS = 8
164 };
165
166 struct InputData {
167 double glotPitch;
168 double glotVol;
169 double aspVol;
170 double fricVol;
171 double fricPos;
172 double fricCF;
173 double fricBW;
174 double radius[TOTAL_REGIONS];
175 double velum;
176 };
177
178 /* VARIABLES FOR INTERPOLATION */
179 struct CurrentData {
180 double glotPitch;
181 double glotPitchDelta;
182 double glotVol;
183 double glotVolDelta;
184 double aspVol;
185 double aspVolDelta;
186 double fricVol;
187 double fricVolDelta;
188 double fricPos;
189 double fricPosDelta;
190 double fricCF;
191 double fricCFDelta;
192 double fricBW;
193 double fricBWDelta;
194 double radius[TOTAL_REGIONS];
195 double radiusDelta[TOTAL_REGIONS];
196 double velum;
197 double velumDelta;
198 };
199
200 struct InputFilters {
201 MovingAverageFilter<double> glotPitchFilter;
202 MovingAverageFilter<double> glotVolFilter;
203 MovingAverageFilter<double> aspVolFilter;
204 MovingAverageFilter<double> fricVolFilter;
205 MovingAverageFilter<double> fricPosFilter;
206 MovingAverageFilter<double> fricCFFilter;
207 MovingAverageFilter<double> fricBWFilter;
208 MovingAverageFilter<double> radius0Filter;
209 MovingAverageFilter<double> radius1Filter;
210 MovingAverageFilter<double> radius2Filter;
211 MovingAverageFilter<double> radius3Filter;
212 MovingAverageFilter<double> radius4Filter;
213 MovingAverageFilter<double> radius5Filter;
214 MovingAverageFilter<double> radius6Filter;
215 MovingAverageFilter<double> radius7Filter;
216 MovingAverageFilter<double> velumFilter;
InputFiltersInputFilters217 InputFilters(double sampleRate, double period)
218 : glotPitchFilter(sampleRate, period)
219 , glotVolFilter(sampleRate, period)
220 , aspVolFilter(sampleRate, period)
221 , fricVolFilter(sampleRate, period)
222 , fricPosFilter(sampleRate, period)
223 , fricCFFilter(sampleRate, period)
224 , fricBWFilter(sampleRate, period)
225 , radius0Filter(sampleRate, period)
226 , radius1Filter(sampleRate, period)
227 , radius2Filter(sampleRate, period)
228 , radius3Filter(sampleRate, period)
229 , radius4Filter(sampleRate, period)
230 , radius5Filter(sampleRate, period)
231 , radius6Filter(sampleRate, period)
232 , radius7Filter(sampleRate, period)
233 , velumFilter(sampleRate, period) {}
resetInputFilters234 void reset() {
235 glotPitchFilter.reset();
236 glotVolFilter.reset();
237 aspVolFilter.reset();
238 fricVolFilter.reset();
239 fricPosFilter.reset();
240 fricCFFilter.reset();
241 fricBWFilter.reset();
242 radius0Filter.reset();
243 radius1Filter.reset();
244 radius2Filter.reset();
245 radius3Filter.reset();
246 radius4Filter.reset();
247 radius5Filter.reset();
248 radius6Filter.reset();
249 radius7Filter.reset();
250 velumFilter.reset();
251 }
252 };
253
254 Tube(const Tube&) = delete;
255 Tube& operator=(const Tube&) = delete;
256
257 void reset();
258 void calculateTubeCoefficients();
259 void initializeNasalCavity();
260 void printInfo(const char* inputFile);
261 void parseInputStream(std::istream& in);
262 void sampleRateInterpolation();
263 void setControlRateParameters(int pos);
264 void setFricationTaps();
265 double vocalTract(double input, double frication);
266 void writeOutputToFile(const char* outputFile);
267 void writeOutputToBuffer(std::vector<float>& outputBuffer);
268 void synthesize();
269 float calculateMonoScale();
270 void calculateStereoScale(float& leftScale, float& rightScale);
271
272 static double amplitude(double decibelLevel);
273 static double frequency(double pitch);
274 static double speedOfSound(double temperature);
275
276 float outputRate_; /* output sample rate (22.05, 44.1) */
277 float controlRate_; /* 1.0-1000.0 input tables/second (Hz) */
278
279 double volume_; /* master volume (0 - 60 dB) */
280 int channels_; /* # of sound output channels (1, 2) */
281 double balance_; /* stereo balance (-1 to +1) */
282
283 int waveform_; /* GS waveform type (0=PULSE, 1=SINE */
284 double tp_; /* % glottal pulse rise time */
285 double tnMin_; /* % glottal pulse fall time minimum */
286 double tnMax_; /* % glottal pulse fall time maximum */
287 double breathiness_; /* % glottal source breathiness */
288
289 double length_; /* nominal tube length (10 - 20 cm) */
290 double temperature_; /* tube temperature (25 - 40 C) */
291 double lossFactor_; /* junction loss factor in (0 - 5 %) */
292
293 double apertureRadius_; /* aperture scl. radius (3.05 - 12 cm) */
294 double mouthCoef_; /* mouth aperture coefficient */
295 double noseCoef_; /* nose aperture coefficient */
296
297 double noseRadius_[TOTAL_NASAL_SECTIONS]; /* fixed nose radii (0 - 3 cm) */
298
299 double throatCutoff_; /* throat lp cutoff (50 - nyquist Hz) */
300 double throatVol_; /* throat volume (0 - 48 dB) */
301
302 int modulation_; /* pulse mod. of noise (0=OFF, 1=ON) */
303 double mixOffset_; /* noise crossmix offset (30 - 60 dB) */
304
305 /* DERIVED VALUES */
306 int controlPeriod_;
307 int sampleRate_;
308 double actualTubeLength_; /* actual length in cm */
309
310 /* MEMORY FOR TUBE AND TUBE COEFFICIENTS */
311 double oropharynx_[TOTAL_SECTIONS][2][2];
312 double oropharynxCoeff_[TOTAL_COEFFICIENTS];
313
314 double nasal_[TOTAL_NASAL_SECTIONS][2][2];
315 double nasalCoeff_[TOTAL_NASAL_COEFFICIENTS];
316
317 double alpha_[TOTAL_ALPHA_COEFFICIENTS];
318 int currentPtr_;
319 int prevPtr_;
320
321 /* MEMORY FOR FRICATION TAPS */
322 double fricationTap_[TOTAL_FRIC_COEFFICIENTS];
323
324 double dampingFactor_; /* calculated damping factor */
325 double crossmixFactor_; /* calculated crossmix factor */
326 double breathinessFactor_;
327
328 double prevGlotAmplitude_;
329
330 std::vector<std::unique_ptr<InputData>> inputData_;
331 CurrentData currentData_;
332 InputData singleInput_;
333 std::size_t outputDataPos_;
334 std::vector<float> outputData_;
335 std::unique_ptr<SampleRateConverter> srConv_;
336 std::unique_ptr<RadiationFilter> mouthRadiationFilter_;
337 std::unique_ptr<ReflectionFilter> mouthReflectionFilter_;
338 std::unique_ptr<RadiationFilter> nasalRadiationFilter_;
339 std::unique_ptr<ReflectionFilter> nasalReflectionFilter_;
340 std::unique_ptr<Throat> throat_;
341 std::unique_ptr<WavetableGlottalSource> glottalSource_;
342 std::unique_ptr<BandpassFilter> bandpassFilter_;
343 std::unique_ptr<NoiseFilter> noiseFilter_;
344 std::unique_ptr<NoiseSource> noiseSource_;
345 std::unique_ptr<InputFilters> inputFilters_;
346 };
347
348
349
350 template<typename T>
351 void
loadConfigurationForInteractiveExecution(const T & config)352 Tube::loadConfigurationForInteractiveExecution(const T& config)
353 {
354 outputRate_ = config.outputRate;
355 controlRate_ = config.controlRate;
356 volume_ = 0.0;
357 channels_ = 1;
358 balance_ = 0.0;
359 waveform_ = 0;
360 tp_ = config.tp;
361 tnMin_ = config.tn;
362 tnMax_ = config.tn;
363 breathiness_ = config.breathiness;
364 length_ = config.length;
365 temperature_ = config.temperature;
366 lossFactor_ = config.lossFactor;
367 apertureRadius_ = config.apertureRadius;
368 mouthCoef_ = config.mouthCoef;
369 noseCoef_ = config.noseCoef;
370
371 noseRadius_[0] = 0.0;
372 noseRadius_[1] = config.staticParamList[0];
373 noseRadius_[2] = config.staticParamList[1];
374 noseRadius_[3] = config.staticParamList[2];
375 noseRadius_[4] = config.staticParamList[3];
376 noseRadius_[5] = config.staticParamList[4];
377
378 throatCutoff_ = config.throatCutoff;
379 throatVol_ = config.throatVol;
380 modulation_ = config.modulation;
381 mixOffset_ = config.mixOffset;
382 }
383
384 } /* namespace TRM */
385 } /* namespace GS */
386
387 #endif /* TRM_TUBE_H_ */
388