1 /***************************************************************************
2  *  Copyright 1991, 1992, 1993, 1994, 1995, 1996, 2001, 2002               *
3  *    David R. Hill, Leonard Manzara, Craig Schock                         *
4  *                                                                         *
5  *  This program is free software: you can redistribute it and/or modify   *
6  *  it under the terms of the GNU General Public License as published by   *
7  *  the Free Software Foundation, either version 3 of the License, or      *
8  *  (at your option) any later version.                                    *
9  *                                                                         *
10  *  This program is distributed in the hope that it will be useful,        *
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of         *
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
13  *  GNU General Public License for more details.                           *
14  *                                                                         *
15  *  You should have received a copy of the GNU General Public License      *
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.  *
17  ***************************************************************************/
18 // 2014-09
19 // This file was copied from Gnuspeech and modified by Marcelo Y. Matuda.
20 
21 #ifndef TRM_TUBE_H_
22 #define TRM_TUBE_H_
23 
24 #include <algorithm> /* max, min */
25 #include <istream>
26 #include <memory>
27 #include <vector>
28 
29 #include "BandpassFilter.h"
30 #include "MovingAverageFilter.h"
31 #include "NoiseFilter.h"
32 #include "NoiseSource.h"
33 #include "RadiationFilter.h"
34 #include "ReflectionFilter.h"
35 #include "SampleRateConverter.h"
36 #include "Throat.h"
37 #include "VocalTractModelParameterValue.h"
38 #include "WavetableGlottalSource.h"
39 
40 #define GS_TRM_TUBE_MIN_RADIUS (0.001)
41 
42 
43 
44 namespace GS {
45 namespace TRM {
46 
47 class Tube {
48 public:
49 	enum { /*  OROPHARYNX REGIONS  */
50 		R1 = 0, /*  S1  */
51 		R2 = 1, /*  S2  */
52 		R3 = 2, /*  S3  */
53 		R4 = 3, /*  S4 & S5  */
54 		R5 = 4, /*  S6 & S7  */
55 		R6 = 5, /*  S8  */
56 		R7 = 6, /*  S9  */
57 		R8 = 7, /*  S10  */
58 		TOTAL_REGIONS = 8
59 	};
60 	enum { /*  NASAL TRACT SECTIONS  */
61 		N1 = 0,
62 		N2 = 1,
63 		N3 = 2,
64 		N4 = 3,
65 		N5 = 4,
66 		N6 = 5,
67 		TOTAL_NASAL_SECTIONS = 6
68 	};
69 	enum ParameterIndex {
70 		PARAM_GLOT_PITCH = 0,
71 		PARAM_GLOT_VOL   = 1,
72 		PARAM_ASP_VOL    = 2,
73 		PARAM_FRIC_VOL   = 3,
74 		PARAM_FRIC_POS   = 4,
75 		PARAM_FRIC_CF    = 5,
76 		PARAM_FRIC_BW    = 6,
77 		PARAM_R1         = 7,
78 		PARAM_R2         = 8,
79 		PARAM_R3         = 9,
80 		PARAM_R4         = 10,
81 		PARAM_R5         = 11,
82 		PARAM_R6         = 12,
83 		PARAM_R7         = 13,
84 		PARAM_R8         = 14,
85 		PARAM_VELUM      = 15
86 	};
87 
88 	Tube();
89 	~Tube();
90 
91 	void synthesizeToFile(std::istream& inputStream, const char* outputFile);
92 	void synthesizeToBuffer(std::istream& inputStream, std::vector<float>& outputBuffer);
93 
94 	template<typename T> void loadConfigurationForInteractiveExecution(const T& config);
95 	void initializeSynthesizer();
96 	void initializeInputFilters(double period);
97 	void loadSingleInput(const VocalTractModelParameterValue pv);
98 	void synthesizeForInputSequence();
99 	void synthesizeForSingleInput(int numIterations);
100 
outputData()101 	std::vector<float>& outputData() { return outputData_; }
outputDataPos()102 	std::size_t outputDataPos() const { return outputDataPos_; }
setOutputDataPos(std::size_t pos)103 	void setOutputDataPos(std::size_t pos) { outputDataPos_ = pos; }
resetOutputData()104 	void resetOutputData() {
105 		outputData_.clear();
106 		outputDataPos_ = 0;
107 	}
maximumOutputSampleValue()108 	double maximumOutputSampleValue() const { return srConv_->maximumSampleValue(); }
outputRate()109 	float outputRate() const { return outputRate_; }
numChannels()110 	unsigned int numChannels() const { return channels_; }
111 private:
112 	enum {
113 		VELUM = N1
114 	};
115 	enum { /*  OROPHARYNX SCATTERING JUNCTION COEFFICIENTS (BETWEEN EACH REGION)  */
116 		C1 = R1, /*  R1-R2 (S1-S2)  */
117 		C2 = R2, /*  R2-R3 (S2-S3)  */
118 		C3 = R3, /*  R3-R4 (S3-S4)  */
119 		C4 = R4, /*  R4-R5 (S5-S6)  */
120 		C5 = R5, /*  R5-R6 (S7-S8)  */
121 		C6 = R6, /*  R6-R7 (S8-S9)  */
122 		C7 = R7, /*  R7-R8 (S9-S10)  */
123 		C8 = R8, /*  R8-AIR (S10-AIR)  */
124 		TOTAL_COEFFICIENTS = TOTAL_REGIONS
125 	};
126 	enum { /*  OROPHARYNX SECTIONS  */
127 		S1  = 0, /*  R1  */
128 		S2  = 1, /*  R2  */
129 		S3  = 2, /*  R3  */
130 		S4  = 3, /*  R4  */
131 		S5  = 4, /*  R4  */
132 		S6  = 5, /*  R5  */
133 		S7  = 6, /*  R5  */
134 		S8  = 7, /*  R6  */
135 		S9  = 8, /*  R7  */
136 		S10 = 9, /*  R8  */
137 		TOTAL_SECTIONS = 10
138 	};
139 	enum { /*  NASAL TRACT COEFFICIENTS  */
140 		NC1 = N1, /*  N1-N2  */
141 		NC2 = N2, /*  N2-N3  */
142 		NC3 = N3, /*  N3-N4  */
143 		NC4 = N4, /*  N4-N5  */
144 		NC5 = N5, /*  N5-N6  */
145 		NC6 = N6, /*  N6-AIR  */
146 		TOTAL_NASAL_COEFFICIENTS = TOTAL_NASAL_SECTIONS
147 	};
148 	enum { /*  THREE-WAY JUNCTION ALPHA COEFFICIENTS  */
149 		LEFT  = 0,
150 		RIGHT = 1,
151 		UPPER = 2,
152 		TOTAL_ALPHA_COEFFICIENTS = 3
153 	};
154 	enum { /*  FRICATION INJECTION COEFFICIENTS  */
155 		FC1 = 0, /*  S3  */
156 		FC2 = 1, /*  S4  */
157 		FC3 = 2, /*  S5  */
158 		FC4 = 3, /*  S6  */
159 		FC5 = 4, /*  S7  */
160 		FC6 = 5, /*  S8  */
161 		FC7 = 6, /*  S9  */
162 		FC8 = 7, /*  S10  */
163 		TOTAL_FRIC_COEFFICIENTS = 8
164 	};
165 
166 	struct InputData {
167 		double glotPitch;
168 		double glotVol;
169 		double aspVol;
170 		double fricVol;
171 		double fricPos;
172 		double fricCF;
173 		double fricBW;
174 		double radius[TOTAL_REGIONS];
175 		double velum;
176 	};
177 
178 	/*  VARIABLES FOR INTERPOLATION  */
179 	struct CurrentData {
180 		double glotPitch;
181 		double glotPitchDelta;
182 		double glotVol;
183 		double glotVolDelta;
184 		double aspVol;
185 		double aspVolDelta;
186 		double fricVol;
187 		double fricVolDelta;
188 		double fricPos;
189 		double fricPosDelta;
190 		double fricCF;
191 		double fricCFDelta;
192 		double fricBW;
193 		double fricBWDelta;
194 		double radius[TOTAL_REGIONS];
195 		double radiusDelta[TOTAL_REGIONS];
196 		double velum;
197 		double velumDelta;
198 	};
199 
200 	struct InputFilters {
201 		MovingAverageFilter<double> glotPitchFilter;
202 		MovingAverageFilter<double> glotVolFilter;
203 		MovingAverageFilter<double> aspVolFilter;
204 		MovingAverageFilter<double> fricVolFilter;
205 		MovingAverageFilter<double> fricPosFilter;
206 		MovingAverageFilter<double> fricCFFilter;
207 		MovingAverageFilter<double> fricBWFilter;
208 		MovingAverageFilter<double> radius0Filter;
209 		MovingAverageFilter<double> radius1Filter;
210 		MovingAverageFilter<double> radius2Filter;
211 		MovingAverageFilter<double> radius3Filter;
212 		MovingAverageFilter<double> radius4Filter;
213 		MovingAverageFilter<double> radius5Filter;
214 		MovingAverageFilter<double> radius6Filter;
215 		MovingAverageFilter<double> radius7Filter;
216 		MovingAverageFilter<double> velumFilter;
InputFiltersInputFilters217 		InputFilters(double sampleRate, double period)
218 			: glotPitchFilter(sampleRate, period)
219 			, glotVolFilter(sampleRate, period)
220 			, aspVolFilter(sampleRate, period)
221 			, fricVolFilter(sampleRate, period)
222 			, fricPosFilter(sampleRate, period)
223 			, fricCFFilter(sampleRate, period)
224 			, fricBWFilter(sampleRate, period)
225 			, radius0Filter(sampleRate, period)
226 			, radius1Filter(sampleRate, period)
227 			, radius2Filter(sampleRate, period)
228 			, radius3Filter(sampleRate, period)
229 			, radius4Filter(sampleRate, period)
230 			, radius5Filter(sampleRate, period)
231 			, radius6Filter(sampleRate, period)
232 			, radius7Filter(sampleRate, period)
233 			, velumFilter(sampleRate, period) {}
resetInputFilters234 		void reset() {
235 			glotPitchFilter.reset();
236 			glotVolFilter.reset();
237 			aspVolFilter.reset();
238 			fricVolFilter.reset();
239 			fricPosFilter.reset();
240 			fricCFFilter.reset();
241 			fricBWFilter.reset();
242 			radius0Filter.reset();
243 			radius1Filter.reset();
244 			radius2Filter.reset();
245 			radius3Filter.reset();
246 			radius4Filter.reset();
247 			radius5Filter.reset();
248 			radius6Filter.reset();
249 			radius7Filter.reset();
250 			velumFilter.reset();
251 		}
252 	};
253 
254 	Tube(const Tube&) = delete;
255 	Tube& operator=(const Tube&) = delete;
256 
257 	void reset();
258 	void calculateTubeCoefficients();
259 	void initializeNasalCavity();
260 	void printInfo(const char* inputFile);
261 	void parseInputStream(std::istream& in);
262 	void sampleRateInterpolation();
263 	void setControlRateParameters(int pos);
264 	void setFricationTaps();
265 	double vocalTract(double input, double frication);
266 	void writeOutputToFile(const char* outputFile);
267 	void writeOutputToBuffer(std::vector<float>& outputBuffer);
268 	void synthesize();
269 	float calculateMonoScale();
270 	void calculateStereoScale(float& leftScale, float& rightScale);
271 
272 	static double amplitude(double decibelLevel);
273 	static double frequency(double pitch);
274 	static double speedOfSound(double temperature);
275 
276 	float  outputRate_;                  /*  output sample rate (22.05, 44.1)  */
277 	float  controlRate_;                 /*  1.0-1000.0 input tables/second (Hz)  */
278 
279 	double volume_;                      /*  master volume (0 - 60 dB)  */
280 	int    channels_;                    /*  # of sound output channels (1, 2)  */
281 	double balance_;                     /*  stereo balance (-1 to +1)  */
282 
283 	int    waveform_;                    /*  GS waveform type (0=PULSE, 1=SINE  */
284 	double tp_;                          /*  % glottal pulse rise time  */
285 	double tnMin_;                       /*  % glottal pulse fall time minimum  */
286 	double tnMax_;                       /*  % glottal pulse fall time maximum  */
287 	double breathiness_;                 /*  % glottal source breathiness  */
288 
289 	double length_;                      /*  nominal tube length (10 - 20 cm)  */
290 	double temperature_;                 /*  tube temperature (25 - 40 C)  */
291 	double lossFactor_;                  /*  junction loss factor in (0 - 5 %)  */
292 
293 	double apertureRadius_;              /*  aperture scl. radius (3.05 - 12 cm)  */
294 	double mouthCoef_;                   /*  mouth aperture coefficient  */
295 	double noseCoef_;                    /*  nose aperture coefficient  */
296 
297 	double noseRadius_[TOTAL_NASAL_SECTIONS]; /*  fixed nose radii (0 - 3 cm)  */
298 
299 	double throatCutoff_;                /*  throat lp cutoff (50 - nyquist Hz)  */
300 	double throatVol_;                   /*  throat volume (0 - 48 dB) */
301 
302 	int    modulation_;                  /*  pulse mod. of noise (0=OFF, 1=ON)  */
303 	double mixOffset_;                   /*  noise crossmix offset (30 - 60 dB)  */
304 
305 	/*  DERIVED VALUES  */
306 	int    controlPeriod_;
307 	int    sampleRate_;
308 	double actualTubeLength_;            /*  actual length in cm  */
309 
310 	/*  MEMORY FOR TUBE AND TUBE COEFFICIENTS  */
311 	double oropharynx_[TOTAL_SECTIONS][2][2];
312 	double oropharynxCoeff_[TOTAL_COEFFICIENTS];
313 
314 	double nasal_[TOTAL_NASAL_SECTIONS][2][2];
315 	double nasalCoeff_[TOTAL_NASAL_COEFFICIENTS];
316 
317 	double alpha_[TOTAL_ALPHA_COEFFICIENTS];
318 	int currentPtr_;
319 	int prevPtr_;
320 
321 	/*  MEMORY FOR FRICATION TAPS  */
322 	double fricationTap_[TOTAL_FRIC_COEFFICIENTS];
323 
324 	double dampingFactor_;               /*  calculated damping factor  */
325 	double crossmixFactor_;              /*  calculated crossmix factor  */
326 	double breathinessFactor_;
327 
328 	double prevGlotAmplitude_;
329 
330 	std::vector<std::unique_ptr<InputData>> inputData_;
331 	CurrentData currentData_;
332 	InputData singleInput_;
333 	std::size_t outputDataPos_;
334 	std::vector<float> outputData_;
335 	std::unique_ptr<SampleRateConverter> srConv_;
336 	std::unique_ptr<RadiationFilter> mouthRadiationFilter_;
337 	std::unique_ptr<ReflectionFilter> mouthReflectionFilter_;
338 	std::unique_ptr<RadiationFilter> nasalRadiationFilter_;
339 	std::unique_ptr<ReflectionFilter> nasalReflectionFilter_;
340 	std::unique_ptr<Throat> throat_;
341 	std::unique_ptr<WavetableGlottalSource> glottalSource_;
342 	std::unique_ptr<BandpassFilter> bandpassFilter_;
343 	std::unique_ptr<NoiseFilter> noiseFilter_;
344 	std::unique_ptr<NoiseSource> noiseSource_;
345 	std::unique_ptr<InputFilters> inputFilters_;
346 };
347 
348 
349 
350 template<typename T>
351 void
loadConfigurationForInteractiveExecution(const T & config)352 Tube::loadConfigurationForInteractiveExecution(const T& config)
353 {
354 	outputRate_   = config.outputRate;
355 	controlRate_  = config.controlRate;
356 	volume_       = 0.0;
357 	channels_     = 1;
358 	balance_      = 0.0;
359 	waveform_     = 0;
360 	tp_           = config.tp;
361 	tnMin_        = config.tn;
362 	tnMax_        = config.tn;
363 	breathiness_  = config.breathiness;
364 	length_       = config.length;
365 	temperature_  = config.temperature;
366 	lossFactor_   = config.lossFactor;
367 	apertureRadius_ = config.apertureRadius;
368 	mouthCoef_    = config.mouthCoef;
369 	noseCoef_     = config.noseCoef;
370 
371 	noseRadius_[0] = 0.0;
372 	noseRadius_[1] = config.staticParamList[0];
373 	noseRadius_[2] = config.staticParamList[1];
374 	noseRadius_[3] = config.staticParamList[2];
375 	noseRadius_[4] = config.staticParamList[3];
376 	noseRadius_[5] = config.staticParamList[4];
377 
378 	throatCutoff_ = config.throatCutoff;
379 	throatVol_    = config.throatVol;
380 	modulation_   = config.modulation;
381 	mixOffset_    = config.mixOffset;
382 }
383 
384 } /* namespace TRM */
385 } /* namespace GS */
386 
387 #endif /* TRM_TUBE_H_ */
388