1 // Copyright 2016 Emilie Gillet.
2 //
3 // Author: Emilie Gillet (emilie.o.gillet@gmail.com)
4 //
5 // Permission is hereby granted, free of charge, to any person obtaining a copy
6 // of this software and associated documentation files (the "Software"), to deal
7 // in the Software without restriction, including without limitation the rights
8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the Software is
10 // furnished to do so, subject to the following conditions:
11 //
12 // The above copyright notice and this permission notice shall be included in
13 // all copies or substantial portions of the Software.
14 //
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 // THE SOFTWARE.
22 //
23 // See http://creativecommons.org/licenses/MIT/ for more information.
24 //
25 // -----------------------------------------------------------------------------
26 //
27 // SAM-inspired speech synth (as used in Shruthi/Ambika/Braids).
28
29 #include "plaits/dsp/speech/sam_speech_synth.h"
30
31 #include <algorithm>
32
33 #include "stmlib/dsp/dsp.h"
34 #include "stmlib/dsp/parameter_interpolator.h"
35
36 #include "plaits/dsp/oscillator/oscillator.h"
37 #include "plaits/resources.h"
38
39 namespace plaits {
40
41 using namespace std;
42 using namespace stmlib;
43
Init()44 void SAMSpeechSynth::Init() {
45 phase_ = 0.0f;
46 frequency_ = 0.0f;
47 pulse_next_sample_ = 0.0f;
48 pulse_lp_ = 0.0f;
49
50 fill(&formant_phase_[0], &formant_phase_[3], 0);
51 consonant_samples_ = 0;
52 consonant_index_ = 0.0f;
53 }
54
55 // Phoneme data
56
57 /* static */
58 SAMSpeechSynth::Phoneme SAMSpeechSynth::phonemes_[] = {
59 { { { 60, 15 }, { 90, 13 }, { 200, 1 } } },
60 { { { 40, 13 }, { 114, 12 }, { 139, 6 } } },
61 { { { 33, 14 }, { 155, 12 }, { 209, 7 } } },
62 { { { 22, 13 }, { 189, 10 }, { 247, 8 } } },
63 { { { 51, 15 }, { 99, 12 }, { 195, 1 } } },
64 { { { 29, 13 }, { 65, 8 }, { 180, 0 } } },
65 { { { 13, 12 }, { 103, 3 }, { 182, 0 } } },
66 { { { 20, 15 }, { 114, 3 }, { 213, 0 } } },
67 { { { 13, 7 }, { 164, 3 }, { 222, 14 } } },
68 { { { 13, 9 }, { 121, 9 }, { 254, 0 } } },
69 { { { 40, 12 }, { 112, 10 }, { 114, 5 } } },
70 { { { 24, 13 }, { 54, 8 }, { 157, 0 } } },
71 { { { 33, 14 }, { 155, 12 }, { 166, 7 } } },
72 { { { 36, 14 }, { 83, 8 }, { 249, 1 } } },
73 { { { 40, 14 }, { 114, 12 }, { 139, 6 } } },
74 { { { 13, 5 }, { 58, 5 }, { 182, 5 } } },
75 { { { 13, 7 }, { 164, 10 }, { 222, 14 } } }
76 };
77
78 /* static */
79 float SAMSpeechSynth::formant_amplitude_lut[] = {
80 0.03125000f, 0.03756299f, 0.04515131f, 0.05427259f, 0.06523652f,
81 0.07841532f, 0.09425646f, 0.11329776f, 0.13618570f, 0.16369736f,
82 0.19676682f, 0.23651683f, 0.28429697f, 0.34172946f, 0.41076422f,
83 0.49374509f
84 };
85
InterpolatePhonemeData(float phoneme,float formant_shift,uint32_t * formant_frequency,float * formant_amplitude)86 void SAMSpeechSynth::InterpolatePhonemeData(
87 float phoneme,
88 float formant_shift,
89 uint32_t* formant_frequency,
90 float* formant_amplitude) {
91 MAKE_INTEGRAL_FRACTIONAL(phoneme);
92
93 const Phoneme& p_1 = phonemes_[phoneme_integral];
94 const Phoneme& p_2 = phonemes_[phoneme_integral + 1];
95
96 formant_shift = 1.0f + formant_shift * 2.5f;
97 for (int i = 0; i < kSAMNumFormants; ++i) {
98 float f_1 = p_1.formant[i].frequency;
99 float f_2 = p_2.formant[i].frequency;
100 float f = f_1 + (f_2 - f_1) * phoneme_fractional;
101 f *= 8.0f * formant_shift * 4294967296.0f / kSampleRate;
102 formant_frequency[i] = static_cast<uint32_t>(f);
103
104 float a_1 = formant_amplitude_lut[p_1.formant[i].amplitude];
105 float a_2 = formant_amplitude_lut[p_2.formant[i].amplitude];
106 formant_amplitude[i] = a_1 + (a_2 - a_1) * phoneme_fractional;
107 }
108 }
109
Render(bool consonant,float frequency,float vowel,float formant_shift,float * excitation,float * output,size_t size)110 void SAMSpeechSynth::Render(
111 bool consonant,
112 float frequency,
113 float vowel,
114 float formant_shift,
115 float* excitation,
116 float* output,
117 size_t size) {
118 if (frequency >= 0.0625f) {
119 frequency = 0.0625f;
120 }
121
122 if (consonant) {
123 consonant_samples_ = kSampleRate * 0.05f;
124 int r = (vowel + 3.0f * frequency + 7.0f * formant_shift) * 8.0f;
125 consonant_index_ = (r % kSAMNumConsonants);
126 }
127 consonant_samples_ -= min(consonant_samples_, size);
128
129 float phoneme = consonant_samples_
130 ? (consonant_index_ + kSAMNumVowels)
131 : vowel * (kSAMNumVowels - 1.0001f);
132
133 uint32_t formant_frequency[kSAMNumFormants];
134 float formant_amplitude[kSAMNumFormants];
135
136 InterpolatePhonemeData(
137 phoneme,
138 formant_shift,
139 formant_frequency,
140 formant_amplitude);
141
142 ParameterInterpolator fm(&frequency_, frequency, size);
143 float pulse_next_sample = pulse_next_sample_;
144
145 while (size--) {
146 float pulse_this_sample = pulse_next_sample;
147 pulse_next_sample = 0.0f;
148 const float frequency = fm.Next();
149 phase_ += frequency;
150
151 if (phase_ >= 1.0f) {
152 phase_ -= 1.0f;
153 float t = phase_ / frequency;
154 formant_phase_[0] = static_cast<uint32_t>(
155 t * static_cast<float>(formant_frequency[0]));
156 formant_phase_[1] = static_cast<uint32_t>(
157 t * static_cast<float>(formant_frequency[1]));
158 formant_phase_[2] = static_cast<uint32_t>(
159 t * static_cast<float>(formant_frequency[2]));
160 pulse_this_sample -= ThisBlepSample(t);
161 pulse_next_sample -= NextBlepSample(t);
162 } else {
163 formant_phase_[0] += formant_frequency[0];
164 formant_phase_[1] += formant_frequency[1];
165 formant_phase_[2] += formant_frequency[2];
166 }
167 pulse_next_sample += phase_;
168
169 float d = pulse_this_sample - 0.5f - pulse_lp_;
170 pulse_lp_ += min(16.0f * frequency, 1.0f) * d;
171 *excitation++ = d;
172
173 float s = 0;
174 s += lut_sine[formant_phase_[0] >> 22] * formant_amplitude[0];
175 s += lut_sine[formant_phase_[1] >> 22] * formant_amplitude[1];
176 s += lut_sine[formant_phase_[2] >> 22] * formant_amplitude[2];
177 s *= (1.0f - phase_);
178 *output++ = s;
179 }
180 pulse_next_sample_ = pulse_next_sample;
181 }
182
183 } // namespace plaits
184