1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "audio/utility/audio_frame_operations.h"
12
13 #include <algorithm>
14
15 #include "modules/include/module_common_types.h"
16 #include "rtc_base/checks.h"
17 #include "rtc_base/numerics/safe_conversions.h"
18
19 namespace webrtc {
20 namespace {
21
22 // 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz.
23 const size_t kMuteFadeFrames = 128;
24 const float kMuteFadeInc = 1.0f / kMuteFadeFrames;
25
26 } // namespace
27
Add(const AudioFrame & frame_to_add,AudioFrame * result_frame)28 void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
29 AudioFrame* result_frame) {
30 // Sanity check.
31 RTC_DCHECK(result_frame);
32 RTC_DCHECK_GT(result_frame->num_channels_, 0);
33 RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);
34
35 bool no_previous_data = result_frame->muted();
36 if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
37 // Special case we have no data to start with.
38 RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
39 result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_;
40 no_previous_data = true;
41 }
42
43 if (result_frame->vad_activity_ == AudioFrame::kVadActive ||
44 frame_to_add.vad_activity_ == AudioFrame::kVadActive) {
45 result_frame->vad_activity_ = AudioFrame::kVadActive;
46 } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown ||
47 frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) {
48 result_frame->vad_activity_ = AudioFrame::kVadUnknown;
49 }
50
51 if (result_frame->speech_type_ != frame_to_add.speech_type_)
52 result_frame->speech_type_ = AudioFrame::kUndefined;
53
54 if (!frame_to_add.muted()) {
55 const int16_t* in_data = frame_to_add.data();
56 int16_t* out_data = result_frame->mutable_data();
57 size_t length =
58 frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
59 if (no_previous_data) {
60 std::copy(in_data, in_data + length, out_data);
61 } else {
62 for (size_t i = 0; i < length; i++) {
63 const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
64 static_cast<int32_t>(in_data[i]);
65 out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
66 }
67 }
68 }
69 }
70
MonoToStereo(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)71 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
72 size_t samples_per_channel,
73 int16_t* dst_audio) {
74 for (size_t i = 0; i < samples_per_channel; i++) {
75 dst_audio[2 * i] = src_audio[i];
76 dst_audio[2 * i + 1] = src_audio[i];
77 }
78 }
79
MonoToStereo(AudioFrame * frame)80 int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
81 if (frame->num_channels_ != 1) {
82 return -1;
83 }
84 if ((frame->samples_per_channel_ * 2) >= AudioFrame::kMaxDataSizeSamples) {
85 // Not enough memory to expand from mono to stereo.
86 return -1;
87 }
88
89 if (!frame->muted()) {
90 // TODO(yujo): this operation can be done in place.
91 int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
92 memcpy(data_copy, frame->data(),
93 sizeof(int16_t) * frame->samples_per_channel_);
94 MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data());
95 }
96 frame->num_channels_ = 2;
97
98 return 0;
99 }
100
StereoToMono(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)101 void AudioFrameOperations::StereoToMono(const int16_t* src_audio,
102 size_t samples_per_channel,
103 int16_t* dst_audio) {
104 for (size_t i = 0; i < samples_per_channel; i++) {
105 dst_audio[i] =
106 (static_cast<int32_t>(src_audio[2 * i]) + src_audio[2 * i + 1]) >> 1;
107 }
108 }
109
StereoToMono(AudioFrame * frame)110 int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
111 if (frame->num_channels_ != 2) {
112 return -1;
113 }
114
115 RTC_DCHECK_LE(frame->samples_per_channel_ * 2,
116 AudioFrame::kMaxDataSizeSamples);
117
118 if (!frame->muted()) {
119 StereoToMono(frame->data(), frame->samples_per_channel_,
120 frame->mutable_data());
121 }
122 frame->num_channels_ = 1;
123
124 return 0;
125 }
126
QuadToStereo(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)127 void AudioFrameOperations::QuadToStereo(const int16_t* src_audio,
128 size_t samples_per_channel,
129 int16_t* dst_audio) {
130 for (size_t i = 0; i < samples_per_channel; i++) {
131 dst_audio[i * 2] =
132 (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1;
133 dst_audio[i * 2 + 1] =
134 (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >>
135 1;
136 }
137 }
138
QuadToStereo(AudioFrame * frame)139 int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
140 if (frame->num_channels_ != 4) {
141 return -1;
142 }
143
144 RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
145 AudioFrame::kMaxDataSizeSamples);
146
147 if (!frame->muted()) {
148 QuadToStereo(frame->data(), frame->samples_per_channel_,
149 frame->mutable_data());
150 }
151 frame->num_channels_ = 2;
152
153 return 0;
154 }
155
QuadToMono(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)156 void AudioFrameOperations::QuadToMono(const int16_t* src_audio,
157 size_t samples_per_channel,
158 int16_t* dst_audio) {
159 for (size_t i = 0; i < samples_per_channel; i++) {
160 dst_audio[i] =
161 (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1] +
162 src_audio[4 * i + 2] + src_audio[4 * i + 3]) >> 2;
163 }
164 }
165
QuadToMono(AudioFrame * frame)166 int AudioFrameOperations::QuadToMono(AudioFrame* frame) {
167 if (frame->num_channels_ != 4) {
168 return -1;
169 }
170
171 RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
172 AudioFrame::kMaxDataSizeSamples);
173
174 if (!frame->muted()) {
175 QuadToMono(frame->data(), frame->samples_per_channel_,
176 frame->mutable_data());
177 }
178 frame->num_channels_ = 1;
179
180 return 0;
181 }
182
DownmixChannels(const int16_t * src_audio,size_t src_channels,size_t samples_per_channel,size_t dst_channels,int16_t * dst_audio)183 void AudioFrameOperations::DownmixChannels(const int16_t* src_audio,
184 size_t src_channels,
185 size_t samples_per_channel,
186 size_t dst_channels,
187 int16_t* dst_audio) {
188 if (src_channels == 2 && dst_channels == 1) {
189 StereoToMono(src_audio, samples_per_channel, dst_audio);
190 return;
191 } else if (src_channels == 4 && dst_channels == 2) {
192 QuadToStereo(src_audio, samples_per_channel, dst_audio);
193 return;
194 } else if (src_channels == 4 && dst_channels == 1) {
195 QuadToMono(src_audio, samples_per_channel, dst_audio);
196 return;
197 }
198
199 RTC_NOTREACHED() << "src_channels: " << src_channels
200 << ", dst_channels: " << dst_channels;
201 }
202
DownmixChannels(size_t dst_channels,AudioFrame * frame)203 int AudioFrameOperations::DownmixChannels(size_t dst_channels,
204 AudioFrame* frame) {
205 if (frame->num_channels_ == 2 && dst_channels == 1) {
206 return StereoToMono(frame);
207 } else if (frame->num_channels_ == 4 && dst_channels == 2) {
208 return QuadToStereo(frame);
209 } else if (frame->num_channels_ == 4 && dst_channels == 1) {
210 return QuadToMono(frame);
211 }
212
213 return -1;
214 }
215
SwapStereoChannels(AudioFrame * frame)216 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
217 RTC_DCHECK(frame);
218 if (frame->num_channels_ != 2 || frame->muted()) {
219 return;
220 }
221
222 int16_t* frame_data = frame->mutable_data();
223 for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
224 int16_t temp_data = frame_data[i];
225 frame_data[i] = frame_data[i + 1];
226 frame_data[i + 1] = temp_data;
227 }
228 }
229
Mute(AudioFrame * frame,bool previous_frame_muted,bool current_frame_muted)230 void AudioFrameOperations::Mute(AudioFrame* frame,
231 bool previous_frame_muted,
232 bool current_frame_muted) {
233 RTC_DCHECK(frame);
234 if (!previous_frame_muted && !current_frame_muted) {
235 // Not muted, don't touch.
236 } else if (previous_frame_muted && current_frame_muted) {
237 // Frame fully muted.
238 size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
239 RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
240 frame->Mute();
241 } else {
242 // Fade is a no-op on a muted frame.
243 if (frame->muted()) {
244 return;
245 }
246
247 // Limit number of samples to fade, if frame isn't long enough.
248 size_t count = kMuteFadeFrames;
249 float inc = kMuteFadeInc;
250 if (frame->samples_per_channel_ < kMuteFadeFrames) {
251 count = frame->samples_per_channel_;
252 if (count > 0) {
253 inc = 1.0f / count;
254 }
255 }
256
257 size_t start = 0;
258 size_t end = count;
259 float start_g = 0.0f;
260 if (current_frame_muted) {
261 // Fade out the last |count| samples of frame.
262 RTC_DCHECK(!previous_frame_muted);
263 start = frame->samples_per_channel_ - count;
264 end = frame->samples_per_channel_;
265 start_g = 1.0f;
266 inc = -inc;
267 } else {
268 // Fade in the first |count| samples of frame.
269 RTC_DCHECK(previous_frame_muted);
270 }
271
272 // Perform fade.
273 int16_t* frame_data = frame->mutable_data();
274 size_t channels = frame->num_channels_;
275 for (size_t j = 0; j < channels; ++j) {
276 float g = start_g;
277 for (size_t i = start * channels; i < end * channels; i += channels) {
278 g += inc;
279 frame_data[i + j] *= g;
280 }
281 }
282 }
283 }
284
Mute(AudioFrame * frame)285 void AudioFrameOperations::Mute(AudioFrame* frame) {
286 Mute(frame, true, true);
287 }
288
ApplyHalfGain(AudioFrame * frame)289 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
290 RTC_DCHECK(frame);
291 RTC_DCHECK_GT(frame->num_channels_, 0);
292 if (frame->num_channels_ < 1 || frame->muted()) {
293 return;
294 }
295
296 int16_t* frame_data = frame->mutable_data();
297 for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
298 i++) {
299 frame_data[i] = frame_data[i] >> 1;
300 }
301 }
302
Scale(float left,float right,AudioFrame * frame)303 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
304 if (frame->num_channels_ != 2) {
305 return -1;
306 } else if (frame->muted()) {
307 return 0;
308 }
309
310 int16_t* frame_data = frame->mutable_data();
311 for (size_t i = 0; i < frame->samples_per_channel_; i++) {
312 frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
313 frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
314 }
315 return 0;
316 }
317
ScaleWithSat(float scale,AudioFrame * frame)318 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
319 if (frame->muted()) {
320 return 0;
321 }
322
323 int16_t* frame_data = frame->mutable_data();
324 for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
325 i++) {
326 frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]);
327 }
328 return 0;
329 }
330 } // namespace webrtc
331