1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "audio/utility/audio_frame_operations.h"
12 
13 #include <algorithm>
14 
15 #include "modules/include/module_common_types.h"
16 #include "rtc_base/checks.h"
17 #include "rtc_base/numerics/safe_conversions.h"
18 
19 namespace webrtc {
20 namespace {
21 
22 // 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz.
23 const size_t kMuteFadeFrames = 128;
24 const float kMuteFadeInc = 1.0f / kMuteFadeFrames;
25 
26 }  // namespace
27 
Add(const AudioFrame & frame_to_add,AudioFrame * result_frame)28 void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
29                                AudioFrame* result_frame) {
30   // Sanity check.
31   RTC_DCHECK(result_frame);
32   RTC_DCHECK_GT(result_frame->num_channels_, 0);
33   RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);
34 
35   bool no_previous_data = result_frame->muted();
36   if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
37     // Special case we have no data to start with.
38     RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
39     result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_;
40     no_previous_data = true;
41   }
42 
43   if (result_frame->vad_activity_ == AudioFrame::kVadActive ||
44       frame_to_add.vad_activity_ == AudioFrame::kVadActive) {
45     result_frame->vad_activity_ = AudioFrame::kVadActive;
46   } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown ||
47              frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) {
48     result_frame->vad_activity_ = AudioFrame::kVadUnknown;
49   }
50 
51   if (result_frame->speech_type_ != frame_to_add.speech_type_)
52     result_frame->speech_type_ = AudioFrame::kUndefined;
53 
54   if (!frame_to_add.muted()) {
55     const int16_t* in_data = frame_to_add.data();
56     int16_t* out_data = result_frame->mutable_data();
57     size_t length =
58         frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
59     if (no_previous_data) {
60       std::copy(in_data, in_data + length, out_data);
61     } else {
62       for (size_t i = 0; i < length; i++) {
63         const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
64                                    static_cast<int32_t>(in_data[i]);
65         out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
66       }
67     }
68   }
69 }
70 
MonoToStereo(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)71 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
72                                         size_t samples_per_channel,
73                                         int16_t* dst_audio) {
74   for (size_t i = 0; i < samples_per_channel; i++) {
75     dst_audio[2 * i] = src_audio[i];
76     dst_audio[2 * i + 1] = src_audio[i];
77   }
78 }
79 
MonoToStereo(AudioFrame * frame)80 int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
81   if (frame->num_channels_ != 1) {
82     return -1;
83   }
84   if ((frame->samples_per_channel_ * 2) >= AudioFrame::kMaxDataSizeSamples) {
85     // Not enough memory to expand from mono to stereo.
86     return -1;
87   }
88 
89   if (!frame->muted()) {
90     // TODO(yujo): this operation can be done in place.
91     int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
92     memcpy(data_copy, frame->data(),
93            sizeof(int16_t) * frame->samples_per_channel_);
94     MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data());
95   }
96   frame->num_channels_ = 2;
97 
98   return 0;
99 }
100 
StereoToMono(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)101 void AudioFrameOperations::StereoToMono(const int16_t* src_audio,
102                                         size_t samples_per_channel,
103                                         int16_t* dst_audio) {
104   for (size_t i = 0; i < samples_per_channel; i++) {
105     dst_audio[i] =
106         (static_cast<int32_t>(src_audio[2 * i]) + src_audio[2 * i + 1]) >> 1;
107   }
108 }
109 
StereoToMono(AudioFrame * frame)110 int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
111   if (frame->num_channels_ != 2) {
112     return -1;
113   }
114 
115   RTC_DCHECK_LE(frame->samples_per_channel_ * 2,
116                 AudioFrame::kMaxDataSizeSamples);
117 
118   if (!frame->muted()) {
119     StereoToMono(frame->data(), frame->samples_per_channel_,
120                  frame->mutable_data());
121   }
122   frame->num_channels_ = 1;
123 
124   return 0;
125 }
126 
QuadToStereo(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)127 void AudioFrameOperations::QuadToStereo(const int16_t* src_audio,
128                                         size_t samples_per_channel,
129                                         int16_t* dst_audio) {
130   for (size_t i = 0; i < samples_per_channel; i++) {
131     dst_audio[i * 2] =
132         (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1;
133     dst_audio[i * 2 + 1] =
134         (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >>
135         1;
136   }
137 }
138 
QuadToStereo(AudioFrame * frame)139 int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
140   if (frame->num_channels_ != 4) {
141     return -1;
142   }
143 
144   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
145                 AudioFrame::kMaxDataSizeSamples);
146 
147   if (!frame->muted()) {
148     QuadToStereo(frame->data(), frame->samples_per_channel_,
149                  frame->mutable_data());
150   }
151   frame->num_channels_ = 2;
152 
153   return 0;
154 }
155 
QuadToMono(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)156 void AudioFrameOperations::QuadToMono(const int16_t* src_audio,
157                                       size_t samples_per_channel,
158                                       int16_t* dst_audio) {
159   for (size_t i = 0; i < samples_per_channel; i++) {
160     dst_audio[i] =
161         (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1] +
162          src_audio[4 * i + 2] + src_audio[4 * i + 3]) >> 2;
163   }
164 }
165 
QuadToMono(AudioFrame * frame)166 int AudioFrameOperations::QuadToMono(AudioFrame* frame) {
167   if (frame->num_channels_ != 4) {
168     return -1;
169   }
170 
171   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
172                 AudioFrame::kMaxDataSizeSamples);
173 
174   if (!frame->muted()) {
175     QuadToMono(frame->data(), frame->samples_per_channel_,
176                frame->mutable_data());
177   }
178   frame->num_channels_ = 1;
179 
180   return 0;
181 }
182 
DownmixChannels(const int16_t * src_audio,size_t src_channels,size_t samples_per_channel,size_t dst_channels,int16_t * dst_audio)183 void AudioFrameOperations::DownmixChannels(const int16_t* src_audio,
184                                            size_t src_channels,
185                                            size_t samples_per_channel,
186                                            size_t dst_channels,
187                                            int16_t* dst_audio) {
188   if (src_channels == 2 && dst_channels == 1) {
189     StereoToMono(src_audio, samples_per_channel, dst_audio);
190     return;
191   } else if (src_channels == 4 && dst_channels == 2) {
192     QuadToStereo(src_audio, samples_per_channel, dst_audio);
193     return;
194   } else if (src_channels == 4 && dst_channels == 1) {
195     QuadToMono(src_audio, samples_per_channel, dst_audio);
196     return;
197   }
198 
199   RTC_NOTREACHED() << "src_channels: " << src_channels
200                    << ", dst_channels: " << dst_channels;
201 }
202 
DownmixChannels(size_t dst_channels,AudioFrame * frame)203 int AudioFrameOperations::DownmixChannels(size_t dst_channels,
204                                           AudioFrame* frame) {
205   if (frame->num_channels_ == 2 && dst_channels == 1) {
206     return StereoToMono(frame);
207   } else if (frame->num_channels_ == 4 && dst_channels == 2) {
208     return QuadToStereo(frame);
209   } else if (frame->num_channels_ == 4 && dst_channels == 1) {
210     return QuadToMono(frame);
211   }
212 
213   return -1;
214 }
215 
SwapStereoChannels(AudioFrame * frame)216 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
217   RTC_DCHECK(frame);
218   if (frame->num_channels_ != 2 || frame->muted()) {
219     return;
220   }
221 
222   int16_t* frame_data = frame->mutable_data();
223   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
224     int16_t temp_data = frame_data[i];
225     frame_data[i] = frame_data[i + 1];
226     frame_data[i + 1] = temp_data;
227   }
228 }
229 
Mute(AudioFrame * frame,bool previous_frame_muted,bool current_frame_muted)230 void AudioFrameOperations::Mute(AudioFrame* frame,
231                                 bool previous_frame_muted,
232                                 bool current_frame_muted) {
233   RTC_DCHECK(frame);
234   if (!previous_frame_muted && !current_frame_muted) {
235     // Not muted, don't touch.
236   } else if (previous_frame_muted && current_frame_muted) {
237     // Frame fully muted.
238     size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
239     RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
240     frame->Mute();
241   } else {
242     // Fade is a no-op on a muted frame.
243     if (frame->muted()) {
244       return;
245     }
246 
247     // Limit number of samples to fade, if frame isn't long enough.
248     size_t count = kMuteFadeFrames;
249     float inc = kMuteFadeInc;
250     if (frame->samples_per_channel_ < kMuteFadeFrames) {
251       count = frame->samples_per_channel_;
252       if (count > 0) {
253         inc = 1.0f / count;
254       }
255     }
256 
257     size_t start = 0;
258     size_t end = count;
259     float start_g = 0.0f;
260     if (current_frame_muted) {
261       // Fade out the last |count| samples of frame.
262       RTC_DCHECK(!previous_frame_muted);
263       start = frame->samples_per_channel_ - count;
264       end = frame->samples_per_channel_;
265       start_g = 1.0f;
266       inc = -inc;
267     } else {
268       // Fade in the first |count| samples of frame.
269       RTC_DCHECK(previous_frame_muted);
270     }
271 
272     // Perform fade.
273     int16_t* frame_data = frame->mutable_data();
274     size_t channels = frame->num_channels_;
275     for (size_t j = 0; j < channels; ++j) {
276       float g = start_g;
277       for (size_t i = start * channels; i < end * channels; i += channels) {
278         g += inc;
279         frame_data[i + j] *= g;
280       }
281     }
282   }
283 }
284 
Mute(AudioFrame * frame)285 void AudioFrameOperations::Mute(AudioFrame* frame) {
286   Mute(frame, true, true);
287 }
288 
ApplyHalfGain(AudioFrame * frame)289 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
290   RTC_DCHECK(frame);
291   RTC_DCHECK_GT(frame->num_channels_, 0);
292   if (frame->num_channels_ < 1 || frame->muted()) {
293     return;
294   }
295 
296   int16_t* frame_data = frame->mutable_data();
297   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
298        i++) {
299     frame_data[i] = frame_data[i] >> 1;
300   }
301 }
302 
Scale(float left,float right,AudioFrame * frame)303 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
304   if (frame->num_channels_ != 2) {
305     return -1;
306   } else if (frame->muted()) {
307     return 0;
308   }
309 
310   int16_t* frame_data = frame->mutable_data();
311   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
312     frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
313     frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
314   }
315   return 0;
316 }
317 
ScaleWithSat(float scale,AudioFrame * frame)318 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
319   if (frame->muted()) {
320     return 0;
321   }
322 
323   int16_t* frame_data = frame->mutable_data();
324   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
325        i++) {
326     frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]);
327   }
328   return 0;
329 }
330 }  // namespace webrtc
331