1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_device/win/core_audio_input_win.h"
12 
13 #include <memory>
14 
15 #include "modules/audio_device/audio_device_buffer.h"
16 #include "modules/audio_device/fine_audio_buffer.h"
17 #include "rtc_base/checks.h"
18 #include "rtc_base/logging.h"
19 #include "rtc_base/numerics/safe_conversions.h"
20 
21 using Microsoft::WRL::ComPtr;
22 
23 namespace webrtc {
24 namespace webrtc_win {
25 
26 enum AudioDeviceMessageType : uint32_t {
27   kMessageInputStreamDisconnected,
28 };
29 
CoreAudioInput(bool automatic_restart)30 CoreAudioInput::CoreAudioInput(bool automatic_restart)
31     : CoreAudioBase(
32           CoreAudioBase::Direction::kInput,
33           automatic_restart,
34           [this](uint64_t freq) { return OnDataCallback(freq); },
__anon7e8dd0640202(ErrorType err) 35           [this](ErrorType err) { return OnErrorCallback(err); }) {
36   RTC_DLOG(INFO) << __FUNCTION__;
37   RTC_DCHECK_RUN_ON(&thread_checker_);
38   thread_checker_audio_.Detach();
39 }
40 
~CoreAudioInput()41 CoreAudioInput::~CoreAudioInput() {
42   RTC_DLOG(INFO) << __FUNCTION__;
43   RTC_DCHECK_RUN_ON(&thread_checker_);
44 }
45 
Init()46 int CoreAudioInput::Init() {
47   RTC_DLOG(INFO) << __FUNCTION__;
48   RTC_DCHECK_RUN_ON(&thread_checker_);
49   return 0;
50 }
51 
Terminate()52 int CoreAudioInput::Terminate() {
53   RTC_DLOG(INFO) << __FUNCTION__;
54   RTC_DCHECK_RUN_ON(&thread_checker_);
55   StopRecording();
56   return 0;
57 }
58 
NumDevices() const59 int CoreAudioInput::NumDevices() const {
60   RTC_DCHECK_RUN_ON(&thread_checker_);
61   return core_audio_utility::NumberOfActiveDevices(eCapture);
62 }
63 
SetDevice(int index)64 int CoreAudioInput::SetDevice(int index) {
65   RTC_DLOG(INFO) << __FUNCTION__ << ": " << index;
66   RTC_DCHECK_GE(index, 0);
67   RTC_DCHECK_RUN_ON(&thread_checker_);
68   return CoreAudioBase::SetDevice(index);
69 }
70 
SetDevice(AudioDeviceModule::WindowsDeviceType device)71 int CoreAudioInput::SetDevice(AudioDeviceModule::WindowsDeviceType device) {
72   RTC_DLOG(INFO) << __FUNCTION__ << ": "
73                  << ((device == AudioDeviceModule::kDefaultDevice)
74                          ? "Default"
75                          : "DefaultCommunication");
76   RTC_DCHECK_RUN_ON(&thread_checker_);
77   return SetDevice((device == AudioDeviceModule::kDefaultDevice) ? 0 : 1);
78 }
79 
DeviceName(int index,std::string * name,std::string * guid)80 int CoreAudioInput::DeviceName(int index,
81                                std::string* name,
82                                std::string* guid) {
83   RTC_DLOG(INFO) << __FUNCTION__ << ": " << index;
84   RTC_DCHECK_RUN_ON(&thread_checker_);
85   RTC_DCHECK(name);
86   return CoreAudioBase::DeviceName(index, name, guid);
87 }
88 
AttachAudioBuffer(AudioDeviceBuffer * audio_buffer)89 void CoreAudioInput::AttachAudioBuffer(AudioDeviceBuffer* audio_buffer) {
90   RTC_DLOG(INFO) << __FUNCTION__;
91   RTC_DCHECK_RUN_ON(&thread_checker_);
92   audio_device_buffer_ = audio_buffer;
93 }
94 
RecordingIsInitialized() const95 bool CoreAudioInput::RecordingIsInitialized() const {
96   RTC_DLOG(INFO) << __FUNCTION__ << ": " << initialized_;
97   RTC_DCHECK_RUN_ON(&thread_checker_);
98   return initialized_;
99 }
100 
InitRecording()101 int CoreAudioInput::InitRecording() {
102   RTC_DLOG(INFO) << __FUNCTION__;
103   RTC_DCHECK(!initialized_);
104   RTC_DCHECK(!Recording());
105   RTC_DCHECK(!audio_capture_client_);
106 
107   // Creates an IAudioClient instance and stores the valid interface pointer in
108   // |audio_client3_|, |audio_client2_|, or |audio_client_| depending on
109   // platform support. The base class will use optimal input parameters and do
110   // an event driven shared mode initialization. The utilized format will be
111   // stored in |format_| and can be used for configuration and allocation of
112   // audio buffers.
113   if (!CoreAudioBase::Init()) {
114     return -1;
115   }
116   RTC_DCHECK(audio_client_);
117 
118   // Configure the recording side of the audio device buffer using |format_|
119   // after a trivial sanity check of the format structure.
120   RTC_DCHECK(audio_device_buffer_);
121   WAVEFORMATEX* format = &format_.Format;
122   RTC_DCHECK_EQ(format->wFormatTag, WAVE_FORMAT_EXTENSIBLE);
123   audio_device_buffer_->SetRecordingSampleRate(format->nSamplesPerSec);
124   audio_device_buffer_->SetRecordingChannels(format->nChannels);
125 
126   // Create a modified audio buffer class which allows us to supply any number
127   // of samples (and not only multiple of 10ms) to match the optimal buffer
128   // size per callback used by Core Audio.
129   // TODO(henrika): can we share one FineAudioBuffer with the output side?
130   fine_audio_buffer_ = std::make_unique<FineAudioBuffer>(audio_device_buffer_);
131 
132   // Create an IAudioCaptureClient for an initialized IAudioClient.
133   // The IAudioCaptureClient interface enables a client to read input data from
134   // a capture endpoint buffer.
135   ComPtr<IAudioCaptureClient> audio_capture_client =
136       core_audio_utility::CreateCaptureClient(audio_client_.Get());
137   if (!audio_capture_client) {
138     return -1;
139   }
140 
141   // Query performance frequency.
142   LARGE_INTEGER ticks_per_sec = {};
143   qpc_to_100ns_.reset();
144   if (::QueryPerformanceFrequency(&ticks_per_sec)) {
145     double qpc_ticks_per_second =
146         rtc::dchecked_cast<double>(ticks_per_sec.QuadPart);
147     qpc_to_100ns_ = 10000000.0 / qpc_ticks_per_second;
148   }
149 
150   // Store valid COM interfaces.
151   audio_capture_client_ = audio_capture_client;
152 
153   initialized_ = true;
154   return 0;
155 }
156 
StartRecording()157 int CoreAudioInput::StartRecording() {
158   RTC_DLOG(INFO) << __FUNCTION__;
159   RTC_DCHECK(!Recording());
160   RTC_DCHECK(fine_audio_buffer_);
161   RTC_DCHECK(audio_device_buffer_);
162   if (!initialized_) {
163     RTC_DLOG(LS_WARNING)
164         << "Recording can not start since InitRecording must succeed first";
165     return 0;
166   }
167 
168   fine_audio_buffer_->ResetRecord();
169   if (!IsRestarting()) {
170     audio_device_buffer_->StartRecording();
171   }
172 
173   if (!Start()) {
174     return -1;
175   }
176 
177   is_active_ = true;
178   return 0;
179 }
180 
StopRecording()181 int CoreAudioInput::StopRecording() {
182   RTC_DLOG(INFO) << __FUNCTION__;
183   if (!initialized_) {
184     return 0;
185   }
186 
187   // Release resources allocated in InitRecording() and then return if this
188   // method is called without any active input audio.
189   if (!Recording()) {
190     RTC_DLOG(WARNING) << "No input stream is active";
191     ReleaseCOMObjects();
192     initialized_ = false;
193     return 0;
194   }
195 
196   if (!Stop()) {
197     RTC_LOG(LS_ERROR) << "StopRecording failed";
198     return -1;
199   }
200 
201   if (!IsRestarting()) {
202     RTC_DCHECK(audio_device_buffer_);
203     audio_device_buffer_->StopRecording();
204   }
205 
206   // Release all allocated resources to allow for a restart without
207   // intermediate destruction.
208   ReleaseCOMObjects();
209   qpc_to_100ns_.reset();
210 
211   initialized_ = false;
212   is_active_ = false;
213   return 0;
214 }
215 
Recording()216 bool CoreAudioInput::Recording() {
217   RTC_DLOG(INFO) << __FUNCTION__ << ": " << is_active_;
218   return is_active_;
219 }
220 
221 // TODO(henrika): finalize support of audio session volume control. As is, we
222 // are not compatible with the old ADM implementation since it allows accessing
223 // the volume control with any active audio output stream.
VolumeIsAvailable(bool * available)224 int CoreAudioInput::VolumeIsAvailable(bool* available) {
225   RTC_DLOG(INFO) << __FUNCTION__;
226   RTC_DCHECK_RUN_ON(&thread_checker_);
227   return IsVolumeControlAvailable(available) ? 0 : -1;
228 }
229 
230 // Triggers the restart sequence. Only used for testing purposes to emulate
231 // a real event where e.g. an active input device is removed.
RestartRecording()232 int CoreAudioInput::RestartRecording() {
233   RTC_DLOG(INFO) << __FUNCTION__;
234   RTC_DCHECK_RUN_ON(&thread_checker_);
235   if (!Recording()) {
236     return 0;
237   }
238 
239   if (!Restart()) {
240     RTC_LOG(LS_ERROR) << "RestartRecording failed";
241     return -1;
242   }
243   return 0;
244 }
245 
Restarting() const246 bool CoreAudioInput::Restarting() const {
247   RTC_DCHECK_RUN_ON(&thread_checker_);
248   return IsRestarting();
249 }
250 
SetSampleRate(uint32_t sample_rate)251 int CoreAudioInput::SetSampleRate(uint32_t sample_rate) {
252   RTC_DLOG(INFO) << __FUNCTION__;
253   RTC_DCHECK_RUN_ON(&thread_checker_);
254   sample_rate_ = sample_rate;
255   return 0;
256 }
257 
ReleaseCOMObjects()258 void CoreAudioInput::ReleaseCOMObjects() {
259   RTC_DLOG(INFO) << __FUNCTION__;
260   CoreAudioBase::ReleaseCOMObjects();
261   if (audio_capture_client_.Get()) {
262     audio_capture_client_.Reset();
263   }
264 }
265 
OnDataCallback(uint64_t device_frequency)266 bool CoreAudioInput::OnDataCallback(uint64_t device_frequency) {
267   RTC_DCHECK_RUN_ON(&thread_checker_audio_);
268 
269   if (!initialized_ || !is_active_) {
270     // This is concurrent examination of state across multiple threads so will
271     // be somewhat error prone, but we should still be defensive and not use
272     // audio_capture_client_ if we know it's not there.
273     return false;
274   }
275   if (num_data_callbacks_ == 0) {
276     RTC_LOG(INFO) << "--- Input audio stream is alive ---";
277   }
278   UINT32 num_frames_in_next_packet = 0;
279   _com_error error =
280       audio_capture_client_->GetNextPacketSize(&num_frames_in_next_packet);
281   if (error.Error() == AUDCLNT_E_DEVICE_INVALIDATED) {
282     // Avoid breaking the thread loop implicitly by returning false and return
283     // true instead for AUDCLNT_E_DEVICE_INVALIDATED even it is a valid error
284     // message. We will use notifications about device changes instead to stop
285     // data callbacks and attempt to restart streaming .
286     RTC_DLOG(LS_ERROR) << "AUDCLNT_E_DEVICE_INVALIDATED";
287     return true;
288   }
289   if (FAILED(error.Error())) {
290     RTC_LOG(LS_ERROR) << "IAudioCaptureClient::GetNextPacketSize failed: "
291                       << core_audio_utility::ErrorToString(error);
292     return false;
293   }
294 
295   // Drain the WASAPI capture buffer fully if audio has been recorded.
296   while (num_frames_in_next_packet > 0) {
297     uint8_t* audio_data;
298     UINT32 num_frames_to_read = 0;
299     DWORD flags = 0;
300     UINT64 device_position_frames = 0;
301     UINT64 capture_time_100ns = 0;
302     error = audio_capture_client_->GetBuffer(&audio_data, &num_frames_to_read,
303                                              &flags, &device_position_frames,
304                                              &capture_time_100ns);
305     if (error.Error() == AUDCLNT_S_BUFFER_EMPTY) {
306       // The call succeeded but no capture data is available to be read.
307       // Return and start waiting for new capture event
308       RTC_DCHECK_EQ(num_frames_to_read, 0u);
309       return true;
310     }
311     if (FAILED(error.Error())) {
312       RTC_LOG(LS_ERROR) << "IAudioCaptureClient::GetBuffer failed: "
313                         << core_audio_utility::ErrorToString(error);
314       return false;
315     }
316 
317     // Update input delay estimate but only about once per second to save
318     // resources. The estimate is usually stable.
319     if (num_data_callbacks_ % 100 == 0) {
320       absl::optional<int> opt_record_delay_ms;
321       // TODO(henrika): note that FineAudioBuffer adds latency as well.
322       opt_record_delay_ms = EstimateLatencyMillis(capture_time_100ns);
323       if (opt_record_delay_ms) {
324         latency_ms_ = *opt_record_delay_ms;
325       } else {
326         RTC_DLOG(LS_WARNING) << "Input latency is set to fixed value";
327         latency_ms_ = 20;
328       }
329     }
330     if (num_data_callbacks_ % 500 == 0) {
331       RTC_DLOG(INFO) << "latency: " << latency_ms_;
332     }
333 
334     // The data in the packet is not correlated with the previous packet's
335     // device position; possibly due to a stream state transition or timing
336     // glitch. The behavior of the AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY flag
337     // is undefined on the application's first call to GetBuffer after Start.
338     if (device_position_frames != 0 &&
339         flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY) {
340       RTC_DLOG(LS_WARNING) << "AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY";
341     }
342     // The time at which the device's stream position was recorded is uncertain.
343     // Thus, the client might be unable to accurately set a time stamp for the
344     // current data packet.
345     if (flags & AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR) {
346       RTC_DLOG(LS_WARNING) << "AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR";
347     }
348 
349     // Treat all of the data in the packet as silence and ignore the actual
350     // data values when AUDCLNT_BUFFERFLAGS_SILENT is set.
351     if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
352       rtc::ExplicitZeroMemory(audio_data,
353                               format_.Format.nBlockAlign * num_frames_to_read);
354       RTC_DLOG(LS_WARNING) << "Captured audio is replaced by silence";
355     } else {
356       // Copy recorded audio in |audio_data| to the WebRTC sink using the
357       // FineAudioBuffer object.
358       fine_audio_buffer_->DeliverRecordedData(
359           rtc::MakeArrayView(reinterpret_cast<const int16_t*>(audio_data),
360                              format_.Format.nChannels * num_frames_to_read),
361 
362           latency_ms_);
363     }
364 
365     error = audio_capture_client_->ReleaseBuffer(num_frames_to_read);
366     if (FAILED(error.Error())) {
367       RTC_LOG(LS_ERROR) << "IAudioCaptureClient::ReleaseBuffer failed: "
368                         << core_audio_utility::ErrorToString(error);
369       return false;
370     }
371 
372     error =
373         audio_capture_client_->GetNextPacketSize(&num_frames_in_next_packet);
374     if (FAILED(error.Error())) {
375       RTC_LOG(LS_ERROR) << "IAudioCaptureClient::GetNextPacketSize failed: "
376                         << core_audio_utility::ErrorToString(error);
377       return false;
378     }
379   }
380   ++num_data_callbacks_;
381   return true;
382 }
383 
OnErrorCallback(ErrorType error)384 bool CoreAudioInput::OnErrorCallback(ErrorType error) {
385   RTC_DLOG(INFO) << __FUNCTION__ << ": " << as_integer(error);
386   RTC_DCHECK_RUN_ON(&thread_checker_audio_);
387   if (error == CoreAudioBase::ErrorType::kStreamDisconnected) {
388     HandleStreamDisconnected();
389   } else {
390     RTC_DLOG(WARNING) << "Unsupported error type";
391   }
392   return true;
393 }
394 
EstimateLatencyMillis(uint64_t capture_time_100ns)395 absl::optional<int> CoreAudioInput::EstimateLatencyMillis(
396     uint64_t capture_time_100ns) {
397   if (!qpc_to_100ns_) {
398     return absl::nullopt;
399   }
400   // Input parameter |capture_time_100ns| contains the performance counter at
401   // the time that the audio endpoint device recorded the device position of
402   // the first audio frame in the data packet converted into 100ns units.
403   // We derive a delay estimate by:
404   // - sampling the current performance counter (qpc_now_raw),
405   // - converting it into 100ns time units (now_time_100ns), and
406   // - subtracting |capture_time_100ns| from now_time_100ns.
407   LARGE_INTEGER perf_counter_now = {};
408   if (!::QueryPerformanceCounter(&perf_counter_now)) {
409     return absl::nullopt;
410   }
411   uint64_t qpc_now_raw = perf_counter_now.QuadPart;
412   uint64_t now_time_100ns = qpc_now_raw * (*qpc_to_100ns_);
413   webrtc::TimeDelta delay_us = webrtc::TimeDelta::Micros(
414       0.1 * (now_time_100ns - capture_time_100ns) + 0.5);
415   return delay_us.ms();
416 }
417 
418 // Called from OnErrorCallback() when error type is kStreamDisconnected.
419 // Note that this method is called on the audio thread and the internal restart
420 // sequence is also executed on that same thread. The audio thread is therefore
421 // not stopped during restart. Such a scheme also makes the restart process less
422 // complex.
423 // Note that, none of the called methods are thread checked since they can also
424 // be called on the main thread. Thread checkers are instead added on one layer
425 // above (in audio_device_module.cc) which ensures that the public API is thread
426 // safe.
427 // TODO(henrika): add more details.
HandleStreamDisconnected()428 bool CoreAudioInput::HandleStreamDisconnected() {
429   RTC_DLOG(INFO) << "<<<--- " << __FUNCTION__;
430   RTC_DCHECK_RUN_ON(&thread_checker_audio_);
431   RTC_DCHECK(automatic_restart());
432 
433   if (StopRecording() != 0) {
434     return false;
435   }
436 
437   if (!SwitchDeviceIfNeeded()) {
438     return false;
439   }
440 
441   if (InitRecording() != 0) {
442     return false;
443   }
444   if (StartRecording() != 0) {
445     return false;
446   }
447 
448   RTC_DLOG(INFO) << __FUNCTION__ << " --->>>";
449   return true;
450 }
451 
452 }  // namespace webrtc_win
453 }  // namespace webrtc
454