1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Implementation of AudioInputStream for Windows using Windows Core Audio 6 // WASAPI for low latency capturing. 7 // 8 // Overview of operation: 9 // 10 // - An object of WASAPIAudioInputStream is created by the AudioManager 11 // factory. 12 // - Next some thread will call Open(), at that point the underlying 13 // Core Audio APIs are utilized to create two WASAPI interfaces called 14 // IAudioClient and IAudioCaptureClient. 15 // - Then some thread will call Start(sink). 16 // A thread called "wasapi_capture_thread" is started and this thread listens 17 // on an event signal which is set periodically by the audio engine for 18 // each recorded data packet. As a result, data samples will be provided 19 // to the registered sink. 20 // - At some point, a thread will call Stop(), which stops and joins the 21 // capture thread and at the same time stops audio streaming. 22 // - The same thread that called stop will call Close() where we cleanup 23 // and notify the audio manager, which likely will destroy this object. 24 // 25 // Implementation notes: 26 // 27 // - The minimum supported client is Windows Vista. 28 // - This implementation is single-threaded, hence: 29 // o Construction and destruction must take place from the same thread. 30 // o It is recommended to call all APIs from the same thread as well. 31 // - It is recommended to first acquire the native sample rate of the default 32 // input device and then use the same rate when creating this object. Use 33 // WASAPIAudioInputStream::HardwareSampleRate() to retrieve the sample rate. 34 // - Calling Close() also leads to self destruction. 35 // 36 // Core Audio API details: 37 // 38 // - Utilized MMDevice interfaces: 39 // o IMMDeviceEnumerator 40 // o IMMDevice 41 // - Utilized WASAPI interfaces: 42 // o IAudioClient 43 // o IAudioCaptureClient 44 // - The stream is initialized in shared mode and the processing of the 45 // audio buffer is event driven. 46 // - The Multimedia Class Scheduler service (MMCSS) is utilized to boost 47 // the priority of the capture thread. 48 // - Audio applications that use the MMDevice API and WASAPI typically use 49 // the ISimpleAudioVolume interface to manage stream volume levels on a 50 // per-session basis. It is also possible to use of the IAudioEndpointVolume 51 // interface to control the master volume level of an audio endpoint device. 52 // This implementation is using the ISimpleAudioVolume interface. 53 // MSDN states that "In rare cases, a specialized audio application might 54 // require the use of the IAudioEndpointVolume". 55 // 56 #ifndef MEDIA_AUDIO_WIN_AUDIO_LOW_LATENCY_INPUT_WIN_H_ 57 #define MEDIA_AUDIO_WIN_AUDIO_LOW_LATENCY_INPUT_WIN_H_ 58 59 #include <Audioclient.h> 60 #include <MMDeviceAPI.h> 61 #include <endpointvolume.h> 62 #include <stddef.h> 63 #include <stdint.h> 64 #include <windows.media.effects.h> 65 #include <wrl/client.h> 66 67 #include <memory> 68 #include <string> 69 #include <vector> 70 71 #include "base/compiler_specific.h" 72 #include "base/macros.h" 73 #include "base/sequence_checker.h" 74 #include "base/threading/platform_thread.h" 75 #include "base/threading/simple_thread.h" 76 #include "base/win/scoped_co_mem.h" 77 #include "base/win/scoped_com_initializer.h" 78 #include "base/win/scoped_handle.h" 79 #include "media/audio/agc_audio_stream.h" 80 #include "media/audio/win/audio_manager_win.h" 81 #include "media/base/audio_converter.h" 82 #include "media/base/audio_parameters.h" 83 #include "media/base/media_export.h" 84 85 namespace media { 86 87 class AudioBlockFifo; 88 class AudioBus; 89 90 // AudioInputStream implementation using Windows Core Audio APIs. 91 class MEDIA_EXPORT WASAPIAudioInputStream 92 : public AgcAudioStream<AudioInputStream>, 93 public base::DelegateSimpleThread::Delegate, 94 public AudioConverter::InputCallback { 95 public: 96 // Used to track down where we fail during initialization which at the 97 // moment seems to be happening frequently and we're not sure why. 98 // The reason might be expected (e.g. trying to open "default" on a machine 99 // that has no audio devices). 100 // Note: This enum is used to record a histogram value and should not be 101 // re-ordered. 102 enum StreamOpenResult { 103 OPEN_RESULT_OK = 0, 104 OPEN_RESULT_CREATE_INSTANCE = 1, 105 OPEN_RESULT_NO_ENDPOINT = 2, 106 OPEN_RESULT_NO_STATE = 3, 107 OPEN_RESULT_DEVICE_NOT_ACTIVE = 4, 108 OPEN_RESULT_ACTIVATION_FAILED = 5, 109 OPEN_RESULT_FORMAT_NOT_SUPPORTED = 6, 110 OPEN_RESULT_AUDIO_CLIENT_INIT_FAILED = 7, 111 OPEN_RESULT_GET_BUFFER_SIZE_FAILED = 8, 112 OPEN_RESULT_LOOPBACK_ACTIVATE_FAILED = 9, 113 OPEN_RESULT_LOOPBACK_INIT_FAILED = 10, 114 OPEN_RESULT_SET_EVENT_HANDLE = 11, 115 OPEN_RESULT_NO_CAPTURE_CLIENT = 12, 116 OPEN_RESULT_NO_AUDIO_VOLUME = 13, 117 OPEN_RESULT_OK_WITH_RESAMPLING = 14, 118 OPEN_RESULT_MAX = OPEN_RESULT_OK_WITH_RESAMPLING 119 }; 120 121 // The ctor takes all the usual parameters, plus |manager| which is the 122 // the audio manager who is creating this object. 123 WASAPIAudioInputStream(AudioManagerWin* manager, 124 const AudioParameters& params, 125 const std::string& device_id, 126 AudioManager::LogCallback log_callback); 127 128 // The dtor is typically called by the AudioManager only and it is usually 129 // triggered by calling AudioInputStream::Close(). 130 ~WASAPIAudioInputStream() override; 131 132 // Implementation of AudioInputStream. 133 bool Open() override; 134 void Start(AudioInputCallback* callback) override; 135 void Stop() override; 136 void Close() override; 137 double GetMaxVolume() override; 138 void SetVolume(double volume) override; 139 double GetVolume() override; 140 bool IsMuted() override; 141 void SetOutputDeviceForAec(const std::string& output_device_id) override; 142 started()143 bool started() const { return started_; } 144 145 private: 146 void SendLogMessage(const char* format, ...) PRINTF_FORMAT(2, 3); 147 148 // DelegateSimpleThread::Delegate implementation. 149 void Run() override; 150 151 // Pulls capture data from the endpoint device and pushes it to the sink. 152 void PullCaptureDataAndPushToSink(); 153 154 // Issues the OnError() callback to the |sink_|. 155 void HandleError(HRESULT err); 156 157 // The Open() method is divided into these sub methods. 158 HRESULT SetCaptureDevice(); 159 // Returns whether raw audio processing is supported or not for the selected 160 // capture device. 161 bool RawProcessingSupported(); 162 // The Windows.Media.Effects.AudioEffectsManager UWP API contains a method 163 // called CreateAudioCaptureEffectsManagerWithMode() which is needed to 164 // enumerate active audio effects on the capture stream. This UWP method 165 // needs a device ID which differs from what can be derived from the default 166 // Win32 API in CoreAudio. The GetUWPDeviceId() method builds up the required 167 // device ID that the audio effects manager needs. Note that it is also 168 // possible to get the ID directly from the Windows.Devices.Enumeration UWP 169 // API but that is rather complex and requires use of asynchronous methods. 170 std::string GetUWPDeviceId(); 171 // For the selected |uwp_device_id|, generate two lists of enabled audio 172 // effects and store them in |default_effect_types_| and |raw_effect_types_|. 173 HRESULT GetAudioCaptureEffects(const std::string& uwp_device_id); 174 HRESULT SetCommunicationsCategoryAndRawCaptureMode(); 175 HRESULT GetAudioEngineStreamFormat(); 176 // Returns whether the desired format is supported or not and writes the 177 // result of a failing system call to |*hr|, or S_OK if successful. If this 178 // function returns false with |*hr| == S_FALSE, the OS supports a closest 179 // match but we don't support conversion to it. 180 bool DesiredFormatIsSupported(HRESULT* hr); 181 void SetupConverterAndStoreFormatInfo(); 182 HRESULT InitializeAudioEngine(); 183 void ReportOpenResult(HRESULT hr); 184 // Reports stats for format related audio client initialization 185 // (IAudioClient::Initialize) errors, that is if |hr| is an error related to 186 // the format. 187 void MaybeReportFormatRelatedInitError(HRESULT hr) const; 188 189 // AudioConverter::InputCallback implementation. 190 double ProvideInput(AudioBus* audio_bus, uint32_t frames_delayed) override; 191 192 // Detects and counts glitches based on |device_position|. 193 void UpdateGlitchCount(UINT64 device_position); 194 195 // Reports glitch stats and resets associated variables. 196 void ReportAndResetGlitchStats(); 197 198 // Our creator, the audio manager needs to be notified when we close. 199 AudioManagerWin* const manager_; 200 201 // Capturing is driven by this thread (which has no message loop). 202 // All OnData() callbacks will be called from this thread. 203 std::unique_ptr<base::DelegateSimpleThread> capture_thread_; 204 205 // Contains the desired output audio format which is set up at construction 206 // and then never modified. It is the audio format this class will output 207 // data to the sink in, or equivalently, the format after the converter if 208 // such is needed. Does not need the extended version since we only support 209 // max stereo at this stage. 210 WAVEFORMATEX output_format_; 211 212 // Contains the audio format we get data from the audio engine in. Initially 213 // set to |output_format_| at construction but it might be changed to a close 214 // match if the audio engine doesn't support the originally set format. Note 215 // that, this is also the format after the FIFO, i.e. the input format to the 216 // converter if any. 217 WAVEFORMATEXTENSIBLE input_format_; 218 219 bool opened_ = false; 220 bool started_ = false; 221 StreamOpenResult open_result_ = OPEN_RESULT_OK; 222 223 // Size in bytes of each audio frame before the converter (4 bytes for 16-bit 224 // stereo PCM). Note that this is the same before and after the fifo. 225 size_t frame_size_bytes_ = 0; 226 227 // Size in audio frames of each audio packet (buffer) after the fifo but 228 // before the converter. 229 size_t packet_size_frames_ = 0; 230 231 // Size in bytes of each audio packet (buffer) after the fifo but before the 232 // converter. 233 size_t packet_size_bytes_ = 0; 234 235 // Length of the audio endpoint buffer, i.e. the buffer size before the fifo. 236 uint32_t endpoint_buffer_size_frames_ = 0; 237 238 // Contains the unique name of the selected endpoint device. 239 // Note that AudioDeviceDescription::kDefaultDeviceId represents the default 240 // device role and is not a valid ID as such. 241 std::string device_id_; 242 243 // Pointer to the object that will receive the recorded audio samples. 244 AudioInputCallback* sink_ = nullptr; 245 246 // Windows Multimedia Device (MMDevice) API interfaces. 247 248 // An IMMDevice interface which represents an audio endpoint device. 249 Microsoft::WRL::ComPtr<IMMDevice> endpoint_device_; 250 251 // Windows Audio Session API (WASAPI) interfaces. 252 253 // An IAudioClient interface which enables a client to create and initialize 254 // an audio stream between an audio application and the audio engine. 255 Microsoft::WRL::ComPtr<IAudioClient> audio_client_; 256 257 // Loopback IAudioClient doesn't support event-driven mode, so a separate 258 // IAudioClient is needed to receive notifications when data is available in 259 // the buffer. For loopback input |audio_client_| is used to receive data, 260 // while |audio_render_client_for_loopback_| is used to get notifications 261 // when a new buffer is ready. See comment in InitializeAudioEngine() for 262 // details. 263 Microsoft::WRL::ComPtr<IAudioClient> audio_render_client_for_loopback_; 264 265 // The IAudioCaptureClient interface enables a client to read input data 266 // from a capture endpoint buffer. 267 Microsoft::WRL::ComPtr<IAudioCaptureClient> audio_capture_client_; 268 269 // The IAudioClock interface is used to get the current timestamp, as the 270 // timestamp from IAudioCaptureClient::GetBuffer can be unreliable with some 271 // devices. 272 Microsoft::WRL::ComPtr<IAudioClock> audio_clock_; 273 274 // The ISimpleAudioVolume interface enables a client to control the 275 // master volume level of an audio session. 276 // The volume-level is a value in the range 0.0 to 1.0. 277 // This interface does only work with shared-mode streams. 278 Microsoft::WRL::ComPtr<ISimpleAudioVolume> simple_audio_volume_; 279 280 // The IAudioEndpointVolume allows a client to control the volume level of 281 // the whole system. 282 Microsoft::WRL::ComPtr<IAudioEndpointVolume> system_audio_volume_; 283 284 // The audio engine will signal this event each time a buffer has been 285 // recorded. 286 base::win::ScopedHandle audio_samples_ready_event_; 287 288 // This event will be signaled when capturing shall stop. 289 base::win::ScopedHandle stop_capture_event_; 290 291 // Never set it through external API. Only used when |device_id_| == 292 // kLoopbackWithMuteDeviceId. 293 // True, if we have muted the system audio for the stream capturing, and 294 // indicates that we need to unmute the system audio when stopping capturing. 295 bool mute_done_ = false; 296 297 // Used for the captured audio on the callback thread. 298 std::unique_ptr<AudioBlockFifo> fifo_; 299 300 // If the caller requires resampling (should only be in exceptional cases and 301 // ideally, never), we support using an AudioConverter. 302 std::unique_ptr<AudioConverter> converter_; 303 std::unique_ptr<AudioBus> convert_bus_; 304 bool imperfect_buffer_size_conversion_ = false; 305 306 // Callback to send log messages to registered clients. 307 AudioManager::LogCallback log_callback_; 308 309 // For detecting and reporting glitches. 310 UINT64 expected_next_device_position_ = 0; 311 int total_glitches_ = 0; 312 UINT64 total_lost_frames_ = 0; 313 UINT64 largest_glitch_frames_ = 0; 314 315 // Enabled if the volume level of the audio session is set to zero when the 316 // session starts. Utilized in UMA histogram. 317 bool audio_session_starts_at_zero_volume_ = false; 318 319 // Set to true if the selected audio device supports raw audio capture. 320 // Also added to a UMS histogram. 321 bool raw_processing_supported_ = false; 322 323 // List of supported and active capture effects for the selected device in 324 // default (normal) audio processing mode. 325 std::vector<ABI::Windows::Media::Effects::AudioEffectType> 326 default_effect_types_; 327 // List of supported and active capture effects for the selected device in 328 // raw (minimal) audio processing mode. Will be empty in most cases. 329 std::vector<ABI::Windows::Media::Effects::AudioEffectType> raw_effect_types_; 330 331 SEQUENCE_CHECKER(sequence_checker_); 332 333 DISALLOW_COPY_AND_ASSIGN(WASAPIAudioInputStream); 334 }; 335 336 } // namespace media 337 338 #endif // MEDIA_AUDIO_WIN_AUDIO_LOW_LATENCY_INPUT_WIN_H_ 339