1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Utility methods for the Core Audio API on Windows.
6 // Always ensure that Core Audio is supported before using these methods.
7 // Use media::CoreAudioUtil::IsSupported() for this purpose.
8 // Also, all methods must be called on a valid COM thread. This can be done
9 // by using the base::win::ScopedCOMInitializer helper class.
10 
11 #ifndef MEDIA_AUDIO_WIN_CORE_AUDIO_UTIL_WIN_H_
12 #define MEDIA_AUDIO_WIN_CORE_AUDIO_UTIL_WIN_H_
13 
14 #include <audioclient.h>
15 #include <mmdeviceapi.h>
16 #include <stdint.h>
17 #include <wrl/client.h>
18 
19 #include <string>
20 
21 #include "base/macros.h"
22 #include "base/time/time.h"
23 #include "media/audio/audio_device_name.h"
24 #include "media/base/audio_parameters.h"
25 #include "media/base/media_export.h"
26 
27 namespace media {
28 
29 // Represents audio channel configuration constants as understood by Windows.
30 // E.g. KSAUDIO_SPEAKER_MONO.  For a list of possible values see:
31 // http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
32 typedef uint32_t ChannelConfig;
33 
34 class MEDIA_EXPORT CoreAudioUtil {
35  public:
36   // Helper class which automates casting between WAVEFORMATEX and
37   // WAVEFORMATEXTENSIBLE raw pointers using implicit constructors and
38   // operator overloading. Note that, no memory is allocated by this utility
39   // structure. It only serves as a handle (or a wrapper) of the structure
40   // provided to it at construction.
41   class MEDIA_EXPORT WaveFormatWrapper {
42    public:
WaveFormatWrapper(WAVEFORMATEXTENSIBLE * p)43     WaveFormatWrapper(WAVEFORMATEXTENSIBLE* p)
44         : ptr_(reinterpret_cast<WAVEFORMATEX*>(p)) {}
WaveFormatWrapper(WAVEFORMATEX * p)45     WaveFormatWrapper(WAVEFORMATEX* p) : ptr_(p) {}
46     ~WaveFormatWrapper() = default;
47 
48     operator WAVEFORMATEX*() const { return ptr_; }
49     WAVEFORMATEX* operator->() const { return ptr_; }
get()50     WAVEFORMATEX* get() const { return ptr_; }
51     WAVEFORMATEXTENSIBLE* GetExtensible() const;
52 
53     bool IsExtensible() const;
54     bool IsPcm() const;
55     bool IsFloat() const;
56     size_t size() const;
57 
58    private:
59     WAVEFORMATEX* ptr_;
60   };
61 
62   // Returns true if Windows Core Audio is supported.
63   // Always verify that this method returns true before using any of the
64   // methods in this class.
65   // WARNING: This function must be called once from the main thread before
66   // it is safe to call from other threads.
67   static bool IsSupported();
68 
69   // Converts a COM error into a human-readable string.
70   static std::string ErrorToString(HRESULT hresult);
71 
72   // Prints/logs all fields of the format structure in |format|.
73   // Also supports extended versions (WAVEFORMATEXTENSIBLE).
74   static std::string WaveFormatToString(WaveFormatWrapper format);
75 
76   // Converts between reference time to base::TimeDelta.
77   // One reference-time unit is 100 nanoseconds.
78   // Example: double s = RefererenceTimeToTimeDelta(t).InMillisecondsF();
79   static base::TimeDelta ReferenceTimeToTimeDelta(REFERENCE_TIME time);
80 
81   // Returns 1, 2, or 3 corresponding to the highest version of IAudioClient
82   // the platform supports.
83   static uint32_t GetIAudioClientVersion();
84 
85   // Returns AUDCLNT_SHAREMODE_EXCLUSIVE if --enable-exclusive-mode is used
86   // as command-line flag and AUDCLNT_SHAREMODE_SHARED otherwise (default).
87   static AUDCLNT_SHAREMODE GetShareMode();
88 
89   // The Windows Multimedia Device (MMDevice) API enables audio clients to
90   // discover audio endpoint devices and determine their capabilities.
91 
92   // Number of active audio devices in the specified flow data flow direction.
93   // Set |data_flow| to eAll to retrieve the total number of active audio
94   // devices.
95   static int NumberOfActiveDevices(EDataFlow data_flow);
96 
97   // Creates an IMMDeviceEnumerator interface which provides methods for
98   // enumerating audio endpoint devices.
99   static Microsoft::WRL::ComPtr<IMMDeviceEnumerator> CreateDeviceEnumerator();
100 
101   // Create an endpoint device specified by |device_id| or a default device
102   // specified by data-flow direction and role if
103   // AudioDeviceDescription::IsDefaultDevice(|device_id|).
104   static Microsoft::WRL::ComPtr<IMMDevice>
105   CreateDevice(const std::string& device_id, EDataFlow data_flow, ERole role);
106 
107   // These functions return the device id of the default or communications
108   // input/output device, or an empty string if no such device exists or if the
109   // device has been disabled.
110   static std::string GetDefaultInputDeviceID();
111   static std::string GetDefaultOutputDeviceID();
112   static std::string GetCommunicationsInputDeviceID();
113   static std::string GetCommunicationsOutputDeviceID();
114 
115   // Returns the unique ID and user-friendly name of a given endpoint device.
116   // Example: "{0.0.1.00000000}.{8db6020f-18e3-4f25-b6f5-7726c9122574}", and
117   //          "Microphone (Realtek High Definition Audio)".
118   static HRESULT GetDeviceName(IMMDevice* device, AudioDeviceName* name);
119 
120   // Returns the device ID/path of the controller (a.k.a. physical device that
121   // |device| is connected to.  This ID will be the same for all devices from
122   // the same controller so it is useful for doing things like determining
123   // whether a set of output and input devices belong to the same controller.
124   // The device enumerator is required as well as the device itself since
125   // looking at the device topology is required and we need to open up
126   // associated devices to determine the controller id.
127   // If the ID could not be determined for some reason, an empty string is
128   // returned.
129   static std::string GetAudioControllerID(IMMDevice* device,
130       IMMDeviceEnumerator* enumerator);
131 
132   // Accepts an id of an input device and finds a matching output device id.
133   // If the associated hardware does not have an audio output device (e.g.
134   // a webcam with a mic), an empty string is returned.
135   static std::string GetMatchingOutputDeviceID(
136       const std::string& input_device_id);
137 
138   // Gets the user-friendly name of the endpoint device which is represented
139   // by a unique id in |device_id|.
140   static std::string GetFriendlyName(const std::string& device_id,
141                                      EDataFlow data_flow,
142                                      ERole role);
143 
144   // Query if the audio device is a rendering device or a capture device.
145   static EDataFlow GetDataFlow(IMMDevice* device);
146 
147   // The Windows Audio Session API (WASAPI) enables client applications to
148   // manage the flow of audio data between the application and an audio endpoint
149   // device.
150 
151   // Create an IAudioClient instance for a specific device or the default
152   // device if AudioDeviceDescription::IsDefaultDevice(device_id).
153   static Microsoft::WRL::ComPtr<IAudioClient>
154   CreateClient(const std::string& device_id, EDataFlow data_flow, ERole role);
155   static Microsoft::WRL::ComPtr<IAudioClient3>
156   CreateClient3(const std::string& device_id, EDataFlow data_flow, ERole role);
157 
158   // Get the mix format that the audio engine uses internally for processing
159   // of shared-mode streams. This format is not necessarily a format that the
160   // audio endpoint device supports. The WAVEFORMATEXTENSIBLE structure can
161   // specify both the mapping of channels to speakers and the number of bits of
162   // precision in each sample. The first member of the WAVEFORMATEXTENSIBLE
163   // structure is a WAVEFORMATEX structure and its wFormatTag will be set to
164   // WAVE_FORMAT_EXTENSIBLE if the output structure is extended.
165   // FormatIsExtensible() can be used to determine if that is the case or not.
166   static HRESULT GetSharedModeMixFormat(IAudioClient* client,
167                                         WAVEFORMATEXTENSIBLE* format);
168 
169   // Returns true if the specified |client| supports the format in |format|
170   // for the given |share_mode| (shared or exclusive).
171   static bool IsFormatSupported(IAudioClient* client,
172                                 AUDCLNT_SHAREMODE share_mode,
173                                 WaveFormatWrapper format);
174 
175   // Returns true if the specified |channel_layout| is supported for the
176   // default IMMDevice where flow direction and role is define by |data_flow|
177   // and |role|. If this method returns true for a certain channel layout, it
178   // means that SharedModeInitialize() will succeed using a format based on
179   // the preferred format where the channel layout has been modified.
180   static bool IsChannelLayoutSupported(const std::string& device_id,
181                                        EDataFlow data_flow,
182                                        ERole role,
183                                        ChannelLayout channel_layout);
184 
185   // For a shared-mode stream, the audio engine periodically processes the
186   // data in the endpoint buffer at the period obtained in |device_period|.
187   // For an exclusive mode stream, |device_period| corresponds to the minimum
188   // time interval between successive processing by the endpoint device.
189   // This period plus the stream latency between the buffer and endpoint device
190   // represents the minimum possible latency that an audio application can
191   // achieve. The time in |device_period| is expressed in 100-nanosecond units.
192   static HRESULT GetDevicePeriod(IAudioClient* client,
193                                  AUDCLNT_SHAREMODE share_mode,
194                                  REFERENCE_TIME* device_period);
195 
196   // Get the preferred audio parameters for the given |device_id|. The acquired
197   // values should only be utilized for shared mode streamed since there are no
198   // preferred settings for an exclusive mode stream.
199   static HRESULT GetPreferredAudioParameters(const std::string& device_id,
200                                              bool is_output_device,
201                                              AudioParameters* params);
202 
203   // Retrieves an integer mask which corresponds to the channel layout the
204   // audio engine uses for its internal processing/mixing of shared-mode
205   // streams. This mask indicates which channels are present in the multi-
206   // channel stream. The least significant bit corresponds with the Front Left
207   // speaker, the next least significant bit corresponds to the Front Right
208   // speaker, and so on, continuing in the order defined in KsMedia.h.
209   // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
210   // for more details.
211   static ChannelConfig GetChannelConfig(const std::string& device_id,
212                                         EDataFlow data_flow);
213 
214   // After activating an IAudioClient interface on an audio endpoint device,
215   // the client must initialize it once, and only once, to initialize the audio
216   // stream between the client and the device. In shared mode, the client
217   // connects indirectly through the audio engine which does the mixing.
218   // In exclusive mode, the client connects directly to the audio hardware.
219   // If a valid event is provided in |event_handle|, the client will be
220   // initialized for event-driven buffer handling. If |event_handle| is set to
221   // NULL, event-driven buffer handling is not utilized.
222   // This function will initialize the audio client as part of the default
223   // audio session if NULL is passed for |session_guid|, otherwise the client
224   // will be associated with the specified session.
225   static HRESULT SharedModeInitialize(IAudioClient* client,
226                                       WaveFormatWrapper format,
227                                       HANDLE event_handle,
228                                       uint32_t requested_buffer_size,
229                                       uint32_t* endpoint_buffer_size,
230                                       const GUID* session_guid);
231 
232   // Create an IAudioRenderClient client for an existing IAudioClient given by
233   // |client|. The IAudioRenderClient interface enables a client to write
234   // output data to a rendering endpoint buffer.
235   static Microsoft::WRL::ComPtr<IAudioRenderClient> CreateRenderClient(
236       IAudioClient* client);
237 
238   // Create an IAudioCaptureClient client for an existing IAudioClient given by
239   // |client|. The IAudioCaptureClient interface enables a client to read
240   // input data from a capture endpoint buffer.
241   static Microsoft::WRL::ComPtr<IAudioCaptureClient> CreateCaptureClient(
242       IAudioClient* client);
243 
244   // Fills up the endpoint rendering buffer with silence for an existing
245   // IAudioClient given by |client| and a corresponding IAudioRenderClient
246   // given by |render_client|.
247   static bool FillRenderEndpointBufferWithSilence(
248       IAudioClient* client, IAudioRenderClient* render_client);
249 
250  private:
CoreAudioUtil()251   CoreAudioUtil() {}
~CoreAudioUtil()252   ~CoreAudioUtil() {}
253   DISALLOW_COPY_AND_ASSIGN(CoreAudioUtil);
254 };
255 
256 // The special audio session identifier we use when opening up the default
257 // communication device.  This has the effect that a separate volume control
258 // will be shown in the system's volume mixer and control over ducking and
259 // visually observing the behavior of ducking, is easier.
260 // Use with |SharedModeInitialize|.
261 extern const GUID kCommunicationsSessionId;
262 
263 }  // namespace media
264 
265 #endif  // MEDIA_AUDIO_WIN_CORE_AUDIO_UTIL_WIN_H_
266