1 #include "../../media-io/audio-resampler.h"
2 #include "../../util/circlebuf.h"
3 #include "../../util/platform.h"
4 #include "../../util/darray.h"
5 #include "../../util/util_uint64.h"
6 #include "../../obs-internal.h"
7 
8 #include "wasapi-output.h"
9 
10 #define ACTUALLY_DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
11 	EXTERN_C const GUID DECLSPEC_SELECTANY name = {                       \
12 		l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
13 
14 #define do_log(level, format, ...)                      \
15 	blog(level, "[audio monitoring: '%s'] " format, \
16 	     obs_source_get_name(monitor->source), ##__VA_ARGS__)
17 
18 #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
19 #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
20 #define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)
21 
22 ACTUALLY_DEFINE_GUID(CLSID_MMDeviceEnumerator, 0xBCDE0395, 0xE52F, 0x467C, 0x8E,
23 		     0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69, 0x2E);
24 ACTUALLY_DEFINE_GUID(IID_IMMDeviceEnumerator, 0xA95664D2, 0x9614, 0x4F35, 0xA7,
25 		     0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17, 0xE6);
26 ACTUALLY_DEFINE_GUID(IID_IAudioClient, 0x1CB9AD4C, 0xDBFA, 0x4C32, 0xB1, 0x78,
27 		     0xC2, 0xF5, 0x68, 0xA7, 0x03, 0xB2);
28 ACTUALLY_DEFINE_GUID(IID_IAudioRenderClient, 0xF294ACFC, 0x3146, 0x4483, 0xA7,
29 		     0xBF, 0xAD, 0xDC, 0xA7, 0xC2, 0x60, 0xE2);
30 
31 struct audio_monitor {
32 	obs_source_t *source;
33 	IMMDevice *device;
34 	IAudioClient *client;
35 	IAudioRenderClient *render;
36 
37 	uint64_t last_recv_time;
38 	uint64_t prev_video_ts;
39 	uint64_t time_since_prev;
40 	audio_resampler_t *resampler;
41 	uint32_t sample_rate;
42 	uint32_t channels;
43 	bool source_has_video;
44 	bool ignore;
45 
46 	int64_t lowest_audio_offset;
47 	struct circlebuf delay_buffer;
48 	uint32_t delay_size;
49 
50 	DARRAY(float) buf;
51 	pthread_mutex_t playback_mutex;
52 };
53 
54 /* #define DEBUG_AUDIO */
55 
process_audio_delay(struct audio_monitor * monitor,float ** data,uint32_t * frames,uint64_t ts,uint32_t pad)56 static bool process_audio_delay(struct audio_monitor *monitor, float **data,
57 				uint32_t *frames, uint64_t ts, uint32_t pad)
58 {
59 	obs_source_t *s = monitor->source;
60 	uint64_t last_frame_ts = s->last_frame_ts;
61 	uint64_t cur_time = os_gettime_ns();
62 	uint64_t front_ts;
63 	uint64_t cur_ts;
64 	int64_t diff;
65 	uint32_t blocksize = monitor->channels * sizeof(float);
66 
67 	/* cut off audio if long-since leftover audio in delay buffer */
68 	if (cur_time - monitor->last_recv_time > 1000000000)
69 		circlebuf_free(&monitor->delay_buffer);
70 	monitor->last_recv_time = cur_time;
71 
72 	ts += monitor->source->sync_offset;
73 
74 	circlebuf_push_back(&monitor->delay_buffer, &ts, sizeof(ts));
75 	circlebuf_push_back(&monitor->delay_buffer, frames, sizeof(*frames));
76 	circlebuf_push_back(&monitor->delay_buffer, *data, *frames * blocksize);
77 
78 	if (!monitor->prev_video_ts) {
79 		monitor->prev_video_ts = last_frame_ts;
80 
81 	} else if (monitor->prev_video_ts == last_frame_ts) {
82 		monitor->time_since_prev += util_mul_div64(
83 			*frames, 1000000000ULL, monitor->sample_rate);
84 	} else {
85 		monitor->time_since_prev = 0;
86 	}
87 
88 	while (monitor->delay_buffer.size != 0) {
89 		size_t size;
90 		bool bad_diff;
91 
92 		circlebuf_peek_front(&monitor->delay_buffer, &cur_ts,
93 				     sizeof(ts));
94 		front_ts = cur_ts - util_mul_div64(pad, 1000000000ULL,
95 						   monitor->sample_rate);
96 		diff = (int64_t)front_ts - (int64_t)last_frame_ts;
97 		bad_diff = !last_frame_ts || llabs(diff) > 5000000000 ||
98 			   monitor->time_since_prev > 100000000ULL;
99 
100 		/* delay audio if rushing */
101 		if (!bad_diff && diff > 75000000) {
102 #ifdef DEBUG_AUDIO
103 			blog(LOG_INFO,
104 			     "audio rushing, cutting audio, "
105 			     "diff: %lld, delay buffer size: %lu, "
106 			     "v: %llu: a: %llu",
107 			     diff, (int)monitor->delay_buffer.size,
108 			     last_frame_ts, front_ts);
109 #endif
110 			return false;
111 		}
112 
113 		circlebuf_pop_front(&monitor->delay_buffer, NULL, sizeof(ts));
114 		circlebuf_pop_front(&monitor->delay_buffer, frames,
115 				    sizeof(*frames));
116 
117 		size = *frames * blocksize;
118 		da_resize(monitor->buf, size);
119 		circlebuf_pop_front(&monitor->delay_buffer, monitor->buf.array,
120 				    size);
121 
122 		/* cut audio if dragging */
123 		if (!bad_diff && diff < -75000000 &&
124 		    monitor->delay_buffer.size > 0) {
125 #ifdef DEBUG_AUDIO
126 			blog(LOG_INFO,
127 			     "audio dragging, cutting audio, "
128 			     "diff: %lld, delay buffer size: %lu, "
129 			     "v: %llu: a: %llu",
130 			     diff, (int)monitor->delay_buffer.size,
131 			     last_frame_ts, front_ts);
132 #endif
133 			continue;
134 		}
135 
136 		*data = monitor->buf.array;
137 		return true;
138 	}
139 
140 	return false;
141 }
142 
on_audio_playback(void * param,obs_source_t * source,const struct audio_data * audio_data,bool muted)143 static void on_audio_playback(void *param, obs_source_t *source,
144 			      const struct audio_data *audio_data, bool muted)
145 {
146 	struct audio_monitor *monitor = param;
147 	IAudioRenderClient *render = monitor->render;
148 	uint8_t *resample_data[MAX_AV_PLANES];
149 	float vol = source->user_volume;
150 	uint32_t resample_frames;
151 	uint64_t ts_offset;
152 	bool success;
153 	BYTE *output;
154 
155 	if (pthread_mutex_trylock(&monitor->playback_mutex) != 0) {
156 		return;
157 	}
158 	if (os_atomic_load_long(&source->activate_refs) == 0) {
159 		goto unlock;
160 	}
161 
162 	success = audio_resampler_resample(
163 		monitor->resampler, resample_data, &resample_frames, &ts_offset,
164 		(const uint8_t *const *)audio_data->data,
165 		(uint32_t)audio_data->frames);
166 	if (!success) {
167 		goto unlock;
168 	}
169 
170 	UINT32 pad = 0;
171 	monitor->client->lpVtbl->GetCurrentPadding(monitor->client, &pad);
172 
173 	bool decouple_audio = source->async_unbuffered &&
174 			      source->async_decoupled;
175 
176 	if (monitor->source_has_video && !decouple_audio) {
177 		uint64_t ts = audio_data->timestamp - ts_offset;
178 
179 		if (!process_audio_delay(monitor, (float **)(&resample_data[0]),
180 					 &resample_frames, ts, pad)) {
181 			goto unlock;
182 		}
183 	}
184 
185 	HRESULT hr =
186 		render->lpVtbl->GetBuffer(render, resample_frames, &output);
187 	if (FAILED(hr)) {
188 		goto unlock;
189 	}
190 
191 	if (!muted) {
192 		/* apply volume */
193 		if (!close_float(vol, 1.0f, EPSILON)) {
194 			register float *cur = (float *)resample_data[0];
195 			register float *end =
196 				cur + resample_frames * monitor->channels;
197 
198 			while (cur < end)
199 				*(cur++) *= vol;
200 		}
201 		memcpy(output, resample_data[0],
202 		       resample_frames * monitor->channels * sizeof(float));
203 	}
204 
205 	render->lpVtbl->ReleaseBuffer(render, resample_frames,
206 				      muted ? AUDCLNT_BUFFERFLAGS_SILENT : 0);
207 
208 unlock:
209 	pthread_mutex_unlock(&monitor->playback_mutex);
210 }
211 
audio_monitor_free(struct audio_monitor * monitor)212 static inline void audio_monitor_free(struct audio_monitor *monitor)
213 {
214 	if (monitor->ignore)
215 		return;
216 
217 	if (monitor->source) {
218 		obs_source_remove_audio_capture_callback(
219 			monitor->source, on_audio_playback, monitor);
220 	}
221 
222 	if (monitor->client)
223 		monitor->client->lpVtbl->Stop(monitor->client);
224 
225 	safe_release(monitor->device);
226 	safe_release(monitor->client);
227 	safe_release(monitor->render);
228 	audio_resampler_destroy(monitor->resampler);
229 	circlebuf_free(&monitor->delay_buffer);
230 	da_free(monitor->buf);
231 }
232 
convert_speaker_layout(DWORD layout,WORD channels)233 static enum speaker_layout convert_speaker_layout(DWORD layout, WORD channels)
234 {
235 	switch (layout) {
236 	case KSAUDIO_SPEAKER_2POINT1:
237 		return SPEAKERS_2POINT1;
238 	case KSAUDIO_SPEAKER_SURROUND:
239 		return SPEAKERS_4POINT0;
240 	case KSAUDIO_SPEAKER_4POINT1:
241 		return SPEAKERS_4POINT1;
242 	case KSAUDIO_SPEAKER_5POINT1:
243 		return SPEAKERS_5POINT1;
244 	case KSAUDIO_SPEAKER_7POINT1:
245 		return SPEAKERS_7POINT1;
246 	}
247 
248 	return (enum speaker_layout)channels;
249 }
250 
251 extern bool devices_match(const char *id1, const char *id2);
252 
audio_monitor_init(struct audio_monitor * monitor,obs_source_t * source)253 static bool audio_monitor_init(struct audio_monitor *monitor,
254 			       obs_source_t *source)
255 {
256 	IMMDeviceEnumerator *immde = NULL;
257 	WAVEFORMATEX *wfex = NULL;
258 	bool success = false;
259 	UINT32 frames;
260 	HRESULT hr;
261 
262 	pthread_mutex_init_value(&monitor->playback_mutex);
263 
264 	monitor->source = source;
265 
266 	const char *id = obs->audio.monitoring_device_id;
267 	if (!id) {
268 		warn("%s: No device ID set", __FUNCTION__);
269 		return false;
270 	}
271 
272 	if (source->info.output_flags & OBS_SOURCE_DO_NOT_SELF_MONITOR) {
273 		obs_data_t *s = obs_source_get_settings(source);
274 		const char *s_dev_id = obs_data_get_string(s, "device_id");
275 		bool match = devices_match(s_dev_id, id);
276 		obs_data_release(s);
277 
278 		if (match) {
279 			monitor->ignore = true;
280 			return true;
281 		}
282 	}
283 
284 	/* ------------------------------------------ *
285 	 * Init device                                */
286 
287 	hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL,
288 			      &IID_IMMDeviceEnumerator, (void **)&immde);
289 	if (FAILED(hr)) {
290 		warn("%s: Failed to create IMMDeviceEnumerator: %08lX",
291 		     __FUNCTION__, hr);
292 		return false;
293 	}
294 
295 	if (strcmp(id, "default") == 0) {
296 		hr = immde->lpVtbl->GetDefaultAudioEndpoint(
297 			immde, eRender, eConsole, &monitor->device);
298 	} else {
299 		wchar_t w_id[512];
300 		os_utf8_to_wcs(id, 0, w_id, 512);
301 
302 		hr = immde->lpVtbl->GetDevice(immde, w_id, &monitor->device);
303 	}
304 
305 	if (FAILED(hr)) {
306 		warn("%s: Failed to get device: %08lX", __FUNCTION__, hr);
307 		goto fail;
308 	}
309 
310 	/* ------------------------------------------ *
311 	 * Init client                                */
312 
313 	hr = monitor->device->lpVtbl->Activate(monitor->device,
314 					       &IID_IAudioClient, CLSCTX_ALL,
315 					       NULL, (void **)&monitor->client);
316 	if (FAILED(hr)) {
317 		warn("%s: Failed to activate device: %08lX", __FUNCTION__, hr);
318 		goto fail;
319 	}
320 
321 	hr = monitor->client->lpVtbl->GetMixFormat(monitor->client, &wfex);
322 	if (FAILED(hr)) {
323 		warn("%s: Failed to get mix format: %08lX", __FUNCTION__, hr);
324 		goto fail;
325 	}
326 
327 	hr = monitor->client->lpVtbl->Initialize(monitor->client,
328 						 AUDCLNT_SHAREMODE_SHARED, 0,
329 						 10000000, 0, wfex, NULL);
330 	if (FAILED(hr)) {
331 		warn("%s: Failed to initialize: %08lX", __FUNCTION__, hr);
332 		goto fail;
333 	}
334 
335 	/* ------------------------------------------ *
336 	 * Init resampler                             */
337 
338 	const struct audio_output_info *info =
339 		audio_output_get_info(obs->audio.audio);
340 	WAVEFORMATEXTENSIBLE *ext = (WAVEFORMATEXTENSIBLE *)wfex;
341 	struct resample_info from;
342 	struct resample_info to;
343 
344 	from.samples_per_sec = info->samples_per_sec;
345 	from.speakers = info->speakers;
346 	from.format = AUDIO_FORMAT_FLOAT_PLANAR;
347 
348 	to.samples_per_sec = (uint32_t)wfex->nSamplesPerSec;
349 	to.speakers =
350 		convert_speaker_layout(ext->dwChannelMask, wfex->nChannels);
351 	to.format = AUDIO_FORMAT_FLOAT;
352 
353 	monitor->sample_rate = (uint32_t)wfex->nSamplesPerSec;
354 	monitor->channels = wfex->nChannels;
355 	monitor->resampler = audio_resampler_create(&to, &from);
356 	if (!monitor->resampler) {
357 		goto fail;
358 	}
359 
360 	/* ------------------------------------------ *
361 	 * Init client                                */
362 
363 	hr = monitor->client->lpVtbl->GetBufferSize(monitor->client, &frames);
364 	if (FAILED(hr)) {
365 		warn("%s: Failed to get buffer size: %08lX", __FUNCTION__, hr);
366 		goto fail;
367 	}
368 
369 	hr = monitor->client->lpVtbl->GetService(monitor->client,
370 						 &IID_IAudioRenderClient,
371 						 (void **)&monitor->render);
372 	if (FAILED(hr)) {
373 		warn("%s: Failed to get IAudioRenderClient: %08lX",
374 		     __FUNCTION__, hr);
375 		goto fail;
376 	}
377 
378 	if (pthread_mutex_init(&monitor->playback_mutex, NULL) != 0) {
379 		warn("%s: Failed to initialize mutex", __FUNCTION__);
380 		goto fail;
381 	}
382 
383 	hr = monitor->client->lpVtbl->Start(monitor->client);
384 	if (FAILED(hr)) {
385 		warn("%s: Failed to start audio: %08lX", __FUNCTION__, hr);
386 		goto fail;
387 	}
388 
389 	success = true;
390 
391 fail:
392 	safe_release(immde);
393 	if (wfex)
394 		CoTaskMemFree(wfex);
395 	return success;
396 }
397 
audio_monitor_init_final(struct audio_monitor * monitor)398 static void audio_monitor_init_final(struct audio_monitor *monitor)
399 {
400 	if (monitor->ignore)
401 		return;
402 
403 	monitor->source_has_video =
404 		(monitor->source->info.output_flags & OBS_SOURCE_VIDEO) != 0;
405 	obs_source_add_audio_capture_callback(monitor->source,
406 					      on_audio_playback, monitor);
407 }
408 
audio_monitor_create(obs_source_t * source)409 struct audio_monitor *audio_monitor_create(obs_source_t *source)
410 {
411 	struct audio_monitor monitor = {0};
412 	struct audio_monitor *out;
413 
414 	if (!audio_monitor_init(&monitor, source)) {
415 		goto fail;
416 	}
417 
418 	out = bmemdup(&monitor, sizeof(monitor));
419 
420 	pthread_mutex_lock(&obs->audio.monitoring_mutex);
421 	da_push_back(obs->audio.monitors, &out);
422 	pthread_mutex_unlock(&obs->audio.monitoring_mutex);
423 
424 	audio_monitor_init_final(out);
425 	return out;
426 
427 fail:
428 	audio_monitor_free(&monitor);
429 	return NULL;
430 }
431 
audio_monitor_reset(struct audio_monitor * monitor)432 void audio_monitor_reset(struct audio_monitor *monitor)
433 {
434 	struct audio_monitor new_monitor = {0};
435 	bool success;
436 
437 	pthread_mutex_lock(&monitor->playback_mutex);
438 	success = audio_monitor_init(&new_monitor, monitor->source);
439 	pthread_mutex_unlock(&monitor->playback_mutex);
440 
441 	if (success) {
442 		obs_source_t *source = monitor->source;
443 		audio_monitor_free(monitor);
444 		*monitor = new_monitor;
445 		audio_monitor_init_final(monitor);
446 	} else {
447 		audio_monitor_free(&new_monitor);
448 	}
449 }
450 
audio_monitor_destroy(struct audio_monitor * monitor)451 void audio_monitor_destroy(struct audio_monitor *monitor)
452 {
453 	if (monitor) {
454 		audio_monitor_free(monitor);
455 
456 		pthread_mutex_lock(&obs->audio.monitoring_mutex);
457 		da_erase_item(obs->audio.monitors, &monitor);
458 		pthread_mutex_unlock(&obs->audio.monitoring_mutex);
459 
460 		bfree(monitor);
461 	}
462 }
463