1 #include <windows.h>
2 #include "graphics-hook.h"
3 
4 #if COMPILE_D3D12_HOOK
5 
6 #include <d3d11on12.h>
7 #include <d3d12.h>
8 #include <dxgi1_4.h>
9 #include <inttypes.h>
10 
11 #include <detours.h>
12 
13 #include "dxgi-helpers.hpp"
14 
15 #define MAX_BACKBUFFERS 8
16 
17 typedef HRESULT(STDMETHODCALLTYPE *PFN_ExecuteCommandLists)(
18 	ID3D12CommandQueue *, UINT, ID3D12CommandList *const *);
19 
20 static PFN_ExecuteCommandLists RealExecuteCommandLists = nullptr;
21 
22 struct d3d12_data {
23 	uint32_t cx;
24 	uint32_t cy;
25 	DXGI_FORMAT format;
26 	bool using_shtex;
27 	bool multisampled;
28 	bool dxgi_1_4;
29 
30 	ID3D11Device *device11;
31 	ID3D11DeviceContext *context11;
32 	ID3D11On12Device *device11on12;
33 
34 	union {
35 		struct {
36 			struct shtex_data *shtex_info;
37 			ID3D11Resource *backbuffer11[MAX_BACKBUFFERS];
38 			UINT backbuffer_count;
39 			UINT cur_backbuffer;
40 			ID3D11Texture2D *copy_tex;
41 			HANDLE handle;
42 		};
43 	};
44 };
45 
46 static struct d3d12_data data = {};
47 
48 extern thread_local bool dxgi_presenting;
49 extern ID3D12CommandQueue *dxgi_possible_swap_queues[8];
50 extern size_t dxgi_possible_swap_queue_count;
51 extern bool dxgi_present_attempted;
52 
d3d12_free(void)53 void d3d12_free(void)
54 {
55 	if (data.copy_tex)
56 		data.copy_tex->Release();
57 	for (size_t i = 0; i < data.backbuffer_count; i++) {
58 		if (data.backbuffer11[i])
59 			data.backbuffer11[i]->Release();
60 	}
61 	if (data.device11)
62 		data.device11->Release();
63 	if (data.context11)
64 		data.context11->Release();
65 	if (data.device11on12)
66 		data.device11on12->Release();
67 
68 	capture_free();
69 
70 	memset(&data, 0, sizeof(data));
71 
72 	hlog("----------------- d3d12 capture freed ----------------");
73 }
74 
75 struct bb_info {
76 	ID3D12Resource *backbuffer[MAX_BACKBUFFERS];
77 	UINT count;
78 };
79 
create_d3d12_tex(bb_info & bb)80 static bool create_d3d12_tex(bb_info &bb)
81 {
82 	D3D11_RESOURCE_FLAGS rf11 = {};
83 	HRESULT hr;
84 
85 	if (!bb.count)
86 		return false;
87 
88 	data.backbuffer_count = bb.count;
89 
90 	for (UINT i = 0; i < bb.count; i++) {
91 		hr = data.device11on12->CreateWrappedResource(
92 			bb.backbuffer[i], &rf11, D3D12_RESOURCE_STATE_PRESENT,
93 			D3D12_RESOURCE_STATE_PRESENT,
94 			IID_PPV_ARGS(&data.backbuffer11[i]));
95 		if (FAILED(hr)) {
96 			hlog_hr("create_d3d12_tex: failed to create "
97 				"backbuffer11",
98 				hr);
99 			return false;
100 		}
101 	}
102 
103 	D3D11_TEXTURE2D_DESC desc11 = {};
104 	desc11.Width = data.cx;
105 	desc11.Height = data.cy;
106 	desc11.MipLevels = 1;
107 	desc11.ArraySize = 1;
108 	desc11.Format = apply_dxgi_format_typeless(
109 		data.format, global_hook_info->allow_srgb_alias);
110 	desc11.SampleDesc.Count = 1;
111 	desc11.BindFlags = D3D11_BIND_SHADER_RESOURCE;
112 	desc11.MiscFlags = D3D11_RESOURCE_MISC_SHARED;
113 
114 	hr = data.device11->CreateTexture2D(&desc11, nullptr, &data.copy_tex);
115 	if (FAILED(hr)) {
116 		hlog_hr("create_d3d12_tex: creation of d3d11 copy tex failed",
117 			hr);
118 		return false;
119 	}
120 
121 	IDXGIResource *dxgi_res;
122 	hr = data.copy_tex->QueryInterface(&dxgi_res);
123 	if (FAILED(hr)) {
124 		hlog_hr("create_d3d12_tex: failed to query "
125 			"IDXGIResource interface from texture",
126 			hr);
127 		return false;
128 	}
129 
130 	hr = dxgi_res->GetSharedHandle(&data.handle);
131 	dxgi_res->Release();
132 	if (FAILED(hr)) {
133 		hlog_hr("create_d3d12_tex: failed to get shared handle", hr);
134 		return false;
135 	}
136 
137 	return true;
138 }
139 
d3d12_init_11on12(ID3D12Device * device)140 static bool d3d12_init_11on12(ID3D12Device *device)
141 {
142 	static HMODULE d3d11 = nullptr;
143 	static PFN_D3D11ON12_CREATE_DEVICE create_11_on_12 = nullptr;
144 	static bool initialized_11 = false;
145 	static bool initialized_func = false;
146 
147 	if (!initialized_11 && !d3d11) {
148 		d3d11 = load_system_library("d3d11.dll");
149 		if (!d3d11) {
150 			hlog("d3d12_init_11on12: failed to load d3d11");
151 		}
152 		initialized_11 = true;
153 	}
154 
155 	if (!d3d11) {
156 		return false;
157 	}
158 
159 	if (!initialized_func && !create_11_on_12) {
160 		create_11_on_12 = (PFN_D3D11ON12_CREATE_DEVICE)GetProcAddress(
161 			d3d11, "D3D11On12CreateDevice");
162 		if (!create_11_on_12) {
163 			hlog("d3d12_init_11on12: Failed to get "
164 			     "D3D11On12CreateDevice address");
165 		}
166 
167 		initialized_func = true;
168 	}
169 
170 	if (!create_11_on_12) {
171 		return false;
172 	}
173 
174 	bool created = false;
175 
176 	for (size_t i = 0; i < dxgi_possible_swap_queue_count; ++i) {
177 		hlog("d3d12_init_11on12: creating 11 device: queue=0x%" PRIX64,
178 		     (uint64_t)(uintptr_t)dxgi_possible_swap_queues[i]);
179 		IUnknown *const queue = dxgi_possible_swap_queues[i];
180 		const HRESULT hr = create_11_on_12(device, 0, nullptr, 0,
181 						   &queue, 1, 0, &data.device11,
182 						   &data.context11, nullptr);
183 		created = SUCCEEDED(hr);
184 		if (created) {
185 			break;
186 		}
187 
188 		hlog_hr("d3d12_init_11on12: failed to create 11 device", hr);
189 	}
190 
191 	if (!created) {
192 		return false;
193 	}
194 
195 	memset(dxgi_possible_swap_queues, 0, sizeof(dxgi_possible_swap_queues));
196 	dxgi_possible_swap_queue_count = 0;
197 	dxgi_present_attempted = false;
198 
199 	const HRESULT hr =
200 		data.device11->QueryInterface(IID_PPV_ARGS(&data.device11on12));
201 	if (FAILED(hr)) {
202 		hlog_hr("d3d12_init_11on12: failed to query 11on12 device", hr);
203 		return false;
204 	}
205 
206 	return true;
207 }
208 
d3d12_shtex_init(ID3D12Device * device,HWND window,bb_info & bb)209 static bool d3d12_shtex_init(ID3D12Device *device, HWND window, bb_info &bb)
210 {
211 	if (!d3d12_init_11on12(device)) {
212 		return false;
213 	}
214 	if (!create_d3d12_tex(bb)) {
215 		return false;
216 	}
217 	if (!capture_init_shtex(&data.shtex_info, window, data.cx, data.cy,
218 				data.format, false, (uintptr_t)data.handle)) {
219 		return false;
220 	}
221 
222 	hlog("d3d12 shared texture capture successful");
223 	return true;
224 }
225 
d3d12_init_format(IDXGISwapChain * swap,HWND & window,bb_info & bb)226 static inline bool d3d12_init_format(IDXGISwapChain *swap, HWND &window,
227 				     bb_info &bb)
228 {
229 	DXGI_SWAP_CHAIN_DESC desc;
230 	IDXGISwapChain3 *swap3;
231 	HRESULT hr;
232 
233 	hr = swap->GetDesc(&desc);
234 	if (FAILED(hr)) {
235 		hlog_hr("d3d12_init_format: swap->GetDesc failed", hr);
236 		return false;
237 	}
238 
239 	data.format = strip_dxgi_format_srgb(desc.BufferDesc.Format);
240 	data.multisampled = desc.SampleDesc.Count > 1;
241 	window = desc.OutputWindow;
242 	data.cx = desc.BufferDesc.Width;
243 	data.cy = desc.BufferDesc.Height;
244 
245 	hr = swap->QueryInterface(&swap3);
246 	if (SUCCEEDED(hr)) {
247 		data.dxgi_1_4 = true;
248 		hlog("We're DXGI1.4 boys!");
249 		swap3->Release();
250 	}
251 
252 	hlog("Buffer count: %d, swap effect: %d", (int)desc.BufferCount,
253 	     (int)desc.SwapEffect);
254 
255 	bb.count = desc.SwapEffect == DXGI_SWAP_EFFECT_DISCARD
256 			   ? 1
257 			   : desc.BufferCount;
258 
259 	if (bb.count == 1)
260 		data.dxgi_1_4 = false;
261 
262 	if (bb.count > MAX_BACKBUFFERS) {
263 		hlog("Somehow it's using more than the max backbuffers.  "
264 		     "Not sure why anyone would do that.");
265 		bb.count = 1;
266 		data.dxgi_1_4 = false;
267 	}
268 
269 	for (UINT i = 0; i < bb.count; i++) {
270 		hr = swap->GetBuffer(i, IID_PPV_ARGS(&bb.backbuffer[i]));
271 		if (SUCCEEDED(hr)) {
272 			bb.backbuffer[i]->Release();
273 		} else {
274 			return false;
275 		}
276 	}
277 
278 	return true;
279 }
280 
d3d12_init(IDXGISwapChain * swap)281 static void d3d12_init(IDXGISwapChain *swap)
282 {
283 	ID3D12Device *device = nullptr;
284 	const HRESULT hr = swap->GetDevice(IID_PPV_ARGS(&device));
285 	if (SUCCEEDED(hr)) {
286 		hlog("d3d12_init: device=0x%" PRIX64,
287 		     (uint64_t)(uintptr_t)device);
288 
289 		HWND window;
290 		bb_info bb = {};
291 		if (d3d12_init_format(swap, window, bb)) {
292 			if (global_hook_info->force_shmem) {
293 				hlog("d3d12_init: shared memory capture currently "
294 				     "unsupported; ignoring");
295 			}
296 
297 			if (!d3d12_shtex_init(device, window, bb))
298 				d3d12_free();
299 		}
300 
301 		device->Release();
302 	} else {
303 		hlog_hr("d3d12_init: failed to get device from swap", hr);
304 	}
305 }
306 
d3d12_copy_texture(ID3D11Resource * dst,ID3D11Resource * src)307 static inline void d3d12_copy_texture(ID3D11Resource *dst, ID3D11Resource *src)
308 {
309 	if (data.multisampled) {
310 		data.context11->ResolveSubresource(dst, 0, src, 0, data.format);
311 	} else {
312 		data.context11->CopyResource(dst, src);
313 	}
314 }
315 
d3d12_shtex_capture(IDXGISwapChain * swap)316 static inline void d3d12_shtex_capture(IDXGISwapChain *swap)
317 {
318 	bool dxgi_1_4 = data.dxgi_1_4;
319 	UINT cur_idx;
320 
321 	if (dxgi_1_4) {
322 		IDXGISwapChain3 *swap3 =
323 			reinterpret_cast<IDXGISwapChain3 *>(swap);
324 		cur_idx = swap3->GetCurrentBackBufferIndex();
325 	} else {
326 		cur_idx = data.cur_backbuffer;
327 	}
328 
329 	ID3D11Resource *backbuffer = data.backbuffer11[cur_idx];
330 
331 	data.device11on12->AcquireWrappedResources(&backbuffer, 1);
332 	d3d12_copy_texture(data.copy_tex, backbuffer);
333 	data.device11on12->ReleaseWrappedResources(&backbuffer, 1);
334 	data.context11->Flush();
335 
336 	if (!dxgi_1_4) {
337 		if (++data.cur_backbuffer >= data.backbuffer_count)
338 			data.cur_backbuffer = 0;
339 	}
340 }
341 
d3d12_capture(void * swap_ptr,void *)342 void d3d12_capture(void *swap_ptr, void *)
343 {
344 	IDXGISwapChain *swap = (IDXGISwapChain *)swap_ptr;
345 
346 	if (capture_should_stop()) {
347 		d3d12_free();
348 	}
349 	if (capture_should_init()) {
350 		d3d12_init(swap);
351 	}
352 	if (capture_ready()) {
353 		d3d12_shtex_capture(swap);
354 	}
355 }
356 
try_append_queue_if_unique(ID3D12CommandQueue * queue)357 static bool try_append_queue_if_unique(ID3D12CommandQueue *queue)
358 {
359 	for (size_t i = 0; i < dxgi_possible_swap_queue_count; ++i) {
360 		if (dxgi_possible_swap_queues[i] == queue)
361 			return false;
362 	}
363 
364 	dxgi_possible_swap_queues[dxgi_possible_swap_queue_count] = queue;
365 	++dxgi_possible_swap_queue_count;
366 	return true;
367 }
368 
369 static HRESULT STDMETHODCALLTYPE
hook_execute_command_lists(ID3D12CommandQueue * queue,UINT NumCommandLists,ID3D12CommandList * const * ppCommandLists)370 hook_execute_command_lists(ID3D12CommandQueue *queue, UINT NumCommandLists,
371 			   ID3D12CommandList *const *ppCommandLists)
372 {
373 	hlog_verbose("ExecuteCommandLists callback: queue=0x%" PRIX64,
374 		     (uint64_t)(uintptr_t)queue);
375 
376 	if (dxgi_possible_swap_queue_count <
377 	    _countof(dxgi_possible_swap_queues)) {
378 		if (dxgi_presenting &&
379 		    (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_DIRECT)) {
380 			if (try_append_queue_if_unique(queue)) {
381 				hlog("Remembering D3D12 queue from present: queue=0x%" PRIX64,
382 				     (uint64_t)(uintptr_t)queue);
383 			}
384 		} else if (dxgi_present_attempted &&
385 			   (queue->GetDesc().Type ==
386 			    D3D12_COMMAND_LIST_TYPE_DIRECT)) {
387 			if (try_append_queue_if_unique(queue)) {
388 				hlog("Remembering D3D12 queue from first direct submit after present: queue=0x%" PRIX64,
389 				     (uint64_t)(uintptr_t)queue);
390 			}
391 		} else {
392 			hlog_verbose("Ignoring D3D12 queue=0x%" PRIX64,
393 				     (uint64_t)(uintptr_t)queue);
394 		}
395 	}
396 
397 	return RealExecuteCommandLists(queue, NumCommandLists, ppCommandLists);
398 }
399 
400 static bool
manually_get_d3d12_addrs(HMODULE d3d12_module,PFN_ExecuteCommandLists * execute_command_lists_addr)401 manually_get_d3d12_addrs(HMODULE d3d12_module,
402 			 PFN_ExecuteCommandLists *execute_command_lists_addr)
403 {
404 	PFN_D3D12_CREATE_DEVICE create =
405 		(PFN_D3D12_CREATE_DEVICE)GetProcAddress(d3d12_module,
406 							"D3D12CreateDevice");
407 	if (!create) {
408 		hlog("Failed to load D3D12CreateDevice");
409 		return false;
410 	}
411 
412 	bool success = false;
413 	ID3D12Device *device;
414 	if (SUCCEEDED(create(NULL, D3D_FEATURE_LEVEL_11_0,
415 			     IID_PPV_ARGS(&device)))) {
416 		D3D12_COMMAND_QUEUE_DESC desc{};
417 		ID3D12CommandQueue *queue;
418 		HRESULT hr =
419 			device->CreateCommandQueue(&desc, IID_PPV_ARGS(&queue));
420 		success = SUCCEEDED(hr);
421 		if (success) {
422 			void **queue_vtable = *(void ***)queue;
423 			*execute_command_lists_addr =
424 				(PFN_ExecuteCommandLists)queue_vtable[10];
425 
426 			queue->Release();
427 		} else {
428 			hlog("Failed to create D3D12 command queue");
429 		}
430 
431 		device->Release();
432 	} else {
433 		hlog("Failed to create D3D12 device");
434 	}
435 
436 	return success;
437 }
438 
hook_d3d12(void)439 bool hook_d3d12(void)
440 {
441 	HMODULE d3d12_module = get_system_module("d3d12.dll");
442 	if (!d3d12_module) {
443 		hlog_verbose(
444 			"Failed to find d3d12.dll. Skipping hook attempt.");
445 		return false;
446 	}
447 
448 	PFN_ExecuteCommandLists execute_command_lists_addr = nullptr;
449 	if (!manually_get_d3d12_addrs(d3d12_module,
450 				      &execute_command_lists_addr)) {
451 		hlog("Failed to get D3D12 values");
452 		return true;
453 	}
454 
455 	if (!execute_command_lists_addr) {
456 		hlog("Invalid D3D12 values");
457 		return true;
458 	}
459 
460 	DetourTransactionBegin();
461 
462 	RealExecuteCommandLists = execute_command_lists_addr;
463 	DetourAttach(&(PVOID &)RealExecuteCommandLists,
464 		     hook_execute_command_lists);
465 
466 	const LONG error = DetourTransactionCommit();
467 	const bool success = error == NO_ERROR;
468 	if (success) {
469 		hlog("Hooked ID3D12CommandQueue::ExecuteCommandLists");
470 		hlog("Hooked D3D12");
471 	} else {
472 		RealExecuteCommandLists = nullptr;
473 		hlog("Failed to attach Detours hook: %ld", error);
474 	}
475 
476 	return success;
477 }
478 
479 #endif
480