1 #include <windows.h>
2 #include <versionhelpers.h>
3 #include <d3d11_1.h>
4 #include <d3d11sdklayers.h>
5 #include <dxgi1_2.h>
6 #include <d3dcompiler.h>
7 #include <spirv_cross_c.h>
8
9 #include "common/msg.h"
10 #include "osdep/io.h"
11 #include "osdep/subprocess.h"
12 #include "osdep/timer.h"
13 #include "osdep/windows_utils.h"
14 #include "video/out/gpu/spirv.h"
15 #include "video/out/gpu/utils.h"
16
17 #include "ra_d3d11.h"
18
19 #ifndef D3D11_1_UAV_SLOT_COUNT
20 #define D3D11_1_UAV_SLOT_COUNT (64)
21 #endif
22 #define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80)
23
24 struct dll_version {
25 uint16_t major;
26 uint16_t minor;
27 uint16_t build;
28 uint16_t revision;
29 };
30
31 struct ra_d3d11 {
32 struct spirv_compiler *spirv;
33
34 ID3D11Device *dev;
35 ID3D11Device1 *dev1;
36 ID3D11DeviceContext *ctx;
37 ID3D11DeviceContext1 *ctx1;
38 pD3DCompile D3DCompile;
39
40 struct dll_version d3d_compiler_ver;
41
42 // Debug interfaces (--gpu-debug)
43 ID3D11Debug *debug;
44 ID3D11InfoQueue *iqueue;
45
46 // Device capabilities
47 D3D_FEATURE_LEVEL fl;
48 bool has_clear_view;
49 bool has_timestamp_queries;
50 int max_uavs;
51
52 // Streaming dynamic vertex buffer, which is used for all renderpasses
53 ID3D11Buffer *vbuf;
54 size_t vbuf_size;
55 size_t vbuf_used;
56
57 // clear() renderpass resources (only used when has_clear_view is false)
58 ID3D11PixelShader *clear_ps;
59 ID3D11VertexShader *clear_vs;
60 ID3D11InputLayout *clear_layout;
61 ID3D11Buffer *clear_vbuf;
62 ID3D11Buffer *clear_cbuf;
63
64 // blit() renderpass resources
65 ID3D11PixelShader *blit_float_ps;
66 ID3D11VertexShader *blit_vs;
67 ID3D11InputLayout *blit_layout;
68 ID3D11Buffer *blit_vbuf;
69 ID3D11SamplerState *blit_sampler;
70 };
71
72 struct d3d_tex {
73 // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not
74 // hold an additional reference to the texture object.
75 ID3D11Resource *res;
76
77 ID3D11Texture1D *tex1d;
78 ID3D11Texture2D *tex2d;
79 ID3D11Texture3D *tex3d;
80 int array_slice;
81
82 // Staging texture for tex_download(), 2D only
83 ID3D11Texture2D *staging;
84
85 ID3D11ShaderResourceView *srv;
86 ID3D11RenderTargetView *rtv;
87 ID3D11UnorderedAccessView *uav;
88 ID3D11SamplerState *sampler;
89 };
90
91 struct d3d_buf {
92 ID3D11Buffer *buf;
93 ID3D11UnorderedAccessView *uav;
94 void *data; // System-memory mirror of the data in buf
95 bool dirty; // Is buf out of date?
96 };
97
98 struct d3d_rpass {
99 ID3D11PixelShader *ps;
100 ID3D11VertexShader *vs;
101 ID3D11ComputeShader *cs;
102 ID3D11InputLayout *layout;
103 ID3D11BlendState *bstate;
104 };
105
106 struct d3d_timer {
107 ID3D11Query *ts_start;
108 ID3D11Query *ts_end;
109 ID3D11Query *disjoint;
110 uint64_t result; // Latches the result from the previous use of the timer
111 };
112
113 struct d3d_fmt {
114 const char *name;
115 int components;
116 int bytes;
117 int bits[4];
118 DXGI_FORMAT fmt;
119 enum ra_ctype ctype;
120 bool unordered;
121 };
122
123 static const char clear_vs[] = "\
124 float4 main(float2 pos : POSITION) : SV_Position\n\
125 {\n\
126 return float4(pos, 0.0, 1.0);\n\
127 }\n\
128 ";
129
130 static const char clear_ps[] = "\
131 cbuffer ps_cbuf : register(b0) {\n\
132 float4 color : packoffset(c0);\n\
133 }\n\
134 \n\
135 float4 main(float4 pos : SV_Position) : SV_Target\n\
136 {\n\
137 return color;\n\
138 }\n\
139 ";
140
141 struct blit_vert {
142 float x, y, u, v;
143 };
144
145 static const char blit_vs[] = "\
146 void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\
147 out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\
148 {\n\
149 out_pos = float4(pos, 0.0, 1.0);\n\
150 out_coord = coord;\n\
151 }\n\
152 ";
153
154 static const char blit_float_ps[] = "\
155 Texture2D<float4> tex : register(t0);\n\
156 SamplerState samp : register(s0);\n\
157 \n\
158 float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\
159 {\n\
160 return tex.Sample(samp, coord);\n\
161 }\n\
162 ";
163
164 #define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t
165 static struct d3d_fmt formats[] = {
166 { "r8", 1, 1, { 8}, DXFMT(R8, UNORM) },
167 { "rg8", 2, 2, { 8, 8}, DXFMT(R8G8, UNORM) },
168 { "rgba8", 4, 4, { 8, 8, 8, 8}, DXFMT(R8G8B8A8, UNORM) },
169 { "r16", 1, 2, {16}, DXFMT(R16, UNORM) },
170 { "rg16", 2, 4, {16, 16}, DXFMT(R16G16, UNORM) },
171 { "rgba16", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) },
172
173 { "r32ui", 1, 4, {32}, DXFMT(R32, UINT) },
174 { "rg32ui", 2, 8, {32, 32}, DXFMT(R32G32, UINT) },
175 { "rgb32ui", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, UINT) },
176 { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT) },
177
178 { "r16hf", 1, 2, {16}, DXFMT(R16, FLOAT) },
179 { "rg16hf", 2, 4, {16, 16}, DXFMT(R16G16, FLOAT) },
180 { "rgba16hf", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) },
181 { "r32f", 1, 4, {32}, DXFMT(R32, FLOAT) },
182 { "rg32f", 2, 8, {32, 32}, DXFMT(R32G32, FLOAT) },
183 { "rgb32f", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, FLOAT) },
184 { "rgba32f", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) },
185
186 { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) },
187 { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = true },
188 { "bgrx8", 3, 4, { 8, 8, 8}, DXFMT(B8G8R8X8, UNORM), .unordered = true },
189 };
190
dll_version_equal(struct dll_version a,struct dll_version b)191 static bool dll_version_equal(struct dll_version a, struct dll_version b)
192 {
193 return a.major == b.major &&
194 a.minor == b.minor &&
195 a.build == b.build &&
196 a.revision == b.revision;
197 }
198
fmt_to_dxgi(const struct ra_format * fmt)199 static DXGI_FORMAT fmt_to_dxgi(const struct ra_format *fmt)
200 {
201 struct d3d_fmt *d3d = fmt->priv;
202 return d3d->fmt;
203 }
204
setup_formats(struct ra * ra)205 static void setup_formats(struct ra *ra)
206 {
207 // All formats must be usable as a 2D texture
208 static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D;
209 // SHADER_SAMPLE indicates support for linear sampling, point always works
210 static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE;
211 // RA requires renderable surfaces to be blendable as well
212 static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET |
213 D3D11_FORMAT_SUPPORT_BLENDABLE;
214 // Typed UAVs are equivalent to images. RA only cares if they're storable.
215 static const UINT sup_store = D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW;
216 static const UINT sup2_store = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE;
217
218 struct ra_d3d11 *p = ra->priv;
219 HRESULT hr;
220
221 for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) {
222 struct d3d_fmt *d3dfmt = &formats[i];
223 UINT support = 0;
224 hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support);
225 if (FAILED(hr))
226 continue;
227 if ((support & sup_basic) != sup_basic)
228 continue;
229
230 D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3dfmt->fmt };
231 ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2,
232 ², sizeof(sup2));
233 UINT support2 = sup2.OutFormatSupport2;
234
235 struct ra_format *fmt = talloc_zero(ra, struct ra_format);
236 *fmt = (struct ra_format) {
237 .name = d3dfmt->name,
238 .priv = d3dfmt,
239 .ctype = d3dfmt->ctype,
240 .ordered = !d3dfmt->unordered,
241 .num_components = d3dfmt->components,
242 .pixel_size = d3dfmt->bytes,
243 .linear_filter = (support & sup_filter) == sup_filter,
244 .renderable = (support & sup_render) == sup_render,
245 .storable = p->fl >= D3D_FEATURE_LEVEL_11_0 &&
246 (support & sup_store) == sup_store &&
247 (support2 & sup2_store) == sup2_store,
248 };
249
250 if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D)
251 ra->caps |= RA_CAP_TEX_1D;
252
253 for (int j = 0; j < d3dfmt->components; j++)
254 fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j];
255
256 fmt->glsl_format = ra_fmt_glsl_format(fmt);
257
258 MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
259 }
260 }
261
tex_init(struct ra * ra,struct ra_tex * tex)262 static bool tex_init(struct ra *ra, struct ra_tex *tex)
263 {
264 struct ra_d3d11 *p = ra->priv;
265 struct d3d_tex *tex_p = tex->priv;
266 struct ra_tex_params *params = &tex->params;
267 HRESULT hr;
268
269 // A SRV is required for renderpasses and blitting, since blitting can use
270 // a renderpass internally
271 if (params->render_src || params->blit_src) {
272 // Always specify the SRV format for simplicity. This will match the
273 // texture format for textures created with tex_create, but it can be
274 // different for wrapped planar video textures.
275 D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = {
276 .Format = fmt_to_dxgi(params->format),
277 };
278 switch (params->dimensions) {
279 case 1:
280 if (tex_p->array_slice >= 0) {
281 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY;
282 srvdesc.Texture1DArray.MipLevels = 1;
283 srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice;
284 srvdesc.Texture1DArray.ArraySize = 1;
285 } else {
286 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
287 srvdesc.Texture1D.MipLevels = 1;
288 }
289 break;
290 case 2:
291 if (tex_p->array_slice >= 0) {
292 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
293 srvdesc.Texture2DArray.MipLevels = 1;
294 srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice;
295 srvdesc.Texture2DArray.ArraySize = 1;
296 } else {
297 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
298 srvdesc.Texture2D.MipLevels = 1;
299 }
300 break;
301 case 3:
302 // D3D11 does not have Texture3D arrays
303 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
304 srvdesc.Texture3D.MipLevels = 1;
305 break;
306 }
307 hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc,
308 &tex_p->srv);
309 if (FAILED(hr)) {
310 MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr));
311 goto error;
312 }
313 }
314
315 // Samplers are required for renderpasses, but not blitting, since the blit
316 // code uses its own point sampler
317 if (params->render_src) {
318 D3D11_SAMPLER_DESC sdesc = {
319 .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
320 .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
321 .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
322 .ComparisonFunc = D3D11_COMPARISON_NEVER,
323 .MinLOD = 0,
324 .MaxLOD = D3D11_FLOAT32_MAX,
325 .MaxAnisotropy = 1,
326 };
327 if (params->src_linear)
328 sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
329 if (params->src_repeat) {
330 sdesc.AddressU = sdesc.AddressV = sdesc.AddressW =
331 D3D11_TEXTURE_ADDRESS_WRAP;
332 }
333 // The runtime pools sampler state objects internally, so we don't have
334 // to worry about resource usage when creating one for every ra_tex
335 hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler);
336 if (FAILED(hr)) {
337 MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr));
338 goto error;
339 }
340 }
341
342 // Like SRVs, an RTV is required for renderpass output and blitting
343 if (params->render_dst || params->blit_dst) {
344 hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL,
345 &tex_p->rtv);
346 if (FAILED(hr)) {
347 MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr));
348 goto error;
349 }
350 }
351
352 if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) {
353 hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL,
354 &tex_p->uav);
355 if (FAILED(hr)) {
356 MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
357 goto error;
358 }
359 }
360
361 return true;
362 error:
363 return false;
364 }
365
tex_destroy(struct ra * ra,struct ra_tex * tex)366 static void tex_destroy(struct ra *ra, struct ra_tex *tex)
367 {
368 if (!tex)
369 return;
370 struct d3d_tex *tex_p = tex->priv;
371
372 SAFE_RELEASE(tex_p->srv);
373 SAFE_RELEASE(tex_p->rtv);
374 SAFE_RELEASE(tex_p->uav);
375 SAFE_RELEASE(tex_p->sampler);
376 SAFE_RELEASE(tex_p->res);
377 SAFE_RELEASE(tex_p->staging);
378 talloc_free(tex);
379 }
380
tex_create(struct ra * ra,const struct ra_tex_params * params)381 static struct ra_tex *tex_create(struct ra *ra,
382 const struct ra_tex_params *params)
383 {
384 // Only 2D textures may be downloaded for now
385 if (params->downloadable && params->dimensions != 2)
386 return NULL;
387
388 struct ra_d3d11 *p = ra->priv;
389 HRESULT hr;
390
391 struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
392 tex->params = *params;
393 tex->params.initial_data = NULL;
394
395 struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
396 DXGI_FORMAT fmt = fmt_to_dxgi(params->format);
397
398 D3D11_SUBRESOURCE_DATA data;
399 D3D11_SUBRESOURCE_DATA *pdata = NULL;
400 if (params->initial_data) {
401 data = (D3D11_SUBRESOURCE_DATA) {
402 .pSysMem = params->initial_data,
403 .SysMemPitch = params->w * params->format->pixel_size,
404 };
405 if (params->dimensions >= 3)
406 data.SysMemSlicePitch = data.SysMemPitch * params->h;
407 pdata = &data;
408 }
409
410 D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
411 D3D11_BIND_FLAG bind_flags = 0;
412
413 if (params->render_src || params->blit_src)
414 bind_flags |= D3D11_BIND_SHADER_RESOURCE;
415 if (params->render_dst || params->blit_dst)
416 bind_flags |= D3D11_BIND_RENDER_TARGET;
417 if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst)
418 bind_flags |= D3D11_BIND_UNORDERED_ACCESS;
419
420 // Apparently IMMUTABLE textures are efficient, so try to infer whether we
421 // can use one
422 if (params->initial_data && !params->render_dst && !params->storage_dst &&
423 !params->blit_dst && !params->host_mutable)
424 usage = D3D11_USAGE_IMMUTABLE;
425
426 switch (params->dimensions) {
427 case 1:;
428 D3D11_TEXTURE1D_DESC desc1d = {
429 .Width = params->w,
430 .MipLevels = 1,
431 .ArraySize = 1,
432 .Format = fmt,
433 .Usage = usage,
434 .BindFlags = bind_flags,
435 };
436 hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d);
437 if (FAILED(hr)) {
438 MP_ERR(ra, "Failed to create Texture1D: %s\n",
439 mp_HRESULT_to_str(hr));
440 goto error;
441 }
442 tex_p->res = (ID3D11Resource *)tex_p->tex1d;
443 break;
444 case 2:;
445 D3D11_TEXTURE2D_DESC desc2d = {
446 .Width = params->w,
447 .Height = params->h,
448 .MipLevels = 1,
449 .ArraySize = 1,
450 .SampleDesc.Count = 1,
451 .Format = fmt,
452 .Usage = usage,
453 .BindFlags = bind_flags,
454 };
455 hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d);
456 if (FAILED(hr)) {
457 MP_ERR(ra, "Failed to create Texture2D: %s\n",
458 mp_HRESULT_to_str(hr));
459 goto error;
460 }
461 tex_p->res = (ID3D11Resource *)tex_p->tex2d;
462
463 // Create a staging texture with CPU access for tex_download()
464 if (params->downloadable) {
465 desc2d.BindFlags = 0;
466 desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
467 desc2d.Usage = D3D11_USAGE_STAGING;
468
469 hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL,
470 &tex_p->staging);
471 if (FAILED(hr)) {
472 MP_ERR(ra, "Failed to staging texture: %s\n",
473 mp_HRESULT_to_str(hr));
474 goto error;
475 }
476 }
477 break;
478 case 3:;
479 D3D11_TEXTURE3D_DESC desc3d = {
480 .Width = params->w,
481 .Height = params->h,
482 .Depth = params->d,
483 .MipLevels = 1,
484 .Format = fmt,
485 .Usage = usage,
486 .BindFlags = bind_flags,
487 };
488 hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d);
489 if (FAILED(hr)) {
490 MP_ERR(ra, "Failed to create Texture3D: %s\n",
491 mp_HRESULT_to_str(hr));
492 goto error;
493 }
494 tex_p->res = (ID3D11Resource *)tex_p->tex3d;
495 break;
496 default:
497 abort();
498 }
499
500 tex_p->array_slice = -1;
501
502 if (!tex_init(ra, tex))
503 goto error;
504
505 return tex;
506
507 error:
508 tex_destroy(ra, tex);
509 return NULL;
510 }
511
ra_d3d11_wrap_tex(struct ra * ra,ID3D11Resource * res)512 struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res)
513 {
514 HRESULT hr;
515
516 struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
517 struct ra_tex_params *params = &tex->params;
518 struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
519
520 DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
521 D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
522 D3D11_BIND_FLAG bind_flags = 0;
523
524 D3D11_RESOURCE_DIMENSION type;
525 ID3D11Resource_GetType(res, &type);
526 switch (type) {
527 case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
528 hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D,
529 (void**)&tex_p->tex2d);
530 if (FAILED(hr)) {
531 MP_ERR(ra, "Resource is not a ID3D11Texture2D\n");
532 goto error;
533 }
534 tex_p->res = (ID3D11Resource *)tex_p->tex2d;
535
536 D3D11_TEXTURE2D_DESC desc2d;
537 ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
538 if (desc2d.MipLevels != 1) {
539 MP_ERR(ra, "Mipmapped textures not supported for wrapping\n");
540 goto error;
541 }
542 if (desc2d.ArraySize != 1) {
543 MP_ERR(ra, "Texture arrays not supported for wrapping\n");
544 goto error;
545 }
546 if (desc2d.SampleDesc.Count != 1) {
547 MP_ERR(ra, "Multisampled textures not supported for wrapping\n");
548 goto error;
549 }
550
551 params->dimensions = 2;
552 params->w = desc2d.Width;
553 params->h = desc2d.Height;
554 params->d = 1;
555 usage = desc2d.Usage;
556 bind_flags = desc2d.BindFlags;
557 fmt = desc2d.Format;
558 break;
559 default:
560 // We could wrap Texture1D/3D as well, but keep it simple, since this
561 // function is only used for swapchain backbuffers at the moment
562 MP_ERR(ra, "Resource is not suitable to wrap\n");
563 goto error;
564 }
565
566 for (int i = 0; i < ra->num_formats; i++) {
567 DXGI_FORMAT target_fmt = fmt_to_dxgi(ra->formats[i]);
568 if (fmt == target_fmt) {
569 params->format = ra->formats[i];
570 break;
571 }
572 }
573 if (!params->format) {
574 MP_ERR(ra, "Could not find a suitable RA format for wrapped resource\n");
575 goto error;
576 }
577
578 if (bind_flags & D3D11_BIND_SHADER_RESOURCE) {
579 params->render_src = params->blit_src = true;
580 params->src_linear = params->format->linear_filter;
581 }
582 if (bind_flags & D3D11_BIND_RENDER_TARGET)
583 params->render_dst = params->blit_dst = true;
584 if (bind_flags & D3D11_BIND_UNORDERED_ACCESS)
585 params->storage_dst = true;
586
587 if (usage != D3D11_USAGE_DEFAULT) {
588 MP_ERR(ra, "Resource is not D3D11_USAGE_DEFAULT\n");
589 goto error;
590 }
591
592 tex_p->array_slice = -1;
593
594 if (!tex_init(ra, tex))
595 goto error;
596
597 return tex;
598 error:
599 tex_destroy(ra, tex);
600 return NULL;
601 }
602
ra_d3d11_wrap_tex_video(struct ra * ra,ID3D11Texture2D * res,int w,int h,int array_slice,const struct ra_format * fmt)603 struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
604 int w, int h, int array_slice,
605 const struct ra_format *fmt)
606 {
607 struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
608 struct ra_tex_params *params = &tex->params;
609 struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
610
611 tex_p->tex2d = res;
612 tex_p->res = (ID3D11Resource *)tex_p->tex2d;
613 ID3D11Texture2D_AddRef(res);
614
615 D3D11_TEXTURE2D_DESC desc2d;
616 ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
617 if (!(desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE)) {
618 MP_ERR(ra, "Video resource is not bindable\n");
619 goto error;
620 }
621
622 params->dimensions = 2;
623 params->w = w;
624 params->h = h;
625 params->d = 1;
626 params->render_src = true;
627 params->src_linear = true;
628 // fmt can be different to the texture format for planar video textures
629 params->format = fmt;
630
631 if (desc2d.ArraySize > 1) {
632 tex_p->array_slice = array_slice;
633 } else {
634 tex_p->array_slice = -1;
635 }
636
637 if (!tex_init(ra, tex))
638 goto error;
639
640 return tex;
641 error:
642 tex_destroy(ra, tex);
643 return NULL;
644 }
645
tex_upload(struct ra * ra,const struct ra_tex_upload_params * params)646 static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params)
647 {
648 struct ra_d3d11 *p = ra->priv;
649 struct ra_tex *tex = params->tex;
650 struct d3d_tex *tex_p = tex->priv;
651
652 if (!params->src) {
653 MP_ERR(ra, "Pixel buffers are not supported\n");
654 return false;
655 }
656
657 const char *src = params->src;
658 ptrdiff_t stride = tex->params.dimensions >= 2 ? tex->params.w : 0;
659 ptrdiff_t pitch = tex->params.dimensions >= 3 ? stride * tex->params.h : 0;
660 bool invalidate = true;
661 D3D11_BOX rc;
662 D3D11_BOX *prc = NULL;
663
664 if (tex->params.dimensions == 2) {
665 stride = params->stride;
666
667 if (params->rc && (params->rc->x0 != 0 || params->rc->y0 != 0 ||
668 params->rc->x1 != tex->params.w || params->rc->y1 != tex->params.h))
669 {
670 rc = (D3D11_BOX) {
671 .left = params->rc->x0,
672 .top = params->rc->y0,
673 .front = 0,
674 .right = params->rc->x1,
675 .bottom = params->rc->y1,
676 .back = 1,
677 };
678 prc = &rc;
679 invalidate = params->invalidate;
680 }
681 }
682
683 int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0;
684 if (p->ctx1) {
685 ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res,
686 subresource, prc, src, stride, pitch,
687 invalidate ? D3D11_COPY_DISCARD : 0);
688 } else {
689 ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource,
690 prc, src, stride, pitch);
691 }
692
693 return true;
694 }
695
tex_download(struct ra * ra,struct ra_tex_download_params * params)696 static bool tex_download(struct ra *ra, struct ra_tex_download_params *params)
697 {
698 struct ra_d3d11 *p = ra->priv;
699 struct ra_tex *tex = params->tex;
700 struct d3d_tex *tex_p = tex->priv;
701 HRESULT hr;
702
703 if (!tex_p->staging)
704 return false;
705
706 ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource*)tex_p->staging,
707 tex_p->res);
708
709 D3D11_MAPPED_SUBRESOURCE lock;
710 hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource*)tex_p->staging, 0,
711 D3D11_MAP_READ, 0, &lock);
712 if (FAILED(hr)) {
713 MP_ERR(ra, "Failed to map staging texture: %s\n", mp_HRESULT_to_str(hr));
714 return false;
715 }
716
717 char *cdst = params->dst;
718 char *csrc = lock.pData;
719 for (int y = 0; y < tex->params.h; y++) {
720 memcpy(cdst + y * params->stride, csrc + y * lock.RowPitch,
721 MPMIN(params->stride, lock.RowPitch));
722 }
723
724 ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource*)tex_p->staging, 0);
725
726 return true;
727 }
728
buf_destroy(struct ra * ra,struct ra_buf * buf)729 static void buf_destroy(struct ra *ra, struct ra_buf *buf)
730 {
731 if (!buf)
732 return;
733 struct d3d_buf *buf_p = buf->priv;
734 SAFE_RELEASE(buf_p->buf);
735 SAFE_RELEASE(buf_p->uav);
736 talloc_free(buf);
737 }
738
buf_create(struct ra * ra,const struct ra_buf_params * params)739 static struct ra_buf *buf_create(struct ra *ra,
740 const struct ra_buf_params *params)
741 {
742 // D3D11 does not support permanent mapping or pixel buffers
743 if (params->host_mapped || params->type == RA_BUF_TYPE_TEX_UPLOAD)
744 return NULL;
745
746 struct ra_d3d11 *p = ra->priv;
747 HRESULT hr;
748
749 struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
750 buf->params = *params;
751 buf->params.initial_data = NULL;
752
753 struct d3d_buf *buf_p = buf->priv = talloc_zero(buf, struct d3d_buf);
754
755 D3D11_SUBRESOURCE_DATA data;
756 D3D11_SUBRESOURCE_DATA *pdata = NULL;
757 if (params->initial_data) {
758 data = (D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data };
759 pdata = &data;
760 }
761
762 D3D11_BUFFER_DESC desc = { .ByteWidth = params->size };
763 switch (params->type) {
764 case RA_BUF_TYPE_SHADER_STORAGE:
765 desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
766 desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float));
767 desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
768 break;
769 case RA_BUF_TYPE_UNIFORM:
770 desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
771 desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float[4]));
772 break;
773 }
774
775 hr = ID3D11Device_CreateBuffer(p->dev, &desc, pdata, &buf_p->buf);
776 if (FAILED(hr)) {
777 MP_ERR(ra, "Failed to create buffer: %s\n", mp_HRESULT_to_str(hr));
778 goto error;
779 }
780
781 // D3D11 doesn't allow constant buffer updates that aren't aligned to a
782 // full constant boundary (vec4,) and some drivers don't allow partial
783 // constant buffer updates at all. To support partial buffer updates, keep
784 // a mirror of the buffer data in system memory and upload the whole thing
785 // before the buffer is used.
786 if (params->host_mutable)
787 buf_p->data = talloc_zero_size(buf, desc.ByteWidth);
788
789 if (params->type == RA_BUF_TYPE_SHADER_STORAGE) {
790 D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = {
791 .Format = DXGI_FORMAT_R32_TYPELESS,
792 .ViewDimension = D3D11_UAV_DIMENSION_BUFFER,
793 .Buffer = {
794 .NumElements = desc.ByteWidth / sizeof(float),
795 .Flags = D3D11_BUFFER_UAV_FLAG_RAW,
796 },
797 };
798 hr = ID3D11Device_CreateUnorderedAccessView(p->dev,
799 (ID3D11Resource *)buf_p->buf, &udesc, &buf_p->uav);
800 if (FAILED(hr)) {
801 MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
802 goto error;
803 }
804 }
805
806 return buf;
807 error:
808 buf_destroy(ra, buf);
809 return NULL;
810 }
811
buf_resolve(struct ra * ra,struct ra_buf * buf)812 static void buf_resolve(struct ra *ra, struct ra_buf *buf)
813 {
814 struct ra_d3d11 *p = ra->priv;
815 struct d3d_buf *buf_p = buf->priv;
816
817 if (!buf->params.host_mutable || !buf_p->dirty)
818 return;
819
820 // Synchronize the GPU buffer with the system-memory copy
821 ID3D11DeviceContext_UpdateSubresource(p->ctx, (ID3D11Resource *)buf_p->buf,
822 0, NULL, buf_p->data, 0, 0);
823 buf_p->dirty = false;
824 }
825
buf_update(struct ra * ra,struct ra_buf * buf,ptrdiff_t offset,const void * data,size_t size)826 static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
827 const void *data, size_t size)
828 {
829 struct d3d_buf *buf_p = buf->priv;
830
831 char *cdata = buf_p->data;
832 memcpy(cdata + offset, data, size);
833 buf_p->dirty = true;
834 }
835
get_shader_target(struct ra * ra,enum glsl_shader type)836 static const char *get_shader_target(struct ra *ra, enum glsl_shader type)
837 {
838 struct ra_d3d11 *p = ra->priv;
839 switch (p->fl) {
840 default:
841 switch (type) {
842 case GLSL_SHADER_VERTEX: return "vs_5_0";
843 case GLSL_SHADER_FRAGMENT: return "ps_5_0";
844 case GLSL_SHADER_COMPUTE: return "cs_5_0";
845 }
846 break;
847 case D3D_FEATURE_LEVEL_10_1:
848 switch (type) {
849 case GLSL_SHADER_VERTEX: return "vs_4_1";
850 case GLSL_SHADER_FRAGMENT: return "ps_4_1";
851 case GLSL_SHADER_COMPUTE: return "cs_4_1";
852 }
853 break;
854 case D3D_FEATURE_LEVEL_10_0:
855 switch (type) {
856 case GLSL_SHADER_VERTEX: return "vs_4_0";
857 case GLSL_SHADER_FRAGMENT: return "ps_4_0";
858 case GLSL_SHADER_COMPUTE: return "cs_4_0";
859 }
860 break;
861 case D3D_FEATURE_LEVEL_9_3:
862 switch (type) {
863 case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3";
864 case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3";
865 }
866 break;
867 case D3D_FEATURE_LEVEL_9_2:
868 case D3D_FEATURE_LEVEL_9_1:
869 switch (type) {
870 case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1";
871 case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1";
872 }
873 break;
874 }
875 return NULL;
876 }
877
shader_type_name(enum glsl_shader type)878 static const char *shader_type_name(enum glsl_shader type)
879 {
880 switch (type) {
881 case GLSL_SHADER_VERTEX: return "vertex";
882 case GLSL_SHADER_FRAGMENT: return "fragment";
883 case GLSL_SHADER_COMPUTE: return "compute";
884 default: return "unknown";
885 }
886 }
887
setup_clear_rpass(struct ra * ra)888 static bool setup_clear_rpass(struct ra *ra)
889 {
890 struct ra_d3d11 *p = ra->priv;
891 ID3DBlob *vs_blob = NULL;
892 ID3DBlob *ps_blob = NULL;
893 HRESULT hr;
894
895 hr = p->D3DCompile(clear_vs, sizeof(clear_vs), NULL, NULL, NULL, "main",
896 get_shader_target(ra, GLSL_SHADER_VERTEX),
897 D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
898 if (FAILED(hr)) {
899 MP_ERR(ra, "Failed to compile clear() vertex shader: %s\n",
900 mp_HRESULT_to_str(hr));
901 goto error;
902 }
903
904 hr = ID3D11Device_CreateVertexShader(p->dev,
905 ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
906 NULL, &p->clear_vs);
907 if (FAILED(hr)) {
908 MP_ERR(ra, "Failed to create clear() vertex shader: %s\n",
909 mp_HRESULT_to_str(hr));
910 goto error;
911 }
912
913 hr = p->D3DCompile(clear_ps, sizeof(clear_ps), NULL, NULL, NULL, "main",
914 get_shader_target(ra, GLSL_SHADER_FRAGMENT),
915 D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &ps_blob, NULL);
916 if (FAILED(hr)) {
917 MP_ERR(ra, "Failed to compile clear() pixel shader: %s\n",
918 mp_HRESULT_to_str(hr));
919 goto error;
920 }
921
922 hr = ID3D11Device_CreatePixelShader(p->dev,
923 ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob),
924 NULL, &p->clear_ps);
925 if (FAILED(hr)) {
926 MP_ERR(ra, "Failed to create clear() pixel shader: %s\n",
927 mp_HRESULT_to_str(hr));
928 goto error;
929 }
930
931 D3D11_INPUT_ELEMENT_DESC in_descs[] = {
932 { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
933 };
934 hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
935 MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
936 ID3D10Blob_GetBufferSize(vs_blob), &p->clear_layout);
937 if (FAILED(hr)) {
938 MP_ERR(ra, "Failed to create clear() IA layout: %s\n",
939 mp_HRESULT_to_str(hr));
940 goto error;
941 }
942
943 // clear() always draws to a quad covering the whole viewport
944 static const float verts[] = {
945 -1, -1,
946 1, -1,
947 1, 1,
948 -1, 1,
949 -1, -1,
950 1, 1,
951 };
952 D3D11_BUFFER_DESC vdesc = {
953 .ByteWidth = sizeof(verts),
954 .Usage = D3D11_USAGE_IMMUTABLE,
955 .BindFlags = D3D11_BIND_VERTEX_BUFFER,
956 };
957 D3D11_SUBRESOURCE_DATA vdata = {
958 .pSysMem = verts,
959 };
960 hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, &vdata, &p->clear_vbuf);
961 if (FAILED(hr)) {
962 MP_ERR(ra, "Failed to create clear() vertex buffer: %s\n",
963 mp_HRESULT_to_str(hr));
964 goto error;
965 }
966
967 D3D11_BUFFER_DESC cdesc = {
968 .ByteWidth = sizeof(float[4]),
969 .BindFlags = D3D11_BIND_CONSTANT_BUFFER,
970 };
971 hr = ID3D11Device_CreateBuffer(p->dev, &cdesc, NULL, &p->clear_cbuf);
972 if (FAILED(hr)) {
973 MP_ERR(ra, "Failed to create clear() constant buffer: %s\n",
974 mp_HRESULT_to_str(hr));
975 goto error;
976 }
977
978 SAFE_RELEASE(vs_blob);
979 SAFE_RELEASE(ps_blob);
980 return true;
981 error:
982 SAFE_RELEASE(vs_blob);
983 SAFE_RELEASE(ps_blob);
984 return false;
985 }
986
clear_rpass(struct ra * ra,struct ra_tex * tex,float color[4],struct mp_rect * rc)987 static void clear_rpass(struct ra *ra, struct ra_tex *tex, float color[4],
988 struct mp_rect *rc)
989 {
990 struct ra_d3d11 *p = ra->priv;
991 struct d3d_tex *tex_p = tex->priv;
992 struct ra_tex_params *params = &tex->params;
993
994 ID3D11DeviceContext_UpdateSubresource(p->ctx,
995 (ID3D11Resource *)p->clear_cbuf, 0, NULL, color, 0, 0);
996
997 ID3D11DeviceContext_IASetInputLayout(p->ctx, p->clear_layout);
998 ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->clear_vbuf,
999 &(UINT) { sizeof(float[2]) }, &(UINT) { 0 });
1000 ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
1001 D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
1002
1003 ID3D11DeviceContext_VSSetShader(p->ctx, p->clear_vs, NULL, 0);
1004
1005 ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
1006 .Width = params->w,
1007 .Height = params->h,
1008 .MinDepth = 0,
1009 .MaxDepth = 1,
1010 }));
1011 ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
1012 .left = rc->x0,
1013 .top = rc->y0,
1014 .right = rc->x1,
1015 .bottom = rc->y1,
1016 }));
1017 ID3D11DeviceContext_PSSetShader(p->ctx, p->clear_ps, NULL, 0);
1018 ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, &p->clear_cbuf);
1019
1020 ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &tex_p->rtv, NULL);
1021 ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
1022 D3D11_DEFAULT_SAMPLE_MASK);
1023
1024 ID3D11DeviceContext_Draw(p->ctx, 6, 0);
1025
1026 ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1,
1027 &(ID3D11Buffer *){ NULL });
1028 ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
1029 }
1030
clear(struct ra * ra,struct ra_tex * tex,float color[4],struct mp_rect * rc)1031 static void clear(struct ra *ra, struct ra_tex *tex, float color[4],
1032 struct mp_rect *rc)
1033 {
1034 struct ra_d3d11 *p = ra->priv;
1035 struct d3d_tex *tex_p = tex->priv;
1036 struct ra_tex_params *params = &tex->params;
1037
1038 if (!tex_p->rtv)
1039 return;
1040
1041 if (rc->x0 || rc->y0 || rc->x1 != params->w || rc->y1 != params->h) {
1042 if (p->has_clear_view) {
1043 ID3D11DeviceContext1_ClearView(p->ctx1, (ID3D11View *)tex_p->rtv,
1044 color, (&(D3D11_RECT) {
1045 .left = rc->x0,
1046 .top = rc->y0,
1047 .right = rc->x1,
1048 .bottom = rc->y1,
1049 }), 1);
1050 } else {
1051 clear_rpass(ra, tex, color, rc);
1052 }
1053 } else {
1054 ID3D11DeviceContext_ClearRenderTargetView(p->ctx, tex_p->rtv, color);
1055 }
1056 }
1057
setup_blit_rpass(struct ra * ra)1058 static bool setup_blit_rpass(struct ra *ra)
1059 {
1060 struct ra_d3d11 *p = ra->priv;
1061 ID3DBlob *vs_blob = NULL;
1062 ID3DBlob *float_ps_blob = NULL;
1063 HRESULT hr;
1064
1065 hr = p->D3DCompile(blit_vs, sizeof(blit_vs), NULL, NULL, NULL, "main",
1066 get_shader_target(ra, GLSL_SHADER_VERTEX),
1067 D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
1068 if (FAILED(hr)) {
1069 MP_ERR(ra, "Failed to compile blit() vertex shader: %s\n",
1070 mp_HRESULT_to_str(hr));
1071 goto error;
1072 }
1073
1074 hr = ID3D11Device_CreateVertexShader(p->dev,
1075 ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
1076 NULL, &p->blit_vs);
1077 if (FAILED(hr)) {
1078 MP_ERR(ra, "Failed to create blit() vertex shader: %s\n",
1079 mp_HRESULT_to_str(hr));
1080 goto error;
1081 }
1082
1083 hr = p->D3DCompile(blit_float_ps, sizeof(blit_float_ps), NULL, NULL, NULL,
1084 "main", get_shader_target(ra, GLSL_SHADER_FRAGMENT),
1085 D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &float_ps_blob, NULL);
1086 if (FAILED(hr)) {
1087 MP_ERR(ra, "Failed to compile blit() pixel shader: %s\n",
1088 mp_HRESULT_to_str(hr));
1089 goto error;
1090 }
1091
1092 hr = ID3D11Device_CreatePixelShader(p->dev,
1093 ID3D10Blob_GetBufferPointer(float_ps_blob),
1094 ID3D10Blob_GetBufferSize(float_ps_blob),
1095 NULL, &p->blit_float_ps);
1096 if (FAILED(hr)) {
1097 MP_ERR(ra, "Failed to create blit() pixel shader: %s\n",
1098 mp_HRESULT_to_str(hr));
1099 goto error;
1100 }
1101
1102 D3D11_INPUT_ELEMENT_DESC in_descs[] = {
1103 { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
1104 { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8 },
1105 };
1106 hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
1107 MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
1108 ID3D10Blob_GetBufferSize(vs_blob), &p->blit_layout);
1109 if (FAILED(hr)) {
1110 MP_ERR(ra, "Failed to create blit() IA layout: %s\n",
1111 mp_HRESULT_to_str(hr));
1112 goto error;
1113 }
1114
1115 D3D11_BUFFER_DESC vdesc = {
1116 .ByteWidth = sizeof(struct blit_vert[6]),
1117 .Usage = D3D11_USAGE_DEFAULT,
1118 .BindFlags = D3D11_BIND_VERTEX_BUFFER,
1119 };
1120 hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, NULL, &p->blit_vbuf);
1121 if (FAILED(hr)) {
1122 MP_ERR(ra, "Failed to create blit() vertex buffer: %s\n",
1123 mp_HRESULT_to_str(hr));
1124 goto error;
1125 }
1126
1127 // Blit always uses point sampling, regardless of the source texture
1128 D3D11_SAMPLER_DESC sdesc = {
1129 .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
1130 .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
1131 .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
1132 .ComparisonFunc = D3D11_COMPARISON_NEVER,
1133 .MinLOD = 0,
1134 .MaxLOD = D3D11_FLOAT32_MAX,
1135 .MaxAnisotropy = 1,
1136 };
1137 hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->blit_sampler);
1138 if (FAILED(hr)) {
1139 MP_ERR(ra, "Failed to create blit() sampler: %s\n",
1140 mp_HRESULT_to_str(hr));
1141 goto error;
1142 }
1143
1144 SAFE_RELEASE(vs_blob);
1145 SAFE_RELEASE(float_ps_blob);
1146 return true;
1147 error:
1148 SAFE_RELEASE(vs_blob);
1149 SAFE_RELEASE(float_ps_blob);
1150 return false;
1151 }
1152
blit_rpass(struct ra * ra,struct ra_tex * dst,struct ra_tex * src,struct mp_rect * dst_rc,struct mp_rect * src_rc)1153 static void blit_rpass(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
1154 struct mp_rect *dst_rc, struct mp_rect *src_rc)
1155 {
1156 struct ra_d3d11 *p = ra->priv;
1157 struct d3d_tex *dst_p = dst->priv;
1158 struct d3d_tex *src_p = src->priv;
1159
1160 float u_min = (double)src_rc->x0 / src->params.w;
1161 float u_max = (double)src_rc->x1 / src->params.w;
1162 float v_min = (double)src_rc->y0 / src->params.h;
1163 float v_max = (double)src_rc->y1 / src->params.h;
1164
1165 struct blit_vert verts[6] = {
1166 { .x = -1, .y = -1, .u = u_min, .v = v_max },
1167 { .x = 1, .y = -1, .u = u_max, .v = v_max },
1168 { .x = 1, .y = 1, .u = u_max, .v = v_min },
1169 { .x = -1, .y = 1, .u = u_min, .v = v_min },
1170 };
1171 verts[4] = verts[0];
1172 verts[5] = verts[2];
1173 ID3D11DeviceContext_UpdateSubresource(p->ctx,
1174 (ID3D11Resource *)p->blit_vbuf, 0, NULL, verts, 0, 0);
1175
1176 ID3D11DeviceContext_IASetInputLayout(p->ctx, p->blit_layout);
1177 ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->blit_vbuf,
1178 &(UINT) { sizeof(verts[0]) }, &(UINT) { 0 });
1179 ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
1180 D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
1181
1182 ID3D11DeviceContext_VSSetShader(p->ctx, p->blit_vs, NULL, 0);
1183
1184 ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
1185 .TopLeftX = dst_rc->x0,
1186 .TopLeftY = dst_rc->y0,
1187 .Width = mp_rect_w(*dst_rc),
1188 .Height = mp_rect_h(*dst_rc),
1189 .MinDepth = 0,
1190 .MaxDepth = 1,
1191 }));
1192 ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
1193 .left = dst_rc->x0,
1194 .top = dst_rc->y0,
1195 .right = dst_rc->x1,
1196 .bottom = dst_rc->y1,
1197 }));
1198
1199 ID3D11DeviceContext_PSSetShader(p->ctx, p->blit_float_ps, NULL, 0);
1200 ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, &src_p->srv);
1201 ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, &p->blit_sampler);
1202
1203 ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &dst_p->rtv, NULL);
1204 ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
1205 D3D11_DEFAULT_SAMPLE_MASK);
1206
1207 ID3D11DeviceContext_Draw(p->ctx, 6, 0);
1208
1209 ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1,
1210 &(ID3D11ShaderResourceView *) { NULL });
1211 ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1,
1212 &(ID3D11SamplerState *) { NULL });
1213 ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
1214 }
1215
blit(struct ra * ra,struct ra_tex * dst,struct ra_tex * src,struct mp_rect * dst_rc_ptr,struct mp_rect * src_rc_ptr)1216 static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
1217 struct mp_rect *dst_rc_ptr, struct mp_rect *src_rc_ptr)
1218 {
1219 struct ra_d3d11 *p = ra->priv;
1220 struct d3d_tex *dst_p = dst->priv;
1221 struct d3d_tex *src_p = src->priv;
1222 struct mp_rect dst_rc = *dst_rc_ptr;
1223 struct mp_rect src_rc = *src_rc_ptr;
1224
1225 assert(dst->params.dimensions == 2);
1226 assert(src->params.dimensions == 2);
1227
1228 // A zero-sized target rectangle is a no-op
1229 if (!mp_rect_w(dst_rc) || !mp_rect_h(dst_rc))
1230 return;
1231
1232 // ra.h seems to imply that both dst_rc and src_rc can be flipped, but it's
1233 // easier for blit_rpass() if only src_rc can be flipped, so unflip dst_rc.
1234 if (dst_rc.x0 > dst_rc.x1) {
1235 MPSWAP(int, dst_rc.x0, dst_rc.x1);
1236 MPSWAP(int, src_rc.x0, src_rc.x1);
1237 }
1238 if (dst_rc.y0 > dst_rc.y1) {
1239 MPSWAP(int, dst_rc.y0, dst_rc.y1);
1240 MPSWAP(int, src_rc.y0, src_rc.y1);
1241 }
1242
1243 // If format conversion, stretching or flipping is required, a renderpass
1244 // must be used
1245 if (dst->params.format != src->params.format ||
1246 mp_rect_w(dst_rc) != mp_rect_w(src_rc) ||
1247 mp_rect_h(dst_rc) != mp_rect_h(src_rc))
1248 {
1249 blit_rpass(ra, dst, src, &dst_rc, &src_rc);
1250 } else {
1251 int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0;
1252 int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0;
1253 ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr,
1254 dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) {
1255 .left = src_rc.x0,
1256 .top = src_rc.y0,
1257 .front = 0,
1258 .right = src_rc.x1,
1259 .bottom = src_rc.y1,
1260 .back = 1,
1261 }));
1262 }
1263 }
1264
desc_namespace(struct ra * ra,enum ra_vartype type)1265 static int desc_namespace(struct ra *ra, enum ra_vartype type)
1266 {
1267 // Images and SSBOs both use UAV bindings
1268 if (type == RA_VARTYPE_IMG_W)
1269 type = RA_VARTYPE_BUF_RW;
1270 return type;
1271 }
1272
compile_glsl(struct ra * ra,enum glsl_shader type,const char * glsl,ID3DBlob ** out)1273 static bool compile_glsl(struct ra *ra, enum glsl_shader type,
1274 const char *glsl, ID3DBlob **out)
1275 {
1276 struct ra_d3d11 *p = ra->priv;
1277 struct spirv_compiler *spirv = p->spirv;
1278 void *ta_ctx = talloc_new(NULL);
1279 spvc_result sc_res = SPVC_SUCCESS;
1280 spvc_context sc_ctx = NULL;
1281 spvc_parsed_ir sc_ir = NULL;
1282 spvc_compiler sc_compiler = NULL;
1283 spvc_compiler_options sc_opts = NULL;
1284 const char *hlsl = NULL;
1285 ID3DBlob *errors = NULL;
1286 bool success = false;
1287 HRESULT hr;
1288
1289 int sc_shader_model;
1290 if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
1291 sc_shader_model = 50;
1292 } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) {
1293 sc_shader_model = 41;
1294 } else {
1295 sc_shader_model = 40;
1296 }
1297
1298 int64_t start_us = mp_time_us();
1299
1300 bstr spv_module;
1301 if (!spirv->fns->compile_glsl(spirv, ta_ctx, type, glsl, &spv_module))
1302 goto done;
1303
1304 int64_t shaderc_us = mp_time_us();
1305
1306 sc_res = spvc_context_create(&sc_ctx);
1307 if (sc_res != SPVC_SUCCESS)
1308 goto done;
1309
1310 sc_res = spvc_context_parse_spirv(sc_ctx, (SpvId *)spv_module.start,
1311 spv_module.len / sizeof(SpvId), &sc_ir);
1312 if (sc_res != SPVC_SUCCESS)
1313 goto done;
1314
1315 sc_res = spvc_context_create_compiler(sc_ctx, SPVC_BACKEND_HLSL, sc_ir,
1316 SPVC_CAPTURE_MODE_TAKE_OWNERSHIP,
1317 &sc_compiler);
1318 if (sc_res != SPVC_SUCCESS)
1319 goto done;
1320
1321 sc_res = spvc_compiler_create_compiler_options(sc_compiler, &sc_opts);
1322 if (sc_res != SPVC_SUCCESS)
1323 goto done;
1324 sc_res = spvc_compiler_options_set_uint(sc_opts,
1325 SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model);
1326 if (sc_res != SPVC_SUCCESS)
1327 goto done;
1328 if (type == GLSL_SHADER_VERTEX) {
1329 // FLIP_VERTEX_Y is only valid for vertex shaders
1330 sc_res = spvc_compiler_options_set_bool(sc_opts,
1331 SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE);
1332 if (sc_res != SPVC_SUCCESS)
1333 goto done;
1334 }
1335 sc_res = spvc_compiler_install_compiler_options(sc_compiler, sc_opts);
1336 if (sc_res != SPVC_SUCCESS)
1337 goto done;
1338
1339 sc_res = spvc_compiler_compile(sc_compiler, &hlsl);
1340 if (sc_res != SPVC_SUCCESS)
1341 goto done;
1342
1343 int64_t cross_us = mp_time_us();
1344
1345 hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main",
1346 get_shader_target(ra, type), D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, out,
1347 &errors);
1348 if (FAILED(hr)) {
1349 MP_ERR(ra, "D3DCompile failed: %s\n%.*s", mp_HRESULT_to_str(hr),
1350 (int)ID3D10Blob_GetBufferSize(errors),
1351 (char*)ID3D10Blob_GetBufferPointer(errors));
1352 goto done;
1353 }
1354
1355 int64_t d3dcompile_us = mp_time_us();
1356
1357 MP_VERBOSE(ra, "Compiled a %s shader in %lldus\n", shader_type_name(type),
1358 d3dcompile_us - start_us);
1359 MP_VERBOSE(ra, "shaderc: %lldus, SPIRV-Cross: %lldus, D3DCompile: %lldus\n",
1360 shaderc_us - start_us,
1361 cross_us - shaderc_us,
1362 d3dcompile_us - cross_us);
1363
1364 success = true;
1365 done:
1366 if (sc_res != SPVC_SUCCESS) {
1367 MP_MSG(ra, MSGL_ERR, "SPIRV-Cross failed: %s\n",
1368 spvc_context_get_last_error_string(sc_ctx));
1369 }
1370 int level = success ? MSGL_DEBUG : MSGL_ERR;
1371 MP_MSG(ra, level, "GLSL source:\n");
1372 mp_log_source(ra->log, level, glsl);
1373 if (hlsl) {
1374 MP_MSG(ra, level, "HLSL source:\n");
1375 mp_log_source(ra->log, level, hlsl);
1376 }
1377 SAFE_RELEASE(errors);
1378 if (sc_ctx)
1379 spvc_context_destroy(sc_ctx);
1380 talloc_free(ta_ctx);
1381 return success;
1382 }
1383
renderpass_destroy(struct ra * ra,struct ra_renderpass * pass)1384 static void renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
1385 {
1386 if (!pass)
1387 return;
1388 struct d3d_rpass *pass_p = pass->priv;
1389
1390 SAFE_RELEASE(pass_p->vs);
1391 SAFE_RELEASE(pass_p->ps);
1392 SAFE_RELEASE(pass_p->cs);
1393 SAFE_RELEASE(pass_p->layout);
1394 SAFE_RELEASE(pass_p->bstate);
1395 talloc_free(pass);
1396 }
1397
map_ra_blend(enum ra_blend blend)1398 static D3D11_BLEND map_ra_blend(enum ra_blend blend)
1399 {
1400 switch (blend) {
1401 default:
1402 case RA_BLEND_ZERO: return D3D11_BLEND_ZERO;
1403 case RA_BLEND_ONE: return D3D11_BLEND_ONE;
1404 case RA_BLEND_SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA;
1405 case RA_BLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA;
1406 };
1407 }
1408
vbuf_upload(struct ra * ra,void * data,size_t size)1409 static size_t vbuf_upload(struct ra *ra, void *data, size_t size)
1410 {
1411 struct ra_d3d11 *p = ra->priv;
1412 HRESULT hr;
1413
1414 // Arbitrary size limit in case there is an insane number of vertices
1415 if (size > 1e9) {
1416 MP_ERR(ra, "Vertex buffer is too large\n");
1417 return -1;
1418 }
1419
1420 // If the vertex data doesn't fit, realloc the vertex buffer
1421 if (size > p->vbuf_size) {
1422 size_t new_size = p->vbuf_size;
1423 // Arbitrary base size
1424 if (!new_size)
1425 new_size = 64 * 1024;
1426 while (new_size < size)
1427 new_size *= 2;
1428
1429 ID3D11Buffer *new_buf;
1430 D3D11_BUFFER_DESC vbuf_desc = {
1431 .ByteWidth = new_size,
1432 .Usage = D3D11_USAGE_DYNAMIC,
1433 .BindFlags = D3D11_BIND_VERTEX_BUFFER,
1434 .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
1435 };
1436 hr = ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf);
1437 if (FAILED(hr)) {
1438 MP_ERR(ra, "Failed to create vertex buffer: %s\n",
1439 mp_HRESULT_to_str(hr));
1440 return -1;
1441 }
1442
1443 SAFE_RELEASE(p->vbuf);
1444 p->vbuf = new_buf;
1445 p->vbuf_size = new_size;
1446 p->vbuf_used = 0;
1447 }
1448
1449 bool discard = false;
1450 size_t offset = p->vbuf_used;
1451 if (offset + size > p->vbuf_size) {
1452 // We reached the end of the buffer, so discard and wrap around
1453 discard = true;
1454 offset = 0;
1455 }
1456
1457 D3D11_MAPPED_SUBRESOURCE map = { 0 };
1458 hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)p->vbuf, 0,
1459 discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE,
1460 0, &map);
1461 if (FAILED(hr)) {
1462 MP_ERR(ra, "Failed to map vertex buffer: %s\n", mp_HRESULT_to_str(hr));
1463 return -1;
1464 }
1465
1466 char *cdata = map.pData;
1467 memcpy(cdata + offset, data, size);
1468
1469 ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)p->vbuf, 0);
1470
1471 p->vbuf_used = offset + size;
1472 return offset;
1473 }
1474
1475 static const char cache_magic[4] = "RD11";
1476 static const int cache_version = 3;
1477
1478 struct cache_header {
1479 char magic[sizeof(cache_magic)];
1480 int cache_version;
1481 char compiler[SPIRV_NAME_MAX_LEN];
1482 int spv_compiler_version;
1483 unsigned spvc_compiler_major;
1484 unsigned spvc_compiler_minor;
1485 unsigned spvc_compiler_patch;
1486 struct dll_version d3d_compiler_version;
1487 int feature_level;
1488 size_t vert_bytecode_len;
1489 size_t frag_bytecode_len;
1490 size_t comp_bytecode_len;
1491 };
1492
load_cached_program(struct ra * ra,const struct ra_renderpass_params * params,bstr * vert_bc,bstr * frag_bc,bstr * comp_bc)1493 static void load_cached_program(struct ra *ra,
1494 const struct ra_renderpass_params *params,
1495 bstr *vert_bc,
1496 bstr *frag_bc,
1497 bstr *comp_bc)
1498 {
1499 struct ra_d3d11 *p = ra->priv;
1500 struct spirv_compiler *spirv = p->spirv;
1501 bstr cache = params->cached_program;
1502
1503 if (cache.len < sizeof(struct cache_header))
1504 return;
1505
1506 struct cache_header *header = (struct cache_header *)cache.start;
1507 cache = bstr_cut(cache, sizeof(*header));
1508
1509 unsigned spvc_major, spvc_minor, spvc_patch;
1510 spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
1511
1512 if (strncmp(header->magic, cache_magic, sizeof(cache_magic)) != 0)
1513 return;
1514 if (header->cache_version != cache_version)
1515 return;
1516 if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
1517 return;
1518 if (header->spv_compiler_version != spirv->compiler_version)
1519 return;
1520 if (header->spvc_compiler_major != spvc_major)
1521 return;
1522 if (header->spvc_compiler_minor != spvc_minor)
1523 return;
1524 if (header->spvc_compiler_patch != spvc_patch)
1525 return;
1526 if (!dll_version_equal(header->d3d_compiler_version, p->d3d_compiler_ver))
1527 return;
1528 if (header->feature_level != p->fl)
1529 return;
1530
1531 if (header->vert_bytecode_len && vert_bc) {
1532 *vert_bc = bstr_splice(cache, 0, header->vert_bytecode_len);
1533 MP_VERBOSE(ra, "Using cached vertex shader\n");
1534 }
1535 cache = bstr_cut(cache, header->vert_bytecode_len);
1536
1537 if (header->frag_bytecode_len && frag_bc) {
1538 *frag_bc = bstr_splice(cache, 0, header->frag_bytecode_len);
1539 MP_VERBOSE(ra, "Using cached fragment shader\n");
1540 }
1541 cache = bstr_cut(cache, header->frag_bytecode_len);
1542
1543 if (header->comp_bytecode_len && comp_bc) {
1544 *comp_bc = bstr_splice(cache, 0, header->comp_bytecode_len);
1545 MP_VERBOSE(ra, "Using cached compute shader\n");
1546 }
1547 cache = bstr_cut(cache, header->comp_bytecode_len);
1548 }
1549
save_cached_program(struct ra * ra,struct ra_renderpass * pass,bstr vert_bc,bstr frag_bc,bstr comp_bc)1550 static void save_cached_program(struct ra *ra, struct ra_renderpass *pass,
1551 bstr vert_bc,
1552 bstr frag_bc,
1553 bstr comp_bc)
1554 {
1555 struct ra_d3d11 *p = ra->priv;
1556 struct spirv_compiler *spirv = p->spirv;
1557
1558 unsigned spvc_major, spvc_minor, spvc_patch;
1559 spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
1560
1561 struct cache_header header = {
1562 .cache_version = cache_version,
1563 .spv_compiler_version = p->spirv->compiler_version,
1564 .spvc_compiler_major = spvc_major,
1565 .spvc_compiler_minor = spvc_minor,
1566 .spvc_compiler_patch = spvc_patch,
1567 .d3d_compiler_version = p->d3d_compiler_ver,
1568 .feature_level = p->fl,
1569 .vert_bytecode_len = vert_bc.len,
1570 .frag_bytecode_len = frag_bc.len,
1571 .comp_bytecode_len = comp_bc.len,
1572 };
1573 strncpy(header.magic, cache_magic, sizeof(header.magic));
1574 strncpy(header.compiler, spirv->name, sizeof(header.compiler));
1575
1576 struct bstr *prog = &pass->params.cached_program;
1577 bstr_xappend(pass, prog, (bstr){ (char *) &header, sizeof(header) });
1578 bstr_xappend(pass, prog, vert_bc);
1579 bstr_xappend(pass, prog, frag_bc);
1580 bstr_xappend(pass, prog, comp_bc);
1581 }
1582
renderpass_create_raster(struct ra * ra,struct ra_renderpass * pass,const struct ra_renderpass_params * params)1583 static struct ra_renderpass *renderpass_create_raster(struct ra *ra,
1584 struct ra_renderpass *pass, const struct ra_renderpass_params *params)
1585 {
1586 struct ra_d3d11 *p = ra->priv;
1587 struct d3d_rpass *pass_p = pass->priv;
1588 ID3DBlob *vs_blob = NULL;
1589 ID3DBlob *ps_blob = NULL;
1590 HRESULT hr;
1591
1592 // load_cached_program will load compiled shader bytecode into vert_bc and
1593 // frag_bc if the cache is valid. If not, vert_bc/frag_bc will remain NULL.
1594 bstr vert_bc = {0};
1595 bstr frag_bc = {0};
1596 load_cached_program(ra, params, &vert_bc, &frag_bc, NULL);
1597
1598 if (!vert_bc.start) {
1599 if (!compile_glsl(ra, GLSL_SHADER_VERTEX, params->vertex_shader,
1600 &vs_blob))
1601 goto error;
1602 vert_bc = (bstr){
1603 ID3D10Blob_GetBufferPointer(vs_blob),
1604 ID3D10Blob_GetBufferSize(vs_blob),
1605 };
1606 }
1607
1608 hr = ID3D11Device_CreateVertexShader(p->dev, vert_bc.start, vert_bc.len,
1609 NULL, &pass_p->vs);
1610 if (FAILED(hr)) {
1611 MP_ERR(ra, "Failed to create vertex shader: %s\n",
1612 mp_HRESULT_to_str(hr));
1613 goto error;
1614 }
1615
1616 if (!frag_bc.start) {
1617 if (!compile_glsl(ra, GLSL_SHADER_FRAGMENT, params->frag_shader,
1618 &ps_blob))
1619 goto error;
1620 frag_bc = (bstr){
1621 ID3D10Blob_GetBufferPointer(ps_blob),
1622 ID3D10Blob_GetBufferSize(ps_blob),
1623 };
1624 }
1625
1626 hr = ID3D11Device_CreatePixelShader(p->dev, frag_bc.start, frag_bc.len,
1627 NULL, &pass_p->ps);
1628 if (FAILED(hr)) {
1629 MP_ERR(ra, "Failed to create pixel shader: %s\n",
1630 mp_HRESULT_to_str(hr));
1631 goto error;
1632 }
1633
1634 D3D11_INPUT_ELEMENT_DESC *in_descs = talloc_array(pass,
1635 D3D11_INPUT_ELEMENT_DESC, params->num_vertex_attribs);
1636 for (int i = 0; i < params->num_vertex_attribs; i++) {
1637 struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i];
1638
1639 DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
1640 switch (inp->type) {
1641 case RA_VARTYPE_FLOAT:
1642 switch (inp->dim_v) {
1643 case 1: fmt = DXGI_FORMAT_R32_FLOAT; break;
1644 case 2: fmt = DXGI_FORMAT_R32G32_FLOAT; break;
1645 case 3: fmt = DXGI_FORMAT_R32G32B32_FLOAT; break;
1646 case 4: fmt = DXGI_FORMAT_R32G32B32A32_FLOAT; break;
1647 }
1648 break;
1649 case RA_VARTYPE_BYTE_UNORM:
1650 switch (inp->dim_v) {
1651 case 1: fmt = DXGI_FORMAT_R8_UNORM; break;
1652 case 2: fmt = DXGI_FORMAT_R8G8_UNORM; break;
1653 // There is no 3-component 8-bit DXGI format
1654 case 4: fmt = DXGI_FORMAT_R8G8B8A8_UNORM; break;
1655 }
1656 break;
1657 }
1658 if (fmt == DXGI_FORMAT_UNKNOWN) {
1659 MP_ERR(ra, "Could not find suitable vertex input format\n");
1660 goto error;
1661 }
1662
1663 in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) {
1664 // The semantic name doesn't mean much and is just used to verify
1665 // the input description matches the shader. SPIRV-Cross always
1666 // uses TEXCOORD, so we should too.
1667 .SemanticName = "TEXCOORD",
1668 .SemanticIndex = i,
1669 .AlignedByteOffset = inp->offset,
1670 .Format = fmt,
1671 };
1672 }
1673
1674 hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
1675 params->num_vertex_attribs, vert_bc.start, vert_bc.len,
1676 &pass_p->layout);
1677 if (FAILED(hr)) {
1678 MP_ERR(ra, "Failed to create IA layout: %s\n", mp_HRESULT_to_str(hr));
1679 goto error;
1680 }
1681 talloc_free(in_descs);
1682 in_descs = NULL;
1683
1684 D3D11_BLEND_DESC bdesc = {
1685 .RenderTarget[0] = {
1686 .BlendEnable = params->enable_blend,
1687 .SrcBlend = map_ra_blend(params->blend_src_rgb),
1688 .DestBlend = map_ra_blend(params->blend_dst_rgb),
1689 .BlendOp = D3D11_BLEND_OP_ADD,
1690 .SrcBlendAlpha = map_ra_blend(params->blend_src_alpha),
1691 .DestBlendAlpha = map_ra_blend(params->blend_dst_alpha),
1692 .BlendOpAlpha = D3D11_BLEND_OP_ADD,
1693 .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
1694 },
1695 };
1696 hr = ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate);
1697 if (FAILED(hr)) {
1698 MP_ERR(ra, "Failed to create blend state: %s\n", mp_HRESULT_to_str(hr));
1699 goto error;
1700 }
1701
1702 save_cached_program(ra, pass, vert_bc, frag_bc, (bstr){0});
1703
1704 SAFE_RELEASE(vs_blob);
1705 SAFE_RELEASE(ps_blob);
1706 return pass;
1707
1708 error:
1709 renderpass_destroy(ra, pass);
1710 SAFE_RELEASE(vs_blob);
1711 SAFE_RELEASE(ps_blob);
1712 return NULL;
1713 }
1714
renderpass_create_compute(struct ra * ra,struct ra_renderpass * pass,const struct ra_renderpass_params * params)1715 static struct ra_renderpass *renderpass_create_compute(struct ra *ra,
1716 struct ra_renderpass *pass, const struct ra_renderpass_params *params)
1717 {
1718 struct ra_d3d11 *p = ra->priv;
1719 struct d3d_rpass *pass_p = pass->priv;
1720 ID3DBlob *cs_blob = NULL;
1721 HRESULT hr;
1722
1723 bstr comp_bc = {0};
1724 load_cached_program(ra, params, NULL, NULL, &comp_bc);
1725
1726 if (!comp_bc.start) {
1727 if (!compile_glsl(ra, GLSL_SHADER_COMPUTE, params->compute_shader,
1728 &cs_blob))
1729 goto error;
1730 comp_bc = (bstr){
1731 ID3D10Blob_GetBufferPointer(cs_blob),
1732 ID3D10Blob_GetBufferSize(cs_blob),
1733 };
1734 }
1735 hr = ID3D11Device_CreateComputeShader(p->dev, comp_bc.start, comp_bc.len,
1736 NULL, &pass_p->cs);
1737 if (FAILED(hr)) {
1738 MP_ERR(ra, "Failed to create compute shader: %s\n",
1739 mp_HRESULT_to_str(hr));
1740 goto error;
1741 }
1742
1743 save_cached_program(ra, pass, (bstr){0}, (bstr){0}, comp_bc);
1744
1745 SAFE_RELEASE(cs_blob);
1746 return pass;
1747 error:
1748 renderpass_destroy(ra, pass);
1749 SAFE_RELEASE(cs_blob);
1750 return NULL;
1751 }
1752
renderpass_create(struct ra * ra,const struct ra_renderpass_params * params)1753 static struct ra_renderpass *renderpass_create(struct ra *ra,
1754 const struct ra_renderpass_params *params)
1755 {
1756 struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
1757 pass->params = *ra_renderpass_params_copy(pass, params);
1758 pass->params.cached_program = (bstr){0};
1759 pass->priv = talloc_zero(pass, struct d3d_rpass);
1760
1761 if (params->type == RA_RENDERPASS_TYPE_COMPUTE) {
1762 return renderpass_create_compute(ra, pass, params);
1763 } else {
1764 return renderpass_create_raster(ra, pass, params);
1765 }
1766 }
1767
renderpass_run_raster(struct ra * ra,const struct ra_renderpass_run_params * params,ID3D11Buffer * ubos[],int ubos_len,ID3D11SamplerState * samplers[],ID3D11ShaderResourceView * srvs[],int samplers_len,ID3D11UnorderedAccessView * uavs[],int uavs_len)1768 static void renderpass_run_raster(struct ra *ra,
1769 const struct ra_renderpass_run_params *params,
1770 ID3D11Buffer *ubos[], int ubos_len,
1771 ID3D11SamplerState *samplers[],
1772 ID3D11ShaderResourceView *srvs[],
1773 int samplers_len,
1774 ID3D11UnorderedAccessView *uavs[],
1775 int uavs_len)
1776 {
1777 struct ra_d3d11 *p = ra->priv;
1778 struct ra_renderpass *pass = params->pass;
1779 struct d3d_rpass *pass_p = pass->priv;
1780
1781 UINT vbuf_offset = vbuf_upload(ra, params->vertex_data,
1782 pass->params.vertex_stride * params->vertex_count);
1783 if (vbuf_offset == (UINT)-1)
1784 return;
1785
1786 ID3D11DeviceContext_IASetInputLayout(p->ctx, pass_p->layout);
1787 ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->vbuf,
1788 &pass->params.vertex_stride, &vbuf_offset);
1789 ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
1790 D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
1791
1792 ID3D11DeviceContext_VSSetShader(p->ctx, pass_p->vs, NULL, 0);
1793
1794 ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
1795 .TopLeftX = params->viewport.x0,
1796 .TopLeftY = params->viewport.y0,
1797 .Width = mp_rect_w(params->viewport),
1798 .Height = mp_rect_h(params->viewport),
1799 .MinDepth = 0,
1800 .MaxDepth = 1,
1801 }));
1802 ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
1803 .left = params->scissors.x0,
1804 .top = params->scissors.y0,
1805 .right = params->scissors.x1,
1806 .bottom = params->scissors.y1,
1807 }));
1808 ID3D11DeviceContext_PSSetShader(p->ctx, pass_p->ps, NULL, 0);
1809 ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
1810 ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs);
1811 ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers);
1812
1813 struct ra_tex *target = params->target;
1814 struct d3d_tex *target_p = target->priv;
1815 ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 1,
1816 &target_p->rtv, NULL, 1, uavs_len, uavs, NULL);
1817 ID3D11DeviceContext_OMSetBlendState(p->ctx, pass_p->bstate, NULL,
1818 D3D11_DEFAULT_SAMPLE_MASK);
1819
1820 ID3D11DeviceContext_Draw(p->ctx, params->vertex_count, 0);
1821
1822 // Unbind everything. It's easier to do this than to actually track state,
1823 // and if we leave the RTV bound, it could trip up D3D's conflict checker.
1824 for (int i = 0; i < ubos_len; i++)
1825 ubos[i] = NULL;
1826 for (int i = 0; i < samplers_len; i++) {
1827 samplers[i] = NULL;
1828 srvs[i] = NULL;
1829 }
1830 for (int i = 0; i < uavs_len; i++)
1831 uavs[i] = NULL;
1832 ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
1833 ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs);
1834 ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers);
1835 ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 0,
1836 NULL, NULL, 1, uavs_len, uavs, NULL);
1837 }
1838
renderpass_run_compute(struct ra * ra,const struct ra_renderpass_run_params * params,ID3D11Buffer * ubos[],int ubos_len,ID3D11SamplerState * samplers[],ID3D11ShaderResourceView * srvs[],int samplers_len,ID3D11UnorderedAccessView * uavs[],int uavs_len)1839 static void renderpass_run_compute(struct ra *ra,
1840 const struct ra_renderpass_run_params *params,
1841 ID3D11Buffer *ubos[], int ubos_len,
1842 ID3D11SamplerState *samplers[],
1843 ID3D11ShaderResourceView *srvs[],
1844 int samplers_len,
1845 ID3D11UnorderedAccessView *uavs[],
1846 int uavs_len)
1847 {
1848 struct ra_d3d11 *p = ra->priv;
1849 struct ra_renderpass *pass = params->pass;
1850 struct d3d_rpass *pass_p = pass->priv;
1851
1852 ID3D11DeviceContext_CSSetShader(p->ctx, pass_p->cs, NULL, 0);
1853 ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
1854 ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs);
1855 ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers);
1856 ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs,
1857 NULL);
1858
1859 ID3D11DeviceContext_Dispatch(p->ctx, params->compute_groups[0],
1860 params->compute_groups[1],
1861 params->compute_groups[2]);
1862
1863 for (int i = 0; i < ubos_len; i++)
1864 ubos[i] = NULL;
1865 for (int i = 0; i < samplers_len; i++) {
1866 samplers[i] = NULL;
1867 srvs[i] = NULL;
1868 }
1869 for (int i = 0; i < uavs_len; i++)
1870 uavs[i] = NULL;
1871 ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
1872 ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs);
1873 ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers);
1874 ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs,
1875 NULL);
1876 }
1877
renderpass_run(struct ra * ra,const struct ra_renderpass_run_params * params)1878 static void renderpass_run(struct ra *ra,
1879 const struct ra_renderpass_run_params *params)
1880 {
1881 struct ra_d3d11 *p = ra->priv;
1882 struct ra_renderpass *pass = params->pass;
1883 enum ra_renderpass_type type = pass->params.type;
1884
1885 ID3D11Buffer *ubos[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT] = {0};
1886 int ubos_len = 0;
1887
1888 ID3D11SamplerState *samplers[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0};
1889 ID3D11ShaderResourceView *srvs[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0};
1890 int samplers_len = 0;
1891
1892 ID3D11UnorderedAccessView *uavs[D3D11_1_UAV_SLOT_COUNT] = {0};
1893 int uavs_len = 0;
1894
1895 // In a raster pass, one of the UAV slots is used by the runtime for the RTV
1896 int uavs_max = type == RA_RENDERPASS_TYPE_COMPUTE ? p->max_uavs
1897 : p->max_uavs - 1;
1898
1899 // Gather the input variables used in this pass. These will be mapped to
1900 // HLSL registers.
1901 for (int i = 0; i < params->num_values; i++) {
1902 struct ra_renderpass_input_val *val = ¶ms->values[i];
1903 int binding = pass->params.inputs[val->index].binding;
1904 switch (pass->params.inputs[val->index].type) {
1905 case RA_VARTYPE_BUF_RO:
1906 if (binding > MP_ARRAY_SIZE(ubos)) {
1907 MP_ERR(ra, "Too many constant buffers in pass\n");
1908 return;
1909 }
1910 struct ra_buf *buf_ro = *(struct ra_buf **)val->data;
1911 buf_resolve(ra, buf_ro);
1912 struct d3d_buf *buf_ro_p = buf_ro->priv;
1913 ubos[binding] = buf_ro_p->buf;
1914 ubos_len = MPMAX(ubos_len, binding + 1);
1915 break;
1916 case RA_VARTYPE_BUF_RW:
1917 if (binding > uavs_max) {
1918 MP_ERR(ra, "Too many UAVs in pass\n");
1919 return;
1920 }
1921 struct ra_buf *buf_rw = *(struct ra_buf **)val->data;
1922 buf_resolve(ra, buf_rw);
1923 struct d3d_buf *buf_rw_p = buf_rw->priv;
1924 uavs[binding] = buf_rw_p->uav;
1925 uavs_len = MPMAX(uavs_len, binding + 1);
1926 break;
1927 case RA_VARTYPE_TEX:
1928 if (binding > MP_ARRAY_SIZE(samplers)) {
1929 MP_ERR(ra, "Too many textures in pass\n");
1930 return;
1931 }
1932 struct ra_tex *tex = *(struct ra_tex **)val->data;
1933 struct d3d_tex *tex_p = tex->priv;
1934 samplers[binding] = tex_p->sampler;
1935 srvs[binding] = tex_p->srv;
1936 samplers_len = MPMAX(samplers_len, binding + 1);
1937 break;
1938 case RA_VARTYPE_IMG_W:
1939 if (binding > uavs_max) {
1940 MP_ERR(ra, "Too many UAVs in pass\n");
1941 return;
1942 }
1943 struct ra_tex *img = *(struct ra_tex **)val->data;
1944 struct d3d_tex *img_p = img->priv;
1945 uavs[binding] = img_p->uav;
1946 uavs_len = MPMAX(uavs_len, binding + 1);
1947 break;
1948 }
1949 }
1950
1951 if (type == RA_RENDERPASS_TYPE_COMPUTE) {
1952 renderpass_run_compute(ra, params, ubos, ubos_len, samplers, srvs,
1953 samplers_len, uavs, uavs_len);
1954 } else {
1955 renderpass_run_raster(ra, params, ubos, ubos_len, samplers, srvs,
1956 samplers_len, uavs, uavs_len);
1957 }
1958 }
1959
timer_destroy(struct ra * ra,ra_timer * ratimer)1960 static void timer_destroy(struct ra *ra, ra_timer *ratimer)
1961 {
1962 if (!ratimer)
1963 return;
1964 struct d3d_timer *timer = ratimer;
1965
1966 SAFE_RELEASE(timer->ts_start);
1967 SAFE_RELEASE(timer->ts_end);
1968 SAFE_RELEASE(timer->disjoint);
1969 talloc_free(timer);
1970 }
1971
timer_create(struct ra * ra)1972 static ra_timer *timer_create(struct ra *ra)
1973 {
1974 struct ra_d3d11 *p = ra->priv;
1975 if (!p->has_timestamp_queries)
1976 return NULL;
1977
1978 struct d3d_timer *timer = talloc_zero(NULL, struct d3d_timer);
1979 HRESULT hr;
1980
1981 hr = ID3D11Device_CreateQuery(p->dev,
1982 &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_start);
1983 if (FAILED(hr)) {
1984 MP_ERR(ra, "Failed to create start query: %s\n", mp_HRESULT_to_str(hr));
1985 goto error;
1986 }
1987
1988 hr = ID3D11Device_CreateQuery(p->dev,
1989 &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_end);
1990 if (FAILED(hr)) {
1991 MP_ERR(ra, "Failed to create end query: %s\n", mp_HRESULT_to_str(hr));
1992 goto error;
1993 }
1994
1995 // Measuring duration in D3D11 requires three queries: start and end
1996 // timestamps, and a disjoint query containing a flag which says whether
1997 // the timestamps are usable or if a discontinuity occured between them,
1998 // like a change in power state or clock speed. The disjoint query also
1999 // contains the timer frequency, so the timestamps are useless without it.
2000 hr = ID3D11Device_CreateQuery(p->dev,
2001 &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &timer->disjoint);
2002 if (FAILED(hr)) {
2003 MP_ERR(ra, "Failed to create timer query: %s\n", mp_HRESULT_to_str(hr));
2004 goto error;
2005 }
2006
2007 return timer;
2008 error:
2009 timer_destroy(ra, timer);
2010 return NULL;
2011 }
2012
timestamp_to_ns(uint64_t timestamp,uint64_t freq)2013 static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq)
2014 {
2015 static const uint64_t ns_per_s = 1000000000llu;
2016 return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq;
2017 }
2018
timer_get_result(struct ra * ra,ra_timer * ratimer)2019 static uint64_t timer_get_result(struct ra *ra, ra_timer *ratimer)
2020 {
2021 struct ra_d3d11 *p = ra->priv;
2022 struct d3d_timer *timer = ratimer;
2023 HRESULT hr;
2024
2025 UINT64 start, end;
2026 D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj;
2027
2028 hr = ID3D11DeviceContext_GetData(p->ctx,
2029 (ID3D11Asynchronous *)timer->ts_end, &end, sizeof(end),
2030 D3D11_ASYNC_GETDATA_DONOTFLUSH);
2031 if (FAILED(hr) || hr == S_FALSE)
2032 return 0;
2033 hr = ID3D11DeviceContext_GetData(p->ctx,
2034 (ID3D11Asynchronous *)timer->ts_start, &start, sizeof(start),
2035 D3D11_ASYNC_GETDATA_DONOTFLUSH);
2036 if (FAILED(hr) || hr == S_FALSE)
2037 return 0;
2038 hr = ID3D11DeviceContext_GetData(p->ctx,
2039 (ID3D11Asynchronous *)timer->disjoint, &dj, sizeof(dj),
2040 D3D11_ASYNC_GETDATA_DONOTFLUSH);
2041 if (FAILED(hr) || hr == S_FALSE || dj.Disjoint || !dj.Frequency)
2042 return 0;
2043
2044 return timestamp_to_ns(end - start, dj.Frequency);
2045 }
2046
timer_start(struct ra * ra,ra_timer * ratimer)2047 static void timer_start(struct ra *ra, ra_timer *ratimer)
2048 {
2049 struct ra_d3d11 *p = ra->priv;
2050 struct d3d_timer *timer = ratimer;
2051
2052 // Latch the last result of this ra_timer (returned by timer_stop)
2053 timer->result = timer_get_result(ra, ratimer);
2054
2055 ID3D11DeviceContext_Begin(p->ctx, (ID3D11Asynchronous *)timer->disjoint);
2056 ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_start);
2057 }
2058
timer_stop(struct ra * ra,ra_timer * ratimer)2059 static uint64_t timer_stop(struct ra *ra, ra_timer *ratimer)
2060 {
2061 struct ra_d3d11 *p = ra->priv;
2062 struct d3d_timer *timer = ratimer;
2063
2064 ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_end);
2065 ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->disjoint);
2066
2067 return timer->result;
2068 }
2069
map_msg_severity(D3D11_MESSAGE_SEVERITY sev)2070 static int map_msg_severity(D3D11_MESSAGE_SEVERITY sev)
2071 {
2072 switch (sev) {
2073 case D3D11_MESSAGE_SEVERITY_CORRUPTION:
2074 return MSGL_FATAL;
2075 case D3D11_MESSAGE_SEVERITY_ERROR:
2076 return MSGL_ERR;
2077 case D3D11_MESSAGE_SEVERITY_WARNING:
2078 return MSGL_WARN;
2079 default:
2080 case D3D11_MESSAGE_SEVERITY_INFO:
2081 case D3D11_MESSAGE_SEVERITY_MESSAGE:
2082 return MSGL_DEBUG;
2083 }
2084 }
2085
debug_marker(struct ra * ra,const char * msg)2086 static void debug_marker(struct ra *ra, const char *msg)
2087 {
2088 struct ra_d3d11 *p = ra->priv;
2089 void *talloc_ctx = talloc_new(NULL);
2090 HRESULT hr;
2091
2092 if (!p->iqueue)
2093 goto done;
2094
2095 // Copy debug-layer messages to mpv's log output
2096 bool printed_header = false;
2097 uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(p->iqueue);
2098 for (uint64_t i = 0; i < messages; i++) {
2099 size_t len;
2100 hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, NULL, &len);
2101 if (FAILED(hr) || !len)
2102 goto done;
2103
2104 D3D11_MESSAGE *d3dmsg = talloc_size(talloc_ctx, len);
2105 hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, d3dmsg, &len);
2106 if (FAILED(hr))
2107 goto done;
2108
2109 int msgl = map_msg_severity(d3dmsg->Severity);
2110 if (mp_msg_test(ra->log, msgl)) {
2111 if (!printed_header)
2112 MP_INFO(ra, "%s:\n", msg);
2113 printed_header = true;
2114
2115 MP_MSG(ra, msgl, "%d: %.*s\n", (int)d3dmsg->ID,
2116 (int)d3dmsg->DescriptionByteLength, d3dmsg->pDescription);
2117 talloc_free(d3dmsg);
2118 }
2119 }
2120
2121 ID3D11InfoQueue_ClearStoredMessages(p->iqueue);
2122 done:
2123 talloc_free(talloc_ctx);
2124 }
2125
destroy(struct ra * ra)2126 static void destroy(struct ra *ra)
2127 {
2128 struct ra_d3d11 *p = ra->priv;
2129
2130 // Release everything except the interfaces needed to perform leak checking
2131 SAFE_RELEASE(p->clear_ps);
2132 SAFE_RELEASE(p->clear_vs);
2133 SAFE_RELEASE(p->clear_layout);
2134 SAFE_RELEASE(p->clear_vbuf);
2135 SAFE_RELEASE(p->clear_cbuf);
2136 SAFE_RELEASE(p->blit_float_ps);
2137 SAFE_RELEASE(p->blit_vs);
2138 SAFE_RELEASE(p->blit_layout);
2139 SAFE_RELEASE(p->blit_vbuf);
2140 SAFE_RELEASE(p->blit_sampler);
2141 SAFE_RELEASE(p->vbuf);
2142 SAFE_RELEASE(p->ctx1);
2143 SAFE_RELEASE(p->dev1);
2144 SAFE_RELEASE(p->dev);
2145
2146 if (p->debug && p->ctx) {
2147 // Destroy the device context synchronously so referenced objects don't
2148 // show up in the leak check
2149 ID3D11DeviceContext_ClearState(p->ctx);
2150 ID3D11DeviceContext_Flush(p->ctx);
2151 }
2152 SAFE_RELEASE(p->ctx);
2153
2154 if (p->debug) {
2155 // Report any leaked objects
2156 debug_marker(ra, "after destroy");
2157 ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_DETAIL);
2158 debug_marker(ra, "after leak check");
2159 ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_SUMMARY);
2160 debug_marker(ra, "after leak summary");
2161 }
2162 SAFE_RELEASE(p->debug);
2163 SAFE_RELEASE(p->iqueue);
2164
2165 talloc_free(ra);
2166 }
2167
2168 static struct ra_fns ra_fns_d3d11 = {
2169 .destroy = destroy,
2170 .tex_create = tex_create,
2171 .tex_destroy = tex_destroy,
2172 .tex_upload = tex_upload,
2173 .tex_download = tex_download,
2174 .buf_create = buf_create,
2175 .buf_destroy = buf_destroy,
2176 .buf_update = buf_update,
2177 .clear = clear,
2178 .blit = blit,
2179 .uniform_layout = std140_layout,
2180 .desc_namespace = desc_namespace,
2181 .renderpass_create = renderpass_create,
2182 .renderpass_destroy = renderpass_destroy,
2183 .renderpass_run = renderpass_run,
2184 .timer_create = timer_create,
2185 .timer_destroy = timer_destroy,
2186 .timer_start = timer_start,
2187 .timer_stop = timer_stop,
2188 .debug_marker = debug_marker,
2189 };
2190
ra_d3d11_flush(struct ra * ra)2191 void ra_d3d11_flush(struct ra *ra)
2192 {
2193 struct ra_d3d11 *p = ra->priv;
2194 ID3D11DeviceContext_Flush(p->ctx);
2195 }
2196
init_debug_layer(struct ra * ra)2197 static void init_debug_layer(struct ra *ra)
2198 {
2199 struct ra_d3d11 *p = ra->priv;
2200 HRESULT hr;
2201
2202 hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Debug,
2203 (void**)&p->debug);
2204 if (FAILED(hr)) {
2205 MP_ERR(ra, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr));
2206 return;
2207 }
2208
2209 hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11InfoQueue,
2210 (void**)&p->iqueue);
2211 if (FAILED(hr)) {
2212 MP_ERR(ra, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr));
2213 return;
2214 }
2215
2216 // Store an unlimited amount of messages in the buffer. This is fine
2217 // because we flush stored messages regularly (in debug_marker.)
2218 ID3D11InfoQueue_SetMessageCountLimit(p->iqueue, -1);
2219
2220 // Filter some annoying messages
2221 D3D11_MESSAGE_ID deny_ids[] = {
2222 // This error occurs during context creation when we try to figure out
2223 // the real maximum texture size by attempting to create a texture
2224 // larger than the current feature level allows.
2225 D3D11_MESSAGE_ID_CREATETEXTURE2D_INVALIDDIMENSIONS,
2226
2227 // These are normal. The RA timer queue habitually reuses timer objects
2228 // without retrieving the results.
2229 D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS,
2230 D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS,
2231 };
2232 D3D11_INFO_QUEUE_FILTER filter = {
2233 .DenyList = {
2234 .NumIDs = MP_ARRAY_SIZE(deny_ids),
2235 .pIDList = deny_ids,
2236 },
2237 };
2238 ID3D11InfoQueue_PushStorageFilter(p->iqueue, &filter);
2239 }
2240
get_dll_version(HMODULE dll)2241 static struct dll_version get_dll_version(HMODULE dll)
2242 {
2243 void *ctx = talloc_new(NULL);
2244 struct dll_version ret = { 0 };
2245
2246 HRSRC rsrc = FindResourceW(dll, MAKEINTRESOURCEW(VS_VERSION_INFO),
2247 MAKEINTRESOURCEW(VS_FILE_INFO));
2248 if (!rsrc)
2249 goto done;
2250 DWORD size = SizeofResource(dll, rsrc);
2251 HGLOBAL res = LoadResource(dll, rsrc);
2252 if (!res)
2253 goto done;
2254 void *ptr = LockResource(res);
2255 if (!ptr)
2256 goto done;
2257 void *copy = talloc_memdup(ctx, ptr, size);
2258
2259 VS_FIXEDFILEINFO *ffi;
2260 UINT ffi_len;
2261 if (!VerQueryValueW(copy, L"\\", (void**)&ffi, &ffi_len))
2262 goto done;
2263 if (ffi_len < sizeof(*ffi))
2264 goto done;
2265
2266 ret.major = HIWORD(ffi->dwFileVersionMS);
2267 ret.minor = LOWORD(ffi->dwFileVersionMS);
2268 ret.build = HIWORD(ffi->dwFileVersionLS);
2269 ret.revision = LOWORD(ffi->dwFileVersionLS);
2270
2271 done:
2272 talloc_free(ctx);
2273 return ret;
2274 }
2275
load_d3d_compiler(struct ra * ra)2276 static bool load_d3d_compiler(struct ra *ra)
2277 {
2278 struct ra_d3d11 *p = ra->priv;
2279 HMODULE d3dcompiler = NULL;
2280
2281 // Try the inbox D3DCompiler first (Windows 8.1 and up)
2282 if (IsWindows8Point1OrGreater()) {
2283 d3dcompiler = LoadLibraryExW(L"d3dcompiler_47.dll", NULL,
2284 LOAD_LIBRARY_SEARCH_SYSTEM32);
2285 }
2286 // Check for a packaged version of d3dcompiler_47.dll
2287 if (!d3dcompiler)
2288 d3dcompiler = LoadLibraryW(L"d3dcompiler_47.dll");
2289 // Try d3dcompiler_46.dll from the Windows 8 SDK
2290 if (!d3dcompiler)
2291 d3dcompiler = LoadLibraryW(L"d3dcompiler_46.dll");
2292 // Try d3dcompiler_43.dll from the June 2010 DirectX SDK
2293 if (!d3dcompiler)
2294 d3dcompiler = LoadLibraryW(L"d3dcompiler_43.dll");
2295 // Can't find any compiler DLL, so give up
2296 if (!d3dcompiler)
2297 return false;
2298
2299 p->d3d_compiler_ver = get_dll_version(d3dcompiler);
2300
2301 p->D3DCompile = (pD3DCompile)GetProcAddress(d3dcompiler, "D3DCompile");
2302 if (!p->D3DCompile)
2303 return false;
2304 return true;
2305 }
2306
find_max_texture_dimension(struct ra * ra)2307 static void find_max_texture_dimension(struct ra *ra)
2308 {
2309 struct ra_d3d11 *p = ra->priv;
2310
2311 D3D11_TEXTURE2D_DESC desc = {
2312 .Width = ra->max_texture_wh,
2313 .Height = ra->max_texture_wh,
2314 .MipLevels = 1,
2315 .ArraySize = 1,
2316 .SampleDesc.Count = 1,
2317 .Format = DXGI_FORMAT_R8_UNORM,
2318 .BindFlags = D3D11_BIND_SHADER_RESOURCE,
2319 };
2320 while (true) {
2321 desc.Height = desc.Width *= 2;
2322 if (desc.Width >= 0x8000000u)
2323 return;
2324 if (FAILED(ID3D11Device_CreateTexture2D(p->dev, &desc, NULL, NULL)))
2325 return;
2326 ra->max_texture_wh = desc.Width;
2327 }
2328 }
2329
ra_d3d11_create(ID3D11Device * dev,struct mp_log * log,struct spirv_compiler * spirv)2330 struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
2331 struct spirv_compiler *spirv)
2332 {
2333 HRESULT hr;
2334
2335 struct ra *ra = talloc_zero(NULL, struct ra);
2336 ra->log = log;
2337 ra->fns = &ra_fns_d3d11;
2338
2339 // Even Direct3D 10level9 supports 3D textures
2340 ra->caps = RA_CAP_TEX_3D | RA_CAP_DIRECT_UPLOAD | RA_CAP_BUF_RO |
2341 RA_CAP_BLIT | spirv->ra_caps;
2342
2343 ra->glsl_version = spirv->glsl_version;
2344 ra->glsl_vulkan = true;
2345
2346 struct ra_d3d11 *p = ra->priv = talloc_zero(ra, struct ra_d3d11);
2347 p->spirv = spirv;
2348
2349 int minor = 0;
2350 ID3D11Device_AddRef(dev);
2351 p->dev = dev;
2352 ID3D11Device_GetImmediateContext(p->dev, &p->ctx);
2353 hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1,
2354 (void**)&p->dev1);
2355 if (SUCCEEDED(hr)) {
2356 minor = 1;
2357 ID3D11Device1_GetImmediateContext1(p->dev1, &p->ctx1);
2358
2359 D3D11_FEATURE_DATA_D3D11_OPTIONS fopts = { 0 };
2360 hr = ID3D11Device_CheckFeatureSupport(p->dev,
2361 D3D11_FEATURE_D3D11_OPTIONS, &fopts, sizeof(fopts));
2362 if (SUCCEEDED(hr)) {
2363 p->has_clear_view = fopts.ClearView;
2364 }
2365 }
2366
2367 MP_VERBOSE(ra, "Using Direct3D 11.%d runtime\n", minor);
2368
2369 p->fl = ID3D11Device_GetFeatureLevel(p->dev);
2370 if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
2371 ra->max_texture_wh = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
2372 } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) {
2373 ra->max_texture_wh = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
2374 } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) {
2375 ra->max_texture_wh = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION;
2376 } else {
2377 ra->max_texture_wh = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION;
2378 }
2379
2380 if (p->fl >= D3D_FEATURE_LEVEL_11_0)
2381 ra->caps |= RA_CAP_GATHER;
2382 if (p->fl >= D3D_FEATURE_LEVEL_10_0)
2383 ra->caps |= RA_CAP_FRAGCOORD;
2384
2385 // Some 10_0 hardware has compute shaders, but only 11_0 has image load/store
2386 if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
2387 ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW;
2388 ra->max_shmem = 32 * 1024;
2389 }
2390
2391 if (p->fl >= D3D_FEATURE_LEVEL_11_1) {
2392 p->max_uavs = D3D11_1_UAV_SLOT_COUNT;
2393 } else {
2394 p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT;
2395 }
2396
2397 if (ID3D11Device_GetCreationFlags(p->dev) & D3D11_CREATE_DEVICE_DEBUG)
2398 init_debug_layer(ra);
2399
2400 // Some level 9_x devices don't have timestamp queries
2401 hr = ID3D11Device_CreateQuery(p->dev,
2402 &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL);
2403 p->has_timestamp_queries = SUCCEEDED(hr);
2404
2405 // According to MSDN, the above texture sizes are just minimums and drivers
2406 // may support larger textures. See:
2407 // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476874.aspx
2408 find_max_texture_dimension(ra);
2409 MP_VERBOSE(ra, "Maximum Texture2D size: %dx%d\n", ra->max_texture_wh,
2410 ra->max_texture_wh);
2411
2412 if (!load_d3d_compiler(ra)) {
2413 MP_FATAL(ra, "Could not find D3DCompiler DLL\n");
2414 goto error;
2415 }
2416
2417 MP_VERBOSE(ra, "D3DCompiler version: %u.%u.%u.%u\n",
2418 p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor,
2419 p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision);
2420
2421 setup_formats(ra);
2422
2423 // The rasterizer state never changes, so set it up here
2424 ID3D11RasterizerState *rstate;
2425 D3D11_RASTERIZER_DESC rdesc = {
2426 .FillMode = D3D11_FILL_SOLID,
2427 .CullMode = D3D11_CULL_NONE,
2428 .FrontCounterClockwise = FALSE,
2429 .DepthClipEnable = TRUE, // Required for 10level9
2430 .ScissorEnable = TRUE,
2431 };
2432 hr = ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &rstate);
2433 if (FAILED(hr)) {
2434 MP_ERR(ra, "Failed to create rasterizer state: %s\n", mp_HRESULT_to_str(hr));
2435 goto error;
2436 }
2437 ID3D11DeviceContext_RSSetState(p->ctx, rstate);
2438 SAFE_RELEASE(rstate);
2439
2440 // If the device doesn't support ClearView, we have to set up a
2441 // shader-based clear() implementation
2442 if (!p->has_clear_view && !setup_clear_rpass(ra))
2443 goto error;
2444
2445 if (!setup_blit_rpass(ra))
2446 goto error;
2447
2448 return ra;
2449
2450 error:
2451 destroy(ra);
2452 return NULL;
2453 }
2454
ra_d3d11_get_device(struct ra * ra)2455 ID3D11Device *ra_d3d11_get_device(struct ra *ra)
2456 {
2457 struct ra_d3d11 *p = ra->priv;
2458 ID3D11Device_AddRef(p->dev);
2459 return p->dev;
2460 }
2461
ra_is_d3d11(struct ra * ra)2462 bool ra_is_d3d11(struct ra *ra)
2463 {
2464 return ra->fns == &ra_fns_d3d11;
2465 }
2466