1 #include <windows.h>
2 #include <versionhelpers.h>
3 #include <d3d11_1.h>
4 #include <d3d11sdklayers.h>
5 #include <dxgi1_2.h>
6 #include <d3dcompiler.h>
7 #include <spirv_cross_c.h>
8 
9 #include "common/msg.h"
10 #include "osdep/io.h"
11 #include "osdep/subprocess.h"
12 #include "osdep/timer.h"
13 #include "osdep/windows_utils.h"
14 #include "video/out/gpu/spirv.h"
15 #include "video/out/gpu/utils.h"
16 
17 #include "ra_d3d11.h"
18 
19 #ifndef D3D11_1_UAV_SLOT_COUNT
20 #define D3D11_1_UAV_SLOT_COUNT (64)
21 #endif
22 #define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80)
23 
24 struct dll_version {
25     uint16_t major;
26     uint16_t minor;
27     uint16_t build;
28     uint16_t revision;
29 };
30 
31 struct ra_d3d11 {
32     struct spirv_compiler *spirv;
33 
34     ID3D11Device *dev;
35     ID3D11Device1 *dev1;
36     ID3D11DeviceContext *ctx;
37     ID3D11DeviceContext1 *ctx1;
38     pD3DCompile D3DCompile;
39 
40     struct dll_version d3d_compiler_ver;
41 
42     // Debug interfaces (--gpu-debug)
43     ID3D11Debug *debug;
44     ID3D11InfoQueue *iqueue;
45 
46     // Device capabilities
47     D3D_FEATURE_LEVEL fl;
48     bool has_clear_view;
49     bool has_timestamp_queries;
50     int max_uavs;
51 
52     // Streaming dynamic vertex buffer, which is used for all renderpasses
53     ID3D11Buffer *vbuf;
54     size_t vbuf_size;
55     size_t vbuf_used;
56 
57     // clear() renderpass resources (only used when has_clear_view is false)
58     ID3D11PixelShader *clear_ps;
59     ID3D11VertexShader *clear_vs;
60     ID3D11InputLayout *clear_layout;
61     ID3D11Buffer *clear_vbuf;
62     ID3D11Buffer *clear_cbuf;
63 
64     // blit() renderpass resources
65     ID3D11PixelShader *blit_float_ps;
66     ID3D11VertexShader *blit_vs;
67     ID3D11InputLayout *blit_layout;
68     ID3D11Buffer *blit_vbuf;
69     ID3D11SamplerState *blit_sampler;
70 };
71 
72 struct d3d_tex {
73     // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not
74     // hold an additional reference to the texture object.
75     ID3D11Resource *res;
76 
77     ID3D11Texture1D *tex1d;
78     ID3D11Texture2D *tex2d;
79     ID3D11Texture3D *tex3d;
80     int array_slice;
81 
82     // Staging texture for tex_download(), 2D only
83     ID3D11Texture2D *staging;
84 
85     ID3D11ShaderResourceView *srv;
86     ID3D11RenderTargetView *rtv;
87     ID3D11UnorderedAccessView *uav;
88     ID3D11SamplerState *sampler;
89 };
90 
91 struct d3d_buf {
92     ID3D11Buffer *buf;
93     ID3D11UnorderedAccessView *uav;
94     void *data; // System-memory mirror of the data in buf
95     bool dirty; // Is buf out of date?
96 };
97 
98 struct d3d_rpass {
99     ID3D11PixelShader *ps;
100     ID3D11VertexShader *vs;
101     ID3D11ComputeShader *cs;
102     ID3D11InputLayout *layout;
103     ID3D11BlendState *bstate;
104 };
105 
106 struct d3d_timer {
107     ID3D11Query *ts_start;
108     ID3D11Query *ts_end;
109     ID3D11Query *disjoint;
110     uint64_t result; // Latches the result from the previous use of the timer
111 };
112 
113 struct d3d_fmt {
114     const char *name;
115     int components;
116     int bytes;
117     int bits[4];
118     DXGI_FORMAT fmt;
119     enum ra_ctype ctype;
120     bool unordered;
121 };
122 
123 static const char clear_vs[] = "\
124 float4 main(float2 pos : POSITION) : SV_Position\n\
125 {\n\
126     return float4(pos, 0.0, 1.0);\n\
127 }\n\
128 ";
129 
130 static const char clear_ps[] = "\
131 cbuffer ps_cbuf : register(b0) {\n\
132     float4 color : packoffset(c0);\n\
133 }\n\
134 \n\
135 float4 main(float4 pos : SV_Position) : SV_Target\n\
136 {\n\
137     return color;\n\
138 }\n\
139 ";
140 
141 struct blit_vert {
142     float x, y, u, v;
143 };
144 
145 static const char blit_vs[] = "\
146 void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\
147           out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\
148 {\n\
149     out_pos = float4(pos, 0.0, 1.0);\n\
150     out_coord = coord;\n\
151 }\n\
152 ";
153 
154 static const char blit_float_ps[] = "\
155 Texture2D<float4> tex : register(t0);\n\
156 SamplerState samp : register(s0);\n\
157 \n\
158 float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\
159 {\n\
160     return tex.Sample(samp, coord);\n\
161 }\n\
162 ";
163 
164 #define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t
165 static struct d3d_fmt formats[] = {
166     { "r8",       1,  1, { 8},             DXFMT(R8, UNORM)           },
167     { "rg8",      2,  2, { 8,  8},         DXFMT(R8G8, UNORM)         },
168     { "rgba8",    4,  4, { 8,  8,  8,  8}, DXFMT(R8G8B8A8, UNORM)     },
169     { "r16",      1,  2, {16},             DXFMT(R16, UNORM)          },
170     { "rg16",     2,  4, {16, 16},         DXFMT(R16G16, UNORM)       },
171     { "rgba16",   4,  8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) },
172 
173     { "r32ui",    1,  4, {32},             DXFMT(R32, UINT)           },
174     { "rg32ui",   2,  8, {32, 32},         DXFMT(R32G32, UINT)        },
175     { "rgb32ui",  3, 12, {32, 32, 32},     DXFMT(R32G32B32, UINT)     },
176     { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT)  },
177 
178     { "r16hf",    1,  2, {16},             DXFMT(R16, FLOAT)          },
179     { "rg16hf",   2,  4, {16, 16},         DXFMT(R16G16, FLOAT)       },
180     { "rgba16hf", 4,  8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) },
181     { "r32f",     1,  4, {32},             DXFMT(R32, FLOAT)          },
182     { "rg32f",    2,  8, {32, 32},         DXFMT(R32G32, FLOAT)       },
183     { "rgb32f",   3, 12, {32, 32, 32},     DXFMT(R32G32B32, FLOAT)    },
184     { "rgba32f",  4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) },
185 
186     { "rgb10_a2", 4,  4, {10, 10, 10,  2}, DXFMT(R10G10B10A2, UNORM)  },
187     { "bgra8",    4,  4, { 8,  8,  8,  8}, DXFMT(B8G8R8A8, UNORM), .unordered = true },
188     { "bgrx8",    3,  4, { 8,  8,  8},     DXFMT(B8G8R8X8, UNORM), .unordered = true },
189 };
190 
dll_version_equal(struct dll_version a,struct dll_version b)191 static bool dll_version_equal(struct dll_version a, struct dll_version b)
192 {
193     return a.major == b.major &&
194            a.minor == b.minor &&
195            a.build == b.build &&
196            a.revision == b.revision;
197 }
198 
fmt_to_dxgi(const struct ra_format * fmt)199 static DXGI_FORMAT fmt_to_dxgi(const struct ra_format *fmt)
200 {
201     struct d3d_fmt *d3d = fmt->priv;
202     return d3d->fmt;
203 }
204 
setup_formats(struct ra * ra)205 static void setup_formats(struct ra *ra)
206 {
207     // All formats must be usable as a 2D texture
208     static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D;
209     // SHADER_SAMPLE indicates support for linear sampling, point always works
210     static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE;
211     // RA requires renderable surfaces to be blendable as well
212     static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET |
213                                    D3D11_FORMAT_SUPPORT_BLENDABLE;
214     // Typed UAVs are equivalent to images. RA only cares if they're storable.
215     static const UINT sup_store = D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW;
216     static const UINT sup2_store = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE;
217 
218     struct ra_d3d11 *p = ra->priv;
219     HRESULT hr;
220 
221     for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) {
222         struct d3d_fmt *d3dfmt = &formats[i];
223         UINT support = 0;
224         hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support);
225         if (FAILED(hr))
226             continue;
227         if ((support & sup_basic) != sup_basic)
228             continue;
229 
230         D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3dfmt->fmt };
231         ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2,
232                                          &sup2, sizeof(sup2));
233         UINT support2 = sup2.OutFormatSupport2;
234 
235         struct ra_format *fmt = talloc_zero(ra, struct ra_format);
236         *fmt = (struct ra_format) {
237             .name           = d3dfmt->name,
238             .priv           = d3dfmt,
239             .ctype          = d3dfmt->ctype,
240             .ordered        = !d3dfmt->unordered,
241             .num_components = d3dfmt->components,
242             .pixel_size     = d3dfmt->bytes,
243             .linear_filter  = (support & sup_filter) == sup_filter,
244             .renderable     = (support & sup_render) == sup_render,
245             .storable       = p->fl >= D3D_FEATURE_LEVEL_11_0 &&
246                               (support & sup_store) == sup_store &&
247                               (support2 & sup2_store) == sup2_store,
248         };
249 
250         if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D)
251             ra->caps |= RA_CAP_TEX_1D;
252 
253         for (int j = 0; j < d3dfmt->components; j++)
254             fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j];
255 
256         fmt->glsl_format = ra_fmt_glsl_format(fmt);
257 
258         MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
259     }
260 }
261 
tex_init(struct ra * ra,struct ra_tex * tex)262 static bool tex_init(struct ra *ra, struct ra_tex *tex)
263 {
264     struct ra_d3d11 *p = ra->priv;
265     struct d3d_tex *tex_p = tex->priv;
266     struct ra_tex_params *params = &tex->params;
267     HRESULT hr;
268 
269     // A SRV is required for renderpasses and blitting, since blitting can use
270     // a renderpass internally
271     if (params->render_src || params->blit_src) {
272         // Always specify the SRV format for simplicity. This will match the
273         // texture format for textures created with tex_create, but it can be
274         // different for wrapped planar video textures.
275         D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = {
276             .Format = fmt_to_dxgi(params->format),
277         };
278         switch (params->dimensions) {
279         case 1:
280             if (tex_p->array_slice >= 0) {
281                 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY;
282                 srvdesc.Texture1DArray.MipLevels = 1;
283                 srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice;
284                 srvdesc.Texture1DArray.ArraySize = 1;
285             } else {
286                 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
287                 srvdesc.Texture1D.MipLevels = 1;
288             }
289             break;
290         case 2:
291             if (tex_p->array_slice >= 0) {
292                 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
293                 srvdesc.Texture2DArray.MipLevels = 1;
294                 srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice;
295                 srvdesc.Texture2DArray.ArraySize = 1;
296             } else {
297                 srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
298                 srvdesc.Texture2D.MipLevels = 1;
299             }
300             break;
301         case 3:
302             // D3D11 does not have Texture3D arrays
303             srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
304             srvdesc.Texture3D.MipLevels = 1;
305             break;
306         }
307         hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc,
308                                                    &tex_p->srv);
309         if (FAILED(hr)) {
310             MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr));
311             goto error;
312         }
313     }
314 
315     // Samplers are required for renderpasses, but not blitting, since the blit
316     // code uses its own point sampler
317     if (params->render_src) {
318         D3D11_SAMPLER_DESC sdesc = {
319             .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
320             .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
321             .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
322             .ComparisonFunc = D3D11_COMPARISON_NEVER,
323             .MinLOD = 0,
324             .MaxLOD = D3D11_FLOAT32_MAX,
325             .MaxAnisotropy = 1,
326         };
327         if (params->src_linear)
328             sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
329         if (params->src_repeat) {
330             sdesc.AddressU = sdesc.AddressV = sdesc.AddressW =
331                 D3D11_TEXTURE_ADDRESS_WRAP;
332         }
333         // The runtime pools sampler state objects internally, so we don't have
334         // to worry about resource usage when creating one for every ra_tex
335         hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler);
336         if (FAILED(hr)) {
337             MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr));
338             goto error;
339         }
340     }
341 
342     // Like SRVs, an RTV is required for renderpass output and blitting
343     if (params->render_dst || params->blit_dst) {
344         hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL,
345                                                  &tex_p->rtv);
346         if (FAILED(hr)) {
347             MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr));
348             goto error;
349         }
350     }
351 
352     if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) {
353         hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL,
354                                                     &tex_p->uav);
355         if (FAILED(hr)) {
356             MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
357             goto error;
358         }
359     }
360 
361     return true;
362 error:
363     return false;
364 }
365 
tex_destroy(struct ra * ra,struct ra_tex * tex)366 static void tex_destroy(struct ra *ra, struct ra_tex *tex)
367 {
368     if (!tex)
369         return;
370     struct d3d_tex *tex_p = tex->priv;
371 
372     SAFE_RELEASE(tex_p->srv);
373     SAFE_RELEASE(tex_p->rtv);
374     SAFE_RELEASE(tex_p->uav);
375     SAFE_RELEASE(tex_p->sampler);
376     SAFE_RELEASE(tex_p->res);
377     SAFE_RELEASE(tex_p->staging);
378     talloc_free(tex);
379 }
380 
tex_create(struct ra * ra,const struct ra_tex_params * params)381 static struct ra_tex *tex_create(struct ra *ra,
382                                  const struct ra_tex_params *params)
383 {
384     // Only 2D textures may be downloaded for now
385     if (params->downloadable && params->dimensions != 2)
386         return NULL;
387 
388     struct ra_d3d11 *p = ra->priv;
389     HRESULT hr;
390 
391     struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
392     tex->params = *params;
393     tex->params.initial_data = NULL;
394 
395     struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
396     DXGI_FORMAT fmt = fmt_to_dxgi(params->format);
397 
398     D3D11_SUBRESOURCE_DATA data;
399     D3D11_SUBRESOURCE_DATA *pdata = NULL;
400     if (params->initial_data) {
401         data = (D3D11_SUBRESOURCE_DATA) {
402             .pSysMem = params->initial_data,
403             .SysMemPitch = params->w * params->format->pixel_size,
404         };
405         if (params->dimensions >= 3)
406             data.SysMemSlicePitch = data.SysMemPitch * params->h;
407         pdata = &data;
408     }
409 
410     D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
411     D3D11_BIND_FLAG bind_flags = 0;
412 
413     if (params->render_src || params->blit_src)
414         bind_flags |= D3D11_BIND_SHADER_RESOURCE;
415     if (params->render_dst || params->blit_dst)
416         bind_flags |= D3D11_BIND_RENDER_TARGET;
417     if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst)
418         bind_flags |= D3D11_BIND_UNORDERED_ACCESS;
419 
420     // Apparently IMMUTABLE textures are efficient, so try to infer whether we
421     // can use one
422     if (params->initial_data && !params->render_dst && !params->storage_dst &&
423         !params->blit_dst && !params->host_mutable)
424         usage = D3D11_USAGE_IMMUTABLE;
425 
426     switch (params->dimensions) {
427     case 1:;
428         D3D11_TEXTURE1D_DESC desc1d = {
429             .Width = params->w,
430             .MipLevels = 1,
431             .ArraySize = 1,
432             .Format = fmt,
433             .Usage = usage,
434             .BindFlags = bind_flags,
435         };
436         hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d);
437         if (FAILED(hr)) {
438             MP_ERR(ra, "Failed to create Texture1D: %s\n",
439                    mp_HRESULT_to_str(hr));
440             goto error;
441         }
442         tex_p->res = (ID3D11Resource *)tex_p->tex1d;
443         break;
444     case 2:;
445         D3D11_TEXTURE2D_DESC desc2d = {
446             .Width = params->w,
447             .Height = params->h,
448             .MipLevels = 1,
449             .ArraySize = 1,
450             .SampleDesc.Count = 1,
451             .Format = fmt,
452             .Usage = usage,
453             .BindFlags = bind_flags,
454         };
455         hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d);
456         if (FAILED(hr)) {
457             MP_ERR(ra, "Failed to create Texture2D: %s\n",
458                    mp_HRESULT_to_str(hr));
459             goto error;
460         }
461         tex_p->res = (ID3D11Resource *)tex_p->tex2d;
462 
463         // Create a staging texture with CPU access for tex_download()
464         if (params->downloadable) {
465             desc2d.BindFlags = 0;
466             desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
467             desc2d.Usage = D3D11_USAGE_STAGING;
468 
469             hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL,
470                                               &tex_p->staging);
471             if (FAILED(hr)) {
472                 MP_ERR(ra, "Failed to staging texture: %s\n",
473                        mp_HRESULT_to_str(hr));
474                 goto error;
475             }
476         }
477         break;
478     case 3:;
479         D3D11_TEXTURE3D_DESC desc3d = {
480             .Width = params->w,
481             .Height = params->h,
482             .Depth = params->d,
483             .MipLevels = 1,
484             .Format = fmt,
485             .Usage = usage,
486             .BindFlags = bind_flags,
487         };
488         hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d);
489         if (FAILED(hr)) {
490             MP_ERR(ra, "Failed to create Texture3D: %s\n",
491                    mp_HRESULT_to_str(hr));
492             goto error;
493         }
494         tex_p->res = (ID3D11Resource *)tex_p->tex3d;
495         break;
496     default:
497         abort();
498     }
499 
500     tex_p->array_slice = -1;
501 
502     if (!tex_init(ra, tex))
503         goto error;
504 
505     return tex;
506 
507 error:
508     tex_destroy(ra, tex);
509     return NULL;
510 }
511 
ra_d3d11_wrap_tex(struct ra * ra,ID3D11Resource * res)512 struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res)
513 {
514     HRESULT hr;
515 
516     struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
517     struct ra_tex_params *params = &tex->params;
518     struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
519 
520     DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
521     D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
522     D3D11_BIND_FLAG bind_flags = 0;
523 
524     D3D11_RESOURCE_DIMENSION type;
525     ID3D11Resource_GetType(res, &type);
526     switch (type) {
527     case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
528         hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D,
529                                            (void**)&tex_p->tex2d);
530         if (FAILED(hr)) {
531             MP_ERR(ra, "Resource is not a ID3D11Texture2D\n");
532             goto error;
533         }
534         tex_p->res = (ID3D11Resource *)tex_p->tex2d;
535 
536         D3D11_TEXTURE2D_DESC desc2d;
537         ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
538         if (desc2d.MipLevels != 1) {
539             MP_ERR(ra, "Mipmapped textures not supported for wrapping\n");
540             goto error;
541         }
542         if (desc2d.ArraySize != 1) {
543             MP_ERR(ra, "Texture arrays not supported for wrapping\n");
544             goto error;
545         }
546         if (desc2d.SampleDesc.Count != 1) {
547             MP_ERR(ra, "Multisampled textures not supported for wrapping\n");
548             goto error;
549         }
550 
551         params->dimensions = 2;
552         params->w = desc2d.Width;
553         params->h = desc2d.Height;
554         params->d = 1;
555         usage = desc2d.Usage;
556         bind_flags = desc2d.BindFlags;
557         fmt = desc2d.Format;
558         break;
559     default:
560         // We could wrap Texture1D/3D as well, but keep it simple, since this
561         // function is only used for swapchain backbuffers at the moment
562         MP_ERR(ra, "Resource is not suitable to wrap\n");
563         goto error;
564     }
565 
566     for (int i = 0; i < ra->num_formats; i++) {
567         DXGI_FORMAT target_fmt = fmt_to_dxgi(ra->formats[i]);
568         if (fmt == target_fmt) {
569             params->format = ra->formats[i];
570             break;
571         }
572     }
573     if (!params->format) {
574         MP_ERR(ra, "Could not find a suitable RA format for wrapped resource\n");
575         goto error;
576     }
577 
578     if (bind_flags & D3D11_BIND_SHADER_RESOURCE) {
579         params->render_src = params->blit_src = true;
580         params->src_linear = params->format->linear_filter;
581     }
582     if (bind_flags & D3D11_BIND_RENDER_TARGET)
583         params->render_dst = params->blit_dst = true;
584     if (bind_flags & D3D11_BIND_UNORDERED_ACCESS)
585         params->storage_dst = true;
586 
587     if (usage != D3D11_USAGE_DEFAULT) {
588         MP_ERR(ra, "Resource is not D3D11_USAGE_DEFAULT\n");
589         goto error;
590     }
591 
592     tex_p->array_slice = -1;
593 
594     if (!tex_init(ra, tex))
595         goto error;
596 
597     return tex;
598 error:
599     tex_destroy(ra, tex);
600     return NULL;
601 }
602 
ra_d3d11_wrap_tex_video(struct ra * ra,ID3D11Texture2D * res,int w,int h,int array_slice,const struct ra_format * fmt)603 struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
604                                        int w, int h, int array_slice,
605                                        const struct ra_format *fmt)
606 {
607     struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
608     struct ra_tex_params *params = &tex->params;
609     struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
610 
611     tex_p->tex2d = res;
612     tex_p->res = (ID3D11Resource *)tex_p->tex2d;
613     ID3D11Texture2D_AddRef(res);
614 
615     D3D11_TEXTURE2D_DESC desc2d;
616     ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
617     if (!(desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE)) {
618         MP_ERR(ra, "Video resource is not bindable\n");
619         goto error;
620     }
621 
622     params->dimensions = 2;
623     params->w = w;
624     params->h = h;
625     params->d = 1;
626     params->render_src = true;
627     params->src_linear = true;
628     // fmt can be different to the texture format for planar video textures
629     params->format = fmt;
630 
631     if (desc2d.ArraySize > 1) {
632         tex_p->array_slice = array_slice;
633     } else {
634         tex_p->array_slice = -1;
635     }
636 
637     if (!tex_init(ra, tex))
638         goto error;
639 
640     return tex;
641 error:
642     tex_destroy(ra, tex);
643     return NULL;
644 }
645 
tex_upload(struct ra * ra,const struct ra_tex_upload_params * params)646 static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params)
647 {
648     struct ra_d3d11 *p = ra->priv;
649     struct ra_tex *tex = params->tex;
650     struct d3d_tex *tex_p = tex->priv;
651 
652     if (!params->src) {
653         MP_ERR(ra, "Pixel buffers are not supported\n");
654         return false;
655     }
656 
657     const char *src = params->src;
658     ptrdiff_t stride = tex->params.dimensions >= 2 ? tex->params.w : 0;
659     ptrdiff_t pitch = tex->params.dimensions >= 3 ? stride * tex->params.h : 0;
660     bool invalidate = true;
661     D3D11_BOX rc;
662     D3D11_BOX *prc = NULL;
663 
664     if (tex->params.dimensions == 2) {
665         stride = params->stride;
666 
667         if (params->rc && (params->rc->x0 != 0 || params->rc->y0 != 0 ||
668             params->rc->x1 != tex->params.w || params->rc->y1 != tex->params.h))
669         {
670             rc = (D3D11_BOX) {
671                 .left = params->rc->x0,
672                 .top = params->rc->y0,
673                 .front = 0,
674                 .right = params->rc->x1,
675                 .bottom = params->rc->y1,
676                 .back = 1,
677             };
678             prc = &rc;
679             invalidate = params->invalidate;
680         }
681     }
682 
683     int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0;
684     if (p->ctx1) {
685         ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res,
686             subresource, prc, src, stride, pitch,
687             invalidate ? D3D11_COPY_DISCARD : 0);
688     } else {
689         ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource,
690             prc, src, stride, pitch);
691     }
692 
693     return true;
694 }
695 
tex_download(struct ra * ra,struct ra_tex_download_params * params)696 static bool tex_download(struct ra *ra, struct ra_tex_download_params *params)
697 {
698     struct ra_d3d11 *p = ra->priv;
699     struct ra_tex *tex = params->tex;
700     struct d3d_tex *tex_p = tex->priv;
701     HRESULT hr;
702 
703     if (!tex_p->staging)
704         return false;
705 
706     ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource*)tex_p->staging,
707         tex_p->res);
708 
709     D3D11_MAPPED_SUBRESOURCE lock;
710     hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource*)tex_p->staging, 0,
711                                  D3D11_MAP_READ, 0, &lock);
712     if (FAILED(hr)) {
713         MP_ERR(ra, "Failed to map staging texture: %s\n", mp_HRESULT_to_str(hr));
714         return false;
715     }
716 
717     char *cdst = params->dst;
718     char *csrc = lock.pData;
719     for (int y = 0; y < tex->params.h; y++) {
720         memcpy(cdst + y * params->stride, csrc + y * lock.RowPitch,
721                MPMIN(params->stride, lock.RowPitch));
722     }
723 
724     ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource*)tex_p->staging, 0);
725 
726     return true;
727 }
728 
buf_destroy(struct ra * ra,struct ra_buf * buf)729 static void buf_destroy(struct ra *ra, struct ra_buf *buf)
730 {
731     if (!buf)
732         return;
733     struct d3d_buf *buf_p = buf->priv;
734     SAFE_RELEASE(buf_p->buf);
735     SAFE_RELEASE(buf_p->uav);
736     talloc_free(buf);
737 }
738 
buf_create(struct ra * ra,const struct ra_buf_params * params)739 static struct ra_buf *buf_create(struct ra *ra,
740                                  const struct ra_buf_params *params)
741 {
742     // D3D11 does not support permanent mapping or pixel buffers
743     if (params->host_mapped || params->type == RA_BUF_TYPE_TEX_UPLOAD)
744         return NULL;
745 
746     struct ra_d3d11 *p = ra->priv;
747     HRESULT hr;
748 
749     struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
750     buf->params = *params;
751     buf->params.initial_data = NULL;
752 
753     struct d3d_buf *buf_p = buf->priv = talloc_zero(buf, struct d3d_buf);
754 
755     D3D11_SUBRESOURCE_DATA data;
756     D3D11_SUBRESOURCE_DATA *pdata = NULL;
757     if (params->initial_data) {
758         data = (D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data };
759         pdata = &data;
760     }
761 
762     D3D11_BUFFER_DESC desc = { .ByteWidth = params->size };
763     switch (params->type) {
764     case RA_BUF_TYPE_SHADER_STORAGE:
765         desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
766         desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float));
767         desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
768         break;
769     case RA_BUF_TYPE_UNIFORM:
770         desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
771         desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float[4]));
772         break;
773     }
774 
775     hr = ID3D11Device_CreateBuffer(p->dev, &desc, pdata, &buf_p->buf);
776     if (FAILED(hr)) {
777         MP_ERR(ra, "Failed to create buffer: %s\n", mp_HRESULT_to_str(hr));
778         goto error;
779     }
780 
781     // D3D11 doesn't allow constant buffer updates that aren't aligned to a
782     // full constant boundary (vec4,) and some drivers don't allow partial
783     // constant buffer updates at all. To support partial buffer updates, keep
784     // a mirror of the buffer data in system memory and upload the whole thing
785     // before the buffer is used.
786     if (params->host_mutable)
787         buf_p->data = talloc_zero_size(buf, desc.ByteWidth);
788 
789     if (params->type == RA_BUF_TYPE_SHADER_STORAGE) {
790         D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = {
791             .Format = DXGI_FORMAT_R32_TYPELESS,
792             .ViewDimension = D3D11_UAV_DIMENSION_BUFFER,
793             .Buffer = {
794                 .NumElements = desc.ByteWidth / sizeof(float),
795                 .Flags = D3D11_BUFFER_UAV_FLAG_RAW,
796             },
797         };
798         hr = ID3D11Device_CreateUnorderedAccessView(p->dev,
799             (ID3D11Resource *)buf_p->buf, &udesc, &buf_p->uav);
800         if (FAILED(hr)) {
801             MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
802             goto error;
803         }
804     }
805 
806     return buf;
807 error:
808     buf_destroy(ra, buf);
809     return NULL;
810 }
811 
buf_resolve(struct ra * ra,struct ra_buf * buf)812 static void buf_resolve(struct ra *ra, struct ra_buf *buf)
813 {
814     struct ra_d3d11 *p = ra->priv;
815     struct d3d_buf *buf_p = buf->priv;
816 
817     if (!buf->params.host_mutable || !buf_p->dirty)
818         return;
819 
820     // Synchronize the GPU buffer with the system-memory copy
821     ID3D11DeviceContext_UpdateSubresource(p->ctx, (ID3D11Resource *)buf_p->buf,
822         0, NULL, buf_p->data, 0, 0);
823     buf_p->dirty = false;
824 }
825 
buf_update(struct ra * ra,struct ra_buf * buf,ptrdiff_t offset,const void * data,size_t size)826 static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
827                        const void *data, size_t size)
828 {
829     struct d3d_buf *buf_p = buf->priv;
830 
831     char *cdata = buf_p->data;
832     memcpy(cdata + offset, data, size);
833     buf_p->dirty = true;
834 }
835 
get_shader_target(struct ra * ra,enum glsl_shader type)836 static const char *get_shader_target(struct ra *ra, enum glsl_shader type)
837 {
838     struct ra_d3d11 *p = ra->priv;
839     switch (p->fl) {
840     default:
841         switch (type) {
842         case GLSL_SHADER_VERTEX:   return "vs_5_0";
843         case GLSL_SHADER_FRAGMENT: return "ps_5_0";
844         case GLSL_SHADER_COMPUTE:  return "cs_5_0";
845         }
846         break;
847     case D3D_FEATURE_LEVEL_10_1:
848         switch (type) {
849         case GLSL_SHADER_VERTEX:   return "vs_4_1";
850         case GLSL_SHADER_FRAGMENT: return "ps_4_1";
851         case GLSL_SHADER_COMPUTE:  return "cs_4_1";
852         }
853         break;
854     case D3D_FEATURE_LEVEL_10_0:
855         switch (type) {
856         case GLSL_SHADER_VERTEX:   return "vs_4_0";
857         case GLSL_SHADER_FRAGMENT: return "ps_4_0";
858         case GLSL_SHADER_COMPUTE:  return "cs_4_0";
859         }
860         break;
861     case D3D_FEATURE_LEVEL_9_3:
862         switch (type) {
863         case GLSL_SHADER_VERTEX:   return "vs_4_0_level_9_3";
864         case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3";
865         }
866         break;
867     case D3D_FEATURE_LEVEL_9_2:
868     case D3D_FEATURE_LEVEL_9_1:
869         switch (type) {
870         case GLSL_SHADER_VERTEX:   return "vs_4_0_level_9_1";
871         case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1";
872         }
873         break;
874     }
875     return NULL;
876 }
877 
shader_type_name(enum glsl_shader type)878 static const char *shader_type_name(enum glsl_shader type)
879 {
880     switch (type) {
881     case GLSL_SHADER_VERTEX:   return "vertex";
882     case GLSL_SHADER_FRAGMENT: return "fragment";
883     case GLSL_SHADER_COMPUTE:  return "compute";
884     default:                   return "unknown";
885     }
886 }
887 
setup_clear_rpass(struct ra * ra)888 static bool setup_clear_rpass(struct ra *ra)
889 {
890     struct ra_d3d11 *p = ra->priv;
891     ID3DBlob *vs_blob = NULL;
892     ID3DBlob *ps_blob = NULL;
893     HRESULT hr;
894 
895     hr = p->D3DCompile(clear_vs, sizeof(clear_vs), NULL, NULL, NULL, "main",
896         get_shader_target(ra, GLSL_SHADER_VERTEX),
897         D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
898     if (FAILED(hr)) {
899         MP_ERR(ra, "Failed to compile clear() vertex shader: %s\n",
900                mp_HRESULT_to_str(hr));
901         goto error;
902     }
903 
904     hr = ID3D11Device_CreateVertexShader(p->dev,
905         ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
906         NULL, &p->clear_vs);
907     if (FAILED(hr)) {
908         MP_ERR(ra, "Failed to create clear() vertex shader: %s\n",
909                mp_HRESULT_to_str(hr));
910         goto error;
911     }
912 
913     hr = p->D3DCompile(clear_ps, sizeof(clear_ps), NULL, NULL, NULL, "main",
914         get_shader_target(ra, GLSL_SHADER_FRAGMENT),
915         D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &ps_blob, NULL);
916     if (FAILED(hr)) {
917         MP_ERR(ra, "Failed to compile clear() pixel shader: %s\n",
918                mp_HRESULT_to_str(hr));
919         goto error;
920     }
921 
922     hr = ID3D11Device_CreatePixelShader(p->dev,
923         ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob),
924         NULL, &p->clear_ps);
925     if (FAILED(hr)) {
926         MP_ERR(ra, "Failed to create clear() pixel shader: %s\n",
927                mp_HRESULT_to_str(hr));
928         goto error;
929     }
930 
931     D3D11_INPUT_ELEMENT_DESC in_descs[] = {
932         { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
933     };
934     hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
935         MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
936         ID3D10Blob_GetBufferSize(vs_blob), &p->clear_layout);
937     if (FAILED(hr)) {
938         MP_ERR(ra, "Failed to create clear() IA layout: %s\n",
939                mp_HRESULT_to_str(hr));
940         goto error;
941     }
942 
943     // clear() always draws to a quad covering the whole viewport
944     static const float verts[] = {
945         -1, -1,
946          1, -1,
947          1,  1,
948         -1,  1,
949         -1, -1,
950          1,  1,
951     };
952     D3D11_BUFFER_DESC vdesc = {
953         .ByteWidth = sizeof(verts),
954         .Usage = D3D11_USAGE_IMMUTABLE,
955         .BindFlags = D3D11_BIND_VERTEX_BUFFER,
956     };
957     D3D11_SUBRESOURCE_DATA vdata = {
958         .pSysMem = verts,
959     };
960     hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, &vdata, &p->clear_vbuf);
961     if (FAILED(hr)) {
962         MP_ERR(ra, "Failed to create clear() vertex buffer: %s\n",
963                mp_HRESULT_to_str(hr));
964         goto error;
965     }
966 
967     D3D11_BUFFER_DESC cdesc = {
968         .ByteWidth = sizeof(float[4]),
969         .BindFlags = D3D11_BIND_CONSTANT_BUFFER,
970     };
971     hr = ID3D11Device_CreateBuffer(p->dev, &cdesc, NULL, &p->clear_cbuf);
972     if (FAILED(hr)) {
973         MP_ERR(ra, "Failed to create clear() constant buffer: %s\n",
974                mp_HRESULT_to_str(hr));
975         goto error;
976     }
977 
978     SAFE_RELEASE(vs_blob);
979     SAFE_RELEASE(ps_blob);
980     return true;
981 error:
982     SAFE_RELEASE(vs_blob);
983     SAFE_RELEASE(ps_blob);
984     return false;
985 }
986 
clear_rpass(struct ra * ra,struct ra_tex * tex,float color[4],struct mp_rect * rc)987 static void clear_rpass(struct ra *ra, struct ra_tex *tex, float color[4],
988                         struct mp_rect *rc)
989 {
990     struct ra_d3d11 *p = ra->priv;
991     struct d3d_tex *tex_p = tex->priv;
992     struct ra_tex_params *params = &tex->params;
993 
994     ID3D11DeviceContext_UpdateSubresource(p->ctx,
995         (ID3D11Resource *)p->clear_cbuf, 0, NULL, color, 0, 0);
996 
997     ID3D11DeviceContext_IASetInputLayout(p->ctx, p->clear_layout);
998     ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->clear_vbuf,
999         &(UINT) { sizeof(float[2]) }, &(UINT) { 0 });
1000     ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
1001         D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
1002 
1003     ID3D11DeviceContext_VSSetShader(p->ctx, p->clear_vs, NULL, 0);
1004 
1005     ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
1006         .Width = params->w,
1007         .Height = params->h,
1008         .MinDepth = 0,
1009         .MaxDepth = 1,
1010     }));
1011     ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
1012         .left = rc->x0,
1013         .top = rc->y0,
1014         .right = rc->x1,
1015         .bottom = rc->y1,
1016     }));
1017     ID3D11DeviceContext_PSSetShader(p->ctx, p->clear_ps, NULL, 0);
1018     ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, &p->clear_cbuf);
1019 
1020     ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &tex_p->rtv, NULL);
1021     ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
1022                                         D3D11_DEFAULT_SAMPLE_MASK);
1023 
1024     ID3D11DeviceContext_Draw(p->ctx, 6, 0);
1025 
1026     ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1,
1027         &(ID3D11Buffer *){ NULL });
1028     ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
1029 }
1030 
clear(struct ra * ra,struct ra_tex * tex,float color[4],struct mp_rect * rc)1031 static void clear(struct ra *ra, struct ra_tex *tex, float color[4],
1032                   struct mp_rect *rc)
1033 {
1034     struct ra_d3d11 *p = ra->priv;
1035     struct d3d_tex *tex_p = tex->priv;
1036     struct ra_tex_params *params = &tex->params;
1037 
1038     if (!tex_p->rtv)
1039         return;
1040 
1041     if (rc->x0 || rc->y0 || rc->x1 != params->w || rc->y1 != params->h) {
1042         if (p->has_clear_view) {
1043             ID3D11DeviceContext1_ClearView(p->ctx1, (ID3D11View *)tex_p->rtv,
1044                 color, (&(D3D11_RECT) {
1045                     .left = rc->x0,
1046                     .top = rc->y0,
1047                     .right = rc->x1,
1048                     .bottom = rc->y1,
1049                 }), 1);
1050         } else {
1051             clear_rpass(ra, tex, color, rc);
1052         }
1053     } else {
1054         ID3D11DeviceContext_ClearRenderTargetView(p->ctx, tex_p->rtv, color);
1055     }
1056 }
1057 
setup_blit_rpass(struct ra * ra)1058 static bool setup_blit_rpass(struct ra *ra)
1059 {
1060     struct ra_d3d11 *p = ra->priv;
1061     ID3DBlob *vs_blob = NULL;
1062     ID3DBlob *float_ps_blob = NULL;
1063     HRESULT hr;
1064 
1065     hr = p->D3DCompile(blit_vs, sizeof(blit_vs), NULL, NULL, NULL, "main",
1066         get_shader_target(ra, GLSL_SHADER_VERTEX),
1067         D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
1068     if (FAILED(hr)) {
1069         MP_ERR(ra, "Failed to compile blit() vertex shader: %s\n",
1070                mp_HRESULT_to_str(hr));
1071         goto error;
1072     }
1073 
1074     hr = ID3D11Device_CreateVertexShader(p->dev,
1075         ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
1076         NULL, &p->blit_vs);
1077     if (FAILED(hr)) {
1078         MP_ERR(ra, "Failed to create blit() vertex shader: %s\n",
1079                mp_HRESULT_to_str(hr));
1080         goto error;
1081     }
1082 
1083     hr = p->D3DCompile(blit_float_ps, sizeof(blit_float_ps), NULL, NULL, NULL,
1084         "main", get_shader_target(ra, GLSL_SHADER_FRAGMENT),
1085         D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &float_ps_blob, NULL);
1086     if (FAILED(hr)) {
1087         MP_ERR(ra, "Failed to compile blit() pixel shader: %s\n",
1088                mp_HRESULT_to_str(hr));
1089         goto error;
1090     }
1091 
1092     hr = ID3D11Device_CreatePixelShader(p->dev,
1093         ID3D10Blob_GetBufferPointer(float_ps_blob),
1094         ID3D10Blob_GetBufferSize(float_ps_blob),
1095         NULL, &p->blit_float_ps);
1096     if (FAILED(hr)) {
1097         MP_ERR(ra, "Failed to create blit() pixel shader: %s\n",
1098                mp_HRESULT_to_str(hr));
1099         goto error;
1100     }
1101 
1102     D3D11_INPUT_ELEMENT_DESC in_descs[] = {
1103         { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
1104         { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8 },
1105     };
1106     hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
1107         MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
1108         ID3D10Blob_GetBufferSize(vs_blob), &p->blit_layout);
1109     if (FAILED(hr)) {
1110         MP_ERR(ra, "Failed to create blit() IA layout: %s\n",
1111                mp_HRESULT_to_str(hr));
1112         goto error;
1113     }
1114 
1115     D3D11_BUFFER_DESC vdesc = {
1116         .ByteWidth = sizeof(struct blit_vert[6]),
1117         .Usage = D3D11_USAGE_DEFAULT,
1118         .BindFlags = D3D11_BIND_VERTEX_BUFFER,
1119     };
1120     hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, NULL, &p->blit_vbuf);
1121     if (FAILED(hr)) {
1122         MP_ERR(ra, "Failed to create blit() vertex buffer: %s\n",
1123                mp_HRESULT_to_str(hr));
1124         goto error;
1125     }
1126 
1127     // Blit always uses point sampling, regardless of the source texture
1128     D3D11_SAMPLER_DESC sdesc = {
1129         .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
1130         .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
1131         .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
1132         .ComparisonFunc = D3D11_COMPARISON_NEVER,
1133         .MinLOD = 0,
1134         .MaxLOD = D3D11_FLOAT32_MAX,
1135         .MaxAnisotropy = 1,
1136     };
1137     hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->blit_sampler);
1138     if (FAILED(hr)) {
1139         MP_ERR(ra, "Failed to create blit() sampler: %s\n",
1140                mp_HRESULT_to_str(hr));
1141         goto error;
1142     }
1143 
1144     SAFE_RELEASE(vs_blob);
1145     SAFE_RELEASE(float_ps_blob);
1146     return true;
1147 error:
1148     SAFE_RELEASE(vs_blob);
1149     SAFE_RELEASE(float_ps_blob);
1150     return false;
1151 }
1152 
blit_rpass(struct ra * ra,struct ra_tex * dst,struct ra_tex * src,struct mp_rect * dst_rc,struct mp_rect * src_rc)1153 static void blit_rpass(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
1154                        struct mp_rect *dst_rc, struct mp_rect *src_rc)
1155 {
1156     struct ra_d3d11 *p = ra->priv;
1157     struct d3d_tex *dst_p = dst->priv;
1158     struct d3d_tex *src_p = src->priv;
1159 
1160     float u_min = (double)src_rc->x0 / src->params.w;
1161     float u_max = (double)src_rc->x1 / src->params.w;
1162     float v_min = (double)src_rc->y0 / src->params.h;
1163     float v_max = (double)src_rc->y1 / src->params.h;
1164 
1165     struct blit_vert verts[6] = {
1166         { .x = -1, .y = -1, .u = u_min, .v = v_max },
1167         { .x =  1, .y = -1, .u = u_max, .v = v_max },
1168         { .x =  1, .y =  1, .u = u_max, .v = v_min },
1169         { .x = -1, .y =  1, .u = u_min, .v = v_min },
1170     };
1171     verts[4] = verts[0];
1172     verts[5] = verts[2];
1173     ID3D11DeviceContext_UpdateSubresource(p->ctx,
1174         (ID3D11Resource *)p->blit_vbuf, 0, NULL, verts, 0, 0);
1175 
1176     ID3D11DeviceContext_IASetInputLayout(p->ctx, p->blit_layout);
1177     ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->blit_vbuf,
1178         &(UINT) { sizeof(verts[0]) }, &(UINT) { 0 });
1179     ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
1180         D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
1181 
1182     ID3D11DeviceContext_VSSetShader(p->ctx, p->blit_vs, NULL, 0);
1183 
1184     ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
1185         .TopLeftX = dst_rc->x0,
1186         .TopLeftY = dst_rc->y0,
1187         .Width = mp_rect_w(*dst_rc),
1188         .Height = mp_rect_h(*dst_rc),
1189         .MinDepth = 0,
1190         .MaxDepth = 1,
1191     }));
1192     ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
1193         .left = dst_rc->x0,
1194         .top = dst_rc->y0,
1195         .right = dst_rc->x1,
1196         .bottom = dst_rc->y1,
1197     }));
1198 
1199     ID3D11DeviceContext_PSSetShader(p->ctx, p->blit_float_ps, NULL, 0);
1200     ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, &src_p->srv);
1201     ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, &p->blit_sampler);
1202 
1203     ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &dst_p->rtv, NULL);
1204     ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
1205                                         D3D11_DEFAULT_SAMPLE_MASK);
1206 
1207     ID3D11DeviceContext_Draw(p->ctx, 6, 0);
1208 
1209     ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1,
1210         &(ID3D11ShaderResourceView *) { NULL });
1211     ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1,
1212         &(ID3D11SamplerState *) { NULL });
1213     ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
1214 }
1215 
blit(struct ra * ra,struct ra_tex * dst,struct ra_tex * src,struct mp_rect * dst_rc_ptr,struct mp_rect * src_rc_ptr)1216 static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
1217                  struct mp_rect *dst_rc_ptr, struct mp_rect *src_rc_ptr)
1218 {
1219     struct ra_d3d11 *p = ra->priv;
1220     struct d3d_tex *dst_p = dst->priv;
1221     struct d3d_tex *src_p = src->priv;
1222     struct mp_rect dst_rc = *dst_rc_ptr;
1223     struct mp_rect src_rc = *src_rc_ptr;
1224 
1225     assert(dst->params.dimensions == 2);
1226     assert(src->params.dimensions == 2);
1227 
1228     // A zero-sized target rectangle is a no-op
1229     if (!mp_rect_w(dst_rc) || !mp_rect_h(dst_rc))
1230         return;
1231 
1232     // ra.h seems to imply that both dst_rc and src_rc can be flipped, but it's
1233     // easier for blit_rpass() if only src_rc can be flipped, so unflip dst_rc.
1234     if (dst_rc.x0 > dst_rc.x1) {
1235         MPSWAP(int, dst_rc.x0, dst_rc.x1);
1236         MPSWAP(int, src_rc.x0, src_rc.x1);
1237     }
1238     if (dst_rc.y0 > dst_rc.y1) {
1239         MPSWAP(int, dst_rc.y0, dst_rc.y1);
1240         MPSWAP(int, src_rc.y0, src_rc.y1);
1241     }
1242 
1243     // If format conversion, stretching or flipping is required, a renderpass
1244     // must be used
1245     if (dst->params.format != src->params.format ||
1246         mp_rect_w(dst_rc) != mp_rect_w(src_rc) ||
1247         mp_rect_h(dst_rc) != mp_rect_h(src_rc))
1248     {
1249         blit_rpass(ra, dst, src, &dst_rc, &src_rc);
1250     } else {
1251         int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0;
1252         int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0;
1253         ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr,
1254             dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) {
1255                 .left = src_rc.x0,
1256                 .top = src_rc.y0,
1257                 .front = 0,
1258                 .right = src_rc.x1,
1259                 .bottom = src_rc.y1,
1260                 .back = 1,
1261             }));
1262     }
1263 }
1264 
desc_namespace(struct ra * ra,enum ra_vartype type)1265 static int desc_namespace(struct ra *ra, enum ra_vartype type)
1266 {
1267     // Images and SSBOs both use UAV bindings
1268     if (type == RA_VARTYPE_IMG_W)
1269         type = RA_VARTYPE_BUF_RW;
1270     return type;
1271 }
1272 
compile_glsl(struct ra * ra,enum glsl_shader type,const char * glsl,ID3DBlob ** out)1273 static bool compile_glsl(struct ra *ra, enum glsl_shader type,
1274                          const char *glsl, ID3DBlob **out)
1275 {
1276     struct ra_d3d11 *p = ra->priv;
1277     struct spirv_compiler *spirv = p->spirv;
1278     void *ta_ctx = talloc_new(NULL);
1279     spvc_result sc_res = SPVC_SUCCESS;
1280     spvc_context sc_ctx = NULL;
1281     spvc_parsed_ir sc_ir = NULL;
1282     spvc_compiler sc_compiler = NULL;
1283     spvc_compiler_options sc_opts = NULL;
1284     const char *hlsl = NULL;
1285     ID3DBlob *errors = NULL;
1286     bool success = false;
1287     HRESULT hr;
1288 
1289     int sc_shader_model;
1290     if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
1291         sc_shader_model = 50;
1292     } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) {
1293         sc_shader_model = 41;
1294     } else {
1295         sc_shader_model = 40;
1296     }
1297 
1298     int64_t start_us = mp_time_us();
1299 
1300     bstr spv_module;
1301     if (!spirv->fns->compile_glsl(spirv, ta_ctx, type, glsl, &spv_module))
1302         goto done;
1303 
1304     int64_t shaderc_us = mp_time_us();
1305 
1306     sc_res = spvc_context_create(&sc_ctx);
1307     if (sc_res != SPVC_SUCCESS)
1308         goto done;
1309 
1310     sc_res = spvc_context_parse_spirv(sc_ctx, (SpvId *)spv_module.start,
1311                                       spv_module.len / sizeof(SpvId), &sc_ir);
1312     if (sc_res != SPVC_SUCCESS)
1313         goto done;
1314 
1315     sc_res = spvc_context_create_compiler(sc_ctx, SPVC_BACKEND_HLSL, sc_ir,
1316                                           SPVC_CAPTURE_MODE_TAKE_OWNERSHIP,
1317                                           &sc_compiler);
1318     if (sc_res != SPVC_SUCCESS)
1319         goto done;
1320 
1321     sc_res = spvc_compiler_create_compiler_options(sc_compiler, &sc_opts);
1322     if (sc_res != SPVC_SUCCESS)
1323         goto done;
1324     sc_res = spvc_compiler_options_set_uint(sc_opts,
1325         SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model);
1326     if (sc_res != SPVC_SUCCESS)
1327         goto done;
1328     if (type == GLSL_SHADER_VERTEX) {
1329         // FLIP_VERTEX_Y is only valid for vertex shaders
1330         sc_res = spvc_compiler_options_set_bool(sc_opts,
1331             SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE);
1332         if (sc_res != SPVC_SUCCESS)
1333             goto done;
1334     }
1335     sc_res = spvc_compiler_install_compiler_options(sc_compiler, sc_opts);
1336     if (sc_res != SPVC_SUCCESS)
1337         goto done;
1338 
1339     sc_res = spvc_compiler_compile(sc_compiler, &hlsl);
1340     if (sc_res != SPVC_SUCCESS)
1341         goto done;
1342 
1343     int64_t cross_us = mp_time_us();
1344 
1345     hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main",
1346         get_shader_target(ra, type), D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, out,
1347         &errors);
1348     if (FAILED(hr)) {
1349         MP_ERR(ra, "D3DCompile failed: %s\n%.*s", mp_HRESULT_to_str(hr),
1350                (int)ID3D10Blob_GetBufferSize(errors),
1351                (char*)ID3D10Blob_GetBufferPointer(errors));
1352         goto done;
1353     }
1354 
1355     int64_t d3dcompile_us = mp_time_us();
1356 
1357     MP_VERBOSE(ra, "Compiled a %s shader in %lldus\n", shader_type_name(type),
1358                d3dcompile_us - start_us);
1359     MP_VERBOSE(ra, "shaderc: %lldus, SPIRV-Cross: %lldus, D3DCompile: %lldus\n",
1360                shaderc_us - start_us,
1361                cross_us - shaderc_us,
1362                d3dcompile_us - cross_us);
1363 
1364     success = true;
1365 done:
1366     if (sc_res != SPVC_SUCCESS) {
1367         MP_MSG(ra, MSGL_ERR, "SPIRV-Cross failed: %s\n",
1368                spvc_context_get_last_error_string(sc_ctx));
1369     }
1370     int level = success ? MSGL_DEBUG : MSGL_ERR;
1371     MP_MSG(ra, level, "GLSL source:\n");
1372     mp_log_source(ra->log, level, glsl);
1373     if (hlsl) {
1374         MP_MSG(ra, level, "HLSL source:\n");
1375         mp_log_source(ra->log, level, hlsl);
1376     }
1377     SAFE_RELEASE(errors);
1378     if (sc_ctx)
1379         spvc_context_destroy(sc_ctx);
1380     talloc_free(ta_ctx);
1381     return success;
1382 }
1383 
renderpass_destroy(struct ra * ra,struct ra_renderpass * pass)1384 static void renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
1385 {
1386     if (!pass)
1387         return;
1388     struct d3d_rpass *pass_p = pass->priv;
1389 
1390     SAFE_RELEASE(pass_p->vs);
1391     SAFE_RELEASE(pass_p->ps);
1392     SAFE_RELEASE(pass_p->cs);
1393     SAFE_RELEASE(pass_p->layout);
1394     SAFE_RELEASE(pass_p->bstate);
1395     talloc_free(pass);
1396 }
1397 
map_ra_blend(enum ra_blend blend)1398 static D3D11_BLEND map_ra_blend(enum ra_blend blend)
1399 {
1400     switch (blend) {
1401     default:
1402     case RA_BLEND_ZERO:                return D3D11_BLEND_ZERO;
1403     case RA_BLEND_ONE:                 return D3D11_BLEND_ONE;
1404     case RA_BLEND_SRC_ALPHA:           return D3D11_BLEND_SRC_ALPHA;
1405     case RA_BLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA;
1406     };
1407 }
1408 
vbuf_upload(struct ra * ra,void * data,size_t size)1409 static size_t vbuf_upload(struct ra *ra, void *data, size_t size)
1410 {
1411     struct ra_d3d11 *p = ra->priv;
1412     HRESULT hr;
1413 
1414     // Arbitrary size limit in case there is an insane number of vertices
1415     if (size > 1e9) {
1416         MP_ERR(ra, "Vertex buffer is too large\n");
1417         return -1;
1418     }
1419 
1420     // If the vertex data doesn't fit, realloc the vertex buffer
1421     if (size > p->vbuf_size) {
1422         size_t new_size = p->vbuf_size;
1423         // Arbitrary base size
1424         if (!new_size)
1425             new_size = 64 * 1024;
1426         while (new_size < size)
1427             new_size *= 2;
1428 
1429         ID3D11Buffer *new_buf;
1430         D3D11_BUFFER_DESC vbuf_desc = {
1431             .ByteWidth = new_size,
1432             .Usage = D3D11_USAGE_DYNAMIC,
1433             .BindFlags = D3D11_BIND_VERTEX_BUFFER,
1434             .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
1435         };
1436         hr = ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf);
1437         if (FAILED(hr)) {
1438             MP_ERR(ra, "Failed to create vertex buffer: %s\n",
1439                    mp_HRESULT_to_str(hr));
1440             return -1;
1441         }
1442 
1443         SAFE_RELEASE(p->vbuf);
1444         p->vbuf = new_buf;
1445         p->vbuf_size = new_size;
1446         p->vbuf_used = 0;
1447     }
1448 
1449     bool discard = false;
1450     size_t offset = p->vbuf_used;
1451     if (offset + size > p->vbuf_size) {
1452         // We reached the end of the buffer, so discard and wrap around
1453         discard = true;
1454         offset = 0;
1455     }
1456 
1457     D3D11_MAPPED_SUBRESOURCE map = { 0 };
1458     hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)p->vbuf, 0,
1459         discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE,
1460         0, &map);
1461     if (FAILED(hr)) {
1462         MP_ERR(ra, "Failed to map vertex buffer: %s\n", mp_HRESULT_to_str(hr));
1463         return -1;
1464     }
1465 
1466     char *cdata = map.pData;
1467     memcpy(cdata + offset, data, size);
1468 
1469     ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)p->vbuf, 0);
1470 
1471     p->vbuf_used = offset + size;
1472     return offset;
1473 }
1474 
1475 static const char cache_magic[4] = "RD11";
1476 static const int cache_version = 3;
1477 
1478 struct cache_header {
1479     char magic[sizeof(cache_magic)];
1480     int cache_version;
1481     char compiler[SPIRV_NAME_MAX_LEN];
1482     int spv_compiler_version;
1483     unsigned spvc_compiler_major;
1484     unsigned spvc_compiler_minor;
1485     unsigned spvc_compiler_patch;
1486     struct dll_version d3d_compiler_version;
1487     int feature_level;
1488     size_t vert_bytecode_len;
1489     size_t frag_bytecode_len;
1490     size_t comp_bytecode_len;
1491 };
1492 
load_cached_program(struct ra * ra,const struct ra_renderpass_params * params,bstr * vert_bc,bstr * frag_bc,bstr * comp_bc)1493 static void load_cached_program(struct ra *ra,
1494                                 const struct ra_renderpass_params *params,
1495                                 bstr *vert_bc,
1496                                 bstr *frag_bc,
1497                                 bstr *comp_bc)
1498 {
1499     struct ra_d3d11 *p = ra->priv;
1500     struct spirv_compiler *spirv = p->spirv;
1501     bstr cache = params->cached_program;
1502 
1503     if (cache.len < sizeof(struct cache_header))
1504         return;
1505 
1506     struct cache_header *header = (struct cache_header *)cache.start;
1507     cache = bstr_cut(cache, sizeof(*header));
1508 
1509     unsigned spvc_major, spvc_minor, spvc_patch;
1510     spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
1511 
1512     if (strncmp(header->magic, cache_magic, sizeof(cache_magic)) != 0)
1513         return;
1514     if (header->cache_version != cache_version)
1515         return;
1516     if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
1517         return;
1518     if (header->spv_compiler_version != spirv->compiler_version)
1519         return;
1520     if (header->spvc_compiler_major != spvc_major)
1521         return;
1522     if (header->spvc_compiler_minor != spvc_minor)
1523         return;
1524     if (header->spvc_compiler_patch != spvc_patch)
1525         return;
1526     if (!dll_version_equal(header->d3d_compiler_version, p->d3d_compiler_ver))
1527         return;
1528     if (header->feature_level != p->fl)
1529         return;
1530 
1531     if (header->vert_bytecode_len && vert_bc) {
1532         *vert_bc = bstr_splice(cache, 0, header->vert_bytecode_len);
1533         MP_VERBOSE(ra, "Using cached vertex shader\n");
1534     }
1535     cache = bstr_cut(cache, header->vert_bytecode_len);
1536 
1537     if (header->frag_bytecode_len && frag_bc) {
1538         *frag_bc = bstr_splice(cache, 0, header->frag_bytecode_len);
1539         MP_VERBOSE(ra, "Using cached fragment shader\n");
1540     }
1541     cache = bstr_cut(cache, header->frag_bytecode_len);
1542 
1543     if (header->comp_bytecode_len && comp_bc) {
1544         *comp_bc = bstr_splice(cache, 0, header->comp_bytecode_len);
1545         MP_VERBOSE(ra, "Using cached compute shader\n");
1546     }
1547     cache = bstr_cut(cache, header->comp_bytecode_len);
1548 }
1549 
save_cached_program(struct ra * ra,struct ra_renderpass * pass,bstr vert_bc,bstr frag_bc,bstr comp_bc)1550 static void save_cached_program(struct ra *ra, struct ra_renderpass *pass,
1551                                 bstr vert_bc,
1552                                 bstr frag_bc,
1553                                 bstr comp_bc)
1554 {
1555     struct ra_d3d11 *p = ra->priv;
1556     struct spirv_compiler *spirv = p->spirv;
1557 
1558     unsigned spvc_major, spvc_minor, spvc_patch;
1559     spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
1560 
1561     struct cache_header header = {
1562         .cache_version = cache_version,
1563         .spv_compiler_version = p->spirv->compiler_version,
1564         .spvc_compiler_major = spvc_major,
1565         .spvc_compiler_minor = spvc_minor,
1566         .spvc_compiler_patch = spvc_patch,
1567         .d3d_compiler_version = p->d3d_compiler_ver,
1568         .feature_level = p->fl,
1569         .vert_bytecode_len = vert_bc.len,
1570         .frag_bytecode_len = frag_bc.len,
1571         .comp_bytecode_len = comp_bc.len,
1572     };
1573     strncpy(header.magic, cache_magic, sizeof(header.magic));
1574     strncpy(header.compiler, spirv->name, sizeof(header.compiler));
1575 
1576     struct bstr *prog = &pass->params.cached_program;
1577     bstr_xappend(pass, prog, (bstr){ (char *) &header, sizeof(header) });
1578     bstr_xappend(pass, prog, vert_bc);
1579     bstr_xappend(pass, prog, frag_bc);
1580     bstr_xappend(pass, prog, comp_bc);
1581 }
1582 
renderpass_create_raster(struct ra * ra,struct ra_renderpass * pass,const struct ra_renderpass_params * params)1583 static struct ra_renderpass *renderpass_create_raster(struct ra *ra,
1584     struct ra_renderpass *pass, const struct ra_renderpass_params *params)
1585 {
1586     struct ra_d3d11 *p = ra->priv;
1587     struct d3d_rpass *pass_p = pass->priv;
1588     ID3DBlob *vs_blob = NULL;
1589     ID3DBlob *ps_blob = NULL;
1590     HRESULT hr;
1591 
1592     // load_cached_program will load compiled shader bytecode into vert_bc and
1593     // frag_bc if the cache is valid. If not, vert_bc/frag_bc will remain NULL.
1594     bstr vert_bc = {0};
1595     bstr frag_bc = {0};
1596     load_cached_program(ra, params, &vert_bc, &frag_bc, NULL);
1597 
1598     if (!vert_bc.start) {
1599         if (!compile_glsl(ra, GLSL_SHADER_VERTEX, params->vertex_shader,
1600                           &vs_blob))
1601             goto error;
1602         vert_bc = (bstr){
1603             ID3D10Blob_GetBufferPointer(vs_blob),
1604             ID3D10Blob_GetBufferSize(vs_blob),
1605         };
1606     }
1607 
1608     hr = ID3D11Device_CreateVertexShader(p->dev, vert_bc.start, vert_bc.len,
1609                                          NULL, &pass_p->vs);
1610     if (FAILED(hr)) {
1611         MP_ERR(ra, "Failed to create vertex shader: %s\n",
1612                mp_HRESULT_to_str(hr));
1613         goto error;
1614     }
1615 
1616     if (!frag_bc.start) {
1617         if (!compile_glsl(ra, GLSL_SHADER_FRAGMENT, params->frag_shader,
1618                           &ps_blob))
1619             goto error;
1620         frag_bc = (bstr){
1621             ID3D10Blob_GetBufferPointer(ps_blob),
1622             ID3D10Blob_GetBufferSize(ps_blob),
1623         };
1624     }
1625 
1626     hr = ID3D11Device_CreatePixelShader(p->dev, frag_bc.start, frag_bc.len,
1627                                         NULL, &pass_p->ps);
1628     if (FAILED(hr)) {
1629         MP_ERR(ra, "Failed to create pixel shader: %s\n",
1630                mp_HRESULT_to_str(hr));
1631         goto error;
1632     }
1633 
1634     D3D11_INPUT_ELEMENT_DESC *in_descs = talloc_array(pass,
1635         D3D11_INPUT_ELEMENT_DESC, params->num_vertex_attribs);
1636     for (int i = 0; i < params->num_vertex_attribs; i++) {
1637         struct ra_renderpass_input *inp = &params->vertex_attribs[i];
1638 
1639         DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
1640         switch (inp->type) {
1641         case RA_VARTYPE_FLOAT:
1642             switch (inp->dim_v) {
1643             case 1: fmt = DXGI_FORMAT_R32_FLOAT;          break;
1644             case 2: fmt = DXGI_FORMAT_R32G32_FLOAT;       break;
1645             case 3: fmt = DXGI_FORMAT_R32G32B32_FLOAT;    break;
1646             case 4: fmt = DXGI_FORMAT_R32G32B32A32_FLOAT; break;
1647             }
1648             break;
1649         case RA_VARTYPE_BYTE_UNORM:
1650             switch (inp->dim_v) {
1651             case 1: fmt = DXGI_FORMAT_R8_UNORM;       break;
1652             case 2: fmt = DXGI_FORMAT_R8G8_UNORM;     break;
1653             // There is no 3-component 8-bit DXGI format
1654             case 4: fmt = DXGI_FORMAT_R8G8B8A8_UNORM; break;
1655             }
1656             break;
1657         }
1658         if (fmt == DXGI_FORMAT_UNKNOWN) {
1659             MP_ERR(ra, "Could not find suitable vertex input format\n");
1660             goto error;
1661         }
1662 
1663         in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) {
1664             // The semantic name doesn't mean much and is just used to verify
1665             // the input description matches the shader. SPIRV-Cross always
1666             // uses TEXCOORD, so we should too.
1667             .SemanticName = "TEXCOORD",
1668             .SemanticIndex = i,
1669             .AlignedByteOffset = inp->offset,
1670             .Format = fmt,
1671         };
1672     }
1673 
1674     hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
1675         params->num_vertex_attribs, vert_bc.start, vert_bc.len,
1676         &pass_p->layout);
1677     if (FAILED(hr)) {
1678         MP_ERR(ra, "Failed to create IA layout: %s\n", mp_HRESULT_to_str(hr));
1679         goto error;
1680     }
1681     talloc_free(in_descs);
1682     in_descs = NULL;
1683 
1684     D3D11_BLEND_DESC bdesc = {
1685         .RenderTarget[0] = {
1686             .BlendEnable = params->enable_blend,
1687             .SrcBlend = map_ra_blend(params->blend_src_rgb),
1688             .DestBlend = map_ra_blend(params->blend_dst_rgb),
1689             .BlendOp = D3D11_BLEND_OP_ADD,
1690             .SrcBlendAlpha = map_ra_blend(params->blend_src_alpha),
1691             .DestBlendAlpha = map_ra_blend(params->blend_dst_alpha),
1692             .BlendOpAlpha = D3D11_BLEND_OP_ADD,
1693             .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
1694         },
1695     };
1696     hr = ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate);
1697     if (FAILED(hr)) {
1698         MP_ERR(ra, "Failed to create blend state: %s\n", mp_HRESULT_to_str(hr));
1699         goto error;
1700     }
1701 
1702     save_cached_program(ra, pass, vert_bc, frag_bc, (bstr){0});
1703 
1704     SAFE_RELEASE(vs_blob);
1705     SAFE_RELEASE(ps_blob);
1706     return pass;
1707 
1708 error:
1709     renderpass_destroy(ra, pass);
1710     SAFE_RELEASE(vs_blob);
1711     SAFE_RELEASE(ps_blob);
1712     return NULL;
1713 }
1714 
renderpass_create_compute(struct ra * ra,struct ra_renderpass * pass,const struct ra_renderpass_params * params)1715 static struct ra_renderpass *renderpass_create_compute(struct ra *ra,
1716     struct ra_renderpass *pass, const struct ra_renderpass_params *params)
1717 {
1718     struct ra_d3d11 *p = ra->priv;
1719     struct d3d_rpass *pass_p = pass->priv;
1720     ID3DBlob *cs_blob = NULL;
1721     HRESULT hr;
1722 
1723     bstr comp_bc = {0};
1724     load_cached_program(ra, params, NULL, NULL, &comp_bc);
1725 
1726     if (!comp_bc.start) {
1727         if (!compile_glsl(ra, GLSL_SHADER_COMPUTE, params->compute_shader,
1728                           &cs_blob))
1729             goto error;
1730         comp_bc = (bstr){
1731             ID3D10Blob_GetBufferPointer(cs_blob),
1732             ID3D10Blob_GetBufferSize(cs_blob),
1733         };
1734     }
1735     hr = ID3D11Device_CreateComputeShader(p->dev, comp_bc.start, comp_bc.len,
1736                                           NULL, &pass_p->cs);
1737     if (FAILED(hr)) {
1738         MP_ERR(ra, "Failed to create compute shader: %s\n",
1739                mp_HRESULT_to_str(hr));
1740         goto error;
1741     }
1742 
1743     save_cached_program(ra, pass, (bstr){0}, (bstr){0}, comp_bc);
1744 
1745     SAFE_RELEASE(cs_blob);
1746     return pass;
1747 error:
1748     renderpass_destroy(ra, pass);
1749     SAFE_RELEASE(cs_blob);
1750     return NULL;
1751 }
1752 
renderpass_create(struct ra * ra,const struct ra_renderpass_params * params)1753 static struct ra_renderpass *renderpass_create(struct ra *ra,
1754     const struct ra_renderpass_params *params)
1755 {
1756     struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
1757     pass->params = *ra_renderpass_params_copy(pass, params);
1758     pass->params.cached_program = (bstr){0};
1759     pass->priv = talloc_zero(pass, struct d3d_rpass);
1760 
1761     if (params->type == RA_RENDERPASS_TYPE_COMPUTE) {
1762         return renderpass_create_compute(ra, pass, params);
1763     } else {
1764         return renderpass_create_raster(ra, pass, params);
1765     }
1766 }
1767 
renderpass_run_raster(struct ra * ra,const struct ra_renderpass_run_params * params,ID3D11Buffer * ubos[],int ubos_len,ID3D11SamplerState * samplers[],ID3D11ShaderResourceView * srvs[],int samplers_len,ID3D11UnorderedAccessView * uavs[],int uavs_len)1768 static void renderpass_run_raster(struct ra *ra,
1769                                   const struct ra_renderpass_run_params *params,
1770                                   ID3D11Buffer *ubos[], int ubos_len,
1771                                   ID3D11SamplerState *samplers[],
1772                                   ID3D11ShaderResourceView *srvs[],
1773                                   int samplers_len,
1774                                   ID3D11UnorderedAccessView *uavs[],
1775                                   int uavs_len)
1776 {
1777     struct ra_d3d11 *p = ra->priv;
1778     struct ra_renderpass *pass = params->pass;
1779     struct d3d_rpass *pass_p = pass->priv;
1780 
1781     UINT vbuf_offset = vbuf_upload(ra, params->vertex_data,
1782         pass->params.vertex_stride * params->vertex_count);
1783     if (vbuf_offset == (UINT)-1)
1784         return;
1785 
1786     ID3D11DeviceContext_IASetInputLayout(p->ctx, pass_p->layout);
1787     ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->vbuf,
1788         &pass->params.vertex_stride, &vbuf_offset);
1789     ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
1790         D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
1791 
1792     ID3D11DeviceContext_VSSetShader(p->ctx, pass_p->vs, NULL, 0);
1793 
1794     ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
1795         .TopLeftX = params->viewport.x0,
1796         .TopLeftY = params->viewport.y0,
1797         .Width = mp_rect_w(params->viewport),
1798         .Height = mp_rect_h(params->viewport),
1799         .MinDepth = 0,
1800         .MaxDepth = 1,
1801     }));
1802     ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
1803         .left = params->scissors.x0,
1804         .top = params->scissors.y0,
1805         .right = params->scissors.x1,
1806         .bottom = params->scissors.y1,
1807     }));
1808     ID3D11DeviceContext_PSSetShader(p->ctx, pass_p->ps, NULL, 0);
1809     ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
1810     ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs);
1811     ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers);
1812 
1813     struct ra_tex *target = params->target;
1814     struct d3d_tex *target_p = target->priv;
1815     ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 1,
1816         &target_p->rtv, NULL, 1, uavs_len, uavs, NULL);
1817     ID3D11DeviceContext_OMSetBlendState(p->ctx, pass_p->bstate, NULL,
1818                                         D3D11_DEFAULT_SAMPLE_MASK);
1819 
1820     ID3D11DeviceContext_Draw(p->ctx, params->vertex_count, 0);
1821 
1822     // Unbind everything. It's easier to do this than to actually track state,
1823     // and if we leave the RTV bound, it could trip up D3D's conflict checker.
1824     for (int i = 0; i < ubos_len; i++)
1825         ubos[i] = NULL;
1826     for (int i = 0; i < samplers_len; i++) {
1827         samplers[i] = NULL;
1828         srvs[i] = NULL;
1829     }
1830     for (int i = 0; i < uavs_len; i++)
1831         uavs[i] = NULL;
1832     ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
1833     ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs);
1834     ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers);
1835     ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 0,
1836         NULL, NULL, 1, uavs_len, uavs, NULL);
1837 }
1838 
renderpass_run_compute(struct ra * ra,const struct ra_renderpass_run_params * params,ID3D11Buffer * ubos[],int ubos_len,ID3D11SamplerState * samplers[],ID3D11ShaderResourceView * srvs[],int samplers_len,ID3D11UnorderedAccessView * uavs[],int uavs_len)1839 static void renderpass_run_compute(struct ra *ra,
1840                                    const struct ra_renderpass_run_params *params,
1841                                    ID3D11Buffer *ubos[], int ubos_len,
1842                                    ID3D11SamplerState *samplers[],
1843                                    ID3D11ShaderResourceView *srvs[],
1844                                    int samplers_len,
1845                                    ID3D11UnorderedAccessView *uavs[],
1846                                    int uavs_len)
1847 {
1848     struct ra_d3d11 *p = ra->priv;
1849     struct ra_renderpass *pass = params->pass;
1850     struct d3d_rpass *pass_p = pass->priv;
1851 
1852     ID3D11DeviceContext_CSSetShader(p->ctx, pass_p->cs, NULL, 0);
1853     ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
1854     ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs);
1855     ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers);
1856     ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs,
1857                                                   NULL);
1858 
1859     ID3D11DeviceContext_Dispatch(p->ctx, params->compute_groups[0],
1860                                          params->compute_groups[1],
1861                                          params->compute_groups[2]);
1862 
1863     for (int i = 0; i < ubos_len; i++)
1864         ubos[i] = NULL;
1865     for (int i = 0; i < samplers_len; i++) {
1866         samplers[i] = NULL;
1867         srvs[i] = NULL;
1868     }
1869     for (int i = 0; i < uavs_len; i++)
1870         uavs[i] = NULL;
1871     ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
1872     ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs);
1873     ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers);
1874     ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs,
1875                                                   NULL);
1876 }
1877 
renderpass_run(struct ra * ra,const struct ra_renderpass_run_params * params)1878 static void renderpass_run(struct ra *ra,
1879                            const struct ra_renderpass_run_params *params)
1880 {
1881     struct ra_d3d11 *p = ra->priv;
1882     struct ra_renderpass *pass = params->pass;
1883     enum ra_renderpass_type type = pass->params.type;
1884 
1885     ID3D11Buffer *ubos[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT] = {0};
1886     int ubos_len = 0;
1887 
1888     ID3D11SamplerState *samplers[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0};
1889     ID3D11ShaderResourceView *srvs[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0};
1890     int samplers_len = 0;
1891 
1892     ID3D11UnorderedAccessView *uavs[D3D11_1_UAV_SLOT_COUNT] = {0};
1893     int uavs_len = 0;
1894 
1895     // In a raster pass, one of the UAV slots is used by the runtime for the RTV
1896     int uavs_max = type == RA_RENDERPASS_TYPE_COMPUTE ? p->max_uavs
1897                                                       : p->max_uavs - 1;
1898 
1899     // Gather the input variables used in this pass. These will be mapped to
1900     // HLSL registers.
1901     for (int i = 0; i < params->num_values; i++) {
1902         struct ra_renderpass_input_val *val = &params->values[i];
1903         int binding = pass->params.inputs[val->index].binding;
1904         switch (pass->params.inputs[val->index].type) {
1905         case RA_VARTYPE_BUF_RO:
1906             if (binding > MP_ARRAY_SIZE(ubos)) {
1907                 MP_ERR(ra, "Too many constant buffers in pass\n");
1908                 return;
1909             }
1910             struct ra_buf *buf_ro = *(struct ra_buf **)val->data;
1911             buf_resolve(ra, buf_ro);
1912             struct d3d_buf *buf_ro_p = buf_ro->priv;
1913             ubos[binding] = buf_ro_p->buf;
1914             ubos_len = MPMAX(ubos_len, binding + 1);
1915             break;
1916         case RA_VARTYPE_BUF_RW:
1917             if (binding > uavs_max) {
1918                 MP_ERR(ra, "Too many UAVs in pass\n");
1919                 return;
1920             }
1921             struct ra_buf *buf_rw = *(struct ra_buf **)val->data;
1922             buf_resolve(ra, buf_rw);
1923             struct d3d_buf *buf_rw_p = buf_rw->priv;
1924             uavs[binding] = buf_rw_p->uav;
1925             uavs_len = MPMAX(uavs_len, binding + 1);
1926             break;
1927         case RA_VARTYPE_TEX:
1928             if (binding > MP_ARRAY_SIZE(samplers)) {
1929                 MP_ERR(ra, "Too many textures in pass\n");
1930                 return;
1931             }
1932             struct ra_tex *tex = *(struct ra_tex **)val->data;
1933             struct d3d_tex *tex_p = tex->priv;
1934             samplers[binding] = tex_p->sampler;
1935             srvs[binding] = tex_p->srv;
1936             samplers_len = MPMAX(samplers_len, binding + 1);
1937             break;
1938         case RA_VARTYPE_IMG_W:
1939             if (binding > uavs_max) {
1940                 MP_ERR(ra, "Too many UAVs in pass\n");
1941                 return;
1942             }
1943             struct ra_tex *img = *(struct ra_tex **)val->data;
1944             struct d3d_tex *img_p = img->priv;
1945             uavs[binding] = img_p->uav;
1946             uavs_len = MPMAX(uavs_len, binding + 1);
1947             break;
1948         }
1949     }
1950 
1951     if (type == RA_RENDERPASS_TYPE_COMPUTE) {
1952         renderpass_run_compute(ra, params, ubos, ubos_len, samplers, srvs,
1953                                samplers_len, uavs, uavs_len);
1954     } else {
1955         renderpass_run_raster(ra, params, ubos, ubos_len, samplers, srvs,
1956                               samplers_len, uavs, uavs_len);
1957     }
1958 }
1959 
timer_destroy(struct ra * ra,ra_timer * ratimer)1960 static void timer_destroy(struct ra *ra, ra_timer *ratimer)
1961 {
1962     if (!ratimer)
1963         return;
1964     struct d3d_timer *timer = ratimer;
1965 
1966     SAFE_RELEASE(timer->ts_start);
1967     SAFE_RELEASE(timer->ts_end);
1968     SAFE_RELEASE(timer->disjoint);
1969     talloc_free(timer);
1970 }
1971 
timer_create(struct ra * ra)1972 static ra_timer *timer_create(struct ra *ra)
1973 {
1974     struct ra_d3d11 *p = ra->priv;
1975     if (!p->has_timestamp_queries)
1976         return NULL;
1977 
1978     struct d3d_timer *timer = talloc_zero(NULL, struct d3d_timer);
1979     HRESULT hr;
1980 
1981     hr = ID3D11Device_CreateQuery(p->dev,
1982         &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_start);
1983     if (FAILED(hr)) {
1984         MP_ERR(ra, "Failed to create start query: %s\n", mp_HRESULT_to_str(hr));
1985         goto error;
1986     }
1987 
1988     hr = ID3D11Device_CreateQuery(p->dev,
1989         &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_end);
1990     if (FAILED(hr)) {
1991         MP_ERR(ra, "Failed to create end query: %s\n", mp_HRESULT_to_str(hr));
1992         goto error;
1993     }
1994 
1995     // Measuring duration in D3D11 requires three queries: start and end
1996     // timestamps, and a disjoint query containing a flag which says whether
1997     // the timestamps are usable or if a discontinuity occured between them,
1998     // like a change in power state or clock speed. The disjoint query also
1999     // contains the timer frequency, so the timestamps are useless without it.
2000     hr = ID3D11Device_CreateQuery(p->dev,
2001         &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &timer->disjoint);
2002     if (FAILED(hr)) {
2003         MP_ERR(ra, "Failed to create timer query: %s\n", mp_HRESULT_to_str(hr));
2004         goto error;
2005     }
2006 
2007     return timer;
2008 error:
2009     timer_destroy(ra, timer);
2010     return NULL;
2011 }
2012 
timestamp_to_ns(uint64_t timestamp,uint64_t freq)2013 static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq)
2014 {
2015     static const uint64_t ns_per_s = 1000000000llu;
2016     return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq;
2017 }
2018 
timer_get_result(struct ra * ra,ra_timer * ratimer)2019 static uint64_t timer_get_result(struct ra *ra, ra_timer *ratimer)
2020 {
2021     struct ra_d3d11 *p = ra->priv;
2022     struct d3d_timer *timer = ratimer;
2023     HRESULT hr;
2024 
2025     UINT64 start, end;
2026     D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj;
2027 
2028     hr = ID3D11DeviceContext_GetData(p->ctx,
2029         (ID3D11Asynchronous *)timer->ts_end, &end, sizeof(end),
2030         D3D11_ASYNC_GETDATA_DONOTFLUSH);
2031     if (FAILED(hr) || hr == S_FALSE)
2032         return 0;
2033     hr = ID3D11DeviceContext_GetData(p->ctx,
2034         (ID3D11Asynchronous *)timer->ts_start, &start, sizeof(start),
2035         D3D11_ASYNC_GETDATA_DONOTFLUSH);
2036     if (FAILED(hr) || hr == S_FALSE)
2037         return 0;
2038     hr = ID3D11DeviceContext_GetData(p->ctx,
2039         (ID3D11Asynchronous *)timer->disjoint, &dj, sizeof(dj),
2040         D3D11_ASYNC_GETDATA_DONOTFLUSH);
2041     if (FAILED(hr) || hr == S_FALSE || dj.Disjoint || !dj.Frequency)
2042         return 0;
2043 
2044     return timestamp_to_ns(end - start, dj.Frequency);
2045 }
2046 
timer_start(struct ra * ra,ra_timer * ratimer)2047 static void timer_start(struct ra *ra, ra_timer *ratimer)
2048 {
2049     struct ra_d3d11 *p = ra->priv;
2050     struct d3d_timer *timer = ratimer;
2051 
2052     // Latch the last result of this ra_timer (returned by timer_stop)
2053     timer->result = timer_get_result(ra, ratimer);
2054 
2055     ID3D11DeviceContext_Begin(p->ctx, (ID3D11Asynchronous *)timer->disjoint);
2056     ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_start);
2057 }
2058 
timer_stop(struct ra * ra,ra_timer * ratimer)2059 static uint64_t timer_stop(struct ra *ra, ra_timer *ratimer)
2060 {
2061     struct ra_d3d11 *p = ra->priv;
2062     struct d3d_timer *timer = ratimer;
2063 
2064     ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_end);
2065     ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->disjoint);
2066 
2067     return timer->result;
2068 }
2069 
map_msg_severity(D3D11_MESSAGE_SEVERITY sev)2070 static int map_msg_severity(D3D11_MESSAGE_SEVERITY sev)
2071 {
2072     switch (sev) {
2073     case D3D11_MESSAGE_SEVERITY_CORRUPTION:
2074         return MSGL_FATAL;
2075     case D3D11_MESSAGE_SEVERITY_ERROR:
2076         return MSGL_ERR;
2077     case D3D11_MESSAGE_SEVERITY_WARNING:
2078         return MSGL_WARN;
2079     default:
2080     case D3D11_MESSAGE_SEVERITY_INFO:
2081     case D3D11_MESSAGE_SEVERITY_MESSAGE:
2082         return MSGL_DEBUG;
2083     }
2084 }
2085 
debug_marker(struct ra * ra,const char * msg)2086 static void debug_marker(struct ra *ra, const char *msg)
2087 {
2088     struct ra_d3d11 *p = ra->priv;
2089     void *talloc_ctx = talloc_new(NULL);
2090     HRESULT hr;
2091 
2092     if (!p->iqueue)
2093         goto done;
2094 
2095     // Copy debug-layer messages to mpv's log output
2096     bool printed_header = false;
2097     uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(p->iqueue);
2098     for (uint64_t i = 0; i < messages; i++) {
2099         size_t len;
2100         hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, NULL, &len);
2101         if (FAILED(hr) || !len)
2102             goto done;
2103 
2104         D3D11_MESSAGE *d3dmsg = talloc_size(talloc_ctx, len);
2105         hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, d3dmsg, &len);
2106         if (FAILED(hr))
2107             goto done;
2108 
2109         int msgl = map_msg_severity(d3dmsg->Severity);
2110         if (mp_msg_test(ra->log, msgl)) {
2111             if (!printed_header)
2112                 MP_INFO(ra, "%s:\n", msg);
2113             printed_header = true;
2114 
2115             MP_MSG(ra, msgl, "%d: %.*s\n", (int)d3dmsg->ID,
2116                 (int)d3dmsg->DescriptionByteLength, d3dmsg->pDescription);
2117             talloc_free(d3dmsg);
2118         }
2119     }
2120 
2121     ID3D11InfoQueue_ClearStoredMessages(p->iqueue);
2122 done:
2123     talloc_free(talloc_ctx);
2124 }
2125 
destroy(struct ra * ra)2126 static void destroy(struct ra *ra)
2127 {
2128     struct ra_d3d11 *p = ra->priv;
2129 
2130     // Release everything except the interfaces needed to perform leak checking
2131     SAFE_RELEASE(p->clear_ps);
2132     SAFE_RELEASE(p->clear_vs);
2133     SAFE_RELEASE(p->clear_layout);
2134     SAFE_RELEASE(p->clear_vbuf);
2135     SAFE_RELEASE(p->clear_cbuf);
2136     SAFE_RELEASE(p->blit_float_ps);
2137     SAFE_RELEASE(p->blit_vs);
2138     SAFE_RELEASE(p->blit_layout);
2139     SAFE_RELEASE(p->blit_vbuf);
2140     SAFE_RELEASE(p->blit_sampler);
2141     SAFE_RELEASE(p->vbuf);
2142     SAFE_RELEASE(p->ctx1);
2143     SAFE_RELEASE(p->dev1);
2144     SAFE_RELEASE(p->dev);
2145 
2146     if (p->debug && p->ctx) {
2147         // Destroy the device context synchronously so referenced objects don't
2148         // show up in the leak check
2149         ID3D11DeviceContext_ClearState(p->ctx);
2150         ID3D11DeviceContext_Flush(p->ctx);
2151     }
2152     SAFE_RELEASE(p->ctx);
2153 
2154     if (p->debug) {
2155         // Report any leaked objects
2156         debug_marker(ra, "after destroy");
2157         ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_DETAIL);
2158         debug_marker(ra, "after leak check");
2159         ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_SUMMARY);
2160         debug_marker(ra, "after leak summary");
2161     }
2162     SAFE_RELEASE(p->debug);
2163     SAFE_RELEASE(p->iqueue);
2164 
2165     talloc_free(ra);
2166 }
2167 
2168 static struct ra_fns ra_fns_d3d11 = {
2169     .destroy            = destroy,
2170     .tex_create         = tex_create,
2171     .tex_destroy        = tex_destroy,
2172     .tex_upload         = tex_upload,
2173     .tex_download       = tex_download,
2174     .buf_create         = buf_create,
2175     .buf_destroy        = buf_destroy,
2176     .buf_update         = buf_update,
2177     .clear              = clear,
2178     .blit               = blit,
2179     .uniform_layout     = std140_layout,
2180     .desc_namespace     = desc_namespace,
2181     .renderpass_create  = renderpass_create,
2182     .renderpass_destroy = renderpass_destroy,
2183     .renderpass_run     = renderpass_run,
2184     .timer_create       = timer_create,
2185     .timer_destroy      = timer_destroy,
2186     .timer_start        = timer_start,
2187     .timer_stop         = timer_stop,
2188     .debug_marker       = debug_marker,
2189 };
2190 
ra_d3d11_flush(struct ra * ra)2191 void ra_d3d11_flush(struct ra *ra)
2192 {
2193     struct ra_d3d11 *p = ra->priv;
2194     ID3D11DeviceContext_Flush(p->ctx);
2195 }
2196 
init_debug_layer(struct ra * ra)2197 static void init_debug_layer(struct ra *ra)
2198 {
2199     struct ra_d3d11 *p = ra->priv;
2200     HRESULT hr;
2201 
2202     hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Debug,
2203                                      (void**)&p->debug);
2204     if (FAILED(hr)) {
2205         MP_ERR(ra, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr));
2206         return;
2207     }
2208 
2209     hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11InfoQueue,
2210                                      (void**)&p->iqueue);
2211     if (FAILED(hr)) {
2212         MP_ERR(ra, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr));
2213         return;
2214     }
2215 
2216     // Store an unlimited amount of messages in the buffer. This is fine
2217     // because we flush stored messages regularly (in debug_marker.)
2218     ID3D11InfoQueue_SetMessageCountLimit(p->iqueue, -1);
2219 
2220     // Filter some annoying messages
2221     D3D11_MESSAGE_ID deny_ids[] = {
2222         // This error occurs during context creation when we try to figure out
2223         // the real maximum texture size by attempting to create a texture
2224         // larger than the current feature level allows.
2225         D3D11_MESSAGE_ID_CREATETEXTURE2D_INVALIDDIMENSIONS,
2226 
2227         // These are normal. The RA timer queue habitually reuses timer objects
2228         // without retrieving the results.
2229         D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS,
2230         D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS,
2231     };
2232     D3D11_INFO_QUEUE_FILTER filter = {
2233         .DenyList = {
2234             .NumIDs = MP_ARRAY_SIZE(deny_ids),
2235             .pIDList = deny_ids,
2236         },
2237     };
2238     ID3D11InfoQueue_PushStorageFilter(p->iqueue, &filter);
2239 }
2240 
get_dll_version(HMODULE dll)2241 static struct dll_version get_dll_version(HMODULE dll)
2242 {
2243     void *ctx = talloc_new(NULL);
2244     struct dll_version ret = { 0 };
2245 
2246     HRSRC rsrc = FindResourceW(dll, MAKEINTRESOURCEW(VS_VERSION_INFO),
2247                                MAKEINTRESOURCEW(VS_FILE_INFO));
2248     if (!rsrc)
2249         goto done;
2250     DWORD size = SizeofResource(dll, rsrc);
2251     HGLOBAL res = LoadResource(dll, rsrc);
2252     if (!res)
2253         goto done;
2254     void *ptr = LockResource(res);
2255     if (!ptr)
2256         goto done;
2257     void *copy = talloc_memdup(ctx, ptr, size);
2258 
2259     VS_FIXEDFILEINFO *ffi;
2260     UINT ffi_len;
2261     if (!VerQueryValueW(copy, L"\\", (void**)&ffi, &ffi_len))
2262         goto done;
2263     if (ffi_len < sizeof(*ffi))
2264         goto done;
2265 
2266     ret.major = HIWORD(ffi->dwFileVersionMS);
2267     ret.minor = LOWORD(ffi->dwFileVersionMS);
2268     ret.build = HIWORD(ffi->dwFileVersionLS);
2269     ret.revision = LOWORD(ffi->dwFileVersionLS);
2270 
2271 done:
2272     talloc_free(ctx);
2273     return ret;
2274 }
2275 
load_d3d_compiler(struct ra * ra)2276 static bool load_d3d_compiler(struct ra *ra)
2277 {
2278     struct ra_d3d11 *p = ra->priv;
2279     HMODULE d3dcompiler = NULL;
2280 
2281     // Try the inbox D3DCompiler first (Windows 8.1 and up)
2282     if (IsWindows8Point1OrGreater()) {
2283         d3dcompiler = LoadLibraryExW(L"d3dcompiler_47.dll", NULL,
2284                                      LOAD_LIBRARY_SEARCH_SYSTEM32);
2285     }
2286     // Check for a packaged version of d3dcompiler_47.dll
2287     if (!d3dcompiler)
2288         d3dcompiler = LoadLibraryW(L"d3dcompiler_47.dll");
2289     // Try d3dcompiler_46.dll from the Windows 8 SDK
2290     if (!d3dcompiler)
2291         d3dcompiler = LoadLibraryW(L"d3dcompiler_46.dll");
2292     // Try d3dcompiler_43.dll from the June 2010 DirectX SDK
2293     if (!d3dcompiler)
2294         d3dcompiler = LoadLibraryW(L"d3dcompiler_43.dll");
2295     // Can't find any compiler DLL, so give up
2296     if (!d3dcompiler)
2297         return false;
2298 
2299     p->d3d_compiler_ver = get_dll_version(d3dcompiler);
2300 
2301     p->D3DCompile = (pD3DCompile)GetProcAddress(d3dcompiler, "D3DCompile");
2302     if (!p->D3DCompile)
2303         return false;
2304     return true;
2305 }
2306 
find_max_texture_dimension(struct ra * ra)2307 static void find_max_texture_dimension(struct ra *ra)
2308 {
2309     struct ra_d3d11 *p = ra->priv;
2310 
2311     D3D11_TEXTURE2D_DESC desc = {
2312         .Width = ra->max_texture_wh,
2313         .Height = ra->max_texture_wh,
2314         .MipLevels = 1,
2315         .ArraySize = 1,
2316         .SampleDesc.Count = 1,
2317         .Format = DXGI_FORMAT_R8_UNORM,
2318         .BindFlags = D3D11_BIND_SHADER_RESOURCE,
2319     };
2320     while (true) {
2321         desc.Height = desc.Width *= 2;
2322         if (desc.Width >= 0x8000000u)
2323             return;
2324         if (FAILED(ID3D11Device_CreateTexture2D(p->dev, &desc, NULL, NULL)))
2325             return;
2326         ra->max_texture_wh = desc.Width;
2327     }
2328 }
2329 
ra_d3d11_create(ID3D11Device * dev,struct mp_log * log,struct spirv_compiler * spirv)2330 struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
2331                            struct spirv_compiler *spirv)
2332 {
2333     HRESULT hr;
2334 
2335     struct ra *ra = talloc_zero(NULL, struct ra);
2336     ra->log = log;
2337     ra->fns = &ra_fns_d3d11;
2338 
2339     // Even Direct3D 10level9 supports 3D textures
2340     ra->caps = RA_CAP_TEX_3D | RA_CAP_DIRECT_UPLOAD | RA_CAP_BUF_RO |
2341                RA_CAP_BLIT | spirv->ra_caps;
2342 
2343     ra->glsl_version = spirv->glsl_version;
2344     ra->glsl_vulkan = true;
2345 
2346     struct ra_d3d11 *p = ra->priv = talloc_zero(ra, struct ra_d3d11);
2347     p->spirv = spirv;
2348 
2349     int minor = 0;
2350     ID3D11Device_AddRef(dev);
2351     p->dev = dev;
2352     ID3D11Device_GetImmediateContext(p->dev, &p->ctx);
2353     hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1,
2354                                      (void**)&p->dev1);
2355     if (SUCCEEDED(hr)) {
2356         minor = 1;
2357         ID3D11Device1_GetImmediateContext1(p->dev1, &p->ctx1);
2358 
2359         D3D11_FEATURE_DATA_D3D11_OPTIONS fopts = { 0 };
2360         hr = ID3D11Device_CheckFeatureSupport(p->dev,
2361             D3D11_FEATURE_D3D11_OPTIONS, &fopts, sizeof(fopts));
2362         if (SUCCEEDED(hr)) {
2363             p->has_clear_view = fopts.ClearView;
2364         }
2365     }
2366 
2367     MP_VERBOSE(ra, "Using Direct3D 11.%d runtime\n", minor);
2368 
2369     p->fl = ID3D11Device_GetFeatureLevel(p->dev);
2370     if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
2371         ra->max_texture_wh = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
2372     } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) {
2373         ra->max_texture_wh = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
2374     } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) {
2375         ra->max_texture_wh = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION;
2376     } else {
2377         ra->max_texture_wh = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION;
2378     }
2379 
2380     if (p->fl >= D3D_FEATURE_LEVEL_11_0)
2381         ra->caps |= RA_CAP_GATHER;
2382     if (p->fl >= D3D_FEATURE_LEVEL_10_0)
2383         ra->caps |= RA_CAP_FRAGCOORD;
2384 
2385     // Some 10_0 hardware has compute shaders, but only 11_0 has image load/store
2386     if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
2387         ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW;
2388         ra->max_shmem = 32 * 1024;
2389     }
2390 
2391     if (p->fl >= D3D_FEATURE_LEVEL_11_1) {
2392         p->max_uavs = D3D11_1_UAV_SLOT_COUNT;
2393     } else {
2394         p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT;
2395     }
2396 
2397     if (ID3D11Device_GetCreationFlags(p->dev) & D3D11_CREATE_DEVICE_DEBUG)
2398         init_debug_layer(ra);
2399 
2400     // Some level 9_x devices don't have timestamp queries
2401     hr = ID3D11Device_CreateQuery(p->dev,
2402         &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL);
2403     p->has_timestamp_queries = SUCCEEDED(hr);
2404 
2405     // According to MSDN, the above texture sizes are just minimums and drivers
2406     // may support larger textures. See:
2407     // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476874.aspx
2408     find_max_texture_dimension(ra);
2409     MP_VERBOSE(ra, "Maximum Texture2D size: %dx%d\n", ra->max_texture_wh,
2410                ra->max_texture_wh);
2411 
2412     if (!load_d3d_compiler(ra)) {
2413         MP_FATAL(ra, "Could not find D3DCompiler DLL\n");
2414         goto error;
2415     }
2416 
2417     MP_VERBOSE(ra, "D3DCompiler version: %u.%u.%u.%u\n",
2418                p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor,
2419                p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision);
2420 
2421     setup_formats(ra);
2422 
2423     // The rasterizer state never changes, so set it up here
2424     ID3D11RasterizerState *rstate;
2425     D3D11_RASTERIZER_DESC rdesc = {
2426         .FillMode = D3D11_FILL_SOLID,
2427         .CullMode = D3D11_CULL_NONE,
2428         .FrontCounterClockwise = FALSE,
2429         .DepthClipEnable = TRUE, // Required for 10level9
2430         .ScissorEnable = TRUE,
2431     };
2432     hr = ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &rstate);
2433     if (FAILED(hr)) {
2434         MP_ERR(ra, "Failed to create rasterizer state: %s\n", mp_HRESULT_to_str(hr));
2435         goto error;
2436     }
2437     ID3D11DeviceContext_RSSetState(p->ctx, rstate);
2438     SAFE_RELEASE(rstate);
2439 
2440     // If the device doesn't support ClearView, we have to set up a
2441     // shader-based clear() implementation
2442     if (!p->has_clear_view && !setup_clear_rpass(ra))
2443         goto error;
2444 
2445     if (!setup_blit_rpass(ra))
2446         goto error;
2447 
2448     return ra;
2449 
2450 error:
2451     destroy(ra);
2452     return NULL;
2453 }
2454 
ra_d3d11_get_device(struct ra * ra)2455 ID3D11Device *ra_d3d11_get_device(struct ra *ra)
2456 {
2457     struct ra_d3d11 *p = ra->priv;
2458     ID3D11Device_AddRef(p->dev);
2459     return p->dev;
2460 }
2461 
ra_is_d3d11(struct ra * ra)2462 bool ra_is_d3d11(struct ra *ra)
2463 {
2464     return ra->fns == &ra_fns_d3d11;
2465 }
2466