1 /*
2 * This file is part of libplacebo.
3 *
4 * libplacebo is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * libplacebo is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "gpu.h"
19 #include "formats.h"
20 #include "glsl/spirv.h"
21
22 struct stream_buf_slice {
23 const void *data;
24 unsigned int size;
25 unsigned int offset;
26 };
27
28 // Upload one or more slices of single-use data to a suballocated dynamic
29 // buffer. Only call this once per-buffer per-pass, since it will discard or
30 // reallocate the buffer when full.
stream_buf_upload(pl_gpu gpu,struct d3d_stream_buf * stream,struct stream_buf_slice * slices,int num_slices)31 static bool stream_buf_upload(pl_gpu gpu, struct d3d_stream_buf *stream,
32 struct stream_buf_slice *slices, int num_slices)
33 {
34 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
35 struct d3d11_ctx *ctx = p->ctx;
36 unsigned int align = PL_DEF(stream->align, sizeof(float));
37
38 // Get total size, rounded up to the buffer's alignment
39 size_t size = 0;
40 for (int i = 0; i < num_slices; i++)
41 size += PL_ALIGN2(slices[i].size, align);
42
43 if (size > gpu->limits.max_buf_size) {
44 PL_ERR(gpu, "Streaming buffer is too large");
45 return -1;
46 }
47
48 // If the data doesn't fit, realloc the buffer
49 if (size > stream->size) {
50 size_t new_size = stream->size;
51 // Arbitrary base size
52 if (!new_size)
53 new_size = 16 * 1024;
54 while (new_size < size)
55 new_size *= 2;
56 new_size = PL_MIN(new_size, gpu->limits.max_buf_size);
57
58 ID3D11Buffer *new_buf;
59 D3D11_BUFFER_DESC vbuf_desc = {
60 .ByteWidth = new_size,
61 .Usage = D3D11_USAGE_DYNAMIC,
62 .BindFlags = stream->bind_flags,
63 .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
64 };
65 D3D(ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf));
66
67 SAFE_RELEASE(stream->buf);
68 stream->buf = new_buf;
69 stream->size = new_size;
70 stream->used = 0;
71 }
72
73 bool discard = false;
74 size_t offset = stream->used;
75 if (offset + size > stream->size) {
76 // We reached the end of the buffer, so discard and wrap around
77 discard = true;
78 offset = 0;
79 }
80
81 D3D11_MAPPED_SUBRESOURCE map = {0};
82 UINT type = discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE;
83 D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) stream->buf, 0, type,
84 0, &map));
85
86 // Upload each slice
87 char *cdata = map.pData;
88 stream->used = offset;
89 for (int i = 0; i < num_slices; i++) {
90 slices[i].offset = stream->used;
91 memcpy(cdata + slices[i].offset, slices[i].data, slices[i].size);
92 stream->used += PL_ALIGN2(slices[i].size, align);
93 }
94
95 ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) stream->buf, 0);
96
97 return true;
98
99 error:
100 return false;
101 }
102
get_shader_target(pl_gpu gpu,enum glsl_shader_stage stage)103 static const char *get_shader_target(pl_gpu gpu, enum glsl_shader_stage stage)
104 {
105 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
106 switch (p->fl) {
107 default:
108 switch (stage) {
109 case GLSL_SHADER_VERTEX: return "vs_5_0";
110 case GLSL_SHADER_FRAGMENT: return "ps_5_0";
111 case GLSL_SHADER_COMPUTE: return "cs_5_0";
112 }
113 break;
114 case D3D_FEATURE_LEVEL_10_1:
115 switch (stage) {
116 case GLSL_SHADER_VERTEX: return "vs_4_1";
117 case GLSL_SHADER_FRAGMENT: return "ps_4_1";
118 case GLSL_SHADER_COMPUTE: return "cs_4_1";
119 }
120 break;
121 case D3D_FEATURE_LEVEL_10_0:
122 switch (stage) {
123 case GLSL_SHADER_VERTEX: return "vs_4_0";
124 case GLSL_SHADER_FRAGMENT: return "ps_4_0";
125 case GLSL_SHADER_COMPUTE: return "cs_4_0";
126 }
127 break;
128 case D3D_FEATURE_LEVEL_9_3:
129 switch (stage) {
130 case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3";
131 case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3";
132 case GLSL_SHADER_COMPUTE: return NULL;
133 }
134 break;
135 case D3D_FEATURE_LEVEL_9_2:
136 case D3D_FEATURE_LEVEL_9_1:
137 switch (stage) {
138 case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1";
139 case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1";
140 case GLSL_SHADER_COMPUTE: return NULL;
141 }
142 break;
143 }
144 return NULL;
145 }
146
147 #define SC(cmd) \
148 do { \
149 spvc_result res = (cmd); \
150 if (res != SPVC_SUCCESS) { \
151 PL_ERR(gpu, "%s: %s (%d) (%s:%d)", \
152 #cmd, pass_s->sc ? \
153 spvc_context_get_last_error_string(pass_s->sc) : "", \
154 res, __FILE__, __LINE__); \
155 goto error; \
156 } \
157 } while (0)
158
mark_resources_used(pl_pass pass,spvc_compiler sc_comp,spvc_resources resources,spvc_resource_type res_type,enum glsl_shader_stage stage)159 static spvc_result mark_resources_used(pl_pass pass, spvc_compiler sc_comp,
160 spvc_resources resources,
161 spvc_resource_type res_type,
162 enum glsl_shader_stage stage)
163 {
164 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
165 const spvc_reflected_resource *res_list;
166 size_t res_count;
167 spvc_result res;
168
169 res = spvc_resources_get_resource_list_for_type(resources, res_type,
170 &res_list, &res_count);
171 if (res != SPVC_SUCCESS)
172 return res;
173
174 for (int i = 0; i < res_count; i++) {
175 unsigned int binding = spvc_compiler_get_decoration(sc_comp,
176 res_list[i].id, SpvDecorationBinding);
177 unsigned int descriptor_set = spvc_compiler_get_decoration(sc_comp,
178 res_list[i].id, SpvDecorationDescriptorSet);
179 if (descriptor_set != 0)
180 continue;
181
182 // Find the pl_desc with this binding and mark it as used
183 for (int j = 0; j < pass->params.num_descriptors; j++) {
184 struct pl_desc *desc = &pass->params.descriptors[j];
185 if (desc->binding != binding)
186 continue;
187
188 struct pl_desc_d3d11 *desc_p = &pass_p->descriptors[j];
189 if (stage == GLSL_SHADER_VERTEX) {
190 desc_p->vertex.used = true;
191 } else {
192 desc_p->main.used = true;
193 }
194 }
195 }
196
197 return res;
198 }
199
200 static const char *shader_names[] = {
201 [GLSL_SHADER_VERTEX] = "vertex",
202 [GLSL_SHADER_FRAGMENT] = "fragment",
203 [GLSL_SHADER_COMPUTE] = "compute",
204 };
205
shader_compile_glsl(pl_gpu gpu,pl_pass pass,struct d3d_pass_stage * pass_s,enum glsl_shader_stage stage,const char * glsl)206 static bool shader_compile_glsl(pl_gpu gpu, pl_pass pass,
207 struct d3d_pass_stage *pass_s,
208 enum glsl_shader_stage stage, const char *glsl)
209 {
210 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
211 void *tmp = pl_tmp(NULL);
212 bool success = false;
213
214 clock_t start = clock();
215 pl_str spirv = spirv_compile_glsl(p->spirv, tmp, &gpu->glsl, stage, glsl);
216 if (!spirv.len)
217 goto error;
218
219 pl_log_cpu_time(gpu->log, start, clock(), "translating GLSL to SPIR-V");
220
221 SC(spvc_context_create(&pass_s->sc));
222
223 spvc_parsed_ir sc_ir;
224 SC(spvc_context_parse_spirv(pass_s->sc, (SpvId *) spirv.buf,
225 spirv.len / sizeof(SpvId), &sc_ir));
226
227 SC(spvc_context_create_compiler(pass_s->sc, SPVC_BACKEND_HLSL, sc_ir,
228 SPVC_CAPTURE_MODE_TAKE_OWNERSHIP,
229 &pass_s->sc_comp));
230
231 spvc_compiler_options sc_opts;
232 SC(spvc_compiler_create_compiler_options(pass_s->sc_comp, &sc_opts));
233
234 int sc_shader_model;
235 if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
236 sc_shader_model = 50;
237 } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) {
238 sc_shader_model = 41;
239 } else {
240 sc_shader_model = 40;
241 }
242
243 SC(spvc_compiler_options_set_uint(sc_opts,
244 SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model));
245
246 // Unlike Vulkan and OpenGL, in D3D11, the clip-space is "flipped" with
247 // respect to framebuffer-space. In other words, if you render to a pixel at
248 // (0, -1), you have to sample from (0, 1) to get the value back. We unflip
249 // it by setting the following option, which inserts the equivalent of
250 // `gl_Position.y = -gl_Position.y` into the vertex shader
251 if (stage == GLSL_SHADER_VERTEX) {
252 SC(spvc_compiler_options_set_bool(sc_opts,
253 SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE));
254 }
255
256 // Bind readonly images and imageBuffers as SRVs. This is done because a lot
257 // of hardware (especially FL11_x hardware) has very poor format support for
258 // reading values from UAVs. It allows the common case of readonly and
259 // writeonly images to support more formats, though the less common case of
260 // readwrite images still requires format support for UAV loads (represented
261 // by the PL_FMT_CAP_READWRITE cap in libplacebo.)
262 //
263 // Note that setting this option comes at the cost of GLSL support. Readonly
264 // and readwrite images are the same type in GLSL, but SRV and UAV bound
265 // textures are different types in HLSL, so for example, a GLSL function
266 // with an image parameter may fail to compile as HLSL if it's called with a
267 // readonly image and a readwrite image at different call sites.
268 SC(spvc_compiler_options_set_bool(sc_opts,
269 SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV, SPVC_TRUE));
270
271 SC(spvc_compiler_install_compiler_options(pass_s->sc_comp, sc_opts));
272
273 spvc_set active = NULL;
274 SC(spvc_compiler_get_active_interface_variables(pass_s->sc_comp, &active));
275 spvc_resources resources = NULL;
276 SC(spvc_compiler_create_shader_resources_for_active_variables(
277 pass_s->sc_comp, &resources, active));
278
279 // In D3D11, the vertex shader and fragment shader can have a different set
280 // of bindings. At this point, SPIRV-Cross knows which resources are
281 // statically used in each stage. We can use this information to optimize
282 // HLSL register allocation by not binding resources to shader stages
283 // they're not used in.
284 mark_resources_used(pass, pass_s->sc_comp, resources,
285 SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, stage);
286 mark_resources_used(pass, pass_s->sc_comp, resources,
287 SPVC_RESOURCE_TYPE_STORAGE_BUFFER, stage);
288 mark_resources_used(pass, pass_s->sc_comp, resources,
289 SPVC_RESOURCE_TYPE_STORAGE_IMAGE, stage);
290 mark_resources_used(pass, pass_s->sc_comp, resources,
291 SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, stage);
292
293 success = true;
294 error:;
295 if (!success) {
296 PL_ERR(gpu, "%s shader GLSL source:", shader_names[stage]);
297 pl_msg_source(gpu->ctx, PL_LOG_ERR, glsl);
298
299 if (pass_s->sc) {
300 spvc_context_destroy(pass_s->sc);
301 pass_s->sc = NULL;
302 }
303 }
304 pl_free(tmp);
305
306 return success;
307 }
308
shader_compile_hlsl(pl_gpu gpu,pl_pass pass,struct d3d_pass_stage * pass_s,enum glsl_shader_stage stage,const char * glsl,ID3DBlob ** out)309 static bool shader_compile_hlsl(pl_gpu gpu, pl_pass pass,
310 struct d3d_pass_stage *pass_s,
311 enum glsl_shader_stage stage, const char *glsl,
312 ID3DBlob **out)
313 {
314 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
315 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
316 const char *hlsl = NULL;
317 ID3DBlob *errors = NULL;
318 bool success = false;
319 HRESULT hr;
320
321 int max_binding = -1;
322
323 // This should not be called without first calling shader_compile_glsl
324 pl_assert(pass_s->sc_comp);
325
326 static const SpvExecutionModel spv_execution_model[] = {
327 [GLSL_SHADER_VERTEX] = SpvExecutionModelVertex,
328 [GLSL_SHADER_FRAGMENT] = SpvExecutionModelFragment,
329 [GLSL_SHADER_COMPUTE] = SpvExecutionModelGLCompute,
330 };
331
332 // Assign the HLSL register numbers we want to use for each resource
333 for (int i = 0; i < pass->params.num_descriptors; i++) {
334 struct pl_desc *desc = &pass->params.descriptors[i];
335 struct pl_desc_d3d11 *desc_p = &pass_p->descriptors[i];
336 struct d3d_desc_stage *desc_s =
337 stage == GLSL_SHADER_VERTEX ? &desc_p->vertex : &desc_p->main;
338
339 // Skip resources that aren't in this shader stage
340 if (!desc_s->used)
341 continue;
342
343 spvc_hlsl_resource_binding binding;
344 spvc_hlsl_resource_binding_init(&binding);
345 binding.stage = spv_execution_model[stage];
346 binding.binding = desc->binding;
347 max_binding = PL_MAX(max_binding, desc->binding);
348 if (desc_s->cbv_slot > 0)
349 binding.cbv.register_binding = desc_s->cbv_slot;
350 if (desc_s->srv_slot > 0)
351 binding.srv.register_binding = desc_s->srv_slot;
352 if (desc_s->sampler_slot > 0)
353 binding.sampler.register_binding = desc_s->sampler_slot;
354 if (desc_s->uav_slot > 0)
355 binding.uav.register_binding = desc_s->uav_slot;
356 SC(spvc_compiler_hlsl_add_resource_binding(pass_s->sc_comp, &binding));
357 }
358
359 if (stage == GLSL_SHADER_COMPUTE) {
360 // Check if the gl_NumWorkGroups builtin is used. If it is, we have to
361 // emulate it with a constant buffer, so allocate it a CBV register.
362 spvc_variable_id num_workgroups_id =
363 spvc_compiler_hlsl_remap_num_workgroups_builtin(pass_s->sc_comp);
364 if (num_workgroups_id) {
365 pass_p->num_workgroups_used = true;
366
367 spvc_hlsl_resource_binding binding;
368 spvc_hlsl_resource_binding_init(&binding);
369 binding.stage = spv_execution_model[stage];
370 binding.binding = max_binding + 1;
371
372 // Allocate a CBV register for the buffer
373 binding.cbv.register_binding = pass_s->cbvs.num;
374 PL_ARRAY_APPEND(pass, pass_s->cbvs, HLSL_BINDING_NUM_WORKGROUPS);
375 if (pass_s->cbvs.num >
376 D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) {
377 PL_ERR(gpu, "Not enough constant buffer slots for gl_NumWorkGroups");
378 goto error;
379 }
380
381 spvc_compiler_set_decoration(pass_s->sc_comp, num_workgroups_id,
382 SpvDecorationDescriptorSet, 0);
383 spvc_compiler_set_decoration(pass_s->sc_comp, num_workgroups_id,
384 SpvDecorationBinding, binding.binding);
385
386 SC(spvc_compiler_hlsl_add_resource_binding(pass_s->sc_comp, &binding));
387 }
388 }
389
390 clock_t start = clock();
391 SC(spvc_compiler_compile(pass_s->sc_comp, &hlsl));
392
393 clock_t after_spvc = clock();
394 pl_log_cpu_time(gpu->log, start, after_spvc, "translating SPIR-V to HLSL");
395
396 // Check if each resource binding was actually used by SPIRV-Cross in the
397 // compiled HLSL. This information can be used to optimize resource binding
398 // to the pipeline.
399 for (int i = 0; i < pass->params.num_descriptors; i++) {
400 struct pl_desc *desc = &pass->params.descriptors[i];
401 struct pl_desc_d3d11 *desc_p = &pass_p->descriptors[i];
402 struct d3d_desc_stage *desc_s =
403 stage == GLSL_SHADER_VERTEX ? &desc_p->vertex : &desc_p->main;
404
405 // Skip resources that aren't in this shader stage
406 if (!desc_s->used)
407 continue;
408
409 bool used = spvc_compiler_hlsl_is_resource_used(pass_s->sc_comp,
410 spv_execution_model[stage], 0, desc->binding);
411 if (!used)
412 desc_s->used = false;
413 }
414
415 hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main",
416 get_shader_target(gpu, stage),
417 D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, out,
418 &errors);
419 if (FAILED(hr)) {
420 PL_ERR(gpu, "D3DCompile failed: %s\n%.*s", pl_hresult_to_str(hr),
421 (int) ID3D10Blob_GetBufferSize(errors),
422 (char *) ID3D10Blob_GetBufferPointer(errors));
423 goto error;
424 }
425
426 pl_log_cpu_time(gpu->log, after_spvc, clock(), "translating HLSL to DXBC");
427
428 success = true;
429 error:;
430 int level = success ? PL_LOG_DEBUG : PL_LOG_ERR;
431 PL_MSG(gpu, level, "%s shader GLSL source:", shader_names[stage]);
432 pl_msg_source(gpu->ctx, level, glsl);
433 if (hlsl) {
434 PL_MSG(gpu, level, "%s shader HLSL source:", shader_names[stage]);
435 pl_msg_source(gpu->ctx, level, hlsl);
436 }
437
438 if (pass_s->sc) {
439 spvc_context_destroy(pass_s->sc);
440 pass_s->sc = NULL;
441 }
442 SAFE_RELEASE(errors);
443 return success;
444 }
445
pl_d3d11_pass_destroy(pl_gpu gpu,pl_pass pass)446 void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass)
447 {
448 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
449 struct d3d11_ctx *ctx = p->ctx;
450 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
451
452 if (pass_p->main.sc) {
453 spvc_context_destroy(pass_p->main.sc);
454 pass_p->main.sc = NULL;
455 }
456 if (pass_p->vertex.sc) {
457 spvc_context_destroy(pass_p->vertex.sc);
458 pass_p->vertex.sc = NULL;
459 }
460
461 SAFE_RELEASE(pass_p->vs);
462 SAFE_RELEASE(pass_p->ps);
463 SAFE_RELEASE(pass_p->cs);
464 SAFE_RELEASE(pass_p->layout);
465 SAFE_RELEASE(pass_p->bstate);
466
467 pl_d3d11_flush_message_queue(ctx, "After pass destroy");
468
469 pl_free((void *) pass);
470 }
471
pass_create_raster(pl_gpu gpu,struct pl_pass * pass,const struct pl_pass_params * params)472 static bool pass_create_raster(pl_gpu gpu, struct pl_pass *pass,
473 const struct pl_pass_params *params)
474 {
475 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
476 struct d3d11_ctx *ctx = p->ctx;
477 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
478 ID3DBlob *vs_blob = NULL;
479 ID3DBlob *ps_blob = NULL;
480 D3D11_INPUT_ELEMENT_DESC *in_descs = NULL;
481 bool success = false;
482
483 if (!shader_compile_hlsl(gpu, pass, &pass_p->vertex, GLSL_SHADER_VERTEX,
484 params->vertex_shader, &vs_blob))
485 goto error;
486
487 D3D(ID3D11Device_CreateVertexShader(p->dev,
488 ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
489 NULL, &pass_p->vs));
490
491 if (!shader_compile_hlsl(gpu, pass, &pass_p->main, GLSL_SHADER_FRAGMENT,
492 params->glsl_shader, &ps_blob))
493 goto error;
494
495 D3D(ID3D11Device_CreatePixelShader(p->dev,
496 ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob),
497 NULL, &pass_p->ps));
498
499 in_descs = pl_calloc_ptr(pass, params->num_vertex_attribs, in_descs);
500 for (int i = 0; i < params->num_vertex_attribs; i++) {
501 struct pl_vertex_attrib *va = ¶ms->vertex_attribs[i];
502
503 in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) {
504 // The semantic name doesn't mean much and is just used to verify
505 // the input description matches the shader. SPIRV-Cross always
506 // uses TEXCOORD, so we should too.
507 .SemanticName = "TEXCOORD",
508 .SemanticIndex = va->location,
509 .AlignedByteOffset = va->offset,
510 .Format = fmt_to_dxgi(va->fmt),
511 };
512 }
513 D3D(ID3D11Device_CreateInputLayout(p->dev, in_descs,
514 params->num_vertex_attribs, ID3D10Blob_GetBufferPointer(vs_blob),
515 ID3D10Blob_GetBufferSize(vs_blob), &pass_p->layout));
516
517 static const D3D11_BLEND blend_options[] = {
518 [PL_BLEND_ZERO] = D3D11_BLEND_ZERO,
519 [PL_BLEND_ONE] = D3D11_BLEND_ONE,
520 [PL_BLEND_SRC_ALPHA] = D3D11_BLEND_SRC_ALPHA,
521 [PL_BLEND_ONE_MINUS_SRC_ALPHA] = D3D11_BLEND_INV_SRC_ALPHA,
522 };
523
524 D3D11_BLEND_DESC bdesc = {
525 .RenderTarget[0] = {
526 .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
527 },
528 };
529 if (params->blend_params) {
530 bdesc.RenderTarget[0] = (D3D11_RENDER_TARGET_BLEND_DESC) {
531 .BlendEnable = TRUE,
532 .SrcBlend = blend_options[params->blend_params->src_rgb],
533 .DestBlend = blend_options[params->blend_params->dst_rgb],
534 .BlendOp = D3D11_BLEND_OP_ADD,
535 .SrcBlendAlpha = blend_options[params->blend_params->src_alpha],
536 .DestBlendAlpha = blend_options[params->blend_params->dst_alpha],
537 .BlendOpAlpha = D3D11_BLEND_OP_ADD,
538 .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
539 };
540 }
541 D3D(ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate));
542
543 success = true;
544 error:
545 SAFE_RELEASE(vs_blob);
546 SAFE_RELEASE(ps_blob);
547 pl_free(in_descs);
548 return success;
549 }
550
pass_create_compute(pl_gpu gpu,struct pl_pass * pass,const struct pl_pass_params * params)551 static bool pass_create_compute(pl_gpu gpu, struct pl_pass *pass,
552 const struct pl_pass_params *params)
553 {
554 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
555 struct d3d11_ctx *ctx = p->ctx;
556 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
557 ID3DBlob *cs_blob = NULL;
558 bool success = false;
559
560 if (!shader_compile_hlsl(gpu, pass, &pass_p->main, GLSL_SHADER_COMPUTE,
561 params->glsl_shader, &cs_blob))
562 goto error;
563
564 D3D(ID3D11Device_CreateComputeShader(p->dev,
565 ID3D10Blob_GetBufferPointer(cs_blob), ID3D10Blob_GetBufferSize(cs_blob),
566 NULL, &pass_p->cs));
567
568 if (pass_p->num_workgroups_used) {
569 D3D11_BUFFER_DESC bdesc = {
570 .BindFlags = D3D11_BIND_CONSTANT_BUFFER,
571 .ByteWidth = sizeof(pass_p->last_num_wgs),
572 };
573 D3D(ID3D11Device_CreateBuffer(p->dev, &bdesc, NULL,
574 &pass_p->num_workgroups_buf));
575 }
576
577 success = true;
578 error:
579 SAFE_RELEASE(cs_blob);
580 return success;
581 }
582
pl_d3d11_pass_create(pl_gpu gpu,const struct pl_pass_params * params)583 const struct pl_pass *pl_d3d11_pass_create(pl_gpu gpu,
584 const struct pl_pass_params *params)
585 {
586 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
587 struct d3d11_ctx *ctx = p->ctx;
588
589 struct pl_pass *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_d3d11);
590 pass->params = pl_pass_params_copy(pass, params);
591
592 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
593
594 pass_p->descriptors = pl_calloc_ptr(pass, params->num_descriptors,
595 pass_p->descriptors);
596 for (int i = 0; i < params->num_descriptors; i++) {
597 struct pl_desc_d3d11 *desc_p = &pass_p->descriptors[i];
598 *desc_p = (struct pl_desc_d3d11) {
599 .main = {
600 .cbv_slot = -1,
601 .srv_slot = -1,
602 .sampler_slot = -1,
603 .uav_slot = -1,
604 },
605 .vertex = {
606 .cbv_slot = -1,
607 .srv_slot = -1,
608 .sampler_slot = -1,
609 },
610 };
611 }
612
613 // Compile GLSL to SPIR-V. This also sets `desc_stage.used` based on which
614 // resources are statically used in the shader for each pass.
615 if (params->type == PL_PASS_RASTER) {
616 if (!shader_compile_glsl(gpu, pass, &pass_p->vertex, GLSL_SHADER_VERTEX,
617 params->vertex_shader))
618 goto error;
619 if (!shader_compile_glsl(gpu, pass, &pass_p->main, GLSL_SHADER_FRAGMENT,
620 params->glsl_shader))
621 goto error;
622 } else {
623 if (!shader_compile_glsl(gpu, pass, &pass_p->main, GLSL_SHADER_COMPUTE,
624 params->glsl_shader))
625 goto error;
626 }
627
628 // In a raster pass, one of the UAV slots is used by the runtime for the RTV
629 int uav_offset = params->type == PL_PASS_COMPUTE ? 0 : 1;
630 int max_uavs = p->max_uavs - uav_offset;
631
632 for (int desc_idx = 0; desc_idx < params->num_descriptors; desc_idx++) {
633 struct pl_desc *desc = ¶ms->descriptors[desc_idx];
634 struct pl_desc_d3d11 *desc_p = &pass_p->descriptors[desc_idx];
635
636 bool has_cbv = false, has_srv = false, has_sampler = false, has_uav = false;
637
638 switch (desc->type) {
639 case PL_DESC_SAMPLED_TEX:
640 has_sampler = true;
641 has_srv = true;
642 break;
643 case PL_DESC_BUF_STORAGE:
644 case PL_DESC_STORAGE_IMG:
645 case PL_DESC_BUF_TEXEL_STORAGE:
646 if (desc->access == PL_DESC_ACCESS_READONLY) {
647 has_srv = true;
648 } else {
649 has_uav = true;
650 }
651 break;
652 case PL_DESC_BUF_UNIFORM:
653 has_cbv = true;
654 break;
655 case PL_DESC_BUF_TEXEL_UNIFORM:
656 has_srv = true;
657 break;
658 case PL_DESC_INVALID:
659 case PL_DESC_TYPE_COUNT:
660 pl_unreachable();
661 }
662
663 // Allocate HLSL register numbers for each shader stage
664 struct d3d_pass_stage *stages[] = { &pass_p->main, &pass_p->vertex };
665 for (int j = 0; j < PL_ARRAY_SIZE(stages); j++) {
666 struct d3d_pass_stage *pass_s = stages[j];
667 struct d3d_desc_stage *desc_s =
668 pass_s == &pass_p->vertex ? &desc_p->vertex : &desc_p->main;
669 if (!desc_s->used)
670 continue;
671
672 if (has_cbv) {
673 desc_s->cbv_slot = pass_s->cbvs.num;
674 PL_ARRAY_APPEND(pass, pass_s->cbvs, desc_idx);
675 if (pass_s->cbvs.num > D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) {
676 PL_ERR(gpu, "Too many constant buffers in shader");
677 goto error;
678 }
679 }
680
681 if (has_srv) {
682 desc_s->srv_slot = pass_s->srvs.num;
683 PL_ARRAY_APPEND(pass, pass_s->srvs, desc_idx);
684 if (pass_s->srvs.num > p->max_srvs) {
685 PL_ERR(gpu, "Too many SRVs in shader");
686 goto error;
687 }
688 }
689
690 if (has_sampler) {
691 desc_s->sampler_slot = pass_s->samplers.num;
692 PL_ARRAY_APPEND(pass, pass_s->samplers, desc_idx);
693 if (pass_s->srvs.num > D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) {
694 PL_ERR(gpu, "Too many samplers in shader");
695 goto error;
696 }
697 }
698 }
699
700 // UAV bindings are shared between all shader stages
701 if (has_uav && (desc_p->main.used || desc_p->vertex.used)) {
702 desc_p->main.uav_slot = pass_p->uavs.num + uav_offset;
703 PL_ARRAY_APPEND(pass, pass_p->uavs, desc_idx);
704 if (pass_p->uavs.num > max_uavs) {
705 PL_ERR(gpu, "Too many UAVs in shader");
706 goto error;
707 }
708 }
709 }
710
711 if (params->type == PL_PASS_COMPUTE) {
712 if (!pass_create_compute(gpu, pass, params))
713 goto error;
714 } else {
715 if (!pass_create_raster(gpu, pass, params))
716 goto error;
717 }
718
719 // Pre-allocate resource arrays to use in pl_pass_run
720 pass_p->cbv_arr = pl_calloc(pass,
721 PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num),
722 sizeof(*pass_p->cbv_arr));
723 pass_p->srv_arr = pl_calloc(pass,
724 PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num),
725 sizeof(*pass_p->srv_arr));
726 pass_p->sampler_arr = pl_calloc(pass,
727 PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num),
728 sizeof(*pass_p->sampler_arr));
729 pass_p->uav_arr = pl_calloc(pass, pass_p->uavs.num, sizeof(*pass_p->uav_arr));
730
731 pl_d3d11_flush_message_queue(ctx, "After pass create");
732
733 return pass;
734
735 error:
736 pl_d3d11_pass_destroy(gpu, pass);
737 return NULL;
738 }
739
740 // Shared logic between VS, PS and CS for filling the resource arrays that are
741 // passed to ID3D11DeviceContext methods
fill_resources(pl_gpu gpu,pl_pass pass,struct d3d_pass_stage * pass_s,const struct pl_pass_run_params * params,ID3D11Buffer ** cbvs,ID3D11ShaderResourceView ** srvs,ID3D11SamplerState ** samplers)742 static void fill_resources(pl_gpu gpu, pl_pass pass,
743 struct d3d_pass_stage *pass_s,
744 const struct pl_pass_run_params *params,
745 ID3D11Buffer **cbvs, ID3D11ShaderResourceView **srvs,
746 ID3D11SamplerState **samplers)
747 {
748 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
749 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
750
751 for (int i = 0; i < pass_s->cbvs.num; i++) {
752 int binding = pass_s->cbvs.elem[i];
753 if (binding == HLSL_BINDING_NOT_USED) {
754 cbvs[i] = NULL;
755 continue;
756 } else if (binding == HLSL_BINDING_NUM_WORKGROUPS) {
757 cbvs[i] = pass_p->num_workgroups_buf;
758 continue;
759 }
760
761 pl_buf buf = params->desc_bindings[binding].object;
762 pl_d3d11_buf_resolve(gpu, buf);
763 struct pl_buf_d3d11 *buf_p = PL_PRIV(buf);
764 cbvs[i] = buf_p->buf;
765 }
766
767 for (int i = 0; i < pass_s->srvs.num; i++) {
768 int binding = pass_s->srvs.elem[i];
769 if (binding == HLSL_BINDING_NOT_USED) {
770 srvs[i] = NULL;
771 continue;
772 }
773
774 pl_tex tex;
775 struct pl_tex_d3d11 *tex_p;
776 pl_buf buf;
777 struct pl_buf_d3d11 *buf_p;
778 switch (pass->params.descriptors[binding].type) {
779 case PL_DESC_SAMPLED_TEX:
780 case PL_DESC_STORAGE_IMG:
781 tex = params->desc_bindings[binding].object;
782 tex_p = PL_PRIV(tex);
783 srvs[i] = tex_p->srv;
784 break;
785 case PL_DESC_BUF_STORAGE:
786 buf = params->desc_bindings[binding].object;
787 buf_p = PL_PRIV(buf);
788 srvs[i] = buf_p->raw_srv;
789 break;
790 case PL_DESC_BUF_TEXEL_UNIFORM:
791 case PL_DESC_BUF_TEXEL_STORAGE:
792 buf = params->desc_bindings[binding].object;
793 buf_p = PL_PRIV(buf);
794 srvs[i] = buf_p->texel_srv;
795 break;
796 default:
797 break;
798 }
799 }
800
801 for (int i = 0; i < pass_s->samplers.num; i++) {
802 int binding = pass_s->samplers.elem[i];
803 if (binding == HLSL_BINDING_NOT_USED) {
804 samplers[i] = NULL;
805 continue;
806 }
807
808 struct pl_desc_binding *db = ¶ms->desc_bindings[binding];
809 samplers[i] = p->samplers[db->sample_mode][db->address_mode];
810 }
811 }
812
fill_uavs(pl_pass pass,const struct pl_pass_run_params * params,ID3D11UnorderedAccessView ** uavs)813 static void fill_uavs(pl_pass pass, const struct pl_pass_run_params *params,
814 ID3D11UnorderedAccessView **uavs)
815 {
816 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
817
818 for (int i = 0; i < pass_p->uavs.num; i++) {
819 int binding = pass_p->uavs.elem[i];
820 if (binding == HLSL_BINDING_NOT_USED) {
821 uavs[i] = NULL;
822 continue;
823 }
824
825 pl_tex tex;
826 struct pl_tex_d3d11 *tex_p;
827 pl_buf buf;
828 struct pl_buf_d3d11 *buf_p;
829 switch (pass->params.descriptors[binding].type) {
830 case PL_DESC_BUF_STORAGE:
831 buf = params->desc_bindings[binding].object;
832 buf_p = PL_PRIV(buf);
833 uavs[i] = buf_p->raw_uav;
834 break;
835 case PL_DESC_STORAGE_IMG:
836 tex = params->desc_bindings[binding].object;
837 tex_p = PL_PRIV(tex);
838 uavs[i] = tex_p->uav;
839 break;
840 case PL_DESC_BUF_TEXEL_STORAGE:
841 buf = params->desc_bindings[binding].object;
842 buf_p = PL_PRIV(buf);
843 uavs[i] = buf_p->texel_uav;
844 break;
845 default:
846 break;
847 }
848 }
849 }
850
pass_run_raster(pl_gpu gpu,const struct pl_pass_run_params * params)851 static void pass_run_raster(pl_gpu gpu, const struct pl_pass_run_params *params)
852 {
853 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
854 pl_pass pass = params->pass;
855 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
856
857 if (p->fl <= D3D_FEATURE_LEVEL_9_3 && params->index_buf) {
858 // Index buffers are unsupported because we can't tell if they are an
859 // index buffer or a vertex buffer on creation, and FL9_x allows only
860 // one binding type per-buffer
861 PL_ERR(gpu, "Index buffers are unsupported in FL9_x");
862 return;
863 }
864
865 // Figure out how much vertex data to upload, if any
866 size_t vertex_alloc = 0;
867 if (params->vertex_data) {
868 int num_verticies = 0;
869 if (params->index_data) {
870 // Indexed draw, so we need to store all indexed vertices
871 for (int i = 0; i < params->vertex_count; i++)
872 num_verticies = PL_MAX(num_verticies, params->index_data[i] + 1);
873 } else {
874 num_verticies = params->vertex_count;
875 }
876 vertex_alloc = num_verticies * pass->params.vertex_stride;
877 }
878
879 // Figure out how much index data to upload, if any
880 size_t index_alloc = 0;
881 if (params->index_data)
882 index_alloc = params->vertex_count * sizeof(uint16_t);
883
884 // Upload vertex data. On >=FL10_0 we use the same buffer for index data, so
885 // upload that too.
886 bool share_vertex_index_buf = p->fl > D3D_FEATURE_LEVEL_9_3;
887 if (vertex_alloc || (share_vertex_index_buf && index_alloc)) {
888 struct stream_buf_slice slices[] = {
889 { .data = params->vertex_data, .size = vertex_alloc },
890 { .data = params->index_data, .size = index_alloc },
891 };
892
893 if (!stream_buf_upload(gpu, &p->vbuf, slices,
894 share_vertex_index_buf ? 2 : 1)) {
895 PL_ERR(gpu, "Failed to upload vertex data");
896 return;
897 }
898
899 if (vertex_alloc) {
900 ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &p->vbuf.buf,
901 &(UINT) { pass->params.vertex_stride }, &slices[0].offset);
902 }
903 if (share_vertex_index_buf && index_alloc) {
904 ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->vbuf.buf,
905 DXGI_FORMAT_R16_UINT, slices[1].offset);
906 }
907 }
908
909 // Upload index data for <=FL9_3, which must be in its own buffer
910 if (!share_vertex_index_buf && index_alloc) {
911 struct stream_buf_slice slices[] = {
912 { .data = params->index_data, .size = index_alloc },
913 };
914
915 if (!stream_buf_upload(gpu, &p->ibuf, slices, PL_ARRAY_SIZE(slices))) {
916 PL_ERR(gpu, "Failed to upload index data");
917 return;
918 }
919
920 ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->ibuf.buf,
921 DXGI_FORMAT_R16_UINT, slices[0].offset);
922 }
923
924 if (params->vertex_buf) {
925 struct pl_buf_d3d11 *buf_p = PL_PRIV(params->vertex_buf);
926 ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &buf_p->buf,
927 &(UINT) { pass->params.vertex_stride },
928 &(UINT) { params->buf_offset });
929 }
930
931 if (params->index_buf) {
932 struct pl_buf_d3d11 *buf_p = PL_PRIV(params->index_buf);
933 ID3D11DeviceContext_IASetIndexBuffer(p->imm, buf_p->buf,
934 DXGI_FORMAT_R16_UINT, params->index_offset);
935 }
936
937 ID3D11DeviceContext_IASetInputLayout(p->imm, pass_p->layout);
938
939 static const D3D_PRIMITIVE_TOPOLOGY prim_topology[] = {
940 [PL_PRIM_TRIANGLE_LIST] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
941 [PL_PRIM_TRIANGLE_STRIP] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
942 };
943 ID3D11DeviceContext_IASetPrimitiveTopology(p->imm,
944 prim_topology[pass->params.vertex_type]);
945
946 ID3D11DeviceContext_VSSetShader(p->imm, pass_p->vs, NULL, 0);
947
948 ID3D11Buffer **cbvs = pass_p->cbv_arr;
949 ID3D11ShaderResourceView **srvs = pass_p->srv_arr;
950 ID3D11SamplerState **samplers = pass_p->sampler_arr;
951 ID3D11UnorderedAccessView **uavs = pass_p->uav_arr;
952
953 // Set vertex shader resources. The device context is called conditionally
954 // because the debug layer complains if these are called with 0 resources.
955 fill_resources(gpu, pass, &pass_p->vertex, params, cbvs, srvs, samplers);
956 if (pass_p->vertex.cbvs.num)
957 ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs);
958 if (pass_p->vertex.srvs.num)
959 ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs);
960 if (pass_p->vertex.samplers.num)
961 ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers);
962
963 ID3D11DeviceContext_RSSetState(p->imm, p->rstate);
964 ID3D11DeviceContext_RSSetViewports(p->imm, 1, (&(D3D11_VIEWPORT) {
965 .TopLeftX = params->viewport.x0,
966 .TopLeftY = params->viewport.y0,
967 .Width = pl_rect_w(params->viewport),
968 .Height = pl_rect_h(params->viewport),
969 .MinDepth = 0,
970 .MaxDepth = 1,
971 }));
972 ID3D11DeviceContext_RSSetScissorRects(p->imm, 1, (&(D3D11_RECT) {
973 .left = params->scissors.x0,
974 .top = params->scissors.y0,
975 .right = params->scissors.x1,
976 .bottom = params->scissors.y1,
977 }));
978
979 ID3D11DeviceContext_PSSetShader(p->imm, pass_p->ps, NULL, 0);
980
981 // Set pixel shader resources
982 fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers);
983 if (pass_p->main.cbvs.num)
984 ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs);
985 if (pass_p->main.srvs.num)
986 ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs);
987 if (pass_p->main.samplers.num)
988 ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers);
989
990 ID3D11DeviceContext_OMSetBlendState(p->imm, pass_p->bstate, NULL,
991 D3D11_DEFAULT_SAMPLE_MASK);
992 ID3D11DeviceContext_OMSetDepthStencilState(p->imm, p->dsstate, 0);
993
994 fill_uavs(pass, params, uavs);
995
996 struct pl_tex_d3d11 *target_p = PL_PRIV(params->target);
997 ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(
998 p->imm, 1, &target_p->rtv, NULL, 1, pass_p->uavs.num, uavs, NULL);
999
1000 if (params->index_data || params->index_buf) {
1001 ID3D11DeviceContext_DrawIndexed(p->imm, params->vertex_count, 0, 0);
1002 } else {
1003 ID3D11DeviceContext_Draw(p->imm, params->vertex_count, 0);
1004 }
1005
1006 // Unbind everything. It's easier to do this than to actually track state,
1007 // and if we leave the RTV bound, it could trip up D3D's conflict checker.
1008 // Also, apparently unbinding SRVs can prevent a 10level9 bug?
1009 // https://docs.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-downlevel-prevent-null-srvs
1010 for (int i = 0; i < PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num); i++)
1011 cbvs[i] = NULL;
1012 for (int i = 0; i < PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num); i++)
1013 srvs[i] = NULL;
1014 for (int i = 0; i < PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num); i++)
1015 samplers[i] = NULL;
1016 for (int i = 0; i < pass_p->uavs.num; i++)
1017 uavs[i] = NULL;
1018 if (pass_p->vertex.cbvs.num)
1019 ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs);
1020 if (pass_p->vertex.srvs.num)
1021 ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs);
1022 if (pass_p->vertex.samplers.num)
1023 ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers);
1024 if (pass_p->main.cbvs.num)
1025 ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs);
1026 if (pass_p->main.srvs.num)
1027 ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs);
1028 if (pass_p->main.samplers.num)
1029 ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers);
1030 ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(
1031 p->imm, 0, NULL, NULL, 1, pass_p->uavs.num, uavs, NULL);
1032 }
1033
pass_run_compute(pl_gpu gpu,const struct pl_pass_run_params * params)1034 static void pass_run_compute(pl_gpu gpu, const struct pl_pass_run_params *params)
1035 {
1036 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
1037 pl_pass pass = params->pass;
1038 struct pl_pass_d3d11 *pass_p = PL_PRIV(pass);
1039
1040 // Update gl_NumWorkGroups emulation buffer if necessary
1041 if (pass_p->num_workgroups_used) {
1042 bool needs_update = false;
1043 for (int i = 0; i < 3; i++) {
1044 if (pass_p->last_num_wgs.num_wgs[i] != params->compute_groups[i])
1045 needs_update = true;
1046 pass_p->last_num_wgs.num_wgs[i] = params->compute_groups[i];
1047 }
1048
1049 if (needs_update) {
1050 ID3D11DeviceContext_UpdateSubresource(p->imm,
1051 (ID3D11Resource *) pass_p->num_workgroups_buf, 0, NULL,
1052 &pass_p->last_num_wgs, 0, 0);
1053 }
1054 }
1055
1056 ID3D11DeviceContext_CSSetShader(p->imm, pass_p->cs, NULL, 0);
1057
1058 ID3D11Buffer **cbvs = pass_p->cbv_arr;
1059 ID3D11ShaderResourceView **srvs = pass_p->srv_arr;
1060 ID3D11UnorderedAccessView **uavs = pass_p->uav_arr;
1061 ID3D11SamplerState **samplers = pass_p->sampler_arr;
1062
1063 fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers);
1064 fill_uavs(pass, params, uavs);
1065
1066 if (pass_p->main.cbvs.num)
1067 ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs);
1068 if (pass_p->main.srvs.num)
1069 ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs);
1070 if (pass_p->main.samplers.num)
1071 ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers);
1072 if (pass_p->uavs.num)
1073 ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL);
1074
1075 ID3D11DeviceContext_Dispatch(p->imm, params->compute_groups[0],
1076 params->compute_groups[1],
1077 params->compute_groups[2]);
1078
1079 // Unbind everything
1080 for (int i = 0; i < pass_p->main.cbvs.num; i++)
1081 cbvs[i] = NULL;
1082 for (int i = 0; i < pass_p->main.srvs.num; i++)
1083 srvs[i] = NULL;
1084 for (int i = 0; i < pass_p->main.samplers.num; i++)
1085 samplers[i] = NULL;
1086 for (int i = 0; i < pass_p->uavs.num; i++)
1087 uavs[i] = NULL;
1088 if (pass_p->main.cbvs.num)
1089 ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs);
1090 if (pass_p->main.srvs.num)
1091 ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs);
1092 if (pass_p->main.samplers.num)
1093 ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers);
1094 if (pass_p->uavs.num)
1095 ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL);
1096 }
1097
pl_d3d11_pass_run(pl_gpu gpu,const struct pl_pass_run_params * params)1098 void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params)
1099 {
1100 struct pl_gpu_d3d11 *p = PL_PRIV(gpu);
1101 struct d3d11_ctx *ctx = p->ctx;
1102 pl_pass pass = params->pass;
1103
1104 pl_d3d11_timer_start(gpu, params->timer);
1105
1106 if (pass->params.type == PL_PASS_COMPUTE) {
1107 pass_run_compute(gpu, params);
1108 } else {
1109 pass_run_raster(gpu, params);
1110 }
1111
1112 pl_d3d11_timer_end(gpu, params->timer);
1113 pl_d3d11_flush_message_queue(ctx, "After pass run");
1114 }
1115