1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_private.h"
25
26 #include "spirv_to_dxil.h"
27
28 #include "vk_alloc.h"
29 #include "vk_util.h"
30 #include "vk_format.h"
31
32 #include <directx/d3d12.h>
33 #include <dxguids/dxguids.h>
34
35 #include <dxcapi.h>
36 #include <wrl/client.h>
37
38 #include "util/u_debug.h"
39
40 using Microsoft::WRL::ComPtr;
41
42 static dxil_spirv_shader_stage
to_dxil_shader_stage(VkShaderStageFlagBits in)43 to_dxil_shader_stage(VkShaderStageFlagBits in)
44 {
45 switch (in) {
46 case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
47 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
48 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
49 case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
50 case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
51 case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
52 default: unreachable("Unsupported stage");
53 }
54 }
55
56 static VkResult
dzn_pipeline_compile_shader(dzn_device * device,const VkAllocationCallbacks * alloc,dzn_pipeline_layout * layout,const VkPipelineShaderStageCreateInfo * stage_info,enum dxil_spirv_yz_flip_mode yz_flip_mode,uint16_t y_flip_mask,uint16_t z_flip_mask,D3D12_SHADER_BYTECODE * slot)57 dzn_pipeline_compile_shader(dzn_device *device,
58 const VkAllocationCallbacks *alloc,
59 dzn_pipeline_layout *layout,
60 const VkPipelineShaderStageCreateInfo *stage_info,
61 enum dxil_spirv_yz_flip_mode yz_flip_mode,
62 uint16_t y_flip_mask, uint16_t z_flip_mask,
63 D3D12_SHADER_BYTECODE *slot)
64 {
65 dzn_instance *instance =
66 container_of(device->vk.physical->instance, dzn_instance, vk);
67 IDxcValidator *validator = instance->dxc.validator;
68 IDxcLibrary *library = instance->dxc.library;
69 IDxcCompiler *compiler = instance->dxc.compiler;
70 const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo;
71 VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
72 struct dxil_spirv_object dxil_object;
73
74 /* convert VkSpecializationInfo */
75 struct dxil_spirv_specialization *spec = NULL;
76 uint32_t num_spec = 0;
77
78 if (spec_info && spec_info->mapEntryCount) {
79 spec = (struct dxil_spirv_specialization *)
80 vk_alloc2(&device->vk.alloc, alloc,
81 spec_info->mapEntryCount * sizeof(*spec), 8,
82 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
83 if (!spec)
84 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
85
86 for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
87 const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
88 const uint8_t *data = (const uint8_t *)spec_info->pData + entry->offset;
89 assert(data + entry->size <= (const uint8_t *)spec_info->pData + spec_info->dataSize);
90 spec[i].id = entry->constantID;
91 switch (entry->size) {
92 case 8:
93 spec[i].value.u64 = *(const uint64_t *)data;
94 break;
95 case 4:
96 spec[i].value.u32 = *(const uint32_t *)data;
97 break;
98 case 2:
99 spec[i].value.u16 = *(const uint16_t *)data;
100 break;
101 case 1:
102 spec[i].value.u8 = *(const uint8_t *)data;
103 break;
104 default:
105 assert(!"Invalid spec constant size");
106 break;
107 }
108
109 spec[i].defined_on_module = false;
110 }
111
112 num_spec = spec_info->mapEntryCount;
113 }
114
115 struct dxil_spirv_runtime_conf conf = {
116 .runtime_data_cbv = {
117 .register_space = DZN_REGISTER_SPACE_SYSVALS,
118 .base_shader_register = 0,
119 },
120 .push_constant_cbv = {
121 .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
122 .base_shader_register = 0,
123 },
124 .descriptor_set_count = layout->set_count,
125 .descriptor_sets = layout->binding_translation,
126 .zero_based_vertex_instance_id = false,
127 .yz_flip = {
128 .mode = yz_flip_mode,
129 .y_mask = y_flip_mask,
130 .z_mask = z_flip_mask,
131 },
132 .read_only_images_as_srvs = true,
133 };
134
135 struct dxil_spirv_debug_options dbg_opts = {
136 .dump_nir = !!(instance->debug_flags & DZN_DEBUG_NIR),
137 };
138
139 /* TODO: Extend spirv_to_dxil() to allow passing a custom allocator */
140 bool success =
141 spirv_to_dxil((uint32_t *)module->data, module->size / sizeof(uint32_t),
142 spec, num_spec,
143 to_dxil_shader_stage(stage_info->stage),
144 stage_info->pName, &dbg_opts, &conf, &dxil_object);
145
146 vk_free2(&device->vk.alloc, alloc, spec);
147
148 if (!success)
149 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
150
151 dzn_shader_blob blob(dxil_object.binary.buffer, dxil_object.binary.size);
152 ComPtr<IDxcOperationResult> result;
153 validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit, &result);
154
155 if (instance->debug_flags & DZN_DEBUG_DXIL) {
156 IDxcBlobEncoding *disassembly;
157 compiler->Disassemble(&blob, &disassembly);
158 ComPtr<IDxcBlobEncoding> blobUtf8;
159 library->GetBlobAsUtf8(disassembly, blobUtf8.GetAddressOf());
160 char *disasm = reinterpret_cast<char*>(blobUtf8->GetBufferPointer());
161 disasm[blobUtf8->GetBufferSize() - 1] = 0;
162 fprintf(stderr, "== BEGIN SHADER ============================================\n"
163 "%s\n"
164 "== END SHADER ==============================================\n",
165 disasm);
166 disassembly->Release();
167 }
168
169 HRESULT validationStatus;
170 result->GetStatus(&validationStatus);
171 if (FAILED(validationStatus)) {
172 if (instance->debug_flags & DZN_DEBUG_DXIL) {
173 ComPtr<IDxcBlobEncoding> printBlob, printBlobUtf8;
174 result->GetErrorBuffer(&printBlob);
175 library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf());
176
177 char *errorString;
178 if (printBlobUtf8) {
179 errorString = reinterpret_cast<char*>(printBlobUtf8->GetBufferPointer());
180
181 errorString[printBlobUtf8->GetBufferSize() - 1] = 0;
182 fprintf(stderr,
183 "== VALIDATION ERROR =============================================\n"
184 "%s\n"
185 "== END ==========================================================\n",
186 errorString);
187 }
188 }
189
190 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
191 }
192
193 slot->pShaderBytecode = dxil_object.binary.buffer;
194 slot->BytecodeLength = dxil_object.binary.size;
195 return VK_SUCCESS;
196 }
197
198 static D3D12_SHADER_BYTECODE *
dzn_pipeline_get_gfx_shader_slot(D3D12_GRAPHICS_PIPELINE_STATE_DESC * desc,VkShaderStageFlagBits in)199 dzn_pipeline_get_gfx_shader_slot(D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc,
200 VkShaderStageFlagBits in)
201 {
202 switch (in) {
203 case VK_SHADER_STAGE_VERTEX_BIT: return &desc->VS;
204 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return &desc->DS;
205 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return &desc->HS;
206 case VK_SHADER_STAGE_GEOMETRY_BIT: return &desc->GS;
207 case VK_SHADER_STAGE_FRAGMENT_BIT: return &desc->PS;
208 default: unreachable("Unsupported stage");
209 }
210 }
211
212 static VkResult
dzn_graphics_pipeline_translate_vi(dzn_graphics_pipeline * pipeline,const VkAllocationCallbacks * alloc,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in,D3D12_INPUT_ELEMENT_DESC ** input_elems)213 dzn_graphics_pipeline_translate_vi(dzn_graphics_pipeline *pipeline,
214 const VkAllocationCallbacks *alloc,
215 D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
216 const VkGraphicsPipelineCreateInfo *in,
217 D3D12_INPUT_ELEMENT_DESC **input_elems)
218 {
219 dzn_device *device =
220 container_of(pipeline->base.base.device, dzn_device, vk);
221 const VkPipelineVertexInputStateCreateInfo *in_vi =
222 in->pVertexInputState;
223
224 if (!in_vi->vertexAttributeDescriptionCount) {
225 out->InputLayout.pInputElementDescs = NULL;
226 out->InputLayout.NumElements = 0;
227 *input_elems = NULL;
228 return VK_SUCCESS;
229 }
230
231 *input_elems = (D3D12_INPUT_ELEMENT_DESC *)
232 vk_alloc2(&device->vk.alloc, alloc,
233 sizeof(**input_elems) * in_vi->vertexAttributeDescriptionCount, 8,
234 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
235 if (!*input_elems)
236 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
237
238 D3D12_INPUT_ELEMENT_DESC *inputs = *input_elems;
239 D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];
240
241 pipeline->vb.count = 0;
242 for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
243 const struct VkVertexInputBindingDescription *bdesc =
244 &in_vi->pVertexBindingDescriptions[i];
245
246 pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
247 pipeline->vb.strides[bdesc->binding] = bdesc->stride;
248 if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
249 slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
250 } else {
251 assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
252 slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
253 }
254 }
255
256 for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
257 const VkVertexInputAttributeDescription *attr =
258 &in_vi->pVertexAttributeDescriptions[i];
259
260 /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
261 inputs[i].SemanticName = "TEXCOORD";
262 inputs[i].SemanticIndex = attr->location;
263 inputs[i].Format = dzn_buffer_get_dxgi_format(attr->format);
264 inputs[i].InputSlot = attr->binding;
265 inputs[i].InputSlotClass = slot_class[attr->binding];
266 inputs[i].InstanceDataStepRate =
267 inputs[i].InputSlotClass == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0;
268 inputs[i].AlignedByteOffset = attr->offset;
269 }
270
271 out->InputLayout.pInputElementDescs = inputs;
272 out->InputLayout.NumElements = in_vi->vertexAttributeDescriptionCount;
273 return VK_SUCCESS;
274 }
275
276 static D3D12_PRIMITIVE_TOPOLOGY_TYPE
to_prim_topology_type(VkPrimitiveTopology in)277 to_prim_topology_type(VkPrimitiveTopology in)
278 {
279 switch (in) {
280 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
281 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
282 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
283 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
284 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
285 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
286 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
287 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
288 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
289 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
290 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
291 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
292 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
293 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
294 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
295 default: unreachable("Invalid primitive topology");
296 }
297 }
298
299 static D3D12_PRIMITIVE_TOPOLOGY
to_prim_topology(VkPrimitiveTopology in,unsigned patch_control_points)300 to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points)
301 {
302 switch (in) {
303 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
304 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
305 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
306 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
307 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
308 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
309 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
310 /* Triangle fans are emulated using an intermediate index buffer. */
311 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
312 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
313 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
314 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
315 assert(patch_control_points);
316 return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
317 default: unreachable("Invalid primitive topology");
318 }
319 }
320
321 static void
dzn_graphics_pipeline_translate_ia(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)322 dzn_graphics_pipeline_translate_ia(dzn_graphics_pipeline *pipeline,
323 D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
324 const VkGraphicsPipelineCreateInfo *in)
325 {
326 const VkPipelineInputAssemblyStateCreateInfo *in_ia =
327 in->pInputAssemblyState;
328 const VkPipelineTessellationStateCreateInfo *in_tes =
329 (out->DS.pShaderBytecode && out->HS.pShaderBytecode) ?
330 in->pTessellationState : NULL;
331
332 out->PrimitiveTopologyType = to_prim_topology_type(in_ia->topology);
333 pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
334 pipeline->ia.topology =
335 to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0);
336
337 /* FIXME: does that work for u16 index buffers? */
338 if (in_ia->primitiveRestartEnable)
339 out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
340 else
341 out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
342 }
343
344 static D3D12_FILL_MODE
translate_polygon_mode(VkPolygonMode in)345 translate_polygon_mode(VkPolygonMode in)
346 {
347 switch (in) {
348 case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
349 case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
350 default: unreachable("Unsupported polygon mode");
351 }
352 }
353
354 static D3D12_CULL_MODE
translate_cull_mode(VkCullModeFlags in)355 translate_cull_mode(VkCullModeFlags in)
356 {
357 switch (in) {
358 case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
359 case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
360 case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
361 /* Front+back face culling is equivalent to 'rasterization disabled' */
362 case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
363 default: unreachable("Unsupported cull mode");
364 }
365 }
366
367 static void
dzn_graphics_pipeline_translate_rast(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)368 dzn_graphics_pipeline_translate_rast(dzn_graphics_pipeline *pipeline,
369 D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
370 const VkGraphicsPipelineCreateInfo *in)
371 {
372 const VkPipelineRasterizationStateCreateInfo *in_rast =
373 in->pRasterizationState;
374 const VkPipelineViewportStateCreateInfo *in_vp =
375 in->pViewportState;
376
377 if (in_vp) {
378 pipeline->vp.count = in_vp->viewportCount;
379 if (in_vp->pViewports) {
380 for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
381 dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
382 }
383
384 pipeline->scissor.count = in_vp->scissorCount;
385 if (in_vp->pScissors) {
386 for (uint32_t i = 0; i < in_vp->scissorCount; i++)
387 dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
388 }
389 }
390
391 out->RasterizerState.DepthClipEnable = !in_rast->depthClampEnable;
392 out->RasterizerState.FillMode = translate_polygon_mode(in_rast->polygonMode);
393 out->RasterizerState.CullMode = translate_cull_mode(in_rast->cullMode);
394 out->RasterizerState.FrontCounterClockwise =
395 in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
396 if (in_rast->depthBiasEnable) {
397 out->RasterizerState.DepthBias = in_rast->depthBiasConstantFactor;
398 out->RasterizerState.SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
399 out->RasterizerState.DepthBiasClamp = in_rast->depthBiasClamp;
400 }
401
402 assert(in_rast->lineWidth == 1.0f);
403 }
404
405 static void
dzn_graphics_pipeline_translate_ms(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)406 dzn_graphics_pipeline_translate_ms(dzn_graphics_pipeline *pipeline,
407 D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
408 const VkGraphicsPipelineCreateInfo *in)
409 {
410 const VkPipelineMultisampleStateCreateInfo *in_ms =
411 in->pMultisampleState;
412
413 /* TODO: sampleShadingEnable, minSampleShading,
414 * alphaToOneEnable
415 */
416 out->SampleDesc.Count = in_ms ? in_ms->rasterizationSamples : 1;
417 out->SampleDesc.Quality = 0;
418 out->SampleMask = in_ms && in_ms->pSampleMask ?
419 *in_ms->pSampleMask :
420 (1 << out->SampleDesc.Count) - 1;
421 }
422
423 static D3D12_STENCIL_OP
translate_stencil_op(VkStencilOp in)424 translate_stencil_op(VkStencilOp in)
425 {
426 switch (in) {
427 case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
428 case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
429 case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
430 case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
431 case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
432 case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
433 case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
434 case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
435 default: unreachable("Invalid stencil op");
436 }
437 }
438
439 static void
translate_stencil_test(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)440 translate_stencil_test(dzn_graphics_pipeline *pipeline,
441 D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
442 const VkGraphicsPipelineCreateInfo *in)
443 {
444 const VkPipelineDepthStencilStateCreateInfo *in_zsa =
445 in->pDepthStencilState;
446
447 bool front_test_uses_ref =
448 !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
449 in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
450 in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
451 (pipeline->zsa.stencil_test.dynamic_compare_mask ||
452 in_zsa->front.compareMask != 0);
453 bool back_test_uses_ref =
454 !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
455 in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
456 in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
457 (pipeline->zsa.stencil_test.dynamic_compare_mask ||
458 in_zsa->back.compareMask != 0);
459
460 if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
461 pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
462 else if (front_test_uses_ref)
463 pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
464 else
465 pipeline->zsa.stencil_test.front.compare_mask = 0;
466
467 if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
468 pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
469 else if (back_test_uses_ref)
470 pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
471 else
472 pipeline->zsa.stencil_test.back.compare_mask = 0;
473
474 bool diff_wr_mask =
475 in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
476 (pipeline->zsa.stencil_test.dynamic_write_mask ||
477 in_zsa->back.writeMask != in_zsa->front.writeMask);
478 bool diff_ref =
479 in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
480 (pipeline->zsa.stencil_test.dynamic_ref ||
481 in_zsa->back.reference != in_zsa->front.reference);
482 bool diff_cmp_mask =
483 back_test_uses_ref && front_test_uses_ref &&
484 (pipeline->zsa.stencil_test.dynamic_compare_mask ||
485 pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask);
486
487 if (diff_cmp_mask || diff_wr_mask)
488 pipeline->zsa.stencil_test.independent_front_back = true;
489
490 bool back_wr_uses_ref =
491 !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
492 (in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
493 in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
494 (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
495 (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
496 in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
497 (in_zsa->depthTestEnable &&
498 in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
499 in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE);
500 bool front_wr_uses_ref =
501 !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
502 (in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
503 in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
504 (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
505 (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
506 in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
507 (in_zsa->depthTestEnable &&
508 in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
509 in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE);
510
511 pipeline->zsa.stencil_test.front.write_mask =
512 (pipeline->zsa.stencil_test.dynamic_write_mask ||
513 (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
514 0 : in_zsa->front.writeMask;
515 pipeline->zsa.stencil_test.back.write_mask =
516 (pipeline->zsa.stencil_test.dynamic_write_mask ||
517 (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
518 0 : in_zsa->back.writeMask;
519
520 pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
521 pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;
522
523 if (diff_ref &&
524 pipeline->zsa.stencil_test.front.uses_ref &&
525 pipeline->zsa.stencil_test.back.uses_ref)
526 pipeline->zsa.stencil_test.independent_front_back = true;
527
528 pipeline->zsa.stencil_test.front.ref =
529 pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
530 pipeline->zsa.stencil_test.back.ref =
531 pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;
532
533 /* FIXME: We don't support independent {compare,write}_mask and stencil
534 * reference. Until we have proper support for independent front/back
535 * stencil test, let's prioritize the front setup when both are active.
536 */
537 out->DepthStencilState.StencilReadMask =
538 front_test_uses_ref ?
539 pipeline->zsa.stencil_test.front.compare_mask :
540 back_test_uses_ref ?
541 pipeline->zsa.stencil_test.back.compare_mask : 0;
542 out->DepthStencilState.StencilWriteMask =
543 pipeline->zsa.stencil_test.front.write_mask ?
544 pipeline->zsa.stencil_test.front.write_mask :
545 pipeline->zsa.stencil_test.back.write_mask;
546
547 assert(!pipeline->zsa.stencil_test.independent_front_back);
548 }
549
550 static void
dzn_graphics_pipeline_translate_zsa(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)551 dzn_graphics_pipeline_translate_zsa(dzn_graphics_pipeline *pipeline,
552 D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
553 const VkGraphicsPipelineCreateInfo *in)
554 {
555 const VkPipelineDepthStencilStateCreateInfo *in_zsa =
556 in->pDepthStencilState;
557
558 if (!in_zsa)
559 return;
560
561 /* TODO: depthBoundsTestEnable */
562
563 out->DepthStencilState.DepthEnable = in_zsa->depthTestEnable;
564 out->DepthStencilState.DepthWriteMask =
565 in_zsa->depthWriteEnable ?
566 D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
567 out->DepthStencilState.DepthFunc =
568 dzn_translate_compare_op(in_zsa->depthCompareOp);
569 out->DepthStencilState.StencilEnable = in_zsa->stencilTestEnable;
570 if (in_zsa->stencilTestEnable) {
571 out->DepthStencilState.FrontFace.StencilFailOp =
572 translate_stencil_op(in_zsa->front.failOp);
573 out->DepthStencilState.FrontFace.StencilDepthFailOp =
574 translate_stencil_op(in_zsa->front.depthFailOp);
575 out->DepthStencilState.FrontFace.StencilPassOp =
576 translate_stencil_op(in_zsa->front.passOp);
577 out->DepthStencilState.FrontFace.StencilFunc =
578 dzn_translate_compare_op(in_zsa->front.compareOp);
579 out->DepthStencilState.BackFace.StencilFailOp =
580 translate_stencil_op(in_zsa->back.failOp);
581 out->DepthStencilState.BackFace.StencilDepthFailOp =
582 translate_stencil_op(in_zsa->back.depthFailOp);
583 out->DepthStencilState.BackFace.StencilPassOp =
584 translate_stencil_op(in_zsa->back.passOp);
585 out->DepthStencilState.BackFace.StencilFunc =
586 dzn_translate_compare_op(in_zsa->back.compareOp);
587
588 pipeline->zsa.stencil_test.enable = true;
589
590 translate_stencil_test(pipeline, out, in);
591 }
592 }
593
594 static D3D12_BLEND
translate_blend_factor(VkBlendFactor in)595 translate_blend_factor(VkBlendFactor in)
596 {
597 switch (in) {
598 case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
599 case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
600 case VK_BLEND_FACTOR_SRC_COLOR: return D3D12_BLEND_SRC_COLOR;
601 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR;
602 case VK_BLEND_FACTOR_DST_COLOR: return D3D12_BLEND_DEST_COLOR;
603 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
604 case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
605 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
606 case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
607 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
608 /* FIXME: no way to isolate the alpla and color constants */
609 case VK_BLEND_FACTOR_CONSTANT_COLOR:
610 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
611 return D3D12_BLEND_BLEND_FACTOR;
612 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
613 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
614 return D3D12_BLEND_INV_BLEND_FACTOR;
615 case VK_BLEND_FACTOR_SRC1_COLOR: return D3D12_BLEND_SRC1_COLOR;
616 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: return D3D12_BLEND_INV_SRC1_COLOR;
617 case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
618 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
619 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
620 default: unreachable("Invalid blend factor");
621 }
622 }
623
624 static D3D12_BLEND_OP
translate_blend_op(VkBlendOp in)625 translate_blend_op(VkBlendOp in)
626 {
627 switch (in) {
628 case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
629 case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
630 case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
631 case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
632 case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
633 default: unreachable("Invalid blend op");
634 }
635 }
636
637 static D3D12_LOGIC_OP
translate_logic_op(VkLogicOp in)638 translate_logic_op(VkLogicOp in)
639 {
640 switch (in) {
641 case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
642 case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
643 case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
644 case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
645 case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
646 case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
647 case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
648 case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
649 case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
650 case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
651 case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
652 case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
653 case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
654 case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
655 case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
656 case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
657 default: unreachable("Invalid logic op");
658 }
659 }
660
661 static void
dzn_graphics_pipeline_translate_blend(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)662 dzn_graphics_pipeline_translate_blend(dzn_graphics_pipeline *pipeline,
663 D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
664 const VkGraphicsPipelineCreateInfo *in)
665 {
666 const VkPipelineColorBlendStateCreateInfo *in_blend =
667 in->pColorBlendState;
668 const VkPipelineMultisampleStateCreateInfo *in_ms =
669 in->pMultisampleState;
670
671 if (!in_blend || !in_ms)
672 return;
673
674 D3D12_LOGIC_OP logicop =
675 in_blend->logicOpEnable ?
676 translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
677 out->BlendState.AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
678 for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
679 if (i > 0 &&
680 !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
681 sizeof(*in_blend->pAttachments)))
682 out->BlendState.IndependentBlendEnable = true;
683
684 out->BlendState.RenderTarget[i].BlendEnable =
685 in_blend->pAttachments[i].blendEnable;
686 in_blend->logicOpEnable;
687 out->BlendState.RenderTarget[i].RenderTargetWriteMask =
688 in_blend->pAttachments[i].colorWriteMask;
689 if (in_blend->logicOpEnable) {
690 out->BlendState.RenderTarget[i].LogicOpEnable = true;
691 out->BlendState.RenderTarget[i].LogicOp = logicop;
692 } else {
693 out->BlendState.RenderTarget[i].SrcBlend =
694 translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor);
695 out->BlendState.RenderTarget[i].DestBlend =
696 translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor);
697 out->BlendState.RenderTarget[i].BlendOp =
698 translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
699 out->BlendState.RenderTarget[i].SrcBlendAlpha =
700 translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor);
701 out->BlendState.RenderTarget[i].DestBlendAlpha =
702 translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor);
703 out->BlendState.RenderTarget[i].BlendOpAlpha =
704 translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
705 }
706 }
707 }
708
709
710 static void
dzn_pipeline_init(dzn_pipeline * pipeline,dzn_device * device,VkPipelineBindPoint type,dzn_pipeline_layout * layout)711 dzn_pipeline_init(dzn_pipeline *pipeline,
712 dzn_device *device,
713 VkPipelineBindPoint type,
714 dzn_pipeline_layout *layout)
715 {
716 pipeline->type = type;
717 pipeline->root.sets_param_count = layout->root.sets_param_count;
718 pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
719 pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
720 STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
721 memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
722 pipeline->root.sig = layout->root.sig;
723 pipeline->root.sig->AddRef();
724
725 STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
726 memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));
727
728 STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
729 memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
730 vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
731 }
732
733 static void
dzn_pipeline_finish(dzn_pipeline * pipeline)734 dzn_pipeline_finish(dzn_pipeline *pipeline)
735 {
736 if (pipeline->state)
737 pipeline->state->Release();
738 if (pipeline->root.sig)
739 pipeline->root.sig->Release();
740
741 vk_object_base_finish(&pipeline->base);
742 }
743
744 static void
dzn_graphics_pipeline_destroy(dzn_graphics_pipeline * pipeline,const VkAllocationCallbacks * alloc)745 dzn_graphics_pipeline_destroy(dzn_graphics_pipeline *pipeline,
746 const VkAllocationCallbacks *alloc)
747 {
748 if (!pipeline)
749 return;
750
751 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
752 if (pipeline->indirect_cmd_sigs[i])
753 pipeline->indirect_cmd_sigs[i]->Release();
754 }
755
756 dzn_pipeline_finish(&pipeline->base);
757 vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
758 }
759
760 static VkResult
dzn_graphics_pipeline_create(dzn_device * device,VkPipelineCache cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)761 dzn_graphics_pipeline_create(dzn_device *device,
762 VkPipelineCache cache,
763 const VkGraphicsPipelineCreateInfo *pCreateInfo,
764 const VkAllocationCallbacks *pAllocator,
765 VkPipeline *out)
766 {
767 VK_FROM_HANDLE(dzn_render_pass, pass, pCreateInfo->renderPass);
768 VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
769 const dzn_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
770 uint32_t stage_mask = 0;
771 VkResult ret;
772 HRESULT hres = 0;
773
774 dzn_graphics_pipeline *pipeline = (dzn_graphics_pipeline *)
775 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
776 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
777 if (!pipeline)
778 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
779
780 dzn_pipeline_init(&pipeline->base, device,
781 VK_PIPELINE_BIND_POINT_GRAPHICS,
782 layout);
783 D3D12_INPUT_ELEMENT_DESC *inputs = NULL;
784 D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {
785 .pRootSignature = pipeline->base.root.sig,
786 .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
787 };
788
789 ret = dzn_graphics_pipeline_translate_vi(pipeline, pAllocator, &desc, pCreateInfo, &inputs);
790 if (ret != VK_SUCCESS)
791 goto out;
792
793 if (pCreateInfo->pDynamicState) {
794 for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
795 switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
796 case VK_DYNAMIC_STATE_VIEWPORT:
797 pipeline->vp.dynamic = true;
798 break;
799 case VK_DYNAMIC_STATE_SCISSOR:
800 pipeline->scissor.dynamic = true;
801 break;
802 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
803 pipeline->zsa.stencil_test.dynamic_ref = true;
804 break;
805 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
806 pipeline->zsa.stencil_test.dynamic_compare_mask = true;
807 break;
808 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
809 pipeline->zsa.stencil_test.dynamic_write_mask = true;
810 break;
811 default: unreachable("Unsupported dynamic state");
812 }
813 }
814 }
815
816 dzn_graphics_pipeline_translate_ia(pipeline, &desc, pCreateInfo);
817 dzn_graphics_pipeline_translate_rast(pipeline, &desc, pCreateInfo);
818 dzn_graphics_pipeline_translate_ms(pipeline, &desc, pCreateInfo);
819 dzn_graphics_pipeline_translate_zsa(pipeline, &desc, pCreateInfo);
820 dzn_graphics_pipeline_translate_blend(pipeline, &desc, pCreateInfo);
821
822 desc.NumRenderTargets = subpass->color_count;
823 for (uint32_t i = 0; i < subpass->color_count; i++) {
824 uint32_t idx = subpass->colors[i].idx;
825
826 if (idx == VK_ATTACHMENT_UNUSED) continue;
827
828 const struct dzn_attachment *attachment = &pass->attachments[idx];
829
830 desc.RTVFormats[i] =
831 dzn_image_get_dxgi_format(attachment->format,
832 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
833 VK_IMAGE_ASPECT_COLOR_BIT);
834 }
835
836 if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) {
837 const struct dzn_attachment *attachment =
838 &pass->attachments[subpass->zs.idx];
839
840 desc.DSVFormat =
841 dzn_image_get_dxgi_format(attachment->format,
842 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
843 VK_IMAGE_ASPECT_DEPTH_BIT |
844 VK_IMAGE_ASPECT_STENCIL_BIT);
845 }
846
847 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
848 stage_mask |= pCreateInfo->pStages[i].stage;
849
850 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
851 if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT &&
852 pCreateInfo->pRasterizationState &&
853 (pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
854 pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
855 /* Disable rasterization (AKA leave fragment shader NULL) when
856 * front+back culling or discard is set.
857 */
858 continue;
859 }
860
861 D3D12_SHADER_BYTECODE *slot =
862 dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage);
863 enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
864 uint16_t y_flip_mask = 0, z_flip_mask = 0;
865
866 if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT ||
867 (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_VERTEX_BIT &&
868 !(stage_mask & VK_SHADER_STAGE_GEOMETRY_BIT))) {
869 if (pipeline->vp.dynamic) {
870 yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
871 } else if (pCreateInfo->pViewportState) {
872 const VkPipelineViewportStateCreateInfo *vp_info =
873 pCreateInfo->pViewportState;
874
875 for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
876 if (vp_info->pViewports[i].height > 0)
877 y_flip_mask |= BITFIELD_BIT(i);
878
879 if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
880 z_flip_mask |= BITFIELD_BIT(i);
881 }
882
883 if (y_flip_mask && z_flip_mask)
884 yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
885 else if (z_flip_mask)
886 yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
887 else if (y_flip_mask)
888 yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
889 }
890 }
891
892 ret = dzn_pipeline_compile_shader(device, pAllocator,
893 layout, &pCreateInfo->pStages[i],
894 yz_flip_mode, y_flip_mask, z_flip_mask, slot);
895 if (ret != VK_SUCCESS)
896 goto out;
897 }
898
899
900 hres = device->dev->CreateGraphicsPipelineState(&desc,
901 IID_PPV_ARGS(&pipeline->base.state));
902 if (FAILED(hres)) {
903 ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
904 goto out;
905 }
906
907 ret = VK_SUCCESS;
908
909 out:
910 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
911 D3D12_SHADER_BYTECODE *slot =
912 dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage);
913 free((void *)slot->pShaderBytecode);
914 }
915
916 vk_free2(&device->vk.alloc, pAllocator, inputs);
917 if (ret != VK_SUCCESS)
918 dzn_graphics_pipeline_destroy(pipeline, pAllocator);
919 else
920 *out = dzn_graphics_pipeline_to_handle(pipeline);
921
922 return ret;
923 }
924
925 #define DZN_INDIRECT_CMD_SIG_MAX_ARGS 3
926
927 ID3D12CommandSignature *
dzn_graphics_pipeline_get_indirect_cmd_sig(dzn_graphics_pipeline * pipeline,enum dzn_indirect_draw_cmd_sig_type type)928 dzn_graphics_pipeline_get_indirect_cmd_sig(dzn_graphics_pipeline *pipeline,
929 enum dzn_indirect_draw_cmd_sig_type type)
930 {
931 assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);
932
933 dzn_device *device =
934 container_of(pipeline->base.base.device, dzn_device, vk);
935 ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type];
936
937 if (cmdsig)
938 return cmdsig;
939
940 bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG;
941 bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan;
942
943 uint32_t cmd_arg_count = 0;
944 D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
945
946 if (triangle_fan) {
947 cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
948 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
949 };
950 }
951
952 cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
953 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
954 .Constant = {
955 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
956 .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
957 .Num32BitValuesToSet = 2,
958 },
959 };
960
961 cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
962 .Type = indexed ?
963 D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
964 D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
965 };
966
967 assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
968 assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
969
970 D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
971 .ByteStride =
972 triangle_fan ?
973 sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
974 sizeof(struct dzn_indirect_draw_exec_params),
975 .NumArgumentDescs = cmd_arg_count,
976 .pArgumentDescs = cmd_args,
977 };
978 HRESULT hres =
979 device->dev->CreateCommandSignature(&cmd_sig_desc,
980 pipeline->base.root.sig,
981 IID_PPV_ARGS(&cmdsig));
982 if (FAILED(hres))
983 return NULL;
984
985 pipeline->indirect_cmd_sigs[type] = cmdsig;
986 return cmdsig;
987 }
988
989 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateGraphicsPipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)990 dzn_CreateGraphicsPipelines(VkDevice dev,
991 VkPipelineCache pipelineCache,
992 uint32_t count,
993 const VkGraphicsPipelineCreateInfo *pCreateInfos,
994 const VkAllocationCallbacks *pAllocator,
995 VkPipeline *pPipelines)
996 {
997 VK_FROM_HANDLE(dzn_device, device, dev);
998 VkResult result = VK_SUCCESS;
999
1000 unsigned i;
1001 for (i = 0; i < count; i++) {
1002 result = dzn_graphics_pipeline_create(device,
1003 pipelineCache,
1004 &pCreateInfos[i],
1005 pAllocator,
1006 &pPipelines[i]);
1007 if (result != VK_SUCCESS) {
1008 pPipelines[i] = VK_NULL_HANDLE;
1009
1010 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
1011 * is not obvious what error should be report upon 2 different failures.
1012 */
1013 if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT)
1014 break;
1015
1016 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
1017 break;
1018 }
1019 }
1020
1021 for (; i < count; i++)
1022 pPipelines[i] = VK_NULL_HANDLE;
1023
1024 return result;
1025 }
1026
1027 static void
dzn_compute_pipeline_destroy(dzn_compute_pipeline * pipeline,const VkAllocationCallbacks * alloc)1028 dzn_compute_pipeline_destroy(dzn_compute_pipeline *pipeline,
1029 const VkAllocationCallbacks *alloc)
1030 {
1031 if (!pipeline)
1032 return;
1033
1034 if (pipeline->indirect_cmd_sig)
1035 pipeline->indirect_cmd_sig->Release();
1036
1037 dzn_pipeline_finish(&pipeline->base);
1038 vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
1039 }
1040
1041 static VkResult
dzn_compute_pipeline_create(dzn_device * device,VkPipelineCache cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)1042 dzn_compute_pipeline_create(dzn_device *device,
1043 VkPipelineCache cache,
1044 const VkComputePipelineCreateInfo *pCreateInfo,
1045 const VkAllocationCallbacks *pAllocator,
1046 VkPipeline *out)
1047 {
1048 VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
1049
1050 dzn_compute_pipeline *pipeline = (dzn_compute_pipeline *)
1051 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1052 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1053 if (!pipeline)
1054 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1055
1056 dzn_pipeline_init(&pipeline->base, device,
1057 VK_PIPELINE_BIND_POINT_COMPUTE,
1058 layout);
1059
1060 D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
1061 .pRootSignature = pipeline->base.root.sig,
1062 .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
1063 };
1064
1065 VkResult ret =
1066 dzn_pipeline_compile_shader(device, pAllocator, layout,
1067 &pCreateInfo->stage,
1068 DXIL_SPIRV_YZ_FLIP_NONE, 0, 0,
1069 &desc.CS);
1070 if (ret != VK_SUCCESS)
1071 goto out;
1072
1073 if (FAILED(device->dev->CreateComputePipelineState(&desc,
1074 IID_PPV_ARGS(&pipeline->base.state)))) {
1075 ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1076 goto out;
1077 }
1078
1079 out:
1080 free((void *)desc.CS.pShaderBytecode);
1081 if (ret != VK_SUCCESS)
1082 dzn_compute_pipeline_destroy(pipeline, pAllocator);
1083 else
1084 *out = dzn_compute_pipeline_to_handle(pipeline);
1085
1086 return ret;
1087 }
1088
1089 ID3D12CommandSignature *
dzn_compute_pipeline_get_indirect_cmd_sig(dzn_compute_pipeline * pipeline)1090 dzn_compute_pipeline_get_indirect_cmd_sig(dzn_compute_pipeline *pipeline)
1091 {
1092 if (pipeline->indirect_cmd_sig)
1093 return pipeline->indirect_cmd_sig;
1094
1095 dzn_device *device =
1096 container_of(pipeline->base.base.device, dzn_device, vk);
1097
1098 D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
1099 {
1100 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
1101 .Constant = {
1102 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
1103 .DestOffsetIn32BitValues = 0,
1104 .Num32BitValuesToSet = 3,
1105 },
1106 },
1107 {
1108 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
1109 },
1110 };
1111
1112 D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
1113 .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
1114 .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
1115 .pArgumentDescs = indirect_dispatch_args,
1116 };
1117
1118 HRESULT hres =
1119 device->dev->CreateCommandSignature(&indirect_dispatch_desc,
1120 pipeline->base.root.sig,
1121 IID_PPV_ARGS(&pipeline->indirect_cmd_sig));
1122 if (FAILED(hres))
1123 return NULL;
1124
1125 return pipeline->indirect_cmd_sig;
1126 }
1127
1128 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateComputePipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1129 dzn_CreateComputePipelines(VkDevice dev,
1130 VkPipelineCache pipelineCache,
1131 uint32_t count,
1132 const VkComputePipelineCreateInfo *pCreateInfos,
1133 const VkAllocationCallbacks *pAllocator,
1134 VkPipeline *pPipelines)
1135 {
1136 VK_FROM_HANDLE(dzn_device, device, dev);
1137 VkResult result = VK_SUCCESS;
1138
1139 unsigned i;
1140 for (i = 0; i < count; i++) {
1141 result = dzn_compute_pipeline_create(device,
1142 pipelineCache,
1143 &pCreateInfos[i],
1144 pAllocator,
1145 &pPipelines[i]);
1146 if (result != VK_SUCCESS) {
1147 pPipelines[i] = VK_NULL_HANDLE;
1148
1149 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
1150 * is not obvious what error should be report upon 2 different failures.
1151 */
1152 if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT)
1153 break;
1154
1155 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
1156 break;
1157 }
1158 }
1159
1160 for (; i < count; i++)
1161 pPipelines[i] = VK_NULL_HANDLE;
1162
1163 return result;
1164 }
1165
1166 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)1167 dzn_DestroyPipeline(VkDevice device,
1168 VkPipeline pipeline,
1169 const VkAllocationCallbacks *pAllocator)
1170 {
1171 VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);
1172
1173 if (!pipe)
1174 return;
1175
1176 if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
1177 dzn_graphics_pipeline *gfx = container_of(pipe, dzn_graphics_pipeline, base);
1178 dzn_graphics_pipeline_destroy(gfx, pAllocator);
1179 } else {
1180 assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
1181 dzn_compute_pipeline *compute = container_of(pipe, dzn_compute_pipeline, base);
1182 dzn_compute_pipeline_destroy(compute, pAllocator);
1183 }
1184 }
1185