1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 
26 #include "spirv_to_dxil.h"
27 
28 #include "vk_alloc.h"
29 #include "vk_util.h"
30 #include "vk_format.h"
31 
32 #include <directx/d3d12.h>
33 #include <dxguids/dxguids.h>
34 
35 #include <dxcapi.h>
36 #include <wrl/client.h>
37 
38 #include "util/u_debug.h"
39 
40 using Microsoft::WRL::ComPtr;
41 
42 static dxil_spirv_shader_stage
to_dxil_shader_stage(VkShaderStageFlagBits in)43 to_dxil_shader_stage(VkShaderStageFlagBits in)
44 {
45    switch (in) {
46    case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
47    case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
48    case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
49    case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
50    case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
51    case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
52    default: unreachable("Unsupported stage");
53    }
54 }
55 
56 static VkResult
dzn_pipeline_compile_shader(dzn_device * device,const VkAllocationCallbacks * alloc,dzn_pipeline_layout * layout,const VkPipelineShaderStageCreateInfo * stage_info,enum dxil_spirv_yz_flip_mode yz_flip_mode,uint16_t y_flip_mask,uint16_t z_flip_mask,D3D12_SHADER_BYTECODE * slot)57 dzn_pipeline_compile_shader(dzn_device *device,
58                             const VkAllocationCallbacks *alloc,
59                             dzn_pipeline_layout *layout,
60                             const VkPipelineShaderStageCreateInfo *stage_info,
61                             enum dxil_spirv_yz_flip_mode yz_flip_mode,
62                             uint16_t y_flip_mask, uint16_t z_flip_mask,
63                             D3D12_SHADER_BYTECODE *slot)
64 {
65    dzn_instance *instance =
66       container_of(device->vk.physical->instance, dzn_instance, vk);
67    IDxcValidator *validator = instance->dxc.validator;
68    IDxcLibrary *library = instance->dxc.library;
69    IDxcCompiler *compiler = instance->dxc.compiler;
70    const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo;
71    VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
72    struct dxil_spirv_object dxil_object;
73 
74    /* convert VkSpecializationInfo */
75    struct dxil_spirv_specialization *spec = NULL;
76    uint32_t num_spec = 0;
77 
78    if (spec_info && spec_info->mapEntryCount) {
79       spec = (struct dxil_spirv_specialization *)
80          vk_alloc2(&device->vk.alloc, alloc,
81                    spec_info->mapEntryCount * sizeof(*spec), 8,
82                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
83       if (!spec)
84          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
85 
86       for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
87          const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
88          const uint8_t *data = (const uint8_t *)spec_info->pData + entry->offset;
89          assert(data + entry->size <= (const uint8_t *)spec_info->pData + spec_info->dataSize);
90          spec[i].id = entry->constantID;
91          switch (entry->size) {
92          case 8:
93             spec[i].value.u64 = *(const uint64_t *)data;
94             break;
95          case 4:
96             spec[i].value.u32 = *(const uint32_t *)data;
97             break;
98          case 2:
99             spec[i].value.u16 = *(const uint16_t *)data;
100             break;
101          case 1:
102             spec[i].value.u8 = *(const uint8_t *)data;
103             break;
104          default:
105             assert(!"Invalid spec constant size");
106             break;
107          }
108 
109          spec[i].defined_on_module = false;
110       }
111 
112       num_spec = spec_info->mapEntryCount;
113    }
114 
115    struct dxil_spirv_runtime_conf conf = {
116       .runtime_data_cbv = {
117          .register_space = DZN_REGISTER_SPACE_SYSVALS,
118          .base_shader_register = 0,
119       },
120       .push_constant_cbv = {
121          .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
122          .base_shader_register = 0,
123       },
124       .descriptor_set_count = layout->set_count,
125       .descriptor_sets = layout->binding_translation,
126       .zero_based_vertex_instance_id = false,
127       .yz_flip = {
128          .mode = yz_flip_mode,
129          .y_mask = y_flip_mask,
130          .z_mask = z_flip_mask,
131       },
132       .read_only_images_as_srvs = true,
133    };
134 
135    struct dxil_spirv_debug_options dbg_opts = {
136       .dump_nir = !!(instance->debug_flags & DZN_DEBUG_NIR),
137    };
138 
139    /* TODO: Extend spirv_to_dxil() to allow passing a custom allocator */
140    bool success =
141       spirv_to_dxil((uint32_t *)module->data, module->size / sizeof(uint32_t),
142                     spec, num_spec,
143                     to_dxil_shader_stage(stage_info->stage),
144                     stage_info->pName, &dbg_opts, &conf, &dxil_object);
145 
146    vk_free2(&device->vk.alloc, alloc, spec);
147 
148    if (!success)
149       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
150 
151    dzn_shader_blob blob(dxil_object.binary.buffer, dxil_object.binary.size);
152    ComPtr<IDxcOperationResult> result;
153    validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit, &result);
154 
155    if (instance->debug_flags & DZN_DEBUG_DXIL) {
156       IDxcBlobEncoding *disassembly;
157       compiler->Disassemble(&blob, &disassembly);
158       ComPtr<IDxcBlobEncoding> blobUtf8;
159       library->GetBlobAsUtf8(disassembly, blobUtf8.GetAddressOf());
160       char *disasm = reinterpret_cast<char*>(blobUtf8->GetBufferPointer());
161       disasm[blobUtf8->GetBufferSize() - 1] = 0;
162       fprintf(stderr, "== BEGIN SHADER ============================================\n"
163               "%s\n"
164               "== END SHADER ==============================================\n",
165               disasm);
166       disassembly->Release();
167    }
168 
169    HRESULT validationStatus;
170    result->GetStatus(&validationStatus);
171    if (FAILED(validationStatus)) {
172       if (instance->debug_flags & DZN_DEBUG_DXIL) {
173          ComPtr<IDxcBlobEncoding> printBlob, printBlobUtf8;
174          result->GetErrorBuffer(&printBlob);
175          library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf());
176 
177          char *errorString;
178          if (printBlobUtf8) {
179             errorString = reinterpret_cast<char*>(printBlobUtf8->GetBufferPointer());
180 
181             errorString[printBlobUtf8->GetBufferSize() - 1] = 0;
182             fprintf(stderr,
183                     "== VALIDATION ERROR =============================================\n"
184 		    "%s\n"
185                     "== END ==========================================================\n",
186                     errorString);
187          }
188       }
189 
190       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
191    }
192 
193    slot->pShaderBytecode = dxil_object.binary.buffer;
194    slot->BytecodeLength = dxil_object.binary.size;
195    return VK_SUCCESS;
196 }
197 
198 static D3D12_SHADER_BYTECODE *
dzn_pipeline_get_gfx_shader_slot(D3D12_GRAPHICS_PIPELINE_STATE_DESC * desc,VkShaderStageFlagBits in)199 dzn_pipeline_get_gfx_shader_slot(D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc,
200                                  VkShaderStageFlagBits in)
201 {
202    switch (in) {
203    case VK_SHADER_STAGE_VERTEX_BIT: return &desc->VS;
204    case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return &desc->DS;
205    case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return &desc->HS;
206    case VK_SHADER_STAGE_GEOMETRY_BIT: return &desc->GS;
207    case VK_SHADER_STAGE_FRAGMENT_BIT: return &desc->PS;
208    default: unreachable("Unsupported stage");
209    }
210 }
211 
212 static VkResult
dzn_graphics_pipeline_translate_vi(dzn_graphics_pipeline * pipeline,const VkAllocationCallbacks * alloc,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in,D3D12_INPUT_ELEMENT_DESC ** input_elems)213 dzn_graphics_pipeline_translate_vi(dzn_graphics_pipeline *pipeline,
214                                    const VkAllocationCallbacks *alloc,
215                                    D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
216                                    const VkGraphicsPipelineCreateInfo *in,
217                                    D3D12_INPUT_ELEMENT_DESC **input_elems)
218 {
219    dzn_device *device =
220       container_of(pipeline->base.base.device, dzn_device, vk);
221    const VkPipelineVertexInputStateCreateInfo *in_vi =
222       in->pVertexInputState;
223 
224    if (!in_vi->vertexAttributeDescriptionCount) {
225       out->InputLayout.pInputElementDescs = NULL;
226       out->InputLayout.NumElements = 0;
227       *input_elems = NULL;
228       return VK_SUCCESS;
229    }
230 
231    *input_elems = (D3D12_INPUT_ELEMENT_DESC *)
232       vk_alloc2(&device->vk.alloc, alloc,
233                 sizeof(**input_elems) * in_vi->vertexAttributeDescriptionCount, 8,
234                 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
235    if (!*input_elems)
236       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
237 
238    D3D12_INPUT_ELEMENT_DESC *inputs = *input_elems;
239    D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];
240 
241    pipeline->vb.count = 0;
242    for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
243       const struct VkVertexInputBindingDescription *bdesc =
244          &in_vi->pVertexBindingDescriptions[i];
245 
246       pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
247       pipeline->vb.strides[bdesc->binding] = bdesc->stride;
248       if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
249          slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
250       } else {
251          assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
252          slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
253       }
254    }
255 
256    for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
257       const VkVertexInputAttributeDescription *attr =
258          &in_vi->pVertexAttributeDescriptions[i];
259 
260       /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
261       inputs[i].SemanticName = "TEXCOORD";
262       inputs[i].SemanticIndex = attr->location;
263       inputs[i].Format = dzn_buffer_get_dxgi_format(attr->format);
264       inputs[i].InputSlot = attr->binding;
265       inputs[i].InputSlotClass = slot_class[attr->binding];
266       inputs[i].InstanceDataStepRate =
267          inputs[i].InputSlotClass == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0;
268       inputs[i].AlignedByteOffset = attr->offset;
269    }
270 
271    out->InputLayout.pInputElementDescs = inputs;
272    out->InputLayout.NumElements = in_vi->vertexAttributeDescriptionCount;
273    return VK_SUCCESS;
274 }
275 
276 static D3D12_PRIMITIVE_TOPOLOGY_TYPE
to_prim_topology_type(VkPrimitiveTopology in)277 to_prim_topology_type(VkPrimitiveTopology in)
278 {
279    switch (in) {
280    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
281       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
282    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
283    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
284    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
285    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
286       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
287    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
288    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
289    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
290    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
291    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
292       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
293    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
294       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
295    default: unreachable("Invalid primitive topology");
296    }
297 }
298 
299 static D3D12_PRIMITIVE_TOPOLOGY
to_prim_topology(VkPrimitiveTopology in,unsigned patch_control_points)300 to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points)
301 {
302    switch (in) {
303    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
304    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
305    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
306    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
307    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
308    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
309    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
310    /* Triangle fans are emulated using an intermediate index buffer. */
311    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
312    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
313    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
314    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
315       assert(patch_control_points);
316       return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
317    default: unreachable("Invalid primitive topology");
318    }
319 }
320 
321 static void
dzn_graphics_pipeline_translate_ia(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)322 dzn_graphics_pipeline_translate_ia(dzn_graphics_pipeline *pipeline,
323                                    D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
324                                    const VkGraphicsPipelineCreateInfo *in)
325 {
326    const VkPipelineInputAssemblyStateCreateInfo *in_ia =
327       in->pInputAssemblyState;
328    const VkPipelineTessellationStateCreateInfo *in_tes =
329       (out->DS.pShaderBytecode && out->HS.pShaderBytecode) ?
330       in->pTessellationState : NULL;
331 
332    out->PrimitiveTopologyType = to_prim_topology_type(in_ia->topology);
333    pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
334    pipeline->ia.topology =
335       to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0);
336 
337    /* FIXME: does that work for u16 index buffers? */
338    if (in_ia->primitiveRestartEnable)
339       out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
340    else
341       out->IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
342 }
343 
344 static D3D12_FILL_MODE
translate_polygon_mode(VkPolygonMode in)345 translate_polygon_mode(VkPolygonMode in)
346 {
347    switch (in) {
348    case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
349    case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
350    default: unreachable("Unsupported polygon mode");
351    }
352 }
353 
354 static D3D12_CULL_MODE
translate_cull_mode(VkCullModeFlags in)355 translate_cull_mode(VkCullModeFlags in)
356 {
357    switch (in) {
358    case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
359    case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
360    case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
361    /* Front+back face culling is equivalent to 'rasterization disabled' */
362    case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
363    default: unreachable("Unsupported cull mode");
364    }
365 }
366 
367 static void
dzn_graphics_pipeline_translate_rast(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)368 dzn_graphics_pipeline_translate_rast(dzn_graphics_pipeline *pipeline,
369                                      D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
370                                      const VkGraphicsPipelineCreateInfo *in)
371 {
372    const VkPipelineRasterizationStateCreateInfo *in_rast =
373       in->pRasterizationState;
374    const VkPipelineViewportStateCreateInfo *in_vp =
375       in->pViewportState;
376 
377    if (in_vp) {
378       pipeline->vp.count = in_vp->viewportCount;
379       if (in_vp->pViewports) {
380          for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
381             dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
382       }
383 
384       pipeline->scissor.count = in_vp->scissorCount;
385       if (in_vp->pScissors) {
386          for (uint32_t i = 0; i < in_vp->scissorCount; i++)
387             dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
388       }
389    }
390 
391    out->RasterizerState.DepthClipEnable = !in_rast->depthClampEnable;
392    out->RasterizerState.FillMode = translate_polygon_mode(in_rast->polygonMode);
393    out->RasterizerState.CullMode = translate_cull_mode(in_rast->cullMode);
394    out->RasterizerState.FrontCounterClockwise =
395       in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
396    if (in_rast->depthBiasEnable) {
397       out->RasterizerState.DepthBias = in_rast->depthBiasConstantFactor;
398       out->RasterizerState.SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
399       out->RasterizerState.DepthBiasClamp = in_rast->depthBiasClamp;
400    }
401 
402    assert(in_rast->lineWidth == 1.0f);
403 }
404 
405 static void
dzn_graphics_pipeline_translate_ms(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)406 dzn_graphics_pipeline_translate_ms(dzn_graphics_pipeline *pipeline,
407                                    D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
408                                    const VkGraphicsPipelineCreateInfo *in)
409 {
410    const VkPipelineMultisampleStateCreateInfo *in_ms =
411       in->pMultisampleState;
412 
413    /* TODO: sampleShadingEnable, minSampleShading,
414     *       alphaToOneEnable
415     */
416    out->SampleDesc.Count = in_ms ? in_ms->rasterizationSamples : 1;
417    out->SampleDesc.Quality = 0;
418    out->SampleMask = in_ms && in_ms->pSampleMask ?
419                      *in_ms->pSampleMask :
420                      (1 << out->SampleDesc.Count) - 1;
421 }
422 
423 static D3D12_STENCIL_OP
translate_stencil_op(VkStencilOp in)424 translate_stencil_op(VkStencilOp in)
425 {
426    switch (in) {
427    case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
428    case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
429    case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
430    case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
431    case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
432    case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
433    case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
434    case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
435    default: unreachable("Invalid stencil op");
436    }
437 }
438 
439 static void
translate_stencil_test(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)440 translate_stencil_test(dzn_graphics_pipeline *pipeline,
441                        D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
442                        const VkGraphicsPipelineCreateInfo *in)
443 {
444    const VkPipelineDepthStencilStateCreateInfo *in_zsa =
445       in->pDepthStencilState;
446 
447    bool front_test_uses_ref =
448       !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
449       in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
450       in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
451       (pipeline->zsa.stencil_test.dynamic_compare_mask ||
452        in_zsa->front.compareMask != 0);
453    bool back_test_uses_ref =
454       !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
455       in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
456       in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
457       (pipeline->zsa.stencil_test.dynamic_compare_mask ||
458        in_zsa->back.compareMask != 0);
459 
460    if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
461       pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
462    else if (front_test_uses_ref)
463       pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
464    else
465       pipeline->zsa.stencil_test.front.compare_mask = 0;
466 
467    if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
468       pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
469    else if (back_test_uses_ref)
470       pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
471    else
472       pipeline->zsa.stencil_test.back.compare_mask = 0;
473 
474    bool diff_wr_mask =
475       in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
476       (pipeline->zsa.stencil_test.dynamic_write_mask ||
477        in_zsa->back.writeMask != in_zsa->front.writeMask);
478    bool diff_ref =
479       in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
480       (pipeline->zsa.stencil_test.dynamic_ref ||
481        in_zsa->back.reference != in_zsa->front.reference);
482    bool diff_cmp_mask =
483       back_test_uses_ref && front_test_uses_ref &&
484       (pipeline->zsa.stencil_test.dynamic_compare_mask ||
485        pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask);
486 
487    if (diff_cmp_mask || diff_wr_mask)
488       pipeline->zsa.stencil_test.independent_front_back = true;
489 
490    bool back_wr_uses_ref =
491       !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
492       (in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
493        in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
494       (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
495        (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
496        in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
497       (in_zsa->depthTestEnable &&
498        in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
499        in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE);
500    bool front_wr_uses_ref =
501       !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
502       (in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
503        in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
504       (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
505        (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
506        in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
507       (in_zsa->depthTestEnable &&
508        in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
509        in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE);
510 
511    pipeline->zsa.stencil_test.front.write_mask =
512       (pipeline->zsa.stencil_test.dynamic_write_mask ||
513        (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
514       0 : in_zsa->front.writeMask;
515    pipeline->zsa.stencil_test.back.write_mask =
516       (pipeline->zsa.stencil_test.dynamic_write_mask ||
517        (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
518       0 : in_zsa->back.writeMask;
519 
520    pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
521    pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;
522 
523    if (diff_ref &&
524        pipeline->zsa.stencil_test.front.uses_ref &&
525        pipeline->zsa.stencil_test.back.uses_ref)
526       pipeline->zsa.stencil_test.independent_front_back = true;
527 
528    pipeline->zsa.stencil_test.front.ref =
529       pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
530    pipeline->zsa.stencil_test.back.ref =
531       pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;
532 
533    /* FIXME: We don't support independent {compare,write}_mask and stencil
534     * reference. Until we have proper support for independent front/back
535     * stencil test, let's prioritize the front setup when both are active.
536     */
537    out->DepthStencilState.StencilReadMask =
538       front_test_uses_ref ?
539       pipeline->zsa.stencil_test.front.compare_mask :
540       back_test_uses_ref ?
541       pipeline->zsa.stencil_test.back.compare_mask : 0;
542    out->DepthStencilState.StencilWriteMask =
543       pipeline->zsa.stencil_test.front.write_mask ?
544       pipeline->zsa.stencil_test.front.write_mask :
545       pipeline->zsa.stencil_test.back.write_mask;
546 
547    assert(!pipeline->zsa.stencil_test.independent_front_back);
548 }
549 
550 static void
dzn_graphics_pipeline_translate_zsa(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)551 dzn_graphics_pipeline_translate_zsa(dzn_graphics_pipeline *pipeline,
552                                     D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
553                                     const VkGraphicsPipelineCreateInfo *in)
554 {
555    const VkPipelineDepthStencilStateCreateInfo *in_zsa =
556       in->pDepthStencilState;
557 
558    if (!in_zsa)
559       return;
560 
561    /* TODO: depthBoundsTestEnable */
562 
563    out->DepthStencilState.DepthEnable = in_zsa->depthTestEnable;
564    out->DepthStencilState.DepthWriteMask =
565       in_zsa->depthWriteEnable ?
566       D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
567    out->DepthStencilState.DepthFunc =
568       dzn_translate_compare_op(in_zsa->depthCompareOp);
569    out->DepthStencilState.StencilEnable = in_zsa->stencilTestEnable;
570    if (in_zsa->stencilTestEnable) {
571       out->DepthStencilState.FrontFace.StencilFailOp =
572         translate_stencil_op(in_zsa->front.failOp);
573       out->DepthStencilState.FrontFace.StencilDepthFailOp =
574         translate_stencil_op(in_zsa->front.depthFailOp);
575       out->DepthStencilState.FrontFace.StencilPassOp =
576         translate_stencil_op(in_zsa->front.passOp);
577       out->DepthStencilState.FrontFace.StencilFunc =
578         dzn_translate_compare_op(in_zsa->front.compareOp);
579       out->DepthStencilState.BackFace.StencilFailOp =
580         translate_stencil_op(in_zsa->back.failOp);
581       out->DepthStencilState.BackFace.StencilDepthFailOp =
582         translate_stencil_op(in_zsa->back.depthFailOp);
583       out->DepthStencilState.BackFace.StencilPassOp =
584         translate_stencil_op(in_zsa->back.passOp);
585       out->DepthStencilState.BackFace.StencilFunc =
586         dzn_translate_compare_op(in_zsa->back.compareOp);
587 
588       pipeline->zsa.stencil_test.enable = true;
589 
590       translate_stencil_test(pipeline, out, in);
591    }
592 }
593 
594 static D3D12_BLEND
translate_blend_factor(VkBlendFactor in)595 translate_blend_factor(VkBlendFactor in)
596 {
597    switch (in) {
598    case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
599    case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
600    case VK_BLEND_FACTOR_SRC_COLOR: return D3D12_BLEND_SRC_COLOR;
601    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR;
602    case VK_BLEND_FACTOR_DST_COLOR: return D3D12_BLEND_DEST_COLOR;
603    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
604    case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
605    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
606    case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
607    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
608    /* FIXME: no way to isolate the alpla and color constants */
609    case VK_BLEND_FACTOR_CONSTANT_COLOR:
610    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
611       return D3D12_BLEND_BLEND_FACTOR;
612    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
613    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
614       return D3D12_BLEND_INV_BLEND_FACTOR;
615    case VK_BLEND_FACTOR_SRC1_COLOR: return D3D12_BLEND_SRC1_COLOR;
616    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: return D3D12_BLEND_INV_SRC1_COLOR;
617    case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
618    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
619    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
620    default: unreachable("Invalid blend factor");
621    }
622 }
623 
624 static D3D12_BLEND_OP
translate_blend_op(VkBlendOp in)625 translate_blend_op(VkBlendOp in)
626 {
627    switch (in) {
628    case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
629    case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
630    case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
631    case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
632    case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
633    default: unreachable("Invalid blend op");
634    }
635 }
636 
637 static D3D12_LOGIC_OP
translate_logic_op(VkLogicOp in)638 translate_logic_op(VkLogicOp in)
639 {
640    switch (in) {
641    case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
642    case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
643    case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
644    case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
645    case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
646    case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
647    case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
648    case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
649    case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
650    case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
651    case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
652    case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
653    case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
654    case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
655    case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
656    case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
657    default: unreachable("Invalid logic op");
658    }
659 }
660 
661 static void
dzn_graphics_pipeline_translate_blend(dzn_graphics_pipeline * pipeline,D3D12_GRAPHICS_PIPELINE_STATE_DESC * out,const VkGraphicsPipelineCreateInfo * in)662 dzn_graphics_pipeline_translate_blend(dzn_graphics_pipeline *pipeline,
663                                       D3D12_GRAPHICS_PIPELINE_STATE_DESC *out,
664                                       const VkGraphicsPipelineCreateInfo *in)
665 {
666    const VkPipelineColorBlendStateCreateInfo *in_blend =
667       in->pColorBlendState;
668    const VkPipelineMultisampleStateCreateInfo *in_ms =
669       in->pMultisampleState;
670 
671    if (!in_blend || !in_ms)
672       return;
673 
674    D3D12_LOGIC_OP logicop =
675       in_blend->logicOpEnable ?
676       translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
677    out->BlendState.AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
678    for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
679       if (i > 0 &&
680           !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
681                   sizeof(*in_blend->pAttachments)))
682          out->BlendState.IndependentBlendEnable = true;
683 
684       out->BlendState.RenderTarget[i].BlendEnable =
685          in_blend->pAttachments[i].blendEnable;
686          in_blend->logicOpEnable;
687       out->BlendState.RenderTarget[i].RenderTargetWriteMask =
688          in_blend->pAttachments[i].colorWriteMask;
689       if (in_blend->logicOpEnable) {
690          out->BlendState.RenderTarget[i].LogicOpEnable = true;
691          out->BlendState.RenderTarget[i].LogicOp = logicop;
692       } else {
693          out->BlendState.RenderTarget[i].SrcBlend =
694             translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor);
695          out->BlendState.RenderTarget[i].DestBlend =
696             translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor);
697          out->BlendState.RenderTarget[i].BlendOp =
698             translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
699          out->BlendState.RenderTarget[i].SrcBlendAlpha =
700             translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor);
701          out->BlendState.RenderTarget[i].DestBlendAlpha =
702             translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor);
703          out->BlendState.RenderTarget[i].BlendOpAlpha =
704             translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
705       }
706    }
707 }
708 
709 
710 static void
dzn_pipeline_init(dzn_pipeline * pipeline,dzn_device * device,VkPipelineBindPoint type,dzn_pipeline_layout * layout)711 dzn_pipeline_init(dzn_pipeline *pipeline,
712                   dzn_device *device,
713                   VkPipelineBindPoint type,
714                   dzn_pipeline_layout *layout)
715 {
716    pipeline->type = type;
717    pipeline->root.sets_param_count = layout->root.sets_param_count;
718    pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
719    pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
720    STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
721    memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
722    pipeline->root.sig = layout->root.sig;
723    pipeline->root.sig->AddRef();
724 
725    STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
726    memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));
727 
728    STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
729    memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
730    vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
731 }
732 
733 static void
dzn_pipeline_finish(dzn_pipeline * pipeline)734 dzn_pipeline_finish(dzn_pipeline *pipeline)
735 {
736    if (pipeline->state)
737       pipeline->state->Release();
738    if (pipeline->root.sig)
739       pipeline->root.sig->Release();
740 
741    vk_object_base_finish(&pipeline->base);
742 }
743 
744 static void
dzn_graphics_pipeline_destroy(dzn_graphics_pipeline * pipeline,const VkAllocationCallbacks * alloc)745 dzn_graphics_pipeline_destroy(dzn_graphics_pipeline *pipeline,
746                               const VkAllocationCallbacks *alloc)
747 {
748    if (!pipeline)
749       return;
750 
751    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
752       if (pipeline->indirect_cmd_sigs[i])
753          pipeline->indirect_cmd_sigs[i]->Release();
754    }
755 
756    dzn_pipeline_finish(&pipeline->base);
757    vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
758 }
759 
760 static VkResult
dzn_graphics_pipeline_create(dzn_device * device,VkPipelineCache cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)761 dzn_graphics_pipeline_create(dzn_device *device,
762                              VkPipelineCache cache,
763                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
764                              const VkAllocationCallbacks *pAllocator,
765                              VkPipeline *out)
766 {
767    VK_FROM_HANDLE(dzn_render_pass, pass, pCreateInfo->renderPass);
768    VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
769    const dzn_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
770    uint32_t stage_mask = 0;
771    VkResult ret;
772    HRESULT hres = 0;
773 
774    dzn_graphics_pipeline *pipeline = (dzn_graphics_pipeline *)
775       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
776                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
777    if (!pipeline)
778       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
779 
780    dzn_pipeline_init(&pipeline->base, device,
781                      VK_PIPELINE_BIND_POINT_GRAPHICS,
782                      layout);
783    D3D12_INPUT_ELEMENT_DESC *inputs = NULL;
784    D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {
785       .pRootSignature = pipeline->base.root.sig,
786       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
787    };
788 
789    ret = dzn_graphics_pipeline_translate_vi(pipeline, pAllocator, &desc, pCreateInfo, &inputs);
790    if (ret != VK_SUCCESS)
791       goto out;
792 
793    if (pCreateInfo->pDynamicState) {
794       for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
795          switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
796          case VK_DYNAMIC_STATE_VIEWPORT:
797             pipeline->vp.dynamic = true;
798             break;
799          case VK_DYNAMIC_STATE_SCISSOR:
800             pipeline->scissor.dynamic = true;
801             break;
802          case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
803             pipeline->zsa.stencil_test.dynamic_ref = true;
804             break;
805          case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
806             pipeline->zsa.stencil_test.dynamic_compare_mask = true;
807             break;
808          case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
809             pipeline->zsa.stencil_test.dynamic_write_mask = true;
810             break;
811          default: unreachable("Unsupported dynamic state");
812          }
813       }
814    }
815 
816    dzn_graphics_pipeline_translate_ia(pipeline, &desc, pCreateInfo);
817    dzn_graphics_pipeline_translate_rast(pipeline, &desc, pCreateInfo);
818    dzn_graphics_pipeline_translate_ms(pipeline, &desc, pCreateInfo);
819    dzn_graphics_pipeline_translate_zsa(pipeline, &desc, pCreateInfo);
820    dzn_graphics_pipeline_translate_blend(pipeline, &desc, pCreateInfo);
821 
822    desc.NumRenderTargets = subpass->color_count;
823    for (uint32_t i = 0; i < subpass->color_count; i++) {
824       uint32_t idx = subpass->colors[i].idx;
825 
826       if (idx == VK_ATTACHMENT_UNUSED) continue;
827 
828       const struct dzn_attachment *attachment = &pass->attachments[idx];
829 
830       desc.RTVFormats[i] =
831          dzn_image_get_dxgi_format(attachment->format,
832 	                           VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
833                                    VK_IMAGE_ASPECT_COLOR_BIT);
834    }
835 
836    if (subpass->zs.idx != VK_ATTACHMENT_UNUSED) {
837       const struct dzn_attachment *attachment =
838          &pass->attachments[subpass->zs.idx];
839 
840       desc.DSVFormat =
841          dzn_image_get_dxgi_format(attachment->format,
842                                    VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
843                                    VK_IMAGE_ASPECT_DEPTH_BIT |
844                                    VK_IMAGE_ASPECT_STENCIL_BIT);
845    }
846 
847    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
848       stage_mask |= pCreateInfo->pStages[i].stage;
849 
850    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
851       if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT &&
852           pCreateInfo->pRasterizationState &&
853           (pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
854            pCreateInfo->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
855          /* Disable rasterization (AKA leave fragment shader NULL) when
856           * front+back culling or discard is set.
857           */
858          continue;
859       }
860 
861       D3D12_SHADER_BYTECODE *slot =
862          dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage);
863       enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
864       uint16_t y_flip_mask = 0, z_flip_mask = 0;
865 
866       if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT ||
867           (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_VERTEX_BIT &&
868           !(stage_mask & VK_SHADER_STAGE_GEOMETRY_BIT))) {
869          if (pipeline->vp.dynamic) {
870             yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
871          } else if (pCreateInfo->pViewportState) {
872             const VkPipelineViewportStateCreateInfo *vp_info =
873                pCreateInfo->pViewportState;
874 
875             for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
876                if (vp_info->pViewports[i].height > 0)
877                   y_flip_mask |= BITFIELD_BIT(i);
878 
879                if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
880                   z_flip_mask |= BITFIELD_BIT(i);
881             }
882 
883             if (y_flip_mask && z_flip_mask)
884                yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
885             else if (z_flip_mask)
886                yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
887             else if (y_flip_mask)
888                yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
889          }
890       }
891 
892       ret = dzn_pipeline_compile_shader(device, pAllocator,
893                                         layout, &pCreateInfo->pStages[i],
894                                         yz_flip_mode, y_flip_mask, z_flip_mask, slot);
895       if (ret != VK_SUCCESS)
896          goto out;
897    }
898 
899 
900    hres = device->dev->CreateGraphicsPipelineState(&desc,
901                                                    IID_PPV_ARGS(&pipeline->base.state));
902    if (FAILED(hres)) {
903       ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
904       goto out;
905    }
906 
907    ret = VK_SUCCESS;
908 
909 out:
910    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
911       D3D12_SHADER_BYTECODE *slot =
912          dzn_pipeline_get_gfx_shader_slot(&desc, pCreateInfo->pStages[i].stage);
913       free((void *)slot->pShaderBytecode);
914    }
915 
916    vk_free2(&device->vk.alloc, pAllocator, inputs);
917    if (ret != VK_SUCCESS)
918       dzn_graphics_pipeline_destroy(pipeline, pAllocator);
919    else
920       *out = dzn_graphics_pipeline_to_handle(pipeline);
921 
922    return ret;
923 }
924 
925 #define DZN_INDIRECT_CMD_SIG_MAX_ARGS 3
926 
927 ID3D12CommandSignature *
dzn_graphics_pipeline_get_indirect_cmd_sig(dzn_graphics_pipeline * pipeline,enum dzn_indirect_draw_cmd_sig_type type)928 dzn_graphics_pipeline_get_indirect_cmd_sig(dzn_graphics_pipeline *pipeline,
929                                            enum dzn_indirect_draw_cmd_sig_type type)
930 {
931    assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);
932 
933    dzn_device *device =
934       container_of(pipeline->base.base.device, dzn_device, vk);
935    ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type];
936 
937    if (cmdsig)
938       return cmdsig;
939 
940    bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG;
941    bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan;
942 
943    uint32_t cmd_arg_count = 0;
944    D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
945 
946    if (triangle_fan) {
947       cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
948          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
949       };
950    }
951 
952    cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
953       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
954       .Constant = {
955          .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
956          .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
957          .Num32BitValuesToSet = 2,
958       },
959    };
960 
961    cmd_args[cmd_arg_count++] = D3D12_INDIRECT_ARGUMENT_DESC {
962       .Type = indexed ?
963               D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
964               D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
965    };
966 
967    assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
968    assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
969 
970    D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
971       .ByteStride =
972          triangle_fan ?
973          sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
974          sizeof(struct dzn_indirect_draw_exec_params),
975       .NumArgumentDescs = cmd_arg_count,
976       .pArgumentDescs = cmd_args,
977    };
978    HRESULT hres =
979       device->dev->CreateCommandSignature(&cmd_sig_desc,
980                                           pipeline->base.root.sig,
981                                           IID_PPV_ARGS(&cmdsig));
982    if (FAILED(hres))
983       return NULL;
984 
985    pipeline->indirect_cmd_sigs[type] = cmdsig;
986    return cmdsig;
987 }
988 
989 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateGraphicsPipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)990 dzn_CreateGraphicsPipelines(VkDevice dev,
991                             VkPipelineCache pipelineCache,
992                             uint32_t count,
993                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
994                             const VkAllocationCallbacks *pAllocator,
995                             VkPipeline *pPipelines)
996 {
997    VK_FROM_HANDLE(dzn_device, device, dev);
998    VkResult result = VK_SUCCESS;
999 
1000    unsigned i;
1001    for (i = 0; i < count; i++) {
1002       result = dzn_graphics_pipeline_create(device,
1003                                             pipelineCache,
1004                                             &pCreateInfos[i],
1005                                             pAllocator,
1006                                             &pPipelines[i]);
1007       if (result != VK_SUCCESS) {
1008          pPipelines[i] = VK_NULL_HANDLE;
1009 
1010          /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
1011           * is not obvious what error should be report upon 2 different failures.
1012           */
1013          if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT)
1014             break;
1015 
1016          if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
1017             break;
1018       }
1019    }
1020 
1021    for (; i < count; i++)
1022       pPipelines[i] = VK_NULL_HANDLE;
1023 
1024    return result;
1025 }
1026 
1027 static void
dzn_compute_pipeline_destroy(dzn_compute_pipeline * pipeline,const VkAllocationCallbacks * alloc)1028 dzn_compute_pipeline_destroy(dzn_compute_pipeline *pipeline,
1029                              const VkAllocationCallbacks *alloc)
1030 {
1031    if (!pipeline)
1032       return;
1033 
1034    if (pipeline->indirect_cmd_sig)
1035       pipeline->indirect_cmd_sig->Release();
1036 
1037    dzn_pipeline_finish(&pipeline->base);
1038    vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
1039 }
1040 
1041 static VkResult
dzn_compute_pipeline_create(dzn_device * device,VkPipelineCache cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)1042 dzn_compute_pipeline_create(dzn_device *device,
1043                             VkPipelineCache cache,
1044                             const VkComputePipelineCreateInfo *pCreateInfo,
1045                             const VkAllocationCallbacks *pAllocator,
1046                             VkPipeline *out)
1047 {
1048    VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
1049 
1050    dzn_compute_pipeline *pipeline = (dzn_compute_pipeline *)
1051       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1052                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1053    if (!pipeline)
1054       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1055 
1056    dzn_pipeline_init(&pipeline->base, device,
1057                      VK_PIPELINE_BIND_POINT_COMPUTE,
1058                      layout);
1059 
1060    D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
1061       .pRootSignature = pipeline->base.root.sig,
1062       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
1063    };
1064 
1065    VkResult ret =
1066       dzn_pipeline_compile_shader(device, pAllocator, layout,
1067                                   &pCreateInfo->stage,
1068                                   DXIL_SPIRV_YZ_FLIP_NONE, 0, 0,
1069                                   &desc.CS);
1070    if (ret != VK_SUCCESS)
1071       goto out;
1072 
1073    if (FAILED(device->dev->CreateComputePipelineState(&desc,
1074                                                       IID_PPV_ARGS(&pipeline->base.state)))) {
1075       ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1076       goto out;
1077    }
1078 
1079 out:
1080    free((void *)desc.CS.pShaderBytecode);
1081    if (ret != VK_SUCCESS)
1082       dzn_compute_pipeline_destroy(pipeline, pAllocator);
1083    else
1084       *out = dzn_compute_pipeline_to_handle(pipeline);
1085 
1086    return ret;
1087 }
1088 
1089 ID3D12CommandSignature *
dzn_compute_pipeline_get_indirect_cmd_sig(dzn_compute_pipeline * pipeline)1090 dzn_compute_pipeline_get_indirect_cmd_sig(dzn_compute_pipeline *pipeline)
1091 {
1092    if (pipeline->indirect_cmd_sig)
1093       return pipeline->indirect_cmd_sig;
1094 
1095    dzn_device *device =
1096       container_of(pipeline->base.base.device, dzn_device, vk);
1097 
1098    D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
1099       {
1100          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
1101          .Constant = {
1102             .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
1103             .DestOffsetIn32BitValues = 0,
1104             .Num32BitValuesToSet = 3,
1105          },
1106       },
1107       {
1108          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
1109       },
1110    };
1111 
1112    D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
1113       .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
1114       .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
1115       .pArgumentDescs = indirect_dispatch_args,
1116    };
1117 
1118    HRESULT hres =
1119       device->dev->CreateCommandSignature(&indirect_dispatch_desc,
1120                                           pipeline->base.root.sig,
1121                                           IID_PPV_ARGS(&pipeline->indirect_cmd_sig));
1122    if (FAILED(hres))
1123       return NULL;
1124 
1125    return pipeline->indirect_cmd_sig;
1126 }
1127 
1128 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateComputePipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1129 dzn_CreateComputePipelines(VkDevice dev,
1130                            VkPipelineCache pipelineCache,
1131                            uint32_t count,
1132                            const VkComputePipelineCreateInfo *pCreateInfos,
1133                            const VkAllocationCallbacks *pAllocator,
1134                            VkPipeline *pPipelines)
1135 {
1136    VK_FROM_HANDLE(dzn_device, device, dev);
1137    VkResult result = VK_SUCCESS;
1138 
1139    unsigned i;
1140    for (i = 0; i < count; i++) {
1141       result = dzn_compute_pipeline_create(device,
1142                                            pipelineCache,
1143                                            &pCreateInfos[i],
1144                                            pAllocator,
1145                                            &pPipelines[i]);
1146       if (result != VK_SUCCESS) {
1147          pPipelines[i] = VK_NULL_HANDLE;
1148 
1149          /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
1150           * is not obvious what error should be report upon 2 different failures.
1151           */
1152          if (result != VK_PIPELINE_COMPILE_REQUIRED_EXT)
1153             break;
1154 
1155          if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
1156             break;
1157       }
1158    }
1159 
1160    for (; i < count; i++)
1161       pPipelines[i] = VK_NULL_HANDLE;
1162 
1163    return result;
1164 }
1165 
1166 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)1167 dzn_DestroyPipeline(VkDevice device,
1168                     VkPipeline pipeline,
1169                     const VkAllocationCallbacks *pAllocator)
1170 {
1171    VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);
1172 
1173    if (!pipe)
1174       return;
1175 
1176    if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
1177       dzn_graphics_pipeline *gfx = container_of(pipe, dzn_graphics_pipeline, base);
1178       dzn_graphics_pipeline_destroy(gfx, pAllocator);
1179    } else {
1180       assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
1181       dzn_compute_pipeline *compute = container_of(pipe, dzn_compute_pipeline, base);
1182       dzn_compute_pipeline_destroy(compute, pAllocator);
1183    }
1184 }
1185