1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 #include "vulkan/vk_format.h"
26 #include "common/amd_family.h"
27 #include <stdio.h>
28 #include <sstream>
29 #include <llvm-c/Target.h>
30 #include <mutex>
31 
32 using namespace aco;
33 
34 extern "C" {
35 PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
36 	VkInstance                                  instance,
37 	const char*                                 pName);
38 }
39 
40 ac_shader_config config;
41 radv_shader_info info;
42 std::unique_ptr<Program> program;
43 Builder bld(NULL);
44 Temp inputs[16];
45 
46 static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
47 static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
48 static std::mutex create_device_mutex;
49 
50 #define FUNCTION_LIST\
51    ITEM(CreateInstance)\
52    ITEM(DestroyInstance)\
53    ITEM(EnumeratePhysicalDevices)\
54    ITEM(GetPhysicalDeviceProperties2)\
55    ITEM(CreateDevice)\
56    ITEM(DestroyDevice)\
57    ITEM(CreateShaderModule)\
58    ITEM(DestroyShaderModule)\
59    ITEM(CreateGraphicsPipelines)\
60    ITEM(CreateComputePipelines)\
61    ITEM(DestroyPipeline)\
62    ITEM(CreateDescriptorSetLayout)\
63    ITEM(DestroyDescriptorSetLayout)\
64    ITEM(CreatePipelineLayout)\
65    ITEM(DestroyPipelineLayout)\
66    ITEM(CreateRenderPass)\
67    ITEM(DestroyRenderPass)\
68    ITEM(GetPipelineExecutablePropertiesKHR)\
69    ITEM(GetPipelineExecutableInternalRepresentationsKHR)
70 
71 #define ITEM(n) PFN_vk##n n;
72 FUNCTION_LIST
73 #undef ITEM
74 
create_program(enum chip_class chip_class,Stage stage,unsigned wave_size,enum radeon_family family)75 void create_program(enum chip_class chip_class, Stage stage, unsigned wave_size, enum radeon_family family)
76 {
77    memset(&config, 0, sizeof(config));
78    info.wave_size = wave_size;
79 
80    program.reset(new Program);
81    aco::init_program(program.get(), stage, &info, chip_class, family, false, &config);
82    program->workgroup_size = UINT_MAX;
83    calc_min_waves(program.get());
84 
85    program->debug.func = nullptr;
86    program->debug.private_data = nullptr;
87 
88    program->debug.output = output;
89    program->debug.shorten_messages = true;
90    program->debug.func = nullptr;
91    program->debug.private_data = nullptr;
92 
93    Block *block = program->create_and_insert_block();
94    block->kind = block_kind_top_level;
95 
96    bld = Builder(program.get(), &program->blocks[0]);
97 
98    config.float_mode = program->blocks[0].fp_mode.val;
99 }
100 
setup_cs(const char * input_spec,enum chip_class chip_class,enum radeon_family family,const char * subvariant,unsigned wave_size)101 bool setup_cs(const char *input_spec, enum chip_class chip_class,
102               enum radeon_family family, const char* subvariant,
103               unsigned wave_size)
104 {
105    if (!set_variant(chip_class, subvariant))
106       return false;
107 
108    memset(&info, 0, sizeof(info));
109    info.cs.block_size[0] = 1;
110    info.cs.block_size[1] = 1;
111    info.cs.block_size[2] = 1;
112 
113    create_program(chip_class, compute_cs, wave_size, family);
114 
115    if (input_spec) {
116       unsigned num_inputs = DIV_ROUND_UP(strlen(input_spec), 3u);
117       aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>(aco_opcode::p_startpgm, Format::PSEUDO, 0, num_inputs)};
118       for (unsigned i = 0; i < num_inputs; i++) {
119          RegClass cls(input_spec[i * 3] == 'v' ? RegType::vgpr : RegType::sgpr, input_spec[i * 3 + 1] - '0');
120          inputs[i] = bld.tmp(cls);
121          startpgm->definitions[i] = Definition(inputs[i]);
122       }
123       bld.insert(std::move(startpgm));
124    }
125 
126    return true;
127 }
128 
finish_program(Program * prog)129 void finish_program(Program *prog)
130 {
131    for (Block& BB : prog->blocks) {
132       for (unsigned idx : BB.linear_preds)
133          prog->blocks[idx].linear_succs.emplace_back(BB.index);
134       for (unsigned idx : BB.logical_preds)
135          prog->blocks[idx].logical_succs.emplace_back(BB.index);
136    }
137 
138    for (Block& block : prog->blocks) {
139       if (block.linear_succs.size() == 0) {
140          block.kind |= block_kind_uniform;
141          Builder(prog, &block).sopp(aco_opcode::s_endpgm);
142       }
143    }
144 }
145 
finish_validator_test()146 void finish_validator_test()
147 {
148    finish_program(program.get());
149    aco_print_program(program.get(), output);
150    fprintf(output, "Validation results:\n");
151    if (aco::validate_ir(program.get()))
152       fprintf(output, "Validation passed\n");
153    else
154       fprintf(output, "Validation failed\n");
155 }
156 
finish_opt_test()157 void finish_opt_test()
158 {
159    finish_program(program.get());
160    if (!aco::validate_ir(program.get())) {
161       fail_test("Validation before optimization failed");
162       return;
163    }
164    aco::optimize(program.get());
165    if (!aco::validate_ir(program.get())) {
166       fail_test("Validation after optimization failed");
167       return;
168    }
169    aco_print_program(program.get(), output);
170 }
171 
finish_ra_test(ra_test_policy policy,bool lower)172 void finish_ra_test(ra_test_policy policy, bool lower)
173 {
174    finish_program(program.get());
175    if (!aco::validate_ir(program.get())) {
176       fail_test("Validation before register allocation failed");
177       return;
178    }
179 
180    program->workgroup_size = program->wave_size;
181    aco::live live_vars = aco::live_var_analysis(program.get());
182    aco::register_allocation(program.get(), live_vars.live_out, policy);
183 
184    if (aco::validate_ra(program.get())) {
185       fail_test("Validation after register allocation failed");
186       return;
187    }
188 
189    if (lower) {
190       aco::ssa_elimination(program.get());
191       aco::lower_to_hw_instr(program.get());
192    }
193 
194    aco_print_program(program.get(), output);
195 }
196 
finish_optimizer_postRA_test()197 void finish_optimizer_postRA_test()
198 {
199    finish_program(program.get());
200    aco::optimize_postRA(program.get());
201    aco_print_program(program.get(), output);
202 }
203 
finish_to_hw_instr_test()204 void finish_to_hw_instr_test()
205 {
206    finish_program(program.get());
207    aco::lower_to_hw_instr(program.get());
208    aco_print_program(program.get(), output);
209 }
210 
finish_insert_nops_test()211 void finish_insert_nops_test()
212 {
213    finish_program(program.get());
214    aco::insert_NOPs(program.get());
215    aco_print_program(program.get(), output);
216 }
217 
finish_form_hard_clause_test()218 void finish_form_hard_clause_test()
219 {
220    finish_program(program.get());
221    aco::form_hard_clauses(program.get());
222    aco_print_program(program.get(), output);
223 }
224 
finish_assembler_test()225 void finish_assembler_test()
226 {
227    finish_program(program.get());
228    std::vector<uint32_t> binary;
229    unsigned exec_size = emit_program(program.get(), binary);
230 
231    /* we could use CLRX for disassembly but that would require it to be
232     * installed */
233    if (program->chip_class >= GFX8) {
234       print_asm(program.get(), binary, exec_size / 4u, output);
235    } else {
236       //TODO: maybe we should use CLRX and skip this test if it's not available?
237       for (uint32_t dword : binary)
238          fprintf(output, "%.8x\n", dword);
239    }
240 }
241 
writeout(unsigned i,Temp tmp)242 void writeout(unsigned i, Temp tmp)
243 {
244    if (tmp.id())
245       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
246    else
247       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
248 }
249 
writeout(unsigned i,aco::Builder::Result res)250 void writeout(unsigned i, aco::Builder::Result res)
251 {
252    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
253 }
254 
writeout(unsigned i,Operand op)255 void writeout(unsigned i, Operand op)
256 {
257    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
258 }
259 
writeout(unsigned i,Operand op0,Operand op1)260 void writeout(unsigned i, Operand op0, Operand op1)
261 {
262    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
263 }
264 
fneg(Temp src)265 Temp fneg(Temp src)
266 {
267    return bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), src);
268 }
269 
fabs(Temp src)270 Temp fabs(Temp src)
271 {
272    Builder::Result res =
273       bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), src);
274    res.instr->vop3().abs[1] = true;
275    return res;
276 }
277 
get_vk_device(enum chip_class chip_class)278 VkDevice get_vk_device(enum chip_class chip_class)
279 {
280    enum radeon_family family;
281    switch (chip_class) {
282    case GFX6:
283       family = CHIP_TAHITI;
284       break;
285    case GFX7:
286       family = CHIP_BONAIRE;
287       break;
288    case GFX8:
289       family = CHIP_POLARIS10;
290       break;
291    case GFX9:
292       family = CHIP_VEGA10;
293       break;
294    case GFX10:
295       family = CHIP_NAVI10;
296       break;
297    case GFX10_3:
298       family = CHIP_SIENNA_CICHLID;
299       break;
300    default:
301       family = CHIP_UNKNOWN;
302       break;
303    }
304    return get_vk_device(family);
305 }
306 
get_vk_device(enum radeon_family family)307 VkDevice get_vk_device(enum radeon_family family)
308 {
309    assert(family != CHIP_UNKNOWN);
310 
311    std::lock_guard<std::mutex> guard(create_device_mutex);
312 
313    if (device_cache[family])
314       return device_cache[family];
315 
316    setenv("RADV_FORCE_FAMILY", ac_get_family_name(family), 1);
317 
318    VkApplicationInfo app_info = {};
319    app_info.pApplicationName = "aco_tests";
320    app_info.apiVersion = VK_API_VERSION_1_2;
321    VkInstanceCreateInfo instance_create_info = {};
322    instance_create_info.pApplicationInfo = &app_info;
323    instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
324    ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
325    assert(result == VK_SUCCESS);
326 
327    #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
328    FUNCTION_LIST
329    #undef ITEM
330 
331    uint32_t device_count = 1;
332    VkPhysicalDevice device = VK_NULL_HANDLE;
333    result = EnumeratePhysicalDevices(instance_cache[family], &device_count, &device);
334    assert(result == VK_SUCCESS);
335    assert(device != VK_NULL_HANDLE);
336 
337    VkDeviceCreateInfo device_create_info = {};
338    device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
339    static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"};
340    device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
341    device_create_info.ppEnabledExtensionNames = extensions;
342    result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
343 
344    return device_cache[family];
345 }
346 
347 static struct DestroyDevices {
~DestroyDevicesDestroyDevices348    ~DestroyDevices() {
349       for (unsigned i = 0; i < CHIP_LAST; i++) {
350          if (!device_cache[i])
351             continue;
352          DestroyDevice(device_cache[i], NULL);
353          DestroyInstance(instance_cache[i], NULL);
354       }
355    }
356 } destroy_devices;
357 
print_pipeline_ir(VkDevice device,VkPipeline pipeline,VkShaderStageFlagBits stages,const char * name,bool remove_encoding)358 void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
359                        const char *name, bool remove_encoding)
360 {
361    uint32_t executable_count = 16;
362    VkPipelineExecutablePropertiesKHR executables[16];
363    VkPipelineInfoKHR pipeline_info;
364    pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
365    pipeline_info.pNext = NULL;
366    pipeline_info.pipeline = pipeline;
367    ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
368    assert(result == VK_SUCCESS);
369 
370    uint32_t executable = 0;
371    for (; executable < executable_count; executable++) {
372       if (executables[executable].stages == stages)
373          break;
374    }
375    assert(executable != executable_count);
376 
377    VkPipelineExecutableInfoKHR exec_info;
378    exec_info.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR;
379    exec_info.pNext = NULL;
380    exec_info.pipeline = pipeline;
381    exec_info.executableIndex = executable;
382 
383    uint32_t ir_count = 16;
384    VkPipelineExecutableInternalRepresentationKHR ir[16];
385    memset(ir, 0, sizeof(ir));
386    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
387    assert(result == VK_SUCCESS);
388 
389    VkPipelineExecutableInternalRepresentationKHR* requested_ir = nullptr;
390    for (unsigned i = 0; i < ir_count; ++i) {
391       if (strcmp(ir[i].name, name) == 0) {
392          requested_ir = &ir[i];
393          break;
394       }
395    }
396    assert(requested_ir && "Could not find requested IR");
397 
398    char *data = (char*)malloc(requested_ir->dataSize);
399    requested_ir->pData = data;
400    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
401    assert(result == VK_SUCCESS);
402 
403    if (remove_encoding) {
404       for (char *c = data; *c; c++) {
405          if (*c == ';') {
406             for (; *c && *c != '\n'; c++)
407                *c = ' ';
408          }
409       }
410    }
411 
412    fprintf(output, "%s", data);
413    free(data);
414 }
415 
__qoCreateShaderModule(VkDevice dev,const QoShaderModuleCreateInfo * module_info)416 VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info)
417 {
418     VkShaderModuleCreateInfo vk_module_info;
419     vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
420     vk_module_info.pNext = NULL;
421     vk_module_info.flags = 0;
422     vk_module_info.codeSize = module_info->spirvSize;
423     vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
424 
425     VkShaderModule module;
426     ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
427     assert(result == VK_SUCCESS);
428 
429     return module;
430 }
431 
PipelineBuilder(VkDevice dev)432 PipelineBuilder::PipelineBuilder(VkDevice dev) {
433    memset(this, 0, sizeof(*this));
434    topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
435    device = dev;
436 }
437 
~PipelineBuilder()438 PipelineBuilder::~PipelineBuilder()
439 {
440    DestroyPipeline(device, pipeline, NULL);
441 
442    for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
443       VkPipelineShaderStageCreateInfo *stage_info = &stages[i];
444       if (owned_stages & stage_info->stage)
445          DestroyShaderModule(device, stage_info->module, NULL);
446    }
447 
448    DestroyPipelineLayout(device, pipeline_layout, NULL);
449 
450    for (unsigned i = 0; i < util_bitcount64(desc_layouts_used); i++)
451       DestroyDescriptorSetLayout(device, desc_layouts[i], NULL);
452 
453    DestroyRenderPass(device, render_pass, NULL);
454 }
455 
add_desc_binding(VkShaderStageFlags stage_flags,uint32_t layout,uint32_t binding,VkDescriptorType type,uint32_t count)456 void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
457                                        uint32_t binding, VkDescriptorType type, uint32_t count)
458 {
459    desc_layouts_used |= 1ull << layout;
460    desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
461 }
462 
add_vertex_binding(uint32_t binding,uint32_t stride,VkVertexInputRate rate)463 void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
464 {
465    vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
466 }
467 
add_vertex_attribute(uint32_t location,uint32_t binding,VkFormat format,uint32_t offset)468 void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
469 {
470    vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
471 }
472 
add_resource_decls(QoShaderModuleCreateInfo * module)473 void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module)
474 {
475    for (unsigned i = 0; i < module->declarationCount; i++) {
476       const QoShaderDecl *decl = &module->pDeclarations[i];
477       switch (decl->decl_type) {
478       case QoShaderDeclType_ubo:
479          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
480          break;
481       case QoShaderDeclType_ssbo:
482          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
483          break;
484       case QoShaderDeclType_img_buf:
485          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
486          break;
487       case QoShaderDeclType_img:
488          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
489          break;
490       case QoShaderDeclType_tex_buf:
491          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
492          break;
493       case QoShaderDeclType_combined:
494          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
495          break;
496       case QoShaderDeclType_tex:
497          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
498          break;
499       case QoShaderDeclType_samp:
500          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
501          break;
502       default:
503          break;
504       }
505    }
506 }
507 
add_io_decls(QoShaderModuleCreateInfo * module)508 void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
509 {
510    unsigned next_vtx_offset = 0;
511    for (unsigned i = 0; i < module->declarationCount; i++) {
512       const QoShaderDecl *decl = &module->pDeclarations[i];
513       switch (decl->decl_type) {
514       case QoShaderDeclType_in:
515          if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
516             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
517                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset);
518             else if (decl->type[0] == 'u')
519                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset);
520             else if (decl->type[0] == 'i')
521                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset);
522             next_vtx_offset += 16;
523          }
524          break;
525       case QoShaderDeclType_out:
526          if (module->stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
527             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
528                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SFLOAT;
529             else if (decl->type[0] == 'u')
530                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_UINT;
531             else if (decl->type[0] == 'i')
532                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
533          }
534          break;
535       default:
536          break;
537       }
538    }
539    if (next_vtx_offset)
540       add_vertex_binding(0, next_vtx_offset);
541 }
542 
add_stage(VkShaderStageFlagBits stage,VkShaderModule module,const char * name)543 void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name)
544 {
545    VkPipelineShaderStageCreateInfo *stage_info;
546    if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
547       stage_info = &stages[0];
548    else
549       stage_info = &stages[gfx_pipeline_info.stageCount++];
550    stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
551    stage_info->pNext = NULL;
552    stage_info->flags = 0;
553    stage_info->stage = stage;
554    stage_info->module = module;
555    stage_info->pName = name;
556    stage_info->pSpecializationInfo = NULL;
557    owned_stages |= stage;
558 }
559 
add_stage(VkShaderStageFlagBits stage,QoShaderModuleCreateInfo module,const char * name)560 void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name)
561 {
562    add_stage(stage, __qoCreateShaderModule(device, &module), name);
563    add_resource_decls(&module);
564    add_io_decls(&module);
565 }
566 
add_vsfs(VkShaderModule vs,VkShaderModule fs)567 void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
568 {
569    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
570    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
571 }
572 
add_vsfs(QoShaderModuleCreateInfo vs,QoShaderModuleCreateInfo fs)573 void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
574 {
575    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
576    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
577 }
578 
add_cs(VkShaderModule cs)579 void PipelineBuilder::add_cs(VkShaderModule cs)
580 {
581    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
582 }
583 
add_cs(QoShaderModuleCreateInfo cs)584 void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
585 {
586    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
587 }
588 
is_compute()589 bool PipelineBuilder::is_compute() {
590    return gfx_pipeline_info.stageCount == 0;
591 }
592 
create_compute_pipeline()593 void PipelineBuilder::create_compute_pipeline() {
594    VkComputePipelineCreateInfo create_info;
595    create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
596    create_info.pNext = NULL;
597    create_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
598    create_info.stage = stages[0];
599    create_info.layout = pipeline_layout;
600    create_info.basePipelineHandle = VK_NULL_HANDLE;
601    create_info.basePipelineIndex = 0;
602 
603    ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
604    assert(result == VK_SUCCESS);
605 }
606 
create_graphics_pipeline()607 void PipelineBuilder::create_graphics_pipeline() {
608    /* create the create infos */
609    if (!samples)
610       samples = VK_SAMPLE_COUNT_1_BIT;
611 
612    unsigned num_color_attachments = 0;
613    VkPipelineColorBlendAttachmentState blend_attachment_states[16];
614    VkAttachmentReference color_attachments[16];
615    VkAttachmentDescription attachment_descs[17];
616    for (unsigned i = 0; i < 16; i++) {
617       if (color_outputs[i] == VK_FORMAT_UNDEFINED)
618          continue;
619 
620       VkAttachmentDescription *desc = &attachment_descs[num_color_attachments];
621       desc->flags = 0;
622       desc->format = color_outputs[i];
623       desc->samples = samples;
624       desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
625       desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
626       desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
627       desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
628       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
629       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
630 
631       VkAttachmentReference *ref = &color_attachments[num_color_attachments];
632       ref->attachment = num_color_attachments;
633       ref->layout = VK_IMAGE_LAYOUT_GENERAL;
634 
635       VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments];
636       blend->blendEnable = false;
637       blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
638                               VK_COLOR_COMPONENT_G_BIT |
639                               VK_COLOR_COMPONENT_B_BIT |
640                               VK_COLOR_COMPONENT_A_BIT;
641 
642       num_color_attachments++;
643    }
644 
645    unsigned num_attachments = num_color_attachments;
646    VkAttachmentReference ds_attachment;
647    if (ds_output != VK_FORMAT_UNDEFINED) {
648       VkAttachmentDescription *desc = &attachment_descs[num_attachments];
649       desc->flags = 0;
650       desc->format = ds_output;
651       desc->samples = samples;
652       desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
653       desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
654       desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
655       desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
656       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
657       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
658 
659       ds_attachment.attachment = num_color_attachments;
660       ds_attachment.layout = VK_IMAGE_LAYOUT_GENERAL;
661 
662       num_attachments++;
663    }
664 
665    vs_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
666    vs_input.pNext = NULL;
667    vs_input.flags = 0;
668    vs_input.pVertexBindingDescriptions = vs_bindings;
669    vs_input.pVertexAttributeDescriptions = vs_attributes;
670 
671    VkPipelineInputAssemblyStateCreateInfo assembly_state;
672    assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
673    assembly_state.pNext = NULL;
674    assembly_state.flags = 0;
675    assembly_state.topology = topology;
676    assembly_state.primitiveRestartEnable = false;
677 
678    VkPipelineTessellationStateCreateInfo tess_state;
679    tess_state.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
680    tess_state.pNext = NULL;
681    tess_state.flags = 0;
682    tess_state.patchControlPoints = patch_size;
683 
684    VkPipelineViewportStateCreateInfo viewport_state;
685    viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
686    viewport_state.pNext = NULL;
687    viewport_state.flags = 0;
688    viewport_state.viewportCount = 1;
689    viewport_state.pViewports = NULL;
690    viewport_state.scissorCount = 1;
691    viewport_state.pScissors = NULL;
692 
693    VkPipelineRasterizationStateCreateInfo rasterization_state;
694    rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
695    rasterization_state.pNext = NULL;
696    rasterization_state.flags = 0;
697    rasterization_state.depthClampEnable = false;
698    rasterization_state.rasterizerDiscardEnable = false;
699    rasterization_state.polygonMode = VK_POLYGON_MODE_FILL;
700    rasterization_state.cullMode = VK_CULL_MODE_NONE;
701    rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
702    rasterization_state.depthBiasEnable = false;
703    rasterization_state.lineWidth = 1.0;
704 
705    VkPipelineMultisampleStateCreateInfo ms_state;
706    ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
707    ms_state.pNext = NULL;
708    ms_state.flags = 0;
709    ms_state.rasterizationSamples = samples;
710    ms_state.sampleShadingEnable = sample_shading_enable;
711    ms_state.minSampleShading = min_sample_shading;
712    VkSampleMask sample_mask = 0xffffffff;
713    ms_state.pSampleMask = &sample_mask;
714    ms_state.alphaToCoverageEnable = false;
715    ms_state.alphaToOneEnable = false;
716 
717    VkPipelineDepthStencilStateCreateInfo ds_state;
718    ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
719    ds_state.pNext = NULL;
720    ds_state.flags = 0;
721    ds_state.depthTestEnable = ds_output != VK_FORMAT_UNDEFINED;
722    ds_state.depthWriteEnable = true;
723    ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
724    ds_state.depthBoundsTestEnable = false;
725    ds_state.stencilTestEnable = true;
726    ds_state.front.failOp = VK_STENCIL_OP_KEEP;
727    ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
728    ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
729    ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
730    ds_state.front.compareMask = 0xffffffff,
731    ds_state.front.writeMask = 0;
732    ds_state.front.reference = 0;
733    ds_state.back = ds_state.front;
734 
735    VkPipelineColorBlendStateCreateInfo color_blend_state;
736    color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
737    color_blend_state.pNext = NULL;
738    color_blend_state.flags = 0;
739    color_blend_state.logicOpEnable = false;
740    color_blend_state.attachmentCount = num_color_attachments;
741    color_blend_state.pAttachments = blend_attachment_states;
742 
743    VkDynamicState dynamic_states[9] = {
744       VK_DYNAMIC_STATE_VIEWPORT,
745       VK_DYNAMIC_STATE_SCISSOR,
746       VK_DYNAMIC_STATE_LINE_WIDTH,
747       VK_DYNAMIC_STATE_DEPTH_BIAS,
748       VK_DYNAMIC_STATE_BLEND_CONSTANTS,
749       VK_DYNAMIC_STATE_DEPTH_BOUNDS,
750       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
751       VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
752       VK_DYNAMIC_STATE_STENCIL_REFERENCE
753    };
754 
755    VkPipelineDynamicStateCreateInfo dynamic_state;
756    dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
757    dynamic_state.pNext = NULL;
758    dynamic_state.flags = 0;
759    dynamic_state.dynamicStateCount = sizeof(dynamic_states) / sizeof(VkDynamicState);
760    dynamic_state.pDynamicStates = dynamic_states;
761 
762    gfx_pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
763    gfx_pipeline_info.pNext = NULL;
764    gfx_pipeline_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
765    gfx_pipeline_info.pVertexInputState = &vs_input;
766    gfx_pipeline_info.pInputAssemblyState = &assembly_state;
767    gfx_pipeline_info.pTessellationState = &tess_state;
768    gfx_pipeline_info.pViewportState = &viewport_state;
769    gfx_pipeline_info.pRasterizationState = &rasterization_state;
770    gfx_pipeline_info.pMultisampleState = &ms_state;
771    gfx_pipeline_info.pDepthStencilState = &ds_state;
772    gfx_pipeline_info.pColorBlendState = &color_blend_state;
773    gfx_pipeline_info.pDynamicState = &dynamic_state;
774    gfx_pipeline_info.subpass = 0;
775 
776    /* create the objects used to create the pipeline */
777    VkSubpassDescription subpass;
778    subpass.flags = 0;
779    subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
780    subpass.inputAttachmentCount = 0;
781    subpass.pInputAttachments = NULL;
782    subpass.colorAttachmentCount = num_color_attachments;
783    subpass.pColorAttachments = color_attachments;
784    subpass.pResolveAttachments = NULL;
785    subpass.pDepthStencilAttachment = ds_output == VK_FORMAT_UNDEFINED ? NULL : &ds_attachment;
786    subpass.preserveAttachmentCount = 0;
787    subpass.pPreserveAttachments = NULL;
788 
789    VkRenderPassCreateInfo renderpass_info;
790    renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
791    renderpass_info.pNext = NULL;
792    renderpass_info.flags = 0;
793    renderpass_info.attachmentCount = num_attachments;
794    renderpass_info.pAttachments = attachment_descs;
795    renderpass_info.subpassCount = 1;
796    renderpass_info.pSubpasses = &subpass;
797    renderpass_info.dependencyCount = 0;
798    renderpass_info.pDependencies = NULL;
799 
800    ASSERTED VkResult result = CreateRenderPass(device, &renderpass_info, NULL, &render_pass);
801    assert(result == VK_SUCCESS);
802 
803    gfx_pipeline_info.layout = pipeline_layout;
804    gfx_pipeline_info.renderPass = render_pass;
805 
806    /* create the pipeline */
807    gfx_pipeline_info.pStages = stages;
808 
809    result = CreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gfx_pipeline_info, NULL, &pipeline);
810    assert(result == VK_SUCCESS);
811 }
812 
create_pipeline()813 void PipelineBuilder::create_pipeline() {
814    unsigned num_desc_layouts = 0;
815    for (unsigned i = 0; i < 64; i++) {
816       if (!(desc_layouts_used & (1ull << i)))
817          continue;
818 
819       VkDescriptorSetLayoutCreateInfo desc_layout_info;
820       desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
821       desc_layout_info.pNext = NULL;
822       desc_layout_info.flags = 0;
823       desc_layout_info.bindingCount = num_desc_bindings[i];
824       desc_layout_info.pBindings = desc_bindings[i];
825 
826       ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]);
827       assert(result == VK_SUCCESS);
828       num_desc_layouts++;
829    }
830 
831    VkPipelineLayoutCreateInfo pipeline_layout_info;
832    pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
833    pipeline_layout_info.pNext = NULL;
834    pipeline_layout_info.flags = 0;
835    pipeline_layout_info.pushConstantRangeCount = 1;
836    pipeline_layout_info.pPushConstantRanges = &push_constant_range;
837    pipeline_layout_info.setLayoutCount = num_desc_layouts;
838    pipeline_layout_info.pSetLayouts = desc_layouts;
839 
840    ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
841    assert(result == VK_SUCCESS);
842 
843    if (is_compute())
844       create_compute_pipeline();
845    else
846       create_graphics_pipeline();
847 }
848 
print_ir(VkShaderStageFlagBits stage_flags,const char * name,bool remove_encoding)849 void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding)
850 {
851    if (!pipeline)
852       create_pipeline();
853    print_pipeline_ir(device, pipeline, stage_flags, name, remove_encoding);
854 }
855