1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 * Copyright (c) 2016 The Android Open Source Project
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Indirect Compute Dispatch tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeIndirectComputeDispatchTests.hpp"
26 #include "vktComputeTestsUtil.hpp"
27
28 #include <string>
29 #include <map>
30 #include <vector>
31
32 #include "vkDefs.hpp"
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vktTestCase.hpp"
36 #include "vktTestCaseUtil.hpp"
37 #include "vkPlatform.hpp"
38 #include "vkPrograms.hpp"
39 #include "vkMemUtil.hpp"
40 #include "vkBarrierUtil.hpp"
41 #include "vkBuilderUtil.hpp"
42 #include "vkQueryUtil.hpp"
43 #include "vkCmdUtil.hpp"
44 #include "vkObjUtil.hpp"
45
46 #include "tcuVector.hpp"
47 #include "tcuVectorUtil.hpp"
48 #include "tcuTestLog.hpp"
49 #include "tcuRGBA.hpp"
50 #include "tcuStringTemplate.hpp"
51
52 #include "deUniquePtr.hpp"
53 #include "deSharedPtr.hpp"
54 #include "deStringUtil.hpp"
55 #include "deArrayUtil.hpp"
56
57 #include "gluShaderUtil.hpp"
58
59 namespace vkt
60 {
61 namespace compute
62 {
63 namespace
64 {
65
66 enum
67 {
68 RESULT_BLOCK_BASE_SIZE = 4 * (int)sizeof(deUint32), // uvec3 + uint
69 RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(deUint32),
70 INDIRECT_COMMAND_OFFSET = 3 * (int)sizeof(deUint32),
71 };
72
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)73 vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface& instance_interface,
74 const vk::VkPhysicalDevice physicalDevice,
75 const vk::VkDeviceSize baseSize)
76 {
77 // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
78 vk::VkPhysicalDeviceProperties deviceProperties;
79 instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
80 vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
81
82 if (alignment == 0 || (baseSize % alignment == 0))
83 return baseSize;
84 else
85 return (baseSize / alignment + 1)*alignment;
86 }
87
88 struct DispatchCommand
89 {
DispatchCommandvkt::compute::__anonf8ad28100111::DispatchCommand90 DispatchCommand (const deIntptr offset,
91 const tcu::UVec3& numWorkGroups)
92 : m_offset (offset)
93 , m_numWorkGroups (numWorkGroups) {}
94
95 deIntptr m_offset;
96 tcu::UVec3 m_numWorkGroups;
97 };
98
99 typedef std::vector<DispatchCommand> DispatchCommandsVec;
100
101 struct DispatchCaseDesc
102 {
DispatchCaseDescvkt::compute::__anonf8ad28100111::DispatchCaseDesc103 DispatchCaseDesc (const char* name,
104 const char* description,
105 const deUintptr bufferSize,
106 const tcu::UVec3 workGroupSize,
107 const DispatchCommandsVec& dispatchCommands)
108 : m_name (name)
109 , m_description (description)
110 , m_bufferSize (bufferSize)
111 , m_workGroupSize (workGroupSize)
112 , m_dispatchCommands (dispatchCommands) {}
113
114 const char* m_name;
115 const char* m_description;
116 const deUintptr m_bufferSize;
117 const tcu::UVec3 m_workGroupSize;
118 const DispatchCommandsVec m_dispatchCommands;
119 };
120
121 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
122 {
123 public:
124 IndirectDispatchInstanceBufferUpload (Context& context,
125 const std::string& name,
126 const deUintptr bufferSize,
127 const tcu::UVec3& workGroupSize,
128 const DispatchCommandsVec& dispatchCommands);
129
~IndirectDispatchInstanceBufferUpload(void)130 virtual ~IndirectDispatchInstanceBufferUpload (void) {}
131
132 virtual tcu::TestStatus iterate (void);
133
134 protected:
135 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer,
136 const Buffer& indirectBuffer);
137
138 deBool verifyResultBuffer (const Buffer& resultBuffer,
139 const vk::VkDeviceSize resultBlockSize) const;
140
141 Context& m_context;
142 const std::string m_name;
143
144 const vk::DeviceInterface& m_device_interface;
145 const vk::VkDevice m_device;
146
147 const vk::VkQueue m_queue;
148 const deUint32 m_queueFamilyIndex;
149
150 const deUintptr m_bufferSize;
151 const tcu::UVec3 m_workGroupSize;
152 const DispatchCommandsVec m_dispatchCommands;
153
154 vk::Allocator& m_allocator;
155
156 private:
157 IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
158 IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
159 };
160
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands)161 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context& context,
162 const std::string& name,
163 const deUintptr bufferSize,
164 const tcu::UVec3& workGroupSize,
165 const DispatchCommandsVec& dispatchCommands)
166 : vkt::TestInstance (context)
167 , m_context (context)
168 , m_name (name)
169 , m_device_interface (context.getDeviceInterface())
170 , m_device (context.getDevice())
171 , m_queue (context.getUniversalQueue())
172 , m_queueFamilyIndex (context.getUniversalQueueFamilyIndex())
173 , m_bufferSize (bufferSize)
174 , m_workGroupSize (workGroupSize)
175 , m_dispatchCommands (dispatchCommands)
176 , m_allocator (context.getDefaultAllocator())
177 {
178 }
179
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const Buffer & indirectBuffer)180 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
181 {
182 DE_UNREF(commandBuffer);
183
184 const vk::Allocation& alloc = indirectBuffer.getAllocation();
185 deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
186
187 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
188 {
189 DE_ASSERT(cmdIter->m_offset >= 0);
190 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
191 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
192
193 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
194
195 dstPtr[0] = cmdIter->m_numWorkGroups[0];
196 dstPtr[1] = cmdIter->m_numWorkGroups[1];
197 dstPtr[2] = cmdIter->m_numWorkGroups[2];
198 }
199
200 vk::flushAlloc(m_device_interface, m_device, alloc);
201 }
202
iterate(void)203 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
204 {
205 tcu::TestContext& testCtx = m_context.getTestContext();
206
207 testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
208 {
209 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
210
211 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
212 {
213 testCtx.getLog()
214 << tcu::TestLog::Message
215 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
216 << tcu::TestLog::EndMessage;
217 }
218 }
219
220 // Create result buffer
221 const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
222 const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
223
224 Buffer resultBuffer(
225 m_device_interface, m_device, m_allocator,
226 vk::makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
227 vk::MemoryRequirement::HostVisible);
228
229 {
230 const vk::Allocation& alloc = resultBuffer.getAllocation();
231 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
232
233 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
234 {
235 deUint8* const dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
236
237 *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
238 *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
239 *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
240 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
241 }
242
243 vk::flushAlloc(m_device_interface, m_device, alloc);
244 }
245
246 // Create verify compute shader
247 const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
248 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
249
250 // Create descriptorSetLayout
251 vk::DescriptorSetLayoutBuilder layoutBuilder;
252 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
253 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
254
255 // Create compute pipeline
256 const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout));
257 const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader));
258
259 // Create descriptor pool
260 const vk::Unique<vk::VkDescriptorPool> descriptorPool(
261 vk::DescriptorPoolBuilder()
262 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
263 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
264
265 const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
266 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
267
268 // Create command buffer
269 const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex));
270 const vk::Unique<vk::VkCommandBuffer> cmdBuffer(allocateCommandBuffer(m_device_interface, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
271
272 // Begin recording commands
273 beginCommandBuffer(m_device_interface, *cmdBuffer);
274
275 // Create indirect buffer
276 Buffer indirectBuffer(
277 m_device_interface, m_device, m_allocator,
278 vk::makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
279 vk::MemoryRequirement::HostVisible);
280 fillIndirectBufferData(*cmdBuffer, indirectBuffer);
281
282 // Bind compute pipeline
283 m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
284
285 // Allocate descriptor sets
286 typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
287 std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
288
289 vk::VkDeviceSize curOffset = 0;
290
291 // Create descriptor sets
292 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
293 {
294 descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
295 makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout)));
296
297 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
298
299 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
300 descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
301 descriptorSetBuilder.update(m_device_interface, m_device);
302
303 // Bind descriptor set
304 m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
305
306 // Dispatch indirect compute command
307 m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
308
309 curOffset += resultBlockSize;
310 }
311
312 // Insert memory barrier
313 m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
314 0, (const vk::VkMemoryBarrier*)DE_NULL,
315 1, &ssboPostBarrier,
316 0, (const vk::VkImageMemoryBarrier*)DE_NULL);
317
318 // End recording commands
319 endCommandBuffer(m_device_interface, *cmdBuffer);
320
321 // Wait for command buffer execution finish
322 submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer);
323
324 // Check if result buffer contains valid values
325 if (verifyResultBuffer(resultBuffer, resultBlockSize))
326 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
327 else
328 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
329 }
330
verifyResultBuffer(const Buffer & resultBuffer,const vk::VkDeviceSize resultBlockSize) const331 deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer& resultBuffer,
332 const vk::VkDeviceSize resultBlockSize) const
333 {
334 deBool allOk = true;
335 const vk::Allocation& alloc = resultBuffer.getAllocation();
336 vk::invalidateAlloc(m_device_interface, m_device, alloc);
337
338 const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
339
340 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
341 {
342 const DispatchCommand& cmd = m_dispatchCommands[cmdNdx];
343 const deUint8* const srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
344 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
345 const deUint32 numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
346 const deUint32 numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
347 const deUint32 expectedCount = numInvocationsPerGroup * numGroups;
348
349 if (numPassed != expectedCount)
350 {
351 tcu::TestContext& testCtx = m_context.getTestContext();
352
353 testCtx.getLog()
354 << tcu::TestLog::Message
355 << "ERROR: got invalid result for invocation " << cmdNdx
356 << ": got numPassed = " << numPassed << ", expected " << expectedCount
357 << tcu::TestLog::EndMessage;
358
359 allOk = false;
360 }
361 }
362
363 return allOk;
364 }
365
366 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
367 {
368 public:
369 IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx,
370 const DispatchCaseDesc& caseDesc,
371 const glu::GLSLVersion glslVersion);
372
~IndirectDispatchCaseBufferUpload(void)373 virtual ~IndirectDispatchCaseBufferUpload (void) {}
374
375 virtual void initPrograms (vk::SourceCollections& programCollection) const;
376 virtual TestInstance* createInstance (Context& context) const;
377
378 protected:
379 const deUintptr m_bufferSize;
380 const tcu::UVec3 m_workGroupSize;
381 const DispatchCommandsVec m_dispatchCommands;
382 const glu::GLSLVersion m_glslVersion;
383
384 private:
385 IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
386 IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
387 };
388
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)389 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx,
390 const DispatchCaseDesc& caseDesc,
391 const glu::GLSLVersion glslVersion)
392 : vkt::TestCase (testCtx, caseDesc.m_name, caseDesc.m_description)
393 , m_bufferSize (caseDesc.m_bufferSize)
394 , m_workGroupSize (caseDesc.m_workGroupSize)
395 , m_dispatchCommands (caseDesc.m_dispatchCommands)
396 , m_glslVersion (glslVersion)
397 {
398 }
399
initPrograms(vk::SourceCollections & programCollection) const400 void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
401 {
402 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
403
404 std::ostringstream verifyBuffer;
405
406 verifyBuffer
407 << versionDecl << "\n"
408 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
409 << "layout(set = 0, binding = 0, std430) buffer Result\n"
410 << "{\n"
411 << " uvec3 expectedGroupCount;\n"
412 << " coherent uint numPassed;\n"
413 << "} result;\n"
414 << "void main (void)\n"
415 << "{\n"
416 << " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
417 << " atomicAdd(result.numPassed, 1u);\n"
418 << "}\n";
419
420 std::map<std::string, std::string> args;
421
422 args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
423 args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
424 args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
425
426 std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
427
428 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
429 }
430
createInstance(Context & context) const431 TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
432 {
433 return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
434 }
435
436 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
437 {
438 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const deUintptr bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands)439 IndirectDispatchInstanceBufferGenerate (Context& context,
440 const std::string& name,
441 const deUintptr bufferSize,
442 const tcu::UVec3& workGroupSize,
443 const DispatchCommandsVec& dispatchCommands)
444 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {}
445
~IndirectDispatchInstanceBufferGenerate(void)446 virtual ~IndirectDispatchInstanceBufferGenerate (void) {}
447
448 protected:
449 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer,
450 const Buffer& indirectBuffer);
451
452 vk::Move<vk::VkDescriptorSetLayout> m_descriptorSetLayout;
453 vk::Move<vk::VkDescriptorPool> m_descriptorPool;
454 vk::Move<vk::VkDescriptorSet> m_descriptorSet;
455 vk::Move<vk::VkPipelineLayout> m_pipelineLayout;
456 vk::Move<vk::VkPipeline> m_computePipeline;
457
458 private:
459 IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
460 IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
461 };
462
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const Buffer & indirectBuffer)463 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
464 {
465 // Create compute shader that generates data for indirect buffer
466 const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
467 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
468
469 // Create descriptorSetLayout
470 m_descriptorSetLayout = vk::DescriptorSetLayoutBuilder()
471 .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT)
472 .build(m_device_interface, m_device);
473
474 // Create compute pipeline
475 m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *m_descriptorSetLayout);
476 m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
477
478 // Create descriptor pool
479 m_descriptorPool = vk::DescriptorPoolBuilder()
480 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
481 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
482
483 // Create descriptor set
484 m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *m_descriptorSetLayout);
485
486 const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
487
488 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
489 descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
490 descriptorSetBuilder.update(m_device_interface, m_device);
491
492 const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
493 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
494
495 // Bind compute pipeline
496 m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
497
498 // Bind descriptor set
499 m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
500
501 // Dispatch compute command
502 m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u);
503
504 // Insert memory barrier
505 m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
506 0, (const vk::VkMemoryBarrier*)DE_NULL,
507 1, &bufferBarrier,
508 0, (const vk::VkImageMemoryBarrier*)DE_NULL);
509 }
510
511 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
512 {
513 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion)514 IndirectDispatchCaseBufferGenerate (tcu::TestContext& testCtx,
515 const DispatchCaseDesc& caseDesc,
516 const glu::GLSLVersion glslVersion)
517 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
518
~IndirectDispatchCaseBufferGenerate(void)519 virtual ~IndirectDispatchCaseBufferGenerate (void) {}
520
521 virtual void initPrograms (vk::SourceCollections& programCollection) const;
522 virtual TestInstance* createInstance (Context& context) const;
523
524 private:
525 IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
526 IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
527 };
528
initPrograms(vk::SourceCollections & programCollection) const529 void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
530 {
531 IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
532
533 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
534
535 std::ostringstream computeBuffer;
536
537 // Header
538 computeBuffer
539 << versionDecl << "\n"
540 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
541 << "layout(set = 0, binding = 0, std430) buffer Out\n"
542 << "{\n"
543 << " highp uint data[];\n"
544 << "};\n"
545 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
546 << "{\n"
547 << " data[offset+0u] = numWorkGroups.x;\n"
548 << " data[offset+1u] = numWorkGroups.y;\n"
549 << " data[offset+2u] = numWorkGroups.z;\n"
550 << "}\n"
551 << "void main (void)\n"
552 << "{\n";
553
554 // Dispatch commands
555 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
556 {
557 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
558 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
559
560 computeBuffer
561 << "\twriteCmd(" << offs << "u, uvec3("
562 << cmdIter->m_numWorkGroups.x() << "u, "
563 << cmdIter->m_numWorkGroups.y() << "u, "
564 << cmdIter->m_numWorkGroups.z() << "u));\n";
565 }
566
567 // Ending
568 computeBuffer << "}\n";
569
570 std::string computeString = computeBuffer.str();
571
572 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
573 }
574
createInstance(Context & context) const575 TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
576 {
577 return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
578 }
579
commandsVec(const DispatchCommand & cmd)580 DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
581 {
582 DispatchCommandsVec vec;
583 vec.push_back(cmd);
584 return vec;
585 }
586
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)587 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
588 const DispatchCommand& cmd1,
589 const DispatchCommand& cmd2,
590 const DispatchCommand& cmd3,
591 const DispatchCommand& cmd4)
592 {
593 DispatchCommandsVec vec;
594 vec.push_back(cmd0);
595 vec.push_back(cmd1);
596 vec.push_back(cmd2);
597 vec.push_back(cmd3);
598 vec.push_back(cmd4);
599 return vec;
600 }
601
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)602 DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
603 const DispatchCommand& cmd1,
604 const DispatchCommand& cmd2,
605 const DispatchCommand& cmd3,
606 const DispatchCommand& cmd4,
607 const DispatchCommand& cmd5,
608 const DispatchCommand& cmd6)
609 {
610 DispatchCommandsVec vec;
611 vec.push_back(cmd0);
612 vec.push_back(cmd1);
613 vec.push_back(cmd2);
614 vec.push_back(cmd3);
615 vec.push_back(cmd4);
616 vec.push_back(cmd5);
617 vec.push_back(cmd6);
618 return vec;
619 }
620
621 } // anonymous ns
622
createIndirectComputeDispatchTests(tcu::TestContext & testCtx)623 tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
624 {
625 static const DispatchCaseDesc s_dispatchCases[] =
626 {
627 DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
628 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)))
629 ),
630 DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
631 commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5)))
632 ),
633 DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
634 commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3)))
635 ),
636 DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
637 commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1)))
638 ),
639 DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
640 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1)))
641 ),
642 DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
643 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3)))
644 ),
645 DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
646 commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0)))
647 ),
648 DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
649 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
650 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
651 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
652 DispatchCommand(40, tcu::UVec3(1, 1, 7)),
653 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
654 ),
655 DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
656 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
657 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
658 DispatchCommand(0, tcu::UVec3(1, 1, 1)),
659 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
660 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
661 DispatchCommand(52, tcu::UVec3(1, 1, 4)),
662 DispatchCommand(52, tcu::UVec3(1, 1, 4)))
663 ),
664 };
665
666 de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
667
668 tcu::TestCaseGroup* const groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
669 indirectComputeDispatchTests->addChild(groupBufferUpload);
670
671 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
672 {
673 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
674 }
675
676 tcu::TestCaseGroup* const groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
677 indirectComputeDispatchTests->addChild(groupBufferGenerate);
678
679 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
680 {
681 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
682 }
683
684 return indirectComputeDispatchTests.release();
685 }
686
687 } // compute
688 } // vkt
689