1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Experimental crash postmortem shader timeout tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktPostmortemTests.hpp"
25 #include "vktPostmortemShaderTimeoutTests.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktTestCase.hpp"
28 #include "vkBarrierUtil.hpp"
29 #include "vkBufferWithMemory.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkDefs.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "deUniquePtr.hpp"
36 #include "tcuCommandLine.hpp"
37 #include "vktCustomInstancesDevices.hpp"
38 #include "vktPostmortemUtil.hpp"
39 
40 using namespace vk;
41 
42 namespace vkt
43 {
44 namespace postmortem
45 {
46 namespace
47 {
48 
49 class ShaderTimeoutCase : public vkt::TestCase
50 {
51 public:
ShaderTimeoutCase(tcu::TestContext & testCtx,const std::string & name,deUint32 iterations)52 	ShaderTimeoutCase(tcu::TestContext& testCtx, const std::string& name, deUint32 iterations) : TestCase(testCtx, name, "Long-running compute shader"), m_iterations(iterations) {}
53 
54 	TestInstance* createInstance(Context& context) const override;
55 	void initPrograms(vk::SourceCollections& programCollection) const override;
56 
57 private:
58 	deUint32 m_iterations;
59 };
60 
61 class ShaderTimeoutInstance : public PostmortemTestInstance
62 {
63 public:
64 	ShaderTimeoutInstance(Context& context, deUint32 iterations);
65 
66 	tcu::TestStatus		iterate(void) override;
67 
68 private:
69 	deUint32			m_iterations;
70 };
71 
72 
makeComputePipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkPipelineCreateFlags pipelineFlags,const VkShaderModule shaderModule,const VkPipelineShaderStageCreateFlags shaderFlags)73 Move<VkPipeline> makeComputePipeline(const DeviceInterface&					vk,
74 									 const VkDevice							device,
75 									 const VkPipelineLayout					pipelineLayout,
76 									 const VkPipelineCreateFlags			pipelineFlags,
77 									 const VkShaderModule					shaderModule,
78 									 const VkPipelineShaderStageCreateFlags	shaderFlags)
79 {
80 	const VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
81 	{
82 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType;
83 		DE_NULL,												// const void*							pNext;
84 		shaderFlags,											// VkPipelineShaderStageCreateFlags		flags;
85 		VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits				stage;
86 		shaderModule,											// VkShaderModule						module;
87 		"main",													// const char*							pName;
88 		DE_NULL,												// const VkSpecializationInfo*			pSpecializationInfo;
89 	};
90 	const VkComputePipelineCreateInfo pipelineCreateInfo =
91 	{
92 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,		// VkStructureType					sType;
93 		DE_NULL,											// const void*						pNext;
94 		pipelineFlags,										// VkPipelineCreateFlags			flags;
95 		pipelineShaderStageParams,							// VkPipelineShaderStageCreateInfo	stage;
96 		pipelineLayout,										// VkPipelineLayout					layout;
97 		DE_NULL,											// VkPipeline						basePipelineHandle;
98 		0,													// deInt32							basePipelineIndex;
99 	};
100 	return createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo);
101 }
102 
makeComputePipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule)103 Move<VkPipeline> makeComputePipeline(const DeviceInterface&		vk,
104 									 const VkDevice				device,
105 									 const VkPipelineLayout		pipelineLayout,
106 									 const VkShaderModule		shaderModule)
107 {
108 	return makeComputePipeline(vk, device, pipelineLayout, static_cast<VkPipelineCreateFlags>(0u), shaderModule, static_cast<VkPipelineShaderStageCreateFlags>(0u));
109 }
110 
ShaderTimeoutInstance(Context & context,deUint32 iterations)111 ShaderTimeoutInstance::ShaderTimeoutInstance(Context& context, deUint32 iterations)
112 	: PostmortemTestInstance(context), m_iterations(iterations)
113 {
114 
115 }
116 
createInstance(Context & context) const117 TestInstance* ShaderTimeoutCase::createInstance(Context& context) const
118 {
119 	return new ShaderTimeoutInstance(context, m_iterations);
120 }
121 
initPrograms(vk::SourceCollections & programCollection) const122 void ShaderTimeoutCase::initPrograms(vk::SourceCollections& programCollection) const
123 {
124 	std::ostringstream src;
125 	src << "#version 320 es\n"
126 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1)\n"
127 		<< "layout(binding = 0) uniform Params {\n"
128 		<< "  int x;\n"
129 		<< "  int y;\n"
130 		<< "} bounds;\n"
131 		<< "layout(std430, binding = 1) buffer  Output {\n"
132 		<< "  uint values[];\n"
133 		<< "} sb_out;\n"
134 		<< "\n"
135 		<< "void main()\n"
136 		<< "{\n"
137 		<< "  uint localSize = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n"
138 		<< "  uint globalNdx = gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupID.z + gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
139 		<< "  uint globalOffs = localSize * globalNdx;\n"
140 		<< "  uint localOffs = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_LocalInvocationID.z + gl_WorkGroupSize.x * gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
141 		<< "  uint sum = uint(0);\n"
142 		<< "  for (int y = 0; y < bounds.y; ++y) {\n"
143 		<< "    for (int x = 0; x < bounds.x; ++x) {\n"
144 		<< "	  sb_out.values[globalOffs + localOffs] = sb_out.values[globalOffs + localOffs] + uint(1);\n"
145 		<< "      memoryBarrierBuffer();\n"
146 		<< "      barrier();\n"
147 		<< "    }\n"
148 		<< "  }\n"
149 		<< "}\n";
150 
151 	programCollection.glslSources.add("comp") << glu::ComputeSource(src.str());
152 }
153 
iterate(void)154 tcu::TestStatus	ShaderTimeoutInstance::iterate(void)
155 {
156 	const VkDevice			device				= *m_logicalDevice;
157 	const DeviceInterface&	vk					= m_deviceDriver;
158 	const VkQueue			queue				= m_queue;
159 	const deUint32			queueFamilyIndex	= m_queueFamilyIndex;
160 	Allocator&				allocator			= m_allocator;
161 
162 	const int workSize = 1024;
163 	const VkDeviceSize storageSizeInBytes = sizeof(deUint32) * workSize;
164 	const VkDeviceSize uniformSizeInBytes = sizeof(deUint32) * 2;
165 
166 	// Create storage and uniform buffers
167 	BufferWithMemory storageBuffer(vk, device, allocator,
168 		makeBufferCreateInfo(storageSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
169 		MemoryRequirement::HostVisible);
170 	BufferWithMemory uniformBuffer(vk, device, allocator,
171 		makeBufferCreateInfo(uniformSizeInBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
172 		MemoryRequirement::HostVisible);
173 
174 	// Fill storage buffer with sequentially increasing values
175 	{
176 		const Allocation& storageBufferAllocation = storageBuffer.getAllocation();
177 		deUint32* storageBufferPtr = static_cast<deUint32*>(storageBufferAllocation.getHostPtr());
178 		for (int i = 0; i < workSize; ++i)
179 			storageBufferPtr[i] = i;
180 
181 		flushAlloc(vk, device, storageBufferAllocation);
182 	}
183 
184 	// Set uniforms for shader loop bounds to m_iterations
185 	{
186 		const Allocation& uniformBufferAllocation = uniformBuffer.getAllocation();
187 		deUint32* uniformBufferPtr = static_cast<deUint32*>(uniformBufferAllocation.getHostPtr());
188 		uniformBufferPtr[0] = m_iterations;
189 		uniformBufferPtr[1] = m_iterations;
190 
191 		flushAlloc(vk, device, uniformBufferAllocation);
192 	}
193 
194 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
195 		DescriptorSetLayoutBuilder()
196 		.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
197 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
198 		.build(vk, device));
199 
200 	const Unique<VkDescriptorPool> descriptorPool(
201 		DescriptorPoolBuilder()
202 		.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
203 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
204 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
205 
206 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
207 
208 	const VkDescriptorBufferInfo uniformDescriptorInfo = makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformSizeInBytes);
209 	const VkDescriptorBufferInfo storageDescriptorInfo = makeDescriptorBufferInfo(*storageBuffer, 0ull, storageSizeInBytes);
210 	DescriptorSetUpdateBuilder()
211 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformDescriptorInfo)
212 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &storageDescriptorInfo)
213 		.update(vk, device);
214 
215 	// Create pipelines
216 	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
217 	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
218 	const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
219 
220 	const VkBufferMemoryBarrier hostWriteBarriers[2] =
221 	{
222 		makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *storageBuffer, 0ull, storageSizeInBytes),
223 		makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformSizeInBytes)
224 	};
225 	const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *storageBuffer, 0ull, storageSizeInBytes);
226 
227 	// Create command buffer and launch dispatch,
228 	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
229 	const Unique<VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
230 
231 	beginCommandBuffer(vk, *cmdBuffer);
232 	vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
233 	vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
234 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 2u, hostWriteBarriers, 0, (const VkImageMemoryBarrier*)DE_NULL);
235 	vk.cmdDispatch(*cmdBuffer, workSize, 1, 1);
236 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*) DE_NULL, 1u, &computeFinishBarrier, 0, (const VkImageMemoryBarrier*) DE_NULL);
237 	endCommandBuffer(vk, *cmdBuffer);
238 
239 	submitCommandsAndWait(vk, device, queue, *cmdBuffer);
240 
241 	// Verify output
242 	const Allocation& storageAllocation = storageBuffer.getAllocation();
243 	invalidateAlloc(vk, device, storageAllocation);
244 
245 	const deUint32* bufferPtr = static_cast<deUint32*>(storageAllocation.getHostPtr());
246 	for (int i = 0; i < workSize; ++i)
247 	{
248 		const deUint32	res = bufferPtr[i];
249 		const deUint32	ref = i + m_iterations * m_iterations;
250 		if (res != ref)
251 		{
252 			std::ostringstream msg;
253 			msg << "Comparison failed for sb_out.values[" << i << "] ref:" << ref << " res:" << res;
254 			return tcu::TestStatus::fail(msg.str());
255 		}
256 	}
257 
258 	return tcu::TestStatus::pass("Test succeeded without device loss");
259 }
260 
261 }
262 
createShaderTimeoutTests(tcu::TestContext & testCtx)263 tcu::TestCaseGroup* createShaderTimeoutTests(tcu::TestContext& testCtx)
264 {
265 	de::MovePtr<tcu::TestCaseGroup> timeoutGroup(new tcu::TestCaseGroup(testCtx, "shader_timeout", "Shader timeout tests."));
266 	for (int i = 0; i < 16; ++i)
267 	{
268 		deUint32 iterations = 0x1u << i;
269 		std::stringstream name;
270 		name << "compute_" << iterations << "x" << iterations;
271 		timeoutGroup->addChild(new ShaderTimeoutCase(testCtx, name.str(), iterations));
272 	}
273 
274 	return timeoutGroup.release();
275 }
276 
277 } // postmortem
278 } // vkt
279