1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuCommandLine.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "vkBarrierUtil.hpp"
32 #include "vkImageUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
36 using namespace tcu;
37 using namespace std;
38 using namespace vk;
39 using namespace vkt;
40 
41 namespace
42 {
43 
getMaxWidth()44 deUint32 getMaxWidth ()
45 {
46 	return 1024u;
47 }
48 
getNextWidth(const deUint32 width)49 deUint32 getNextWidth (const deUint32 width)
50 {
51 	if (width < 128)
52 	{
53 		// This ensures we test every value up to 128 (the max subgroup size).
54 		return width + 1;
55 	}
56 	else
57 	{
58 		// And once we hit 128 we increment to only power of 2's to reduce testing time.
59 		return width * 2;
60 	}
61 }
62 
getFormatSizeInBytes(const VkFormat format)63 deUint32 getFormatSizeInBytes(const VkFormat format)
64 {
65 	switch (format)
66 	{
67 		default:
68 			DE_FATAL("Unhandled format!");
69 			return 0;
70 		case VK_FORMAT_R8_SINT:
71 		case VK_FORMAT_R8_UINT:
72 			return static_cast<deUint32>(sizeof(deInt8));
73 		case VK_FORMAT_R8G8_SINT:
74 		case VK_FORMAT_R8G8_UINT:
75 			return static_cast<deUint32>(sizeof(deInt8) * 2);
76 		case VK_FORMAT_R8G8B8_SINT:
77 		case VK_FORMAT_R8G8B8_UINT:
78 		case VK_FORMAT_R8G8B8A8_SINT:
79 		case VK_FORMAT_R8G8B8A8_UINT:
80 			return static_cast<deUint32>(sizeof(deInt8) * 4);
81 		case VK_FORMAT_R16_SINT:
82 		case VK_FORMAT_R16_UINT:
83 		case VK_FORMAT_R16_SFLOAT:
84 			return static_cast<deUint32>(sizeof(deInt16));
85 		case VK_FORMAT_R16G16_SINT:
86 		case VK_FORMAT_R16G16_UINT:
87 		case VK_FORMAT_R16G16_SFLOAT:
88 			return static_cast<deUint32>(sizeof(deInt16) * 2);
89 		case VK_FORMAT_R16G16B16_UINT:
90 		case VK_FORMAT_R16G16B16_SINT:
91 		case VK_FORMAT_R16G16B16_SFLOAT:
92 		case VK_FORMAT_R16G16B16A16_SINT:
93 		case VK_FORMAT_R16G16B16A16_UINT:
94 		case VK_FORMAT_R16G16B16A16_SFLOAT:
95 			return static_cast<deUint32>(sizeof(deInt16) * 4);
96 		case VK_FORMAT_R32_SINT:
97 		case VK_FORMAT_R32_UINT:
98 		case VK_FORMAT_R32_SFLOAT:
99 			return static_cast<deUint32>(sizeof(deInt32));
100 		case VK_FORMAT_R32G32_SINT:
101 		case VK_FORMAT_R32G32_UINT:
102 		case VK_FORMAT_R32G32_SFLOAT:
103 			return static_cast<deUint32>(sizeof(deInt32) * 2);
104 		case VK_FORMAT_R32G32B32_SINT:
105 		case VK_FORMAT_R32G32B32_UINT:
106 		case VK_FORMAT_R32G32B32_SFLOAT:
107 		case VK_FORMAT_R32G32B32A32_SINT:
108 		case VK_FORMAT_R32G32B32A32_UINT:
109 		case VK_FORMAT_R32G32B32A32_SFLOAT:
110 			return static_cast<deUint32>(sizeof(deInt32) * 4);
111 		case VK_FORMAT_R64_SINT:
112 		case VK_FORMAT_R64_UINT:
113 		case VK_FORMAT_R64_SFLOAT:
114 			return static_cast<deUint32>(sizeof(deInt64));
115 		case VK_FORMAT_R64G64_SINT:
116 		case VK_FORMAT_R64G64_UINT:
117 		case VK_FORMAT_R64G64_SFLOAT:
118 			return static_cast<deUint32>(sizeof(deInt64) * 2);
119 		case VK_FORMAT_R64G64B64_SINT:
120 		case VK_FORMAT_R64G64B64_UINT:
121 		case VK_FORMAT_R64G64B64_SFLOAT:
122 		case VK_FORMAT_R64G64B64A64_SINT:
123 		case VK_FORMAT_R64G64B64A64_UINT:
124 		case VK_FORMAT_R64G64B64A64_SFLOAT:
125 			return static_cast<deUint32>(sizeof(deInt64) * 4);
126 		// The below formats are used to represent bool and bvec* types. These
127 		// types are passed to the shader as int and ivec* types, before the
128 		// calculations are done as booleans. We need a distinct type here so
129 		// that the shader generators can switch on it and generate the correct
130 		// shader source for testing.
131 		case VK_FORMAT_R8_USCALED:
132 			return static_cast<deUint32>(sizeof(deInt32));
133 		case VK_FORMAT_R8G8_USCALED:
134 			return static_cast<deUint32>(sizeof(deInt32) * 2);
135 		case VK_FORMAT_R8G8B8_USCALED:
136 		case VK_FORMAT_R8G8B8A8_USCALED:
137 			return static_cast<deUint32>(sizeof(deInt32) * 4);
138 	}
139 }
140 
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)141 deUint32 getElementSizeInBytes(
142 	const VkFormat format,
143 	const subgroups::SSBOData::InputDataLayoutType layout)
144 {
145 	deUint32 bytes = getFormatSizeInBytes(format);
146 	if (layout == subgroups::SSBOData::LayoutStd140)
147 		return bytes < 16 ? 16 : bytes;
148 	else
149 		return bytes;
150 }
151 
makeRenderPass(Context & context,VkFormat format)152 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
153 {
154 	VkAttachmentReference colorReference = {
155 		0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
156 	};
157 
158 	const VkSubpassDescription subpassDescription = {0u,
159 													 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
160 													 DE_NULL, DE_NULL, 0, DE_NULL
161 													};
162 
163 	const VkSubpassDependency subpassDependencies[2] = {
164 		{   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
165 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
166 			VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
167 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
168 			VK_DEPENDENCY_BY_REGION_BIT
169 		},
170 		{   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
171 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
172 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
174 			VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
175 		},
176 	};
177 
178 	VkAttachmentDescription attachmentDescription = {0u, format,
179 													 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
180 													 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
181 													 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
182 													 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
183 													};
184 
185 	const VkRenderPassCreateInfo renderPassCreateInfo = {
186 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
187 		&attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
188 	};
189 
190 	return createRenderPass(context.getDeviceInterface(), context.getDevice(),
191 							&renderPassCreateInfo);
192 }
193 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])194 Move<VkPipeline> makeGraphicsPipeline(const DeviceInterface&						vk,
195 									  const VkDevice								device,
196 									  const VkPipelineLayout						pipelineLayout,
197 									  const VkShaderModule							vertexShaderModule,
198 									  const VkShaderModule							tessellationControlShaderModule,
199 									  const VkShaderModule							tessellationEvalShaderModule,
200 									  const VkShaderModule							geometryShaderModule,
201 									  const VkShaderModule							fragmentShaderModule,
202 									  const VkRenderPass							renderPass,
203 									  const std::vector<VkViewport>&				viewports,
204 									  const std::vector<VkRect2D>&					scissors,
205 									  const VkPrimitiveTopology						topology,
206 									  const deUint32								subpass,
207 									  const deUint32								patchControlPoints,
208 									  const VkPipelineVertexInputStateCreateInfo*	vertexInputStateCreateInfo,
209 									  const VkPipelineRasterizationStateCreateInfo*	rasterizationStateCreateInfo,
210 									  const VkPipelineMultisampleStateCreateInfo*	multisampleStateCreateInfo,
211 									  const VkPipelineDepthStencilStateCreateInfo*	depthStencilStateCreateInfo,
212 									  const VkPipelineColorBlendStateCreateInfo*	colorBlendStateCreateInfo,
213 									  const VkPipelineDynamicStateCreateInfo*		dynamicStateCreateInfo,
214 									  const deUint32								vertexShaderStageCreateFlags,
215 									  const deUint32								tessellationControlShaderStageCreateFlags,
216 									  const deUint32								tessellationEvalShaderStageCreateFlags,
217 									  const deUint32								geometryShaderStageCreateFlags,
218 									  const deUint32								fragmentShaderStageCreateFlags,
219 									  const deUint32								requiredSubgroupSize[5])
220 {
221 	const VkBool32									disableRasterization				= (fragmentShaderModule == DE_NULL);
222 	const bool										hasTessellation						= (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
223 
224 	VkPipelineShaderStageCreateInfo					stageCreateInfo						=
225 	{
226 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType
227 		DE_NULL,												// const void*                         pNext
228 		0u,														// VkPipelineShaderStageCreateFlags    flags
229 		VK_SHADER_STAGE_VERTEX_BIT,								// VkShaderStageFlagBits               stage
230 		DE_NULL,												// VkShaderModule                      module
231 		"main",													// const char*                         pName
232 		DE_NULL													// const VkSpecializationInfo*         pSpecializationInfo
233 	};
234 
235 	std::vector<VkPipelineShaderStageCreateInfo>	pipelineShaderStageParams;
236 
237 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
238 		{
239 			{
240 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
241 				DE_NULL,
242 				requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
243 			},
244 			{
245 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
246 				DE_NULL,
247 				requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
248 			},
249 			{
250 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
251 				DE_NULL,
252 				requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
253 			},
254 			{
255 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
256 				DE_NULL,
257 				requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
258 			},
259 			{
260 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
261 				DE_NULL,
262 				requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
263 			},
264 		};
265 	{
266 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
267 		stageCreateInfo.flags	= vertexShaderStageCreateFlags;
268 		stageCreateInfo.stage	= VK_SHADER_STAGE_VERTEX_BIT;
269 		stageCreateInfo.module	= vertexShaderModule;
270 		pipelineShaderStageParams.push_back(stageCreateInfo);
271 	}
272 
273 	if (tessellationControlShaderModule != DE_NULL)
274 	{
275 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
276 		stageCreateInfo.flags	= tessellationControlShaderStageCreateFlags;
277 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
278 		stageCreateInfo.module	= tessellationControlShaderModule;
279 		pipelineShaderStageParams.push_back(stageCreateInfo);
280 	}
281 
282 	if (tessellationEvalShaderModule != DE_NULL)
283 	{
284 		stageCreateInfo.pNext	= (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
285 		stageCreateInfo.flags	= tessellationEvalShaderStageCreateFlags;
286 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
287 		stageCreateInfo.module	= tessellationEvalShaderModule;
288 		pipelineShaderStageParams.push_back(stageCreateInfo);
289 	}
290 
291 	if (geometryShaderModule != DE_NULL)
292 	{
293 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
294 		stageCreateInfo.flags	= geometryShaderStageCreateFlags;
295 		stageCreateInfo.stage	= VK_SHADER_STAGE_GEOMETRY_BIT;
296 		stageCreateInfo.module	= geometryShaderModule;
297 		pipelineShaderStageParams.push_back(stageCreateInfo);
298 	}
299 
300 	if (fragmentShaderModule != DE_NULL)
301 	{
302 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
303 		stageCreateInfo.flags	= fragmentShaderStageCreateFlags;
304 		stageCreateInfo.stage	= VK_SHADER_STAGE_FRAGMENT_BIT;
305 		stageCreateInfo.module	= fragmentShaderModule;
306 		pipelineShaderStageParams.push_back(stageCreateInfo);
307 	}
308 
309 	const VkVertexInputBindingDescription			vertexInputBindingDescription		=
310 	{
311 		0u,								// deUint32             binding
312 		sizeof(tcu::Vec4),				// deUint32             stride
313 		VK_VERTEX_INPUT_RATE_VERTEX,	// VkVertexInputRate    inputRate
314 	};
315 
316 	const VkVertexInputAttributeDescription			vertexInputAttributeDescription		=
317 	{
318 		0u,								// deUint32    location
319 		0u,								// deUint32    binding
320 		VK_FORMAT_R32G32B32A32_SFLOAT,	// VkFormat    format
321 		0u								// deUint32    offset
322 	};
323 
324 	const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfoDefault	=
325 	{
326 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType                             sType
327 		DE_NULL,													// const void*                                 pNext
328 		(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags       flags
329 		1u,															// deUint32                                    vertexBindingDescriptionCount
330 		&vertexInputBindingDescription,								// const VkVertexInputBindingDescription*      pVertexBindingDescriptions
331 		1u,															// deUint32                                    vertexAttributeDescriptionCount
332 		&vertexInputAttributeDescription							// const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
333 	};
334 
335 	const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo		=
336 	{
337 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType                            sType
338 		DE_NULL,														// const void*                                pNext
339 		0u,																// VkPipelineInputAssemblyStateCreateFlags    flags
340 		topology,														// VkPrimitiveTopology                        topology
341 		VK_FALSE														// VkBool32                                   primitiveRestartEnable
342 	};
343 
344 	const VkPipelineTessellationStateCreateInfo		tessStateCreateInfo					=
345 	{
346 		VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,	// VkStructureType                           sType
347 		DE_NULL,													// const void*                               pNext
348 		0u,															// VkPipelineTessellationStateCreateFlags    flags
349 		patchControlPoints											// deUint32                                  patchControlPoints
350 	};
351 
352 	const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
353 	{
354 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType                             sType
355 		DE_NULL,												// const void*                                 pNext
356 		(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags          flags
357 		viewports.empty() ? 1u : (deUint32)viewports.size(),	// deUint32                                    viewportCount
358 		viewports.empty() ? DE_NULL : &viewports[0],			// const VkViewport*                           pViewports
359 		viewports.empty() ? 1u : (deUint32)scissors.size(),		// deUint32                                    scissorCount
360 		scissors.empty() ? DE_NULL : &scissors[0]				// const VkRect2D*                             pScissors
361 	};
362 
363 	const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfoDefault	=
364 	{
365 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	// VkStructureType                            sType
366 		DE_NULL,													// const void*                                pNext
367 		0u,															// VkPipelineRasterizationStateCreateFlags    flags
368 		VK_FALSE,													// VkBool32                                   depthClampEnable
369 		disableRasterization,										// VkBool32                                   rasterizerDiscardEnable
370 		VK_POLYGON_MODE_FILL,										// VkPolygonMode                              polygonMode
371 		VK_CULL_MODE_NONE,											// VkCullModeFlags                            cullMode
372 		VK_FRONT_FACE_COUNTER_CLOCKWISE,							// VkFrontFace                                frontFace
373 		VK_FALSE,													// VkBool32                                   depthBiasEnable
374 		0.0f,														// float                                      depthBiasConstantFactor
375 		0.0f,														// float                                      depthBiasClamp
376 		0.0f,														// float                                      depthBiasSlopeFactor
377 		1.0f														// float                                      lineWidth
378 	};
379 
380 	const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfoDefault	=
381 	{
382 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType                          sType
383 		DE_NULL,													// const void*                              pNext
384 		0u,															// VkPipelineMultisampleStateCreateFlags    flags
385 		VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits                    rasterizationSamples
386 		VK_FALSE,													// VkBool32                                 sampleShadingEnable
387 		1.0f,														// float                                    minSampleShading
388 		DE_NULL,													// const VkSampleMask*                      pSampleMask
389 		VK_FALSE,													// VkBool32                                 alphaToCoverageEnable
390 		VK_FALSE													// VkBool32                                 alphaToOneEnable
391 	};
392 
393 	const VkStencilOpState							stencilOpState						=
394 	{
395 		VK_STENCIL_OP_KEEP,		// VkStencilOp    failOp
396 		VK_STENCIL_OP_KEEP,		// VkStencilOp    passOp
397 		VK_STENCIL_OP_KEEP,		// VkStencilOp    depthFailOp
398 		VK_COMPARE_OP_NEVER,	// VkCompareOp    compareOp
399 		0,						// deUint32       compareMask
400 		0,						// deUint32       writeMask
401 		0						// deUint32       reference
402 	};
403 
404 	const VkPipelineDepthStencilStateCreateInfo		depthStencilStateCreateInfoDefault	=
405 	{
406 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,	// VkStructureType                          sType
407 		DE_NULL,													// const void*                              pNext
408 		0u,															// VkPipelineDepthStencilStateCreateFlags   flags
409 		VK_FALSE,													// VkBool32                                 depthTestEnable
410 		VK_FALSE,													// VkBool32                                 depthWriteEnable
411 		VK_COMPARE_OP_LESS_OR_EQUAL,								// VkCompareOp                              depthCompareOp
412 		VK_FALSE,													// VkBool32                                 depthBoundsTestEnable
413 		VK_FALSE,													// VkBool32                                 stencilTestEnable
414 		stencilOpState,												// VkStencilOpState                         front
415 		stencilOpState,												// VkStencilOpState                         back
416 		0.0f,														// float                                    minDepthBounds
417 		1.0f,														// float                                    maxDepthBounds
418 	};
419 
420 	const VkPipelineColorBlendAttachmentState		colorBlendAttachmentState			=
421 	{
422 		VK_FALSE,					// VkBool32                 blendEnable
423 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcColorBlendFactor
424 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstColorBlendFactor
425 		VK_BLEND_OP_ADD,			// VkBlendOp                colorBlendOp
426 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcAlphaBlendFactor
427 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstAlphaBlendFactor
428 		VK_BLEND_OP_ADD,			// VkBlendOp                alphaBlendOp
429 		VK_COLOR_COMPONENT_R_BIT	// VkColorComponentFlags    colorWriteMask
430 		| VK_COLOR_COMPONENT_G_BIT
431 		| VK_COLOR_COMPONENT_B_BIT
432 		| VK_COLOR_COMPONENT_A_BIT
433 	};
434 
435 	const VkPipelineColorBlendStateCreateInfo		colorBlendStateCreateInfoDefault	=
436 	{
437 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	// VkStructureType                               sType
438 		DE_NULL,													// const void*                                   pNext
439 		0u,															// VkPipelineColorBlendStateCreateFlags          flags
440 		VK_FALSE,													// VkBool32                                      logicOpEnable
441 		VK_LOGIC_OP_CLEAR,											// VkLogicOp                                     logicOp
442 		1u,															// deUint32                                      attachmentCount
443 		&colorBlendAttachmentState,									// const VkPipelineColorBlendAttachmentState*    pAttachments
444 		{ 0.0f, 0.0f, 0.0f, 0.0f }									// float                                         blendConstants[4]
445 	};
446 
447 	std::vector<VkDynamicState>						dynamicStates;
448 
449 	if (viewports.empty())
450 		dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
451 	if (scissors.empty())
452 		dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
453 
454 	const VkPipelineDynamicStateCreateInfo			dynamicStateCreateInfoDefault		=
455 	{
456 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	// VkStructureType                      sType
457 		DE_NULL,												// const void*                          pNext
458 		0u,														// VkPipelineDynamicStateCreateFlags    flags
459 		(deUint32)dynamicStates.size(),							// deUint32                             dynamicStateCount
460 		dynamicStates.empty() ? DE_NULL : &dynamicStates[0]		// const VkDynamicState*                pDynamicStates
461 	};
462 
463 	const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfoDefaultPtr	= dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
464 
465 	const VkGraphicsPipelineCreateInfo				pipelineCreateInfo					=
466 	{
467 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,														// VkStructureType                                  sType
468 		DE_NULL,																								// const void*                                      pNext
469 		0u,																										// VkPipelineCreateFlags                            flags
470 		(deUint32)pipelineShaderStageParams.size(),																// deUint32                                         stageCount
471 		&pipelineShaderStageParams[0],																			// const VkPipelineShaderStageCreateInfo*           pStages
472 		vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault,			// const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
473 		&inputAssemblyStateCreateInfo,																			// const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
474 		hasTessellation ? &tessStateCreateInfo : DE_NULL,														// const VkPipelineTessellationStateCreateInfo*     pTessellationState
475 		&viewportStateCreateInfo,																				// const VkPipelineViewportStateCreateInfo*         pViewportState
476 		rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault,		// const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
477 		multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault,			// const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
478 		depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault,		// const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
479 		colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault,				// const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
480 		dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr,						// const VkPipelineDynamicStateCreateInfo*          pDynamicState
481 		pipelineLayout,																							// VkPipelineLayout                                 layout
482 		renderPass,																								// VkRenderPass                                     renderPass
483 		subpass,																								// deUint32                                         subpass
484 		DE_NULL,																								// VkPipeline                                       basePipelineHandle
485 		0																										// deInt32                                          basePipelineIndex;
486 	};
487 
488 	return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
489 }
490 
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)491 Move<VkPipeline> makeGraphicsPipeline(Context&									context,
492 									  const VkPipelineLayout					pipelineLayout,
493 									  const VkShaderStageFlags					stages,
494 									  const VkShaderModule						vertexShaderModule,
495 									  const VkShaderModule						fragmentShaderModule,
496 									  const VkShaderModule						geometryShaderModule,
497 									  const VkShaderModule						tessellationControlModule,
498 									  const VkShaderModule						tessellationEvaluationModule,
499 									  const VkRenderPass						renderPass,
500 									  const VkPrimitiveTopology					topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
501 									  const VkVertexInputBindingDescription*	vertexInputBindingDescription = DE_NULL,
502 									  const VkVertexInputAttributeDescription*	vertexInputAttributeDescriptions = DE_NULL,
503 									  const bool								frameBufferTests = false,
504 									  const vk::VkFormat						attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
505 									  const deUint32							vertexShaderStageCreateFlags = 0u,
506 									  const deUint32							tessellationControlShaderStageCreateFlags = 0u,
507 									  const deUint32							tessellationEvalShaderStageCreateFlags = 0u,
508 									  const deUint32							geometryShaderStageCreateFlags = 0u,
509 									  const deUint32							fragmentShaderStageCreateFlags = 0u,
510 									  const deUint32							requiredSubgroupSize[5] = DE_NULL)
511 {
512 	std::vector<VkViewport>	noViewports;
513 	std::vector<VkRect2D>	noScissors;
514 
515 	const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
516 	{
517 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
518 		DE_NULL,													// const void*									pNext;
519 		0u,															// VkPipelineVertexInputStateCreateFlags		flags;
520 		vertexInputBindingDescription == DE_NULL ? 0u : 1u,			// deUint32										vertexBindingDescriptionCount;
521 		vertexInputBindingDescription,								// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
522 		vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,		// deUint32										vertexAttributeDescriptionCount;
523 		vertexInputAttributeDescriptions,							// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
524 	};
525 
526 	const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
527 	const VkColorComponentFlags colorComponent =
528 												numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
529 												numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
530 												numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
531 												VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
532 
533 	const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
534 	{
535 		VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
536 		VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
537 		colorComponent
538 	};
539 
540 	const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
541 	{
542 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
543 		VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
544 		{ 0.0f, 0.0f, 0.0f, 0.0f }
545 	};
546 
547 	const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
548 
549 	return makeGraphicsPipeline(context.getDeviceInterface(),	// const DeviceInterface&                        vk
550 								context.getDevice(),			// const VkDevice                                device
551 								pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
552 								vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
553 								tessellationControlModule,		// const VkShaderModule                          tessellationControlShaderModule
554 								tessellationEvaluationModule,	// const VkShaderModule                          tessellationEvalShaderModule
555 								geometryShaderModule,			// const VkShaderModule                          geometryShaderModule
556 								fragmentShaderModule,			// const VkShaderModule                          fragmentShaderModule
557 								renderPass,						// const VkRenderPass                            renderPass
558 								noViewports,					// const std::vector<VkViewport>&                viewports
559 								noScissors,						// const std::vector<VkRect2D>&                  scissors
560 								topology,						// const VkPrimitiveTopology                     topology
561 								0u,								// const deUint32                                subpass
562 								patchControlPoints,				// const deUint32                                patchControlPoints
563 								&vertexInputStateCreateInfo,	// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
564 								DE_NULL,						// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
565 								DE_NULL,						// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
566 								DE_NULL,						// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
567 								&colorBlendStateCreateInfo,		// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
568 								DE_NULL,						// const VkPipelineDynamicStateCreateInfo*
569 								vertexShaderStageCreateFlags,	// const deUint32								 vertexShaderStageCreateFlags,
570 								tessellationControlShaderStageCreateFlags,	// const deUint32					 tessellationControlShaderStageCreateFlags
571 								tessellationEvalShaderStageCreateFlags,		// const deUint32					 tessellationEvalShaderStageCreateFlags
572 								geometryShaderStageCreateFlags,	// const deUint32								 geometryShaderStageCreateFlags
573 								fragmentShaderStageCreateFlags,	// const deUint32								 fragmentShaderStageCreateFlags
574 								requiredSubgroupSize);			// const deUint32								 requiredSubgroupSize[5]
575 }
576 
makeCommandBuffer(Context & context,const VkCommandPool commandPool)577 Move<VkCommandBuffer> makeCommandBuffer(
578 	Context& context, const VkCommandPool commandPool)
579 {
580 	const VkCommandBufferAllocateInfo bufferAllocateParams =
581 	{
582 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType		sType;
583 		DE_NULL,										// const void*			pNext;
584 		commandPool,									// VkCommandPool		commandPool;
585 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel	level;
586 		1u,												// deUint32				bufferCount;
587 	};
588 	return allocateCommandBuffer(context.getDeviceInterface(),
589 								 context.getDevice(), &bufferAllocateParams);
590 }
591 
592 struct Buffer;
593 struct Image;
594 
595 struct BufferOrImage
596 {
isImage__anone2f77d5c0111::BufferOrImage597 	bool isImage() const
598 	{
599 		return m_isImage;
600 	}
601 
getAsBuffer__anone2f77d5c0111::BufferOrImage602 	Buffer* getAsBuffer()
603 	{
604 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
605 		return reinterpret_cast<Buffer* >(this);
606 	}
607 
getAsImage__anone2f77d5c0111::BufferOrImage608 	Image* getAsImage()
609 	{
610 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
611 		return reinterpret_cast<Image*>(this);
612 	}
613 
getType__anone2f77d5c0111::BufferOrImage614 	virtual VkDescriptorType getType() const
615 	{
616 		if (m_isImage)
617 		{
618 			return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
619 		}
620 		else
621 		{
622 			return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
623 		}
624 	}
625 
getAllocation__anone2f77d5c0111::BufferOrImage626 	Allocation& getAllocation() const
627 	{
628 		return *m_allocation;
629 	}
630 
~BufferOrImage__anone2f77d5c0111::BufferOrImage631 	virtual ~BufferOrImage() {}
632 
633 protected:
BufferOrImage__anone2f77d5c0111::BufferOrImage634 	explicit BufferOrImage(bool image) : m_isImage(image) {}
635 
636 	bool m_isImage;
637 	de::details::MovePtr<Allocation> m_allocation;
638 };
639 
640 struct Buffer : public BufferOrImage
641 {
Buffer__anone2f77d5c0111::Buffer642 	explicit Buffer(
643 		Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
644 		: BufferOrImage		(false)
645 		, m_sizeInBytes		(sizeInBytes)
646 		, m_usage			(usage)
647 	{
648 		const DeviceInterface&			vkd					= context.getDeviceInterface();
649 		const VkDevice					device				= context.getDevice();
650 
651 		const vk::VkBufferCreateInfo	bufferCreateInfo	=
652 		{
653 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
654 			DE_NULL,
655 			0u,
656 			m_sizeInBytes,
657 			m_usage,
658 			VK_SHARING_MODE_EXCLUSIVE,
659 			0u,
660 			DE_NULL,
661 		};
662 		m_buffer		= createBuffer(vkd, device, &bufferCreateInfo);
663 
664 		VkMemoryRequirements			req					= getBufferMemoryRequirements(vkd, device, *m_buffer);
665 
666 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
667 		VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
668 	}
669 
getType__anone2f77d5c0111::Buffer670 	virtual VkDescriptorType getType() const
671 	{
672 		if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
673 		{
674 			return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
675 		}
676 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
677 	}
678 
getBuffer__anone2f77d5c0111::Buffer679 	VkBuffer getBuffer () const
680 	{
681 		return *m_buffer;
682 	}
683 
getBufferPtr__anone2f77d5c0111::Buffer684 	const VkBuffer* getBufferPtr () const
685 	{
686 		return &(*m_buffer);
687 	}
688 
getSize__anone2f77d5c0111::Buffer689 	VkDeviceSize getSize () const
690 	{
691 		return m_sizeInBytes;
692 	}
693 
694 private:
695 	Move<VkBuffer>				m_buffer;
696 	VkDeviceSize				m_sizeInBytes;
697 	const VkBufferUsageFlags	m_usage;
698 };
699 
700 struct Image : public BufferOrImage
701 {
Image__anone2f77d5c0111::Image702 	explicit Image(Context& context, deUint32 width, deUint32 height,
703 				   VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
704 		: BufferOrImage(true)
705 	{
706 		const DeviceInterface&			vk					= context.getDeviceInterface();
707 		const VkDevice					device				= context.getDevice();
708 		const deUint32					queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
709 
710 		const VkImageCreateInfo			imageCreateInfo		=
711 		{
712 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
713 			format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
714 			VK_IMAGE_TILING_OPTIMAL, usage,
715 			VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
716 			VK_IMAGE_LAYOUT_UNDEFINED
717 		};
718 
719 		const VkComponentMapping		componentMapping	=
720 		{
721 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
722 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
723 		};
724 
725 		const VkImageSubresourceRange	subresourceRange	=
726 		{
727 			VK_IMAGE_ASPECT_COLOR_BIT,	//VkImageAspectFlags	aspectMask
728 			0u,							//deUint32				baseMipLevel
729 			1u,							//deUint32				levelCount
730 			0u,							//deUint32				baseArrayLayer
731 			1u							//deUint32				layerCount
732 		};
733 
734 		const VkSamplerCreateInfo		samplerCreateInfo	=
735 		{
736 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
737 			DE_NULL,
738 			0u,
739 			VK_FILTER_NEAREST,
740 			VK_FILTER_NEAREST,
741 			VK_SAMPLER_MIPMAP_MODE_NEAREST,
742 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
743 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
744 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
745 			0.0f,
746 			VK_FALSE,
747 			1.0f,
748 			DE_FALSE,
749 			VK_COMPARE_OP_ALWAYS,
750 			0.0f,
751 			0.0f,
752 			VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
753 			VK_FALSE,
754 		};
755 
756 		m_image			= createImage(vk, device, &imageCreateInfo);
757 
758 		VkMemoryRequirements			req					= getImageMemoryRequirements(vk, device, *m_image);
759 
760 		req.size		*= 2;
761 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
762 
763 		VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
764 
765 		const VkImageViewCreateInfo		imageViewCreateInfo	=
766 		{
767 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
768 			VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
769 			subresourceRange
770 		};
771 
772 		m_imageView		= createImageView(vk, device, &imageViewCreateInfo);
773 		m_sampler		= createSampler(vk, device, &samplerCreateInfo);
774 
775 		// Transition input image layouts
776 		{
777 			const Unique<VkCommandPool>		cmdPool			(makeCommandPool(vk, device, queueFamilyIndex));
778 			const Unique<VkCommandBuffer>	cmdBuffer		(makeCommandBuffer(context, *cmdPool));
779 
780 			beginCommandBuffer(vk, *cmdBuffer);
781 
782 			const VkImageMemoryBarrier		imageBarrier	= makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
783 																	VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
784 
785 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
786 				(VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
787 
788 			endCommandBuffer(vk, *cmdBuffer);
789 			submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
790 		}
791 	}
792 
getImage__anone2f77d5c0111::Image793 	VkImage getImage () const
794 	{
795 		return *m_image;
796 	}
797 
getImageView__anone2f77d5c0111::Image798 	VkImageView getImageView () const
799 	{
800 		return *m_imageView;
801 	}
802 
getSampler__anone2f77d5c0111::Image803 	VkSampler getSampler () const
804 	{
805 		return *m_sampler;
806 	}
807 
808 private:
809 	Move<VkImage> m_image;
810 	Move<VkImageView> m_imageView;
811 	Move<VkSampler> m_sampler;
812 };
813 }
814 
getSharedMemoryBallotHelper()815 std::string vkt::subgroups::getSharedMemoryBallotHelper()
816 {
817 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
818 			"uvec4 sharedMemoryBallot(bool vote)\n"
819 			"{\n"
820 			"  uint groupOffset = gl_SubgroupID;\n"
821 			"  // One invocation in the group 0's the whole group's data\n"
822 			"  if (subgroupElect())\n"
823 			"  {\n"
824 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
825 			"  }\n"
826 			"  subgroupMemoryBarrierShared();\n"
827 			"  if (vote)\n"
828 			"  {\n"
829 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
830 			"    const highp uint bitToSet = 1u << invocationId;\n"
831 			"    switch (gl_SubgroupInvocationID / 32)\n"
832 			"    {\n"
833 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
834 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
835 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
836 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
837 			"    }\n"
838 			"  }\n"
839 			"  subgroupMemoryBarrierShared();\n"
840 			"  return superSecretComputeShaderHelper[groupOffset];\n"
841 			"}\n";
842 }
843 
getSharedMemoryBallotHelperARB()844 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
845 {
846 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
847 			"uint64_t sharedMemoryBallot(bool vote)\n"
848 			"{\n"
849 			"  uint groupOffset = gl_SubgroupID;\n"
850 			"  // One invocation in the group 0's the whole group's data\n"
851 			"  if (subgroupElect())\n"
852 			"  {\n"
853 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
854 			"  }\n"
855 			"  subgroupMemoryBarrierShared();\n"
856 			"  if (vote)\n"
857 			"  {\n"
858 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
859 			"    const highp uint bitToSet = 1u << invocationId;\n"
860 			"    switch (gl_SubgroupInvocationID / 32)\n"
861 			"    {\n"
862 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
863 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
864 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
865 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
866 			"    }\n"
867 			"  }\n"
868 			"  subgroupMemoryBarrierShared();\n"
869 			"  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
870 			"}\n";
871 }
872 
getSubgroupSize(Context & context)873 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
874 {
875 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
876 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
877 	subgroupProperties.pNext = DE_NULL;
878 
879 	VkPhysicalDeviceProperties2 properties;
880 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
881 	properties.pNext = &subgroupProperties;
882 
883 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
884 
885 	return subgroupProperties.subgroupSize;
886 }
887 
maxSupportedSubgroupSize()888 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
889 	return 128u;
890 }
891 
getShaderStageName(VkShaderStageFlags stage)892 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
893 {
894 	switch (stage)
895 	{
896 		default:
897 			DE_FATAL("Unhandled stage!");
898 			return "";
899 		case VK_SHADER_STAGE_COMPUTE_BIT:
900 			return "compute";
901 		case VK_SHADER_STAGE_FRAGMENT_BIT:
902 			return "fragment";
903 		case VK_SHADER_STAGE_VERTEX_BIT:
904 			return "vertex";
905 		case VK_SHADER_STAGE_GEOMETRY_BIT:
906 			return "geometry";
907 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
908 			return "tess_control";
909 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
910 			return "tess_eval";
911 	}
912 }
913 
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)914 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
915 {
916 	switch (bit)
917 	{
918 		default:
919 			DE_FATAL("Unknown subgroup feature category!");
920 			return "";
921 		case VK_SUBGROUP_FEATURE_BASIC_BIT:
922 			return "VK_SUBGROUP_FEATURE_BASIC_BIT";
923 		case VK_SUBGROUP_FEATURE_VOTE_BIT:
924 			return "VK_SUBGROUP_FEATURE_VOTE_BIT";
925 		case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
926 			return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
927 		case VK_SUBGROUP_FEATURE_BALLOT_BIT:
928 			return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
929 		case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
930 			return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
931 		case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
932 			return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
933 		case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
934 			return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
935 		case VK_SUBGROUP_FEATURE_QUAD_BIT:
936 			return "VK_SUBGROUP_FEATURE_QUAD_BIT";
937 	}
938 }
939 
addNoSubgroupShader(SourceCollections & programCollection)940 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
941 {
942 	{
943 	/*
944 		"#version 450\n"
945 		"void main (void)\n"
946 		"{\n"
947 		"  float pixelSize = 2.0f/1024.0f;\n"
948 		"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
949 		"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
950 		"  gl_PointSize = 1.0f;\n"
951 		"}\n"
952 	*/
953 		const std::string vertNoSubgroup =
954 			"; SPIR-V\n"
955 			"; Version: 1.3\n"
956 			"; Generator: Khronos Glslang Reference Front End; 1\n"
957 			"; Bound: 37\n"
958 			"; Schema: 0\n"
959 			"OpCapability Shader\n"
960 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
961 			"OpMemoryModel Logical GLSL450\n"
962 			"OpEntryPoint Vertex %4 \"main\" %22 %26\n"
963 			"OpMemberDecorate %20 0 BuiltIn Position\n"
964 			"OpMemberDecorate %20 1 BuiltIn PointSize\n"
965 			"OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
966 			"OpMemberDecorate %20 3 BuiltIn CullDistance\n"
967 			"OpDecorate %20 Block\n"
968 			"OpDecorate %26 BuiltIn VertexIndex\n"
969 			"%2 = OpTypeVoid\n"
970 			"%3 = OpTypeFunction %2\n"
971 			"%6 = OpTypeFloat 32\n"
972 			"%7 = OpTypePointer Function %6\n"
973 			"%9 = OpConstant %6 0.00195313\n"
974 			"%12 = OpConstant %6 2\n"
975 			"%14 = OpConstant %6 1\n"
976 			"%16 = OpTypeVector %6 4\n"
977 			"%17 = OpTypeInt 32 0\n"
978 			"%18 = OpConstant %17 1\n"
979 			"%19 = OpTypeArray %6 %18\n"
980 			"%20 = OpTypeStruct %16 %6 %19 %19\n"
981 			"%21 = OpTypePointer Output %20\n"
982 			"%22 = OpVariable %21 Output\n"
983 			"%23 = OpTypeInt 32 1\n"
984 			"%24 = OpConstant %23 0\n"
985 			"%25 = OpTypePointer Input %23\n"
986 			"%26 = OpVariable %25 Input\n"
987 			"%33 = OpConstant %6 0\n"
988 			"%35 = OpTypePointer Output %16\n"
989 			"%37 = OpConstant %23 1\n"
990 			"%38 = OpTypePointer Output %6\n"
991 			"%4 = OpFunction %2 None %3\n"
992 			"%5 = OpLabel\n"
993 			"%8 = OpVariable %7 Function\n"
994 			"%10 = OpVariable %7 Function\n"
995 			"OpStore %8 %9\n"
996 			"%11 = OpLoad %6 %8\n"
997 			"%13 = OpFDiv %6 %11 %12\n"
998 			"%15 = OpFSub %6 %13 %14\n"
999 			"OpStore %10 %15\n"
1000 			"%27 = OpLoad %23 %26\n"
1001 			"%28 = OpConvertSToF %6 %27\n"
1002 			"%29 = OpLoad %6 %8\n"
1003 			"%30 = OpFMul %6 %28 %29\n"
1004 			"%31 = OpLoad %6 %10\n"
1005 			"%32 = OpFAdd %6 %30 %31\n"
1006 			"%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1007 			"%36 = OpAccessChain %35 %22 %24\n"
1008 			"OpStore %36 %34\n"
1009 			"%39 = OpAccessChain %38 %22 %37\n"
1010 			"OpStore %39 %14\n"
1011 			"OpReturn\n"
1012 			"OpFunctionEnd\n";
1013 		programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1014 	}
1015 
1016 	{
1017 	/*
1018 		"#version 450\n"
1019 		"layout(vertices=1) out;\n"
1020 		"\n"
1021 		"void main (void)\n"
1022 		"{\n"
1023 		"  if (gl_InvocationID == 0)\n"
1024 		"  {\n"
1025 		"    gl_TessLevelOuter[0] = 1.0f;\n"
1026 		"    gl_TessLevelOuter[1] = 1.0f;\n"
1027 		"  }\n"
1028 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1029 		"}\n"
1030 	*/
1031 		const std::string tescNoSubgroup =
1032 			"; SPIR-V\n"
1033 			"; Version: 1.3\n"
1034 			"; Generator: Khronos Glslang Reference Front End; 1\n"
1035 			"; Bound: 45\n"
1036 			"; Schema: 0\n"
1037 			"OpCapability Tessellation\n"
1038 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1039 			"OpMemoryModel Logical GLSL450\n"
1040 			"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1041 			"OpExecutionMode %4 OutputVertices 1\n"
1042 			"OpDecorate %8 BuiltIn InvocationId\n"
1043 			"OpDecorate %20 Patch\n"
1044 			"OpDecorate %20 BuiltIn TessLevelOuter\n"
1045 			"OpMemberDecorate %29 0 BuiltIn Position\n"
1046 			"OpMemberDecorate %29 1 BuiltIn PointSize\n"
1047 			"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1048 			"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1049 			"OpDecorate %29 Block\n"
1050 			"OpMemberDecorate %34 0 BuiltIn Position\n"
1051 			"OpMemberDecorate %34 1 BuiltIn PointSize\n"
1052 			"OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1053 			"OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1054 			"OpDecorate %34 Block\n"
1055 			"%2 = OpTypeVoid\n"
1056 			"%3 = OpTypeFunction %2\n"
1057 			"%6 = OpTypeInt 32 1\n"
1058 			"%7 = OpTypePointer Input %6\n"
1059 			"%8 = OpVariable %7 Input\n"
1060 			"%10 = OpConstant %6 0\n"
1061 			"%11 = OpTypeBool\n"
1062 			"%15 = OpTypeFloat 32\n"
1063 			"%16 = OpTypeInt 32 0\n"
1064 			"%17 = OpConstant %16 4\n"
1065 			"%18 = OpTypeArray %15 %17\n"
1066 			"%19 = OpTypePointer Output %18\n"
1067 			"%20 = OpVariable %19 Output\n"
1068 			"%21 = OpConstant %15 1\n"
1069 			"%22 = OpTypePointer Output %15\n"
1070 			"%24 = OpConstant %6 1\n"
1071 			"%26 = OpTypeVector %15 4\n"
1072 			"%27 = OpConstant %16 1\n"
1073 			"%28 = OpTypeArray %15 %27\n"
1074 			"%29 = OpTypeStruct %26 %15 %28 %28\n"
1075 			"%30 = OpTypeArray %29 %27\n"
1076 			"%31 = OpTypePointer Output %30\n"
1077 			"%32 = OpVariable %31 Output\n"
1078 			"%34 = OpTypeStruct %26 %15 %28 %28\n"
1079 			"%35 = OpConstant %16 32\n"
1080 			"%36 = OpTypeArray %34 %35\n"
1081 			"%37 = OpTypePointer Input %36\n"
1082 			"%38 = OpVariable %37 Input\n"
1083 			"%40 = OpTypePointer Input %26\n"
1084 			"%43 = OpTypePointer Output %26\n"
1085 			"%4 = OpFunction %2 None %3\n"
1086 			"%5 = OpLabel\n"
1087 			"%9 = OpLoad %6 %8\n"
1088 			"%12 = OpIEqual %11 %9 %10\n"
1089 			"OpSelectionMerge %14 None\n"
1090 			"OpBranchConditional %12 %13 %14\n"
1091 			"%13 = OpLabel\n"
1092 			"%23 = OpAccessChain %22 %20 %10\n"
1093 			"OpStore %23 %21\n"
1094 			"%25 = OpAccessChain %22 %20 %24\n"
1095 			"OpStore %25 %21\n"
1096 			"OpBranch %14\n"
1097 			"%14 = OpLabel\n"
1098 			"%33 = OpLoad %6 %8\n"
1099 			"%39 = OpLoad %6 %8\n"
1100 			"%41 = OpAccessChain %40 %38 %39 %10\n"
1101 			"%42 = OpLoad %26 %41\n"
1102 			"%44 = OpAccessChain %43 %32 %33 %10\n"
1103 			"OpStore %44 %42\n"
1104 			"OpReturn\n"
1105 			"OpFunctionEnd\n";
1106 		programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1107 	}
1108 
1109 	{
1110 	/*
1111 		"#version 450\n"
1112 		"layout(isolines) in;\n"
1113 		"\n"
1114 		"void main (void)\n"
1115 		"{\n"
1116 		"  float pixelSize = 2.0f/1024.0f;\n"
1117 		"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1118 		"}\n";
1119 	*/
1120 		const std::string teseNoSubgroup =
1121 			"; SPIR-V\n"
1122 			"; Version: 1.3\n"
1123 			"; Generator: Khronos Glslang Reference Front End; 2\n"
1124 			"; Bound: 42\n"
1125 			"; Schema: 0\n"
1126 			"OpCapability Tessellation\n"
1127 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1128 			"OpMemoryModel Logical GLSL450\n"
1129 			"OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1130 			"OpExecutionMode %4 Isolines\n"
1131 			"OpExecutionMode %4 SpacingEqual\n"
1132 			"OpExecutionMode %4 VertexOrderCcw\n"
1133 			"OpMemberDecorate %14 0 BuiltIn Position\n"
1134 			"OpMemberDecorate %14 1 BuiltIn PointSize\n"
1135 			"OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1136 			"OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1137 			"OpDecorate %14 Block\n"
1138 			"OpMemberDecorate %19 0 BuiltIn Position\n"
1139 			"OpMemberDecorate %19 1 BuiltIn PointSize\n"
1140 			"OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1141 			"OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1142 			"OpDecorate %19 Block\n"
1143 			"OpDecorate %29 BuiltIn TessCoord\n"
1144 			"%2 = OpTypeVoid\n"
1145 			"%3 = OpTypeFunction %2\n"
1146 			"%6 = OpTypeFloat 32\n"
1147 			"%7 = OpTypePointer Function %6\n"
1148 			"%9 = OpConstant %6 0.00195313\n"
1149 			"%10 = OpTypeVector %6 4\n"
1150 			"%11 = OpTypeInt 32 0\n"
1151 			"%12 = OpConstant %11 1\n"
1152 			"%13 = OpTypeArray %6 %12\n"
1153 			"%14 = OpTypeStruct %10 %6 %13 %13\n"
1154 			"%15 = OpTypePointer Output %14\n"
1155 			"%16 = OpVariable %15 Output\n"
1156 			"%17 = OpTypeInt 32 1\n"
1157 			"%18 = OpConstant %17 0\n"
1158 			"%19 = OpTypeStruct %10 %6 %13 %13\n"
1159 			"%20 = OpConstant %11 32\n"
1160 			"%21 = OpTypeArray %19 %20\n"
1161 			"%22 = OpTypePointer Input %21\n"
1162 			"%23 = OpVariable %22 Input\n"
1163 			"%24 = OpTypePointer Input %10\n"
1164 			"%27 = OpTypeVector %6 3\n"
1165 			"%28 = OpTypePointer Input %27\n"
1166 			"%29 = OpVariable %28 Input\n"
1167 			"%30 = OpConstant %11 0\n"
1168 			"%31 = OpTypePointer Input %6\n"
1169 			"%36 = OpConstant %6 2\n"
1170 			"%40 = OpTypePointer Output %10\n"
1171 			"%4 = OpFunction %2 None %3\n"
1172 			"%5 = OpLabel\n"
1173 			"%8 = OpVariable %7 Function\n"
1174 			"OpStore %8 %9\n"
1175 			"%25 = OpAccessChain %24 %23 %18 %18\n"
1176 			"%26 = OpLoad %10 %25\n"
1177 			"%32 = OpAccessChain %31 %29 %30\n"
1178 			"%33 = OpLoad %6 %32\n"
1179 			"%34 = OpLoad %6 %8\n"
1180 			"%35 = OpFMul %6 %33 %34\n"
1181 			"%37 = OpFDiv %6 %35 %36\n"
1182 			"%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1183 			"%39 = OpFAdd %10 %26 %38\n"
1184 			"%41 = OpAccessChain %40 %16 %18\n"
1185 			"OpStore %41 %39\n"
1186 			"OpReturn\n"
1187 			"OpFunctionEnd\n";
1188 		programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1189 	}
1190 
1191 }
1192 
1193 
getVertShaderForStage(vk::VkShaderStageFlags stage)1194 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
1195 {
1196 	switch (stage)
1197 	{
1198 		default:
1199 			DE_FATAL("Unhandled stage!");
1200 			return "";
1201 		case VK_SHADER_STAGE_FRAGMENT_BIT:
1202 			return
1203 				"#version 450\n"
1204 				"void main (void)\n"
1205 				"{\n"
1206 				"  float pixelSize = 2.0f/1024.0f;\n"
1207 				"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1208 				"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1209 				"}\n";
1210 		case VK_SHADER_STAGE_GEOMETRY_BIT:
1211 			return
1212 				"#version 450\n"
1213 				"void main (void)\n"
1214 				"{\n"
1215 				"}\n";
1216 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1217 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1218 			return
1219 				"#version 450\n"
1220 				"void main (void)\n"
1221 				"{\n"
1222 				"}\n";
1223 	}
1224 }
1225 
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,std::string extHeader,std::string testSrc,std::string helperStr)1226 void vkt::subgroups::initStdFrameBufferPrograms(	SourceCollections&				programCollection,
1227 													const vk::ShaderBuildOptions&	buildOptions,
1228 													VkShaderStageFlags				shaderStage,
1229 													VkFormat						format,
1230 													bool							gsPointSize,
1231 													std::string						extHeader,
1232 													std::string						testSrc,
1233 													std::string						helperStr)
1234 {
1235 	subgroups::setFragmentShaderFrameBuffer(programCollection);
1236 
1237 	if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1238 		subgroups::setVertexShaderFrameBuffer(programCollection);
1239 
1240 	if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1241 	{
1242 		std::ostringstream vertex;
1243 		vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1244 			<< extHeader.c_str()
1245 			<< "layout(location = 0) in highp vec4 in_position;\n"
1246 			<< "layout(location = 0) out float result;\n"
1247 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1248 			<< "{\n"
1249 			<< "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1250 			<< "};\n"
1251 			<< "\n"
1252 			<< helperStr.c_str()
1253 			<< "void main (void)\n"
1254 			<< "{\n"
1255 			<< "  uint tempRes;\n"
1256 			<< testSrc
1257 			<< "  result = float(tempRes);\n"
1258 			<< "  gl_Position = in_position;\n"
1259 			<< "  gl_PointSize = 1.0f;\n"
1260 			<< "}\n";
1261 		programCollection.glslSources.add("vert")
1262 			<< glu::VertexSource(vertex.str()) << buildOptions;
1263 	}
1264 	else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1265 	{
1266 		std::ostringstream geometry;
1267 
1268 		geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1269 			<< extHeader.c_str()
1270 			<< "layout(points) in;\n"
1271 			<< "layout(points, max_vertices = 1) out;\n"
1272 			<< "layout(location = 0) out float out_color;\n"
1273 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1274 			<< "{\n"
1275 			<< "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1276 			<< "};\n"
1277 			<< "\n"
1278 			<< helperStr.c_str()
1279 			<< "void main (void)\n"
1280 			<< "{\n"
1281 			<< "  uint tempRes;\n"
1282 			<< testSrc
1283 			<< "  out_color = float(tempRes);\n"
1284 			<< "  gl_Position = gl_in[0].gl_Position;\n"
1285 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1286 			<< "  EmitVertex();\n"
1287 			<< "  EndPrimitive();\n"
1288 			<< "}\n";
1289 
1290 		programCollection.glslSources.add("geometry")
1291 			<< glu::GeometrySource(geometry.str()) << buildOptions;
1292 	}
1293 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1294 	{
1295 		std::ostringstream controlSource;
1296 		controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1297 			<< extHeader.c_str()
1298 			<< "layout(vertices = 2) out;\n"
1299 			<< "layout(location = 0) out float out_color[];\n"
1300 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1301 			<< "{\n"
1302 			<< "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1303 			<< "};\n"
1304 			<< "\n"
1305 			<< helperStr.c_str()
1306 			<< "void main (void)\n"
1307 			<< "{\n"
1308 			<< "  if (gl_InvocationID == 0)\n"
1309 			<< "  {\n"
1310 			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
1311 			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
1312 			<< "  }\n"
1313 			<< "  uint tempRes;\n"
1314 			<< testSrc
1315 			<< "  out_color[gl_InvocationID] = float(tempRes);\n"
1316 			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1317 			<< (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1318 			<< "}\n";
1319 
1320 		programCollection.glslSources.add("tesc")
1321 			<< glu::TessellationControlSource(controlSource.str()) << buildOptions;
1322 		subgroups::setTesEvalShaderFrameBuffer(programCollection);
1323 	}
1324 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1325 	{
1326 		ostringstream evaluationSource;
1327 		evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1328 			<< extHeader.c_str()
1329 			<< "layout(isolines, equal_spacing, ccw ) in;\n"
1330 			<< "layout(location = 0) out float out_color;\n"
1331 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1332 			<< "{\n"
1333 			<< "  " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1334 			<< "};\n"
1335 			<< "\n"
1336 			<< helperStr.c_str()
1337 			<< "void main (void)\n"
1338 			<< "{\n"
1339 			<< "  uint tempRes;\n"
1340 			<< testSrc
1341 			<< "  out_color = float(tempRes);\n"
1342 			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1343 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1344 			<< "}\n";
1345 
1346 		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1347 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1348 	}
1349 	else
1350 	{
1351 		DE_FATAL("Unsupported shader stage");
1352 	}
1353 }
1354 
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,std::string extHeader,std::string testSrc,std::string helperStr)1355 void vkt::subgroups::initStdPrograms(	vk::SourceCollections&			programCollection,
1356 										const vk::ShaderBuildOptions&	buildOptions,
1357 										vk::VkShaderStageFlags			shaderStage,
1358 										vk::VkFormat					format,
1359 										bool							gsPointSize,
1360 										std::string						extHeader,
1361 										std::string						testSrc,
1362 										std::string						helperStr)
1363 {
1364 	if (shaderStage == VK_SHADER_STAGE_COMPUTE_BIT)
1365 	{
1366 		std::ostringstream src;
1367 
1368 		src << "#version 450\n"
1369 			<< extHeader.c_str()
1370 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
1371 			"local_size_z_id = 2) in;\n"
1372 			<< "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1373 			<< "{\n"
1374 			<< "  uint result[];\n"
1375 			<< "};\n"
1376 			<< "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
1377 			<< "{\n"
1378 			<< "  " << subgroups::getFormatNameForGLSL(format) << " data[];\n"
1379 			<< "};\n"
1380 			<< "\n"
1381 			<< helperStr.c_str()
1382 			<< "void main (void)\n"
1383 			<< "{\n"
1384 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1385 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1386 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1387 			"gl_GlobalInvocationID.x;\n"
1388 			<< "  uint tempRes;\n"
1389 			<< testSrc
1390 			<< "  result[offset] = tempRes;\n"
1391 			<< "}\n";
1392 
1393 		programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1394 	}
1395 	else
1396 	{
1397 		const string vertex =
1398 			"#version 450\n"
1399 			+ extHeader +
1400 			"layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1401 			"{\n"
1402 			"  uint result[];\n"
1403 			"};\n"
1404 			"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1405 			"{\n"
1406 			"  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1407 			"};\n"
1408 			"\n"
1409 			+ helperStr +
1410 			"void main (void)\n"
1411 			"{\n"
1412 			"  uint tempRes;\n"
1413 			+ testSrc +
1414 			"  result[gl_VertexIndex] = tempRes;\n"
1415 			"  float pixelSize = 2.0f/1024.0f;\n"
1416 			"  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1417 			"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1418 			"  gl_PointSize = 1.0f;\n"
1419 			"}\n";
1420 
1421 		const string tesc =
1422 			"#version 450\n"
1423 			+ extHeader +
1424 			"layout(vertices=1) out;\n"
1425 			"layout(set = 0, binding = 1, std430) buffer Buffer1\n"
1426 			"{\n"
1427 			"  uint result[];\n"
1428 			"};\n"
1429 			"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1430 			"{\n"
1431 			"  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1432 			"};\n"
1433 			"\n"
1434 			+ helperStr +
1435 			"void main (void)\n"
1436 			"{\n"
1437 			"  uint tempRes;\n"
1438 			+ testSrc +
1439 			"  result[gl_PrimitiveID] = tempRes;\n"
1440 			"  if (gl_InvocationID == 0)\n"
1441 			"  {\n"
1442 			"    gl_TessLevelOuter[0] = 1.0f;\n"
1443 			"    gl_TessLevelOuter[1] = 1.0f;\n"
1444 			"  }\n"
1445 			"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1446 			+ (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1447 			"}\n";
1448 
1449 		const string tese =
1450 			"#version 450\n"
1451 			+ extHeader +
1452 			"layout(isolines) in;\n"
1453 			"layout(set = 0, binding = 2, std430) buffer Buffer1\n"
1454 			"{\n"
1455 			"  uint result[];\n"
1456 			"};\n"
1457 			"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1458 			"{\n"
1459 			"  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1460 			"};\n"
1461 			"\n"
1462 			+ helperStr +
1463 			"void main (void)\n"
1464 			"{\n"
1465 			"  uint tempRes;\n"
1466 			+ testSrc +
1467 			"  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1468 			"  float pixelSize = 2.0f/1024.0f;\n"
1469 			"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1470 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1471 			"}\n";
1472 
1473 		const string geometry =
1474 			"#version 450\n"
1475 			+ extHeader +
1476 			"layout(${TOPOLOGY}) in;\n"
1477 			"layout(points, max_vertices = 1) out;\n"
1478 			"layout(set = 0, binding = 3, std430) buffer Buffer1\n"
1479 			"{\n"
1480 			"  uint result[];\n"
1481 			"};\n"
1482 			"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1483 			"{\n"
1484 			"  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1485 			"};\n"
1486 			"\n"
1487 			+ helperStr +
1488 			"void main (void)\n"
1489 			"{\n"
1490 			"  uint tempRes;\n"
1491 			+ testSrc +
1492 			"  result[gl_PrimitiveIDIn] = tempRes;\n"
1493 			"  gl_Position = gl_in[0].gl_Position;\n"
1494 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1495 			"  EmitVertex();\n"
1496 			"  EndPrimitive();\n"
1497 			"}\n";
1498 
1499 		const string fragment =
1500 			"#version 450\n"
1501 			+ extHeader +
1502 			"layout(location = 0) out uint result;\n"
1503 			"layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
1504 			"{\n"
1505 			"  " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1506 			"};\n"
1507 			+ helperStr +
1508 			"void main (void)\n"
1509 			"{\n"
1510 			"  uint tempRes;\n"
1511 			+ testSrc +
1512 			"  result = tempRes;\n"
1513 			"}\n";
1514 
1515 		subgroups::addNoSubgroupShader(programCollection);
1516 
1517 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1518 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1519 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1520 		subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1521 		programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1522 	}
1523 }
1524 
isSubgroupSupported(Context & context)1525 bool vkt::subgroups::isSubgroupSupported(Context& context)
1526 {
1527 	return context.contextSupports(vk::ApiVersion(1, 1, 0));
1528 }
1529 
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1530 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
1531 	Context& context, const VkShaderStageFlags stage)
1532 {
1533 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
1534 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1535 	subgroupProperties.pNext = DE_NULL;
1536 
1537 	VkPhysicalDeviceProperties2 properties;
1538 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1539 	properties.pNext = &subgroupProperties;
1540 
1541 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1542 
1543 	return (stage & subgroupProperties.supportedStages) ? true : false;
1544 }
1545 
areSubgroupOperationsRequiredForStage(VkShaderStageFlags stage)1546 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
1547 	VkShaderStageFlags stage)
1548 {
1549 	switch (stage)
1550 	{
1551 		default:
1552 			return false;
1553 		case VK_SHADER_STAGE_COMPUTE_BIT:
1554 			return true;
1555 	}
1556 }
1557 
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1558 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
1559 	Context& context,
1560 	VkSubgroupFeatureFlagBits bit) {
1561 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
1562 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1563 	subgroupProperties.pNext = DE_NULL;
1564 
1565 	VkPhysicalDeviceProperties2 properties;
1566 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1567 	properties.pNext = &subgroupProperties;
1568 
1569 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1570 
1571 	return (bit & subgroupProperties.supportedOperations) ? true : false;
1572 }
1573 
isFragmentSSBOSupportedForDevice(Context & context)1574 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1575 {
1576 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1577 				context.getInstanceInterface(), context.getPhysicalDevice());
1578 	return features.fragmentStoresAndAtomics ? true : false;
1579 }
1580 
isVertexSSBOSupportedForDevice(Context & context)1581 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1582 {
1583 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1584 				context.getInstanceInterface(), context.getPhysicalDevice());
1585 	return features.vertexPipelineStoresAndAtomics ? true : false;
1586 }
1587 
isInt64SupportedForDevice(Context & context)1588 bool vkt::subgroups::isInt64SupportedForDevice(Context& context)
1589 {
1590 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1591 				context.getInstanceInterface(), context.getPhysicalDevice());
1592 	return features.shaderInt64 ? true : false;
1593 }
1594 
isTessellationAndGeometryPointSizeSupported(Context & context)1595 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1596 {
1597 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1598 		context.getInstanceInterface(), context.getPhysicalDevice());
1599 	return features.shaderTessellationAndGeometryPointSize ? true : false;
1600 }
1601 
is16BitUBOStorageSupported(Context & context)1602 bool vkt::subgroups::is16BitUBOStorageSupported(Context& context) {
1603 	VkPhysicalDevice16BitStorageFeatures storage16bit;
1604 	deMemset(&storage16bit, 0, sizeof(storage16bit));
1605 	storage16bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1606 	storage16bit.pNext = DE_NULL;
1607 
1608 	VkPhysicalDeviceFeatures2 features2;
1609 	deMemset(&features2, 0, sizeof(features2));
1610 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1611 	features2.pNext = &storage16bit;
1612 
1613 	const PlatformInterface&		platformInterface = context.getPlatformInterface();
1614 	const VkInstance				instance = context.getInstance();
1615 	const InstanceDriver			instanceDriver(platformInterface, instance);
1616 
1617 	instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1618 	return bool(storage16bit.uniformAndStorageBuffer16BitAccess);
1619 }
1620 
1621 
is8BitUBOStorageSupported(Context & context)1622 bool vkt::subgroups::is8BitUBOStorageSupported(Context& context) {
1623 
1624 	VkPhysicalDevice8BitStorageFeatures storage8bit;
1625 	deMemset(&storage8bit, 0, sizeof(storage8bit));
1626 	storage8bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR;
1627 	storage8bit.pNext = DE_NULL;
1628 
1629 	VkPhysicalDeviceFeatures2 features2;
1630 	deMemset(&features2, 0, sizeof(features2));
1631 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1632 	features2.pNext = &storage8bit;
1633 
1634 
1635 	const PlatformInterface&		platformInterface = context.getPlatformInterface();
1636 	const VkInstance				instance = context.getInstance();
1637 	const InstanceDriver			instanceDriver(platformInterface, instance);
1638 
1639 	instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1640 	return bool(storage8bit.uniformAndStorageBuffer8BitAccess);
1641 }
1642 
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1643 bool vkt::subgroups::isFormatSupportedForDevice(Context& context, vk::VkFormat format)
1644 {
1645 	VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures subgroupExtendedTypesFeatures;
1646 	deMemset(&subgroupExtendedTypesFeatures, 0, sizeof(subgroupExtendedTypesFeatures));
1647 	subgroupExtendedTypesFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES;
1648 	subgroupExtendedTypesFeatures.pNext = DE_NULL;
1649 
1650 	VkPhysicalDeviceShaderFloat16Int8Features float16Int8Features;
1651 	deMemset(&float16Int8Features, 0, sizeof(float16Int8Features));
1652 	float16Int8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
1653 	float16Int8Features.pNext = DE_NULL;
1654 
1655 	VkPhysicalDeviceFeatures2 features2;
1656 	deMemset(&features2, 0, sizeof(features2));
1657 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1658 	features2.pNext = DE_NULL;
1659 
1660 	VkPhysicalDevice16BitStorageFeatures storage16bit;
1661 	deMemset(&storage16bit, 0, sizeof(storage16bit));
1662 	storage16bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1663 	storage16bit.pNext = DE_NULL;
1664 	bool is16bitStorageSupported = context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage");
1665 
1666 	VkPhysicalDevice8BitStorageFeatures storage8bit;
1667 	deMemset(&storage8bit, 0, sizeof(storage8bit));
1668 	storage8bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR;
1669 	storage8bit.pNext = DE_NULL;
1670 	bool is8bitStorageSupported = context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage");
1671 
1672 	if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1673 		context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1674 	{
1675 		features2.pNext = &subgroupExtendedTypesFeatures;
1676 		subgroupExtendedTypesFeatures.pNext = &float16Int8Features;
1677 		if ( is16bitStorageSupported )
1678 		{
1679 			float16Int8Features.pNext = &storage16bit;
1680 			if (is8bitStorageSupported)
1681 			{
1682 				storage16bit.pNext = &storage8bit;
1683 			}
1684 		}
1685 		else
1686 		{
1687 			if (is8bitStorageSupported)
1688 			{
1689 				float16Int8Features.pNext = &storage8bit;
1690 			}
1691 
1692 		}
1693 	}
1694 
1695 	const PlatformInterface&		platformInterface		= context.getPlatformInterface();
1696 	const VkInstance				instance				= context.getInstance();
1697 	const InstanceDriver			instanceDriver			(platformInterface, instance);
1698 
1699 	instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1700 
1701 	switch (format)
1702 	{
1703 		default:
1704 			return true;
1705 		case VK_FORMAT_R16_SFLOAT:
1706 		case VK_FORMAT_R16G16_SFLOAT:
1707 		case VK_FORMAT_R16G16B16_SFLOAT:
1708 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1709 			return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderFloat16 & storage16bit.storageBuffer16BitAccess ? true : false;
1710 		case VK_FORMAT_R64_SFLOAT:
1711 		case VK_FORMAT_R64G64_SFLOAT:
1712 		case VK_FORMAT_R64G64B64_SFLOAT:
1713 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1714 			return features2.features.shaderFloat64 ? true : false;
1715 		case VK_FORMAT_R8_SINT:
1716 		case VK_FORMAT_R8G8_SINT:
1717 		case VK_FORMAT_R8G8B8_SINT:
1718 		case VK_FORMAT_R8G8B8A8_SINT:
1719 		case VK_FORMAT_R8_UINT:
1720 		case VK_FORMAT_R8G8_UINT:
1721 		case VK_FORMAT_R8G8B8_UINT:
1722 		case VK_FORMAT_R8G8B8A8_UINT:
1723 			return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderInt8 & storage8bit.storageBuffer8BitAccess ? true : false;
1724 		case VK_FORMAT_R16_SINT:
1725 		case VK_FORMAT_R16G16_SINT:
1726 		case VK_FORMAT_R16G16B16_SINT:
1727 		case VK_FORMAT_R16G16B16A16_SINT:
1728 		case VK_FORMAT_R16_UINT:
1729 		case VK_FORMAT_R16G16_UINT:
1730 		case VK_FORMAT_R16G16B16_UINT:
1731 		case VK_FORMAT_R16G16B16A16_UINT:
1732 			return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt16 & storage16bit.storageBuffer16BitAccess ? true : false;
1733 		case VK_FORMAT_R64_SINT:
1734 		case VK_FORMAT_R64G64_SINT:
1735 		case VK_FORMAT_R64G64B64_SINT:
1736 		case VK_FORMAT_R64G64B64A64_SINT:
1737 		case VK_FORMAT_R64_UINT:
1738 		case VK_FORMAT_R64G64_UINT:
1739 		case VK_FORMAT_R64G64B64_UINT:
1740 		case VK_FORMAT_R64G64B64A64_UINT:
1741 			return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt64 ? true : false;
1742 	}
1743 }
1744 
isSubgroupBroadcastDynamicIdSupported(Context & context)1745 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1746 {
1747 	return context.contextSupports(vk::ApiVersion(1, 2, 0)) &&
1748 		vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1749 }
1750 
getFormatNameForGLSL(VkFormat format)1751 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1752 {
1753 	switch (format)
1754 	{
1755 		default:
1756 			DE_FATAL("Unhandled format!");
1757 			return "";
1758 		case VK_FORMAT_R8_SINT:
1759 			return "int8_t";
1760 		case VK_FORMAT_R8G8_SINT:
1761 			return "i8vec2";
1762 		case VK_FORMAT_R8G8B8_SINT:
1763 			return "i8vec3";
1764 		case VK_FORMAT_R8G8B8A8_SINT:
1765 			return "i8vec4";
1766 		case VK_FORMAT_R8_UINT:
1767 			return "uint8_t";
1768 		case VK_FORMAT_R8G8_UINT:
1769 			return "u8vec2";
1770 		case VK_FORMAT_R8G8B8_UINT:
1771 			return "u8vec3";
1772 		case VK_FORMAT_R8G8B8A8_UINT:
1773 			return "u8vec4";
1774 		case VK_FORMAT_R16_SINT:
1775 			return "int16_t";
1776 		case VK_FORMAT_R16G16_SINT:
1777 			return "i16vec2";
1778 		case VK_FORMAT_R16G16B16_SINT:
1779 			return "i16vec3";
1780 		case VK_FORMAT_R16G16B16A16_SINT:
1781 			return "i16vec4";
1782 		case VK_FORMAT_R16_UINT:
1783 			return "uint16_t";
1784 		case VK_FORMAT_R16G16_UINT:
1785 			return "u16vec2";
1786 		case VK_FORMAT_R16G16B16_UINT:
1787 			return "u16vec3";
1788 		case VK_FORMAT_R16G16B16A16_UINT:
1789 			return "u16vec4";
1790 		case VK_FORMAT_R32_SINT:
1791 			return "int";
1792 		case VK_FORMAT_R32G32_SINT:
1793 			return "ivec2";
1794 		case VK_FORMAT_R32G32B32_SINT:
1795 			return "ivec3";
1796 		case VK_FORMAT_R32G32B32A32_SINT:
1797 			return "ivec4";
1798 		case VK_FORMAT_R32_UINT:
1799 			return "uint";
1800 		case VK_FORMAT_R32G32_UINT:
1801 			return "uvec2";
1802 		case VK_FORMAT_R32G32B32_UINT:
1803 			return "uvec3";
1804 		case VK_FORMAT_R32G32B32A32_UINT:
1805 			return "uvec4";
1806 		case VK_FORMAT_R64_SINT:
1807 			return "int64_t";
1808 		case VK_FORMAT_R64G64_SINT:
1809 			return "i64vec2";
1810 		case VK_FORMAT_R64G64B64_SINT:
1811 			return "i64vec3";
1812 		case VK_FORMAT_R64G64B64A64_SINT:
1813 			return "i64vec4";
1814 		case VK_FORMAT_R64_UINT:
1815 			return "uint64_t";
1816 		case VK_FORMAT_R64G64_UINT:
1817 			return "u64vec2";
1818 		case VK_FORMAT_R64G64B64_UINT:
1819 			return "u64vec3";
1820 		case VK_FORMAT_R64G64B64A64_UINT:
1821 			return "u64vec4";
1822 		case VK_FORMAT_R16_SFLOAT:
1823 			return "float16_t";
1824 		case VK_FORMAT_R16G16_SFLOAT:
1825 			return "f16vec2";
1826 		case VK_FORMAT_R16G16B16_SFLOAT:
1827 			return "f16vec3";
1828 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1829 			return "f16vec4";
1830 		case VK_FORMAT_R32_SFLOAT:
1831 			return "float";
1832 		case VK_FORMAT_R32G32_SFLOAT:
1833 			return "vec2";
1834 		case VK_FORMAT_R32G32B32_SFLOAT:
1835 			return "vec3";
1836 		case VK_FORMAT_R32G32B32A32_SFLOAT:
1837 			return "vec4";
1838 		case VK_FORMAT_R64_SFLOAT:
1839 			return "double";
1840 		case VK_FORMAT_R64G64_SFLOAT:
1841 			return "dvec2";
1842 		case VK_FORMAT_R64G64B64_SFLOAT:
1843 			return "dvec3";
1844 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1845 			return "dvec4";
1846 		case VK_FORMAT_R8_USCALED:
1847 			return "bool";
1848 		case VK_FORMAT_R8G8_USCALED:
1849 			return "bvec2";
1850 		case VK_FORMAT_R8G8B8_USCALED:
1851 			return "bvec3";
1852 		case VK_FORMAT_R8G8B8A8_USCALED:
1853 			return "bvec4";
1854 	}
1855 }
1856 
getAdditionalExtensionForFormat(vk::VkFormat format)1857 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1858 {
1859 	switch (format)
1860 	{
1861 		default:
1862 			return "";
1863 		case VK_FORMAT_R8_SINT:
1864 		case VK_FORMAT_R8G8_SINT:
1865 		case VK_FORMAT_R8G8B8_SINT:
1866 		case VK_FORMAT_R8G8B8A8_SINT:
1867 		case VK_FORMAT_R8_UINT:
1868 		case VK_FORMAT_R8G8_UINT:
1869 		case VK_FORMAT_R8G8B8_UINT:
1870 		case VK_FORMAT_R8G8B8A8_UINT:
1871 			return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1872 		case VK_FORMAT_R16_SINT:
1873 		case VK_FORMAT_R16G16_SINT:
1874 		case VK_FORMAT_R16G16B16_SINT:
1875 		case VK_FORMAT_R16G16B16A16_SINT:
1876 		case VK_FORMAT_R16_UINT:
1877 		case VK_FORMAT_R16G16_UINT:
1878 		case VK_FORMAT_R16G16B16_UINT:
1879 		case VK_FORMAT_R16G16B16A16_UINT:
1880 			return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1881 		case VK_FORMAT_R64_SINT:
1882 		case VK_FORMAT_R64G64_SINT:
1883 		case VK_FORMAT_R64G64B64_SINT:
1884 		case VK_FORMAT_R64G64B64A64_SINT:
1885 		case VK_FORMAT_R64_UINT:
1886 		case VK_FORMAT_R64G64_UINT:
1887 		case VK_FORMAT_R64G64B64_UINT:
1888 		case VK_FORMAT_R64G64B64A64_UINT:
1889 			return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1890 		case VK_FORMAT_R16_SFLOAT:
1891 		case VK_FORMAT_R16G16_SFLOAT:
1892 		case VK_FORMAT_R16G16B16_SFLOAT:
1893 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1894 			return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1895 	}
1896 }
1897 
getAllFormats()1898 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1899 {
1900 	std::vector<VkFormat> formats;
1901 
1902 	formats.push_back(VK_FORMAT_R8_SINT);
1903 	formats.push_back(VK_FORMAT_R8G8_SINT);
1904 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
1905 	formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1906 	formats.push_back(VK_FORMAT_R8_UINT);
1907 	formats.push_back(VK_FORMAT_R8G8_UINT);
1908 	formats.push_back(VK_FORMAT_R8G8B8_UINT);
1909 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1910 	formats.push_back(VK_FORMAT_R16_SINT);
1911 	formats.push_back(VK_FORMAT_R16G16_SINT);
1912 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
1913 	formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1914 	formats.push_back(VK_FORMAT_R16_UINT);
1915 	formats.push_back(VK_FORMAT_R16G16_UINT);
1916 	formats.push_back(VK_FORMAT_R16G16B16_UINT);
1917 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1918 	formats.push_back(VK_FORMAT_R32_SINT);
1919 	formats.push_back(VK_FORMAT_R32G32_SINT);
1920 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
1921 	formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1922 	formats.push_back(VK_FORMAT_R32_UINT);
1923 	formats.push_back(VK_FORMAT_R32G32_UINT);
1924 	formats.push_back(VK_FORMAT_R32G32B32_UINT);
1925 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1926 	formats.push_back(VK_FORMAT_R64_SINT);
1927 	formats.push_back(VK_FORMAT_R64G64_SINT);
1928 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
1929 	formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1930 	formats.push_back(VK_FORMAT_R64_UINT);
1931 	formats.push_back(VK_FORMAT_R64G64_UINT);
1932 	formats.push_back(VK_FORMAT_R64G64B64_UINT);
1933 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1934 	formats.push_back(VK_FORMAT_R16_SFLOAT);
1935 	formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1936 	formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1937 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1938 	formats.push_back(VK_FORMAT_R32_SFLOAT);
1939 	formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1940 	formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1941 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1942 	formats.push_back(VK_FORMAT_R64_SFLOAT);
1943 	formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1944 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1945 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1946 	formats.push_back(VK_FORMAT_R8_USCALED);
1947 	formats.push_back(VK_FORMAT_R8G8_USCALED);
1948 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1949 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1950 
1951 	return formats;
1952 }
1953 
isFormatSigned(VkFormat format)1954 bool vkt::subgroups::isFormatSigned (VkFormat format)
1955 {
1956 	switch (format)
1957 	{
1958 		default:
1959 			return false;
1960 		case VK_FORMAT_R8_SINT:
1961 		case VK_FORMAT_R8G8_SINT:
1962 		case VK_FORMAT_R8G8B8_SINT:
1963 		case VK_FORMAT_R8G8B8A8_SINT:
1964 		case VK_FORMAT_R16_SINT:
1965 		case VK_FORMAT_R16G16_SINT:
1966 		case VK_FORMAT_R16G16B16_SINT:
1967 		case VK_FORMAT_R16G16B16A16_SINT:
1968 		case VK_FORMAT_R32_SINT:
1969 		case VK_FORMAT_R32G32_SINT:
1970 		case VK_FORMAT_R32G32B32_SINT:
1971 		case VK_FORMAT_R32G32B32A32_SINT:
1972 		case VK_FORMAT_R64_SINT:
1973 		case VK_FORMAT_R64G64_SINT:
1974 		case VK_FORMAT_R64G64B64_SINT:
1975 		case VK_FORMAT_R64G64B64A64_SINT:
1976 			return true;
1977 	}
1978 }
1979 
isFormatUnsigned(VkFormat format)1980 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1981 {
1982 	switch (format)
1983 	{
1984 		default:
1985 			return false;
1986 		case VK_FORMAT_R8_UINT:
1987 		case VK_FORMAT_R8G8_UINT:
1988 		case VK_FORMAT_R8G8B8_UINT:
1989 		case VK_FORMAT_R8G8B8A8_UINT:
1990 		case VK_FORMAT_R16_UINT:
1991 		case VK_FORMAT_R16G16_UINT:
1992 		case VK_FORMAT_R16G16B16_UINT:
1993 		case VK_FORMAT_R16G16B16A16_UINT:
1994 		case VK_FORMAT_R32_UINT:
1995 		case VK_FORMAT_R32G32_UINT:
1996 		case VK_FORMAT_R32G32B32_UINT:
1997 		case VK_FORMAT_R32G32B32A32_UINT:
1998 		case VK_FORMAT_R64_UINT:
1999 		case VK_FORMAT_R64G64_UINT:
2000 		case VK_FORMAT_R64G64B64_UINT:
2001 		case VK_FORMAT_R64G64B64A64_UINT:
2002 			return true;
2003 	}
2004 }
2005 
isFormatFloat(VkFormat format)2006 bool vkt::subgroups::isFormatFloat (VkFormat format)
2007 {
2008 	switch (format)
2009 	{
2010 		default:
2011 			return false;
2012 		case VK_FORMAT_R16_SFLOAT:
2013 		case VK_FORMAT_R16G16_SFLOAT:
2014 		case VK_FORMAT_R16G16B16_SFLOAT:
2015 		case VK_FORMAT_R16G16B16A16_SFLOAT:
2016 		case VK_FORMAT_R32_SFLOAT:
2017 		case VK_FORMAT_R32G32_SFLOAT:
2018 		case VK_FORMAT_R32G32B32_SFLOAT:
2019 		case VK_FORMAT_R32G32B32A32_SFLOAT:
2020 		case VK_FORMAT_R64_SFLOAT:
2021 		case VK_FORMAT_R64G64_SFLOAT:
2022 		case VK_FORMAT_R64G64B64_SFLOAT:
2023 		case VK_FORMAT_R64G64B64A64_SFLOAT:
2024 			return true;
2025 	}
2026 }
2027 
isFormatBool(VkFormat format)2028 bool vkt::subgroups::isFormatBool (VkFormat format)
2029 {
2030 	switch (format)
2031 	{
2032 		default:
2033 			return false;
2034 		case VK_FORMAT_R8_USCALED:
2035 		case VK_FORMAT_R8G8_USCALED:
2036 		case VK_FORMAT_R8G8B8_USCALED:
2037 		case VK_FORMAT_R8G8B8A8_USCALED:
2038 			return true;
2039 	}
2040 }
2041 
isFormat8bitTy(VkFormat format)2042 bool vkt::subgroups::isFormat8bitTy(VkFormat format)
2043 {
2044 	switch (format)
2045 	{
2046 	default:
2047 		return false;
2048 	case VK_FORMAT_R8_SINT:
2049 	case VK_FORMAT_R8G8_SINT:
2050 	case VK_FORMAT_R8G8B8_SINT:
2051 	case VK_FORMAT_R8G8B8A8_SINT:
2052 	case VK_FORMAT_R8_UINT:
2053 	case VK_FORMAT_R8G8_UINT:
2054 	case VK_FORMAT_R8G8B8_UINT:
2055 	case VK_FORMAT_R8G8B8A8_UINT:
2056 		return true;
2057 	}
2058 }
2059 
isFormat16BitTy(VkFormat format)2060 bool vkt::subgroups::isFormat16BitTy(VkFormat format)
2061 {
2062 	switch (format)
2063 	{
2064 	default:
2065 		return false;
2066 	case VK_FORMAT_R16_SFLOAT:
2067 	case VK_FORMAT_R16G16_SFLOAT:
2068 	case VK_FORMAT_R16G16B16_SFLOAT:
2069 	case VK_FORMAT_R16G16B16A16_SFLOAT:
2070 	case VK_FORMAT_R16_SINT:
2071 	case VK_FORMAT_R16G16_SINT:
2072 	case VK_FORMAT_R16G16B16_SINT:
2073 	case VK_FORMAT_R16G16B16A16_SINT:
2074 	case VK_FORMAT_R16_UINT:
2075 	case VK_FORMAT_R16G16_UINT:
2076 	case VK_FORMAT_R16G16B16_UINT:
2077 	case VK_FORMAT_R16G16B16A16_UINT:
2078 		return true;
2079 	}
2080 }
2081 
setVertexShaderFrameBuffer(SourceCollections & programCollection)2082 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2083 {
2084 	/*
2085 		"layout(location = 0) in highp vec4 in_position;\n"
2086 		"void main (void)\n"
2087 		"{\n"
2088 		"  gl_Position = in_position;\n"
2089 		"  gl_PointSize = 1.0f;\n"
2090 		"}\n";
2091 	*/
2092 	programCollection.spirvAsmSources.add("vert") <<
2093 		"; SPIR-V\n"
2094 		"; Version: 1.3\n"
2095 		"; Generator: Khronos Glslang Reference Front End; 7\n"
2096 		"; Bound: 25\n"
2097 		"; Schema: 0\n"
2098 		"OpCapability Shader\n"
2099 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2100 		"OpMemoryModel Logical GLSL450\n"
2101 		"OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2102 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2103 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2104 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2105 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2106 		"OpDecorate %11 Block\n"
2107 		"OpDecorate %17 Location 0\n"
2108 		"%2 = OpTypeVoid\n"
2109 		"%3 = OpTypeFunction %2\n"
2110 		"%6 = OpTypeFloat 32\n"
2111 		"%7 = OpTypeVector %6 4\n"
2112 		"%8 = OpTypeInt 32 0\n"
2113 		"%9 = OpConstant %8 1\n"
2114 		"%10 = OpTypeArray %6 %9\n"
2115 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2116 		"%12 = OpTypePointer Output %11\n"
2117 		"%13 = OpVariable %12 Output\n"
2118 		"%14 = OpTypeInt 32 1\n"
2119 		"%15 = OpConstant %14 0\n"
2120 		"%16 = OpTypePointer Input %7\n"
2121 		"%17 = OpVariable %16 Input\n"
2122 		"%19 = OpTypePointer Output %7\n"
2123 		"%21 = OpConstant %14 1\n"
2124 		"%22 = OpConstant %6 1\n"
2125 		"%23 = OpTypePointer Output %6\n"
2126 		"%4 = OpFunction %2 None %3\n"
2127 		"%5 = OpLabel\n"
2128 		"%18 = OpLoad %7 %17\n"
2129 		"%20 = OpAccessChain %19 %13 %15\n"
2130 		"OpStore %20 %18\n"
2131 		"%24 = OpAccessChain %23 %13 %21\n"
2132 		"OpStore %24 %22\n"
2133 		"OpReturn\n"
2134 		"OpFunctionEnd\n";
2135 }
2136 
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2137 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2138 {
2139 	/*
2140 		"layout(location = 0) in float in_color;\n"
2141 		"layout(location = 0) out uint out_color;\n"
2142 		"void main()\n"
2143 		{\n"
2144 		"	out_color = uint(in_color);\n"
2145 		"}\n";
2146 	*/
2147 	programCollection.spirvAsmSources.add("fragment") <<
2148 		"; SPIR-V\n"
2149 		"; Version: 1.3\n"
2150 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2151 		"; Bound: 14\n"
2152 		"; Schema: 0\n"
2153 		"OpCapability Shader\n"
2154 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2155 		"OpMemoryModel Logical GLSL450\n"
2156 		"OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2157 		"OpExecutionMode %4 OriginUpperLeft\n"
2158 		"OpDecorate %8 Location 0\n"
2159 		"OpDecorate %11 Location 0\n"
2160 		"%2 = OpTypeVoid\n"
2161 		"%3 = OpTypeFunction %2\n"
2162 		"%6 = OpTypeInt 32 0\n"
2163 		"%7 = OpTypePointer Output %6\n"
2164 		"%8 = OpVariable %7 Output\n"
2165 		"%9 = OpTypeFloat 32\n"
2166 		"%10 = OpTypePointer Input %9\n"
2167 		"%11 = OpVariable %10 Input\n"
2168 		"%4 = OpFunction %2 None %3\n"
2169 		"%5 = OpLabel\n"
2170 		"%12 = OpLoad %9 %11\n"
2171 		"%13 = OpConvertFToU %6 %12\n"
2172 		"OpStore %8 %13\n"
2173 		"OpReturn\n"
2174 		"OpFunctionEnd\n";
2175 }
2176 
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2177 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2178 {
2179 	/*
2180 		"#extension GL_KHR_shader_subgroup_basic: enable\n"
2181 		"#extension GL_EXT_tessellation_shader : require\n"
2182 		"layout(vertices = 2) out;\n"
2183 		"void main (void)\n"
2184 		"{\n"
2185 		"  if (gl_InvocationID == 0)\n"
2186 		"  {\n"
2187 		"    gl_TessLevelOuter[0] = 1.0f;\n"
2188 		"    gl_TessLevelOuter[1] = 1.0f;\n"
2189 		"  }\n"
2190 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2191 		"}\n";
2192 	*/
2193 	programCollection.spirvAsmSources.add("tesc") <<
2194 		"; SPIR-V\n"
2195 		"; Version: 1.3\n"
2196 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2197 		"; Bound: 46\n"
2198 		"; Schema: 0\n"
2199 		"OpCapability Tessellation\n"
2200 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2201 		"OpMemoryModel Logical GLSL450\n"
2202 		"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2203 		"OpExecutionMode %4 OutputVertices 2\n"
2204 		"OpDecorate %8 BuiltIn InvocationId\n"
2205 		"OpDecorate %20 Patch\n"
2206 		"OpDecorate %20 BuiltIn TessLevelOuter\n"
2207 		"OpMemberDecorate %29 0 BuiltIn Position\n"
2208 		"OpMemberDecorate %29 1 BuiltIn PointSize\n"
2209 		"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2210 		"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2211 		"OpDecorate %29 Block\n"
2212 		"OpMemberDecorate %35 0 BuiltIn Position\n"
2213 		"OpMemberDecorate %35 1 BuiltIn PointSize\n"
2214 		"OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2215 		"OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2216 		"OpDecorate %35 Block\n"
2217 		"%2 = OpTypeVoid\n"
2218 		"%3 = OpTypeFunction %2\n"
2219 		"%6 = OpTypeInt 32 1\n"
2220 		"%7 = OpTypePointer Input %6\n"
2221 		"%8 = OpVariable %7 Input\n"
2222 		"%10 = OpConstant %6 0\n"
2223 		"%11 = OpTypeBool\n"
2224 		"%15 = OpTypeFloat 32\n"
2225 		"%16 = OpTypeInt 32 0\n"
2226 		"%17 = OpConstant %16 4\n"
2227 		"%18 = OpTypeArray %15 %17\n"
2228 		"%19 = OpTypePointer Output %18\n"
2229 		"%20 = OpVariable %19 Output\n"
2230 		"%21 = OpConstant %15 1\n"
2231 		"%22 = OpTypePointer Output %15\n"
2232 		"%24 = OpConstant %6 1\n"
2233 		"%26 = OpTypeVector %15 4\n"
2234 		"%27 = OpConstant %16 1\n"
2235 		"%28 = OpTypeArray %15 %27\n"
2236 		"%29 = OpTypeStruct %26 %15 %28 %28\n"
2237 		"%30 = OpConstant %16 2\n"
2238 		"%31 = OpTypeArray %29 %30\n"
2239 		"%32 = OpTypePointer Output %31\n"
2240 		"%33 = OpVariable %32 Output\n"
2241 		"%35 = OpTypeStruct %26 %15 %28 %28\n"
2242 		"%36 = OpConstant %16 32\n"
2243 		"%37 = OpTypeArray %35 %36\n"
2244 		"%38 = OpTypePointer Input %37\n"
2245 		"%39 = OpVariable %38 Input\n"
2246 		"%41 = OpTypePointer Input %26\n"
2247 		"%44 = OpTypePointer Output %26\n"
2248 		"%4 = OpFunction %2 None %3\n"
2249 		"%5 = OpLabel\n"
2250 		"%9 = OpLoad %6 %8\n"
2251 		"%12 = OpIEqual %11 %9 %10\n"
2252 		"OpSelectionMerge %14 None\n"
2253 		"OpBranchConditional %12 %13 %14\n"
2254 		"%13 = OpLabel\n"
2255 		"%23 = OpAccessChain %22 %20 %10\n"
2256 		"OpStore %23 %21\n"
2257 		"%25 = OpAccessChain %22 %20 %24\n"
2258 		"OpStore %25 %21\n"
2259 		"OpBranch %14\n"
2260 		"%14 = OpLabel\n"
2261 		"%34 = OpLoad %6 %8\n"
2262 		"%40 = OpLoad %6 %8\n"
2263 		"%42 = OpAccessChain %41 %39 %40 %10\n"
2264 		"%43 = OpLoad %26 %42\n"
2265 		"%45 = OpAccessChain %44 %33 %34 %10\n"
2266 		"OpStore %45 %43\n"
2267 		"OpReturn\n"
2268 		"OpFunctionEnd\n";
2269 }
2270 
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2271 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2272 {
2273 	/*
2274 		"#extension GL_KHR_shader_subgroup_ballot: enable\n"
2275 		"#extension GL_EXT_tessellation_shader : require\n"
2276 		"layout(isolines, equal_spacing, ccw ) in;\n"
2277 		"layout(location = 0) in float in_color[];\n"
2278 		"layout(location = 0) out float out_color;\n"
2279 		"\n"
2280 		"void main (void)\n"
2281 		"{\n"
2282 		"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2283 		"  out_color = in_color[0];\n"
2284 		"}\n";
2285 	*/
2286 	programCollection.spirvAsmSources.add("tese") <<
2287 		"; SPIR-V\n"
2288 		"; Version: 1.3\n"
2289 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2290 		"; Bound: 45\n"
2291 		"; Schema: 0\n"
2292 		"OpCapability Tessellation\n"
2293 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2294 		"OpMemoryModel Logical GLSL450\n"
2295 		"OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2296 		"OpExecutionMode %4 Isolines\n"
2297 		"OpExecutionMode %4 SpacingEqual\n"
2298 		"OpExecutionMode %4 VertexOrderCcw\n"
2299 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2300 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2301 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2302 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2303 		"OpDecorate %11 Block\n"
2304 		"OpMemberDecorate %16 0 BuiltIn Position\n"
2305 		"OpMemberDecorate %16 1 BuiltIn PointSize\n"
2306 		"OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2307 		"OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2308 		"OpDecorate %16 Block\n"
2309 		"OpDecorate %29 BuiltIn TessCoord\n"
2310 		"OpDecorate %39 Location 0\n"
2311 		"OpDecorate %42 Location 0\n"
2312 		"%2 = OpTypeVoid\n"
2313 		"%3 = OpTypeFunction %2\n"
2314 		"%6 = OpTypeFloat 32\n"
2315 		"%7 = OpTypeVector %6 4\n"
2316 		"%8 = OpTypeInt 32 0\n"
2317 		"%9 = OpConstant %8 1\n"
2318 		"%10 = OpTypeArray %6 %9\n"
2319 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2320 		"%12 = OpTypePointer Output %11\n"
2321 		"%13 = OpVariable %12 Output\n"
2322 		"%14 = OpTypeInt 32 1\n"
2323 		"%15 = OpConstant %14 0\n"
2324 		"%16 = OpTypeStruct %7 %6 %10 %10\n"
2325 		"%17 = OpConstant %8 32\n"
2326 		"%18 = OpTypeArray %16 %17\n"
2327 		"%19 = OpTypePointer Input %18\n"
2328 		"%20 = OpVariable %19 Input\n"
2329 		"%21 = OpTypePointer Input %7\n"
2330 		"%24 = OpConstant %14 1\n"
2331 		"%27 = OpTypeVector %6 3\n"
2332 		"%28 = OpTypePointer Input %27\n"
2333 		"%29 = OpVariable %28 Input\n"
2334 		"%30 = OpConstant %8 0\n"
2335 		"%31 = OpTypePointer Input %6\n"
2336 		"%36 = OpTypePointer Output %7\n"
2337 		"%38 = OpTypePointer Output %6\n"
2338 		"%39 = OpVariable %38 Output\n"
2339 		"%40 = OpTypeArray %6 %17\n"
2340 		"%41 = OpTypePointer Input %40\n"
2341 		"%42 = OpVariable %41 Input\n"
2342 		"%4 = OpFunction %2 None %3\n"
2343 		"%5 = OpLabel\n"
2344 		"%22 = OpAccessChain %21 %20 %15 %15\n"
2345 		"%23 = OpLoad %7 %22\n"
2346 		"%25 = OpAccessChain %21 %20 %24 %15\n"
2347 		"%26 = OpLoad %7 %25\n"
2348 		"%32 = OpAccessChain %31 %29 %30\n"
2349 		"%33 = OpLoad %6 %32\n"
2350 		"%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2351 		"%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2352 		"%37 = OpAccessChain %36 %13 %15\n"
2353 		"OpStore %37 %35\n"
2354 		"%43 = OpAccessChain %31 %42 %15\n"
2355 		"%44 = OpLoad %6 %43\n"
2356 		"OpStore %39 %44\n"
2357 		"OpReturn\n"
2358 		"OpFunctionEnd\n";
2359 }
2360 
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2361 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
2362 {
2363 	tcu::StringTemplate geometryTemplate(glslTemplate);
2364 
2365 	map<string, string>		linesParams;
2366 	linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2367 
2368 	map<string, string>		pointsParams;
2369 	pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2370 
2371 	collection.add("geometry_lines")	<< glu::GeometrySource(geometryTemplate.specialize(linesParams))	<< options;
2372 	collection.add("geometry_points")	<< glu::GeometrySource(geometryTemplate.specialize(pointsParams))	<< options;
2373 }
2374 
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2375 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2376 {
2377 	tcu::StringTemplate geometryTemplate(spirvTemplate);
2378 
2379 	map<string, string>		linesParams;
2380 	linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2381 
2382 	map<string, string>		pointsParams;
2383 	pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2384 
2385 	collection.add("geometry_lines")	<< geometryTemplate.specialize(linesParams)		<< options;
2386 	collection.add("geometry_points")	<< geometryTemplate.specialize(pointsParams)	<< options;
2387 }
2388 
initializeMemory(Context & context,const Allocation & alloc,subgroups::SSBOData & data)2389 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
2390 {
2391 	const vk::VkFormat format = data.format;
2392 	const vk::VkDeviceSize size = data.numElements *
2393 		(data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2394 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2395 	{
2396 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2397 
2398 		switch (format)
2399 		{
2400 			default:
2401 				DE_FATAL("Illegal buffer format");
2402 				break;
2403 			case VK_FORMAT_R8_SINT:
2404 			case VK_FORMAT_R8G8_SINT:
2405 			case VK_FORMAT_R8G8B8_SINT:
2406 			case VK_FORMAT_R8G8B8A8_SINT:
2407 			case VK_FORMAT_R8_UINT:
2408 			case VK_FORMAT_R8G8_UINT:
2409 			case VK_FORMAT_R8G8B8_UINT:
2410 			case VK_FORMAT_R8G8B8A8_UINT:
2411 			{
2412 				deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2413 
2414 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2415 				{
2416 					ptr[k] = rnd.getUint8();
2417 				}
2418 			}
2419 			break;
2420 			case VK_FORMAT_R16_SINT:
2421 			case VK_FORMAT_R16G16_SINT:
2422 			case VK_FORMAT_R16G16B16_SINT:
2423 			case VK_FORMAT_R16G16B16A16_SINT:
2424 			case VK_FORMAT_R16_UINT:
2425 			case VK_FORMAT_R16G16_UINT:
2426 			case VK_FORMAT_R16G16B16_UINT:
2427 			case VK_FORMAT_R16G16B16A16_UINT:
2428 			{
2429 				deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2430 
2431 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2432 				{
2433 					ptr[k] = rnd.getUint16();
2434 				}
2435 			}
2436 			break;
2437 			case VK_FORMAT_R8_USCALED:
2438 			case VK_FORMAT_R8G8_USCALED:
2439 			case VK_FORMAT_R8G8B8_USCALED:
2440 			case VK_FORMAT_R8G8B8A8_USCALED:
2441 			{
2442 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2443 
2444 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2445 				{
2446 					deUint32 r = rnd.getUint32();
2447 					ptr[k] = (r & 1) ? r : 0;
2448 				}
2449 			}
2450 			break;
2451 			case VK_FORMAT_R32_SINT:
2452 			case VK_FORMAT_R32G32_SINT:
2453 			case VK_FORMAT_R32G32B32_SINT:
2454 			case VK_FORMAT_R32G32B32A32_SINT:
2455 			case VK_FORMAT_R32_UINT:
2456 			case VK_FORMAT_R32G32_UINT:
2457 			case VK_FORMAT_R32G32B32_UINT:
2458 			case VK_FORMAT_R32G32B32A32_UINT:
2459 			{
2460 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2461 
2462 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2463 				{
2464 					ptr[k] = rnd.getUint32();
2465 				}
2466 			}
2467 			break;
2468 			case VK_FORMAT_R64_SINT:
2469 			case VK_FORMAT_R64G64_SINT:
2470 			case VK_FORMAT_R64G64B64_SINT:
2471 			case VK_FORMAT_R64G64B64A64_SINT:
2472 			case VK_FORMAT_R64_UINT:
2473 			case VK_FORMAT_R64G64_UINT:
2474 			case VK_FORMAT_R64G64B64_UINT:
2475 			case VK_FORMAT_R64G64B64A64_UINT:
2476 			{
2477 				deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2478 
2479 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2480 				{
2481 					ptr[k] = rnd.getUint64();
2482 				}
2483 			}
2484 			break;
2485 			case VK_FORMAT_R16_SFLOAT:
2486 			case VK_FORMAT_R16G16_SFLOAT:
2487 			case VK_FORMAT_R16G16B16_SFLOAT:
2488 			case VK_FORMAT_R16G16B16A16_SFLOAT:
2489 			{
2490 				deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2491 
2492 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2493 				{
2494 					ptr[k] = deFloat32To16(rnd.getFloat());
2495 				}
2496 			}
2497 			break;
2498 			case VK_FORMAT_R32_SFLOAT:
2499 			case VK_FORMAT_R32G32_SFLOAT:
2500 			case VK_FORMAT_R32G32B32_SFLOAT:
2501 			case VK_FORMAT_R32G32B32A32_SFLOAT:
2502 			{
2503 				float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2504 
2505 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2506 				{
2507 					ptr[k] = rnd.getFloat();
2508 				}
2509 			}
2510 			break;
2511 			case VK_FORMAT_R64_SFLOAT:
2512 			case VK_FORMAT_R64G64_SFLOAT:
2513 			case VK_FORMAT_R64G64B64_SFLOAT:
2514 			case VK_FORMAT_R64G64B64A64_SFLOAT:
2515 			{
2516 				double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2517 
2518 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2519 				{
2520 					ptr[k] = rnd.getDouble();
2521 				}
2522 			}
2523 			break;
2524 		}
2525 	}
2526 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2527 	{
2528 		deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2529 
2530 		for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2531 		{
2532 			ptr[k] = 0;
2533 		}
2534 	}
2535 
2536 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
2537 	{
2538 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2539 	}
2540 }
2541 
getResultBinding(const VkShaderStageFlagBits shaderStage)2542 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2543 {
2544 	switch(shaderStage)
2545 	{
2546 		case VK_SHADER_STAGE_VERTEX_BIT:
2547 			return 0u;
2548 			break;
2549 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2550 			return 1u;
2551 			break;
2552 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2553 			return 2u;
2554 			break;
2555 		case VK_SHADER_STAGE_GEOMETRY_BIT:
2556 			return 3u;
2557 			break;
2558 		default:
2559 			DE_ASSERT(0);
2560 			return -1;
2561 	}
2562 	DE_ASSERT(0);
2563 	return -1;
2564 }
2565 
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const VkShaderStageFlags shaderStage)2566 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
2567 	Context& context, VkFormat format, SSBOData* extraData,
2568 	deUint32 extraDataCount, const void* internalData,
2569 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2570 	const VkShaderStageFlags shaderStage)
2571 {
2572 	return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2573 }
2574 
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2575 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2576 	Context& context, VkFormat format, SSBOData* extraData,
2577 	deUint32 extraDataCount, const void* internalData,
2578 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2579 	const VkShaderStageFlags shaderStage, const deUint32 tessShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
2580 {
2581 	const DeviceInterface&					vk						= context.getDeviceInterface();
2582 	const VkDevice							device					= context.getDevice();
2583 	const deUint32							maxWidth				= getMaxWidth();
2584 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2585 	DescriptorSetLayoutBuilder				layoutBuilder;
2586 	DescriptorPoolBuilder					poolBuilder;
2587 	DescriptorSetUpdateBuilder				updateBuilder;
2588 	Move <VkDescriptorPool>					descriptorPool;
2589 	Move <VkDescriptorSet>					descriptorSet;
2590 
2591 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device,
2592 																		context.getBinaryCollection().get("vert"), 0u));
2593 	const Unique<VkShaderModule>			teCtrlShaderModule		(createShaderModule(vk, device,
2594 																		context.getBinaryCollection().get("tesc"), 0u));
2595 	const Unique<VkShaderModule>			teEvalShaderModule		(createShaderModule(vk, device,
2596 																		context.getBinaryCollection().get("tese"), 0u));
2597 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device,
2598 																	context.getBinaryCollection().get("fragment"), 0u));
2599 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2600 
2601 	const VkVertexInputBindingDescription	vertexInputBinding		=
2602 	{
2603 		0u,											// binding;
2604 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
2605 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
2606 	};
2607 
2608 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2609 	{
2610 		0u,
2611 		0u,
2612 		VK_FORMAT_R32G32B32A32_SFLOAT,
2613 		0u
2614 	};
2615 
2616 	for (deUint32 i = 0u; i < extraDataCount; i++)
2617 	{
2618 		if (extraData[i].isImage)
2619 		{
2620 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2621 		}
2622 		else
2623 		{
2624 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2625 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2626 		}
2627 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2628 		initializeMemory(context, alloc, extraData[i]);
2629 	}
2630 
2631 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2632 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2633 
2634 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2635 
2636 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2637 
2638 	const deUint32 requiredSubgroupSizes[5] = {0u,
2639 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2640 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2641 											   0u,
2642 											   0u};
2643 
2644 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2645 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2646 																						  VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2647 																						  *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2648 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2649 																						  0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2650 																						  ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2651 																						  0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2652 
2653 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2654 		poolBuilder.addType(inputBuffers[ndx]->getType());
2655 
2656 	if (extraDataCount > 0)
2657 	{
2658 		descriptorPool = poolBuilder.build(vk, device,
2659 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2660 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2661 	}
2662 
2663 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2664 	{
2665 		if (inputBuffers[buffersNdx]->isImage())
2666 		{
2667 			VkDescriptorImageInfo info =
2668 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2669 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2670 
2671 			updateBuilder.writeSingle(*descriptorSet,
2672 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2673 										inputBuffers[buffersNdx]->getType(), &info);
2674 		}
2675 		else
2676 		{
2677 			VkDescriptorBufferInfo info =
2678 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2679 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2680 
2681 			updateBuilder.writeSingle(*descriptorSet,
2682 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2683 										inputBuffers[buffersNdx]->getType(), &info);
2684 		}
2685 	}
2686 
2687 	updateBuilder.update(vk, device);
2688 
2689 	const VkQueue							queue					= context.getUniversalQueue();
2690 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
2691 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
2692 	const deUint32							subgroupSize			= getSubgroupSize(context);
2693 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2694 	const vk::VkDeviceSize					vertexBufferSize		= 2ull * maxWidth * sizeof(tcu::Vec4);
2695 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2696 	unsigned								totalIterations			= 0u;
2697 	unsigned								failedIterations		= 0u;
2698 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2699 
2700 	{
2701 		const Allocation&		alloc				= vertexBuffer.getAllocation();
2702 		std::vector<tcu::Vec4>	data				(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2703 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
2704 		float					leftHandPosition	= -1.0f;
2705 
2706 		for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2707 		{
2708 			data[ndx][0] = leftHandPosition;
2709 			leftHandPosition += pixelSize;
2710 			data[ndx+1][0] = leftHandPosition;
2711 		}
2712 
2713 		deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2714 		flushAlloc(vk, device, alloc);
2715 	}
2716 
2717 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2718 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
2719 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
2720 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2721 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2722 	const VkDeviceSize			vertexBufferOffset	= 0u;
2723 
2724 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2725 	{
2726 		totalIterations++;
2727 
2728 		beginCommandBuffer(vk, *cmdBuffer);
2729 		{
2730 
2731 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2732 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2733 
2734 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2735 
2736 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2737 
2738 			if (extraDataCount > 0)
2739 			{
2740 				vk.cmdBindDescriptorSets(*cmdBuffer,
2741 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2742 					&descriptorSet.get(), 0u, DE_NULL);
2743 			}
2744 
2745 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2746 			vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2747 
2748 			endRenderPass(vk, *cmdBuffer);
2749 
2750 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2751 			endCommandBuffer(vk, *cmdBuffer);
2752 
2753 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2754 		}
2755 
2756 		{
2757 			const Allocation& allocResult = imageBufferResult.getAllocation();
2758 			invalidateAlloc(vk, device, allocResult);
2759 
2760 			std::vector<const void*> datas;
2761 			datas.push_back(allocResult.getHostPtr());
2762 			if (!checkResult(internalData, datas, width/2u, subgroupSize))
2763 				failedIterations++;
2764 		}
2765 	}
2766 
2767 	if (0 < failedIterations)
2768 	{
2769 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2770 
2771 		context.getTestContext().getLog()
2772 				<< TestLog::Message << valuesPassed << " / "
2773 				<< totalIterations << " values passed" << TestLog::EndMessage;
2774 		return tcu::TestStatus::fail("Failed!");
2775 	}
2776 
2777 	return tcu::TestStatus::pass("OK");
2778 }
2779 
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2780 bool vkt::subgroups::check(std::vector<const void*> datas,
2781 	deUint32 width, deUint32 ref)
2782 {
2783 	const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2784 
2785 	for (deUint32 n = 0; n < width; ++n)
2786 	{
2787 		if (data[n] != ref)
2788 		{
2789 			return false;
2790 		}
2791 	}
2792 
2793 	return true;
2794 }
2795 
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2796 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
2797 	const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2798 	deUint32 ref)
2799 {
2800 	const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2801 	const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2802 	const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2803 
2804 	return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2805 }
2806 
makeGeometryFrameBufferTest(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize))2807 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
2808 	Context& context, VkFormat format, SSBOData* extraData,
2809 	deUint32 extraDataCount, const void* internalData,
2810 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2811 {
2812 	return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
2813 														   0u, 0u);
2814 }
2815 
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2816 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
2817 	Context& context, VkFormat format, SSBOData* extraData,
2818 	deUint32 extraDataCount, const void* internalData,
2819 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2820 	const deUint32 geometryShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
2821 {
2822 	const DeviceInterface&					vk						= context.getDeviceInterface();
2823 	const VkDevice							device					= context.getDevice();
2824 	const deUint32							maxWidth				= getMaxWidth();
2825 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2826 	DescriptorSetLayoutBuilder				layoutBuilder;
2827 	DescriptorPoolBuilder					poolBuilder;
2828 	DescriptorSetUpdateBuilder				updateBuilder;
2829 	Move <VkDescriptorPool>					descriptorPool;
2830 	Move <VkDescriptorSet>					descriptorSet;
2831 
2832 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2833 	const Unique<VkShaderModule>			geometryShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2834 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2835 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2836 	const VkVertexInputBindingDescription	vertexInputBinding		=
2837 	{
2838 		0u,											// binding;
2839 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
2840 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
2841 	};
2842 
2843 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2844 	{
2845 		0u,
2846 		0u,
2847 		VK_FORMAT_R32G32B32A32_SFLOAT,
2848 		0u
2849 	};
2850 
2851 	for (deUint32 i = 0u; i < extraDataCount; i++)
2852 	{
2853 		if (extraData[i].isImage)
2854 		{
2855 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2856 		}
2857 		else
2858 		{
2859 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2860 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2861 		}
2862 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2863 		initializeMemory(context, alloc, extraData[i]);
2864 	}
2865 
2866 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2867 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2868 
2869 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2870 
2871 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2872 
2873 	const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2874 
2875 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2876 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2877 																						  *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2878 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2879 																						  0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2880 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2881 
2882 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2883 		poolBuilder.addType(inputBuffers[ndx]->getType());
2884 
2885 	if (extraDataCount > 0)
2886 	{
2887 		descriptorPool = poolBuilder.build(vk, device,
2888 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2889 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2890 	}
2891 
2892 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2893 	{
2894 		if (inputBuffers[buffersNdx]->isImage())
2895 		{
2896 			VkDescriptorImageInfo info =
2897 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2898 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2899 
2900 			updateBuilder.writeSingle(*descriptorSet,
2901 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2902 										inputBuffers[buffersNdx]->getType(), &info);
2903 		}
2904 		else
2905 		{
2906 			VkDescriptorBufferInfo info =
2907 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2908 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2909 
2910 			updateBuilder.writeSingle(*descriptorSet,
2911 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2912 										inputBuffers[buffersNdx]->getType(), &info);
2913 		}
2914 	}
2915 
2916 	updateBuilder.update(vk, device);
2917 
2918 	const VkQueue							queue					= context.getUniversalQueue();
2919 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
2920 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
2921 	const deUint32							subgroupSize			= getSubgroupSize(context);
2922 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2923 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
2924 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2925 	unsigned								totalIterations			= 0u;
2926 	unsigned								failedIterations		= 0u;
2927 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2928 
2929 	{
2930 		const Allocation&		alloc				= vertexBuffer.getAllocation();
2931 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2932 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
2933 		float					leftHandPosition	= -1.0f;
2934 
2935 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2936 		{
2937 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2938 			leftHandPosition += pixelSize;
2939 		}
2940 
2941 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2942 		flushAlloc(vk, device, alloc);
2943 	}
2944 
2945 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2946 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
2947 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
2948 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2949 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2950 	const VkDeviceSize			vertexBufferOffset	= 0u;
2951 
2952 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2953 	{
2954 		totalIterations++;
2955 
2956 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2957 		{
2958 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2959 			initializeMemory(context, alloc, extraData[ndx]);
2960 		}
2961 
2962 		beginCommandBuffer(vk, *cmdBuffer);
2963 		{
2964 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2965 
2966 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2967 
2968 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2969 
2970 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2971 
2972 			if (extraDataCount > 0)
2973 			{
2974 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2975 					&descriptorSet.get(), 0u, DE_NULL);
2976 			}
2977 
2978 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2979 
2980 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2981 
2982 			endRenderPass(vk, *cmdBuffer);
2983 
2984 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2985 
2986 			endCommandBuffer(vk, *cmdBuffer);
2987 
2988 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2989 		}
2990 
2991 		{
2992 			const Allocation& allocResult = imageBufferResult.getAllocation();
2993 			invalidateAlloc(vk, device, allocResult);
2994 
2995 			std::vector<const void*> datas;
2996 			datas.push_back(allocResult.getHostPtr());
2997 			if (!checkResult(internalData, datas, width, subgroupSize))
2998 				failedIterations++;
2999 		}
3000 	}
3001 
3002 	if (0 < failedIterations)
3003 	{
3004 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3005 
3006 		context.getTestContext().getLog()
3007 				<< TestLog::Message << valuesPassed << " / "
3008 				<< totalIterations << " values passed" << TestLog::EndMessage;
3009 
3010 		return tcu::TestStatus::fail("Failed!");
3011 	}
3012 
3013 	return tcu::TestStatus::pass("OK");
3014 }
3015 
allStages(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3016 tcu::TestStatus vkt::subgroups::allStages(
3017 	Context& context, VkFormat format, SSBOData* extraData,
3018 	deUint32 extraDataCount, const void* internalData,
3019 	const VerificationFunctor& checkResult,
3020 	const vk::VkShaderStageFlags shaderStage)
3021 {
3022 	return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3023 														 0u, 0u, 0u, 0u, 0u, DE_NULL);
3024 }
3025 
allStagesRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3026 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
3027 	Context& context, VkFormat format, SSBOData* extraDatas,
3028 	deUint32 extraDatasCount, const void* internalData,
3029 	const VerificationFunctor& checkResult,
3030 	const VkShaderStageFlags shaderStageTested,
3031 	const deUint32 vertexShaderStageCreateFlags,
3032 	const deUint32 tessellationControlShaderStageCreateFlags,
3033 	const deUint32 tessellationEvalShaderStageCreateFlags,
3034 	const deUint32 geometryShaderStageCreateFlags,
3035 	const deUint32 fragmentShaderStageCreateFlags,
3036 	const deUint32 requiredSubgroupSize[5])
3037 {
3038 	const DeviceInterface&			vk					= context.getDeviceInterface();
3039 	const VkDevice					device				= context.getDevice();
3040 	const deUint32					maxWidth			= getMaxWidth();
3041 	vector<VkShaderStageFlagBits>	stagesVector;
3042 	VkShaderStageFlags				shaderStageRequired	= (VkShaderStageFlags)0ull;
3043 
3044 	Move<VkShaderModule>			vertexShaderModule;
3045 	Move<VkShaderModule>			teCtrlShaderModule;
3046 	Move<VkShaderModule>			teEvalShaderModule;
3047 	Move<VkShaderModule>			geometryShaderModule;
3048 	Move<VkShaderModule>			fragmentShaderModule;
3049 
3050 	if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3051 	{
3052 		stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3053 	}
3054 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3055 	{
3056 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3057 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3058 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3059 	}
3060 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3061 	{
3062 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3063 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3064 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3065 	}
3066 	if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3067 	{
3068 		stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3069 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3070 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3071 	}
3072 	if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3073 	{
3074 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3075 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3076 	}
3077 
3078 	const deUint32	stagesCount	= static_cast<deUint32>(stagesVector.size());
3079 	const string	vert		= (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)					? "vert_noSubgroup"		: "vert";
3080 	const string	tesc		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)		? "tesc_noSubgroup"		: "tesc";
3081 	const string	tese		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)	? "tese_noSubgroup"		: "tese";
3082 
3083 	shaderStageRequired = shaderStageTested | shaderStageRequired;
3084 
3085 	vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3086 	if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3087 	{
3088 		teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3089 		teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3090 	}
3091 	if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3092 	{
3093 		if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3094 		{
3095 			// tessellation shaders output line primitives
3096 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3097 		}
3098 		else
3099 		{
3100 			// otherwise points are processed by geometry shader
3101 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3102 		}
3103 	}
3104 	if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3105 		fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3106 
3107 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3108 
3109 	DescriptorSetLayoutBuilder layoutBuilder;
3110 	// The implicit result SSBO we use to store our outputs from the shader
3111 	for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3112 	{
3113 		const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3114 		const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3115 		inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3116 
3117 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3118 	}
3119 
3120 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3121 	{
3122 		const deUint32 datasNdx = ndx - stagesCount;
3123 		if (extraDatas[datasNdx].isImage)
3124 		{
3125 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3126 		}
3127 		else
3128 		{
3129 			const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3130 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3131 		}
3132 
3133 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3134 		initializeMemory(context, alloc, extraDatas[datasNdx]);
3135 
3136 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3137 								extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3138 	}
3139 
3140 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3141 
3142 	const Unique<VkPipelineLayout> pipelineLayout(
3143 		makePipelineLayout(vk, device, *descriptorSetLayout));
3144 
3145 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3146 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3147 														   shaderStageRequired,
3148 														   *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3149 														   *renderPass,
3150 														   (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3151 														   DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3152 														   vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3153 														   geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3154 
3155 	Move <VkDescriptorPool>	descriptorPool;
3156 	Move <VkDescriptorSet>	descriptorSet;
3157 
3158 	if (inputBuffers.size() > 0)
3159 	{
3160 		DescriptorPoolBuilder poolBuilder;
3161 
3162 		for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3163 		{
3164 			poolBuilder.addType(inputBuffers[ndx]->getType());
3165 		}
3166 
3167 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3168 
3169 		// Create descriptor set
3170 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3171 
3172 		DescriptorSetUpdateBuilder updateBuilder;
3173 
3174 		for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3175 		{
3176 			deUint32 binding;
3177 			if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3178 			else binding = extraDatas[ndx -stagesCount].binding;
3179 
3180 			if (inputBuffers[ndx]->isImage())
3181 			{
3182 				VkDescriptorImageInfo info =
3183 					makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3184 											inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3185 
3186 				updateBuilder.writeSingle(	*descriptorSet,
3187 											DescriptorSetUpdateBuilder::Location::binding(binding),
3188 											inputBuffers[ndx]->getType(), &info);
3189 			}
3190 			else
3191 			{
3192 				VkDescriptorBufferInfo info =
3193 					makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3194 							0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3195 
3196 				updateBuilder.writeSingle(	*descriptorSet,
3197 													DescriptorSetUpdateBuilder::Location::binding(binding),
3198 													inputBuffers[ndx]->getType(), &info);
3199 			}
3200 		}
3201 
3202 		updateBuilder.update(vk, device);
3203 	}
3204 
3205 	{
3206 		const VkQueue					queue					= context.getUniversalQueue();
3207 		const deUint32					queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3208 		const Unique<VkCommandPool>		cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3209 		const deUint32					subgroupSize			= getSubgroupSize(context);
3210 		const Unique<VkCommandBuffer>	cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3211 		unsigned						totalIterations			= 0u;
3212 		unsigned						failedIterations		= 0u;
3213 		Image							resultImage				(context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3214 		const Unique<VkFramebuffer>		framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3215 		const VkViewport				viewport				= makeViewport(maxWidth, 1u);
3216 		const VkRect2D					scissor					= makeRect2D(maxWidth, 1u);
3217 		const vk::VkDeviceSize			imageResultSize			= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3218 		Buffer							imageBufferResult		(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3219 		const VkImageSubresourceRange	subresourceRange		=
3220 		{
3221 			VK_IMAGE_ASPECT_COLOR_BIT,											//VkImageAspectFlags	aspectMask
3222 			0u,																	//deUint32				baseMipLevel
3223 			1u,																	//deUint32				levelCount
3224 			0u,																	//deUint32				baseArrayLayer
3225 			1u																	//deUint32				layerCount
3226 		};
3227 
3228 		const VkImageMemoryBarrier		colorAttachmentBarrier	= makeImageMemoryBarrier(
3229 			(VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3230 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3231 			resultImage.getImage(), subresourceRange);
3232 
3233 		for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3234 		{
3235 			for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3236 			{
3237 				// re-init the data
3238 				const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3239 				initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3240 			}
3241 
3242 			totalIterations++;
3243 
3244 			beginCommandBuffer(vk, *cmdBuffer);
3245 
3246 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3247 
3248 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3249 
3250 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3251 
3252 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3253 
3254 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3255 
3256 			if (stagesCount + extraDatasCount > 0)
3257 				vk.cmdBindDescriptorSets(*cmdBuffer,
3258 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3259 						&descriptorSet.get(), 0u, DE_NULL);
3260 
3261 			vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3262 
3263 			endRenderPass(vk, *cmdBuffer);
3264 
3265 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3266 
3267 			endCommandBuffer(vk, *cmdBuffer);
3268 
3269 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3270 
3271 			for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3272 			{
3273 				std::vector<const void*> datas;
3274 				if (!inputBuffers[ndx]->isImage())
3275 				{
3276 					const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3277 					invalidateAlloc(vk, device, resultAlloc);
3278 					// we always have our result data first
3279 					datas.push_back(resultAlloc.getHostPtr());
3280 				}
3281 
3282 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3283 				{
3284 					const deUint32 datasNdx = index - stagesCount;
3285 					if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3286 					{
3287 						const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3288 						invalidateAlloc(vk, device, resultAlloc);
3289 						// we always have our result data first
3290 						datas.push_back(resultAlloc.getHostPtr());
3291 					}
3292 				}
3293 
3294 				// Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3295 				const bool		multiCall	= (	stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT						||
3296 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT		||
3297 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT	||
3298 												stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT					);
3299 				const deUint32	usedWidth	= ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3300 
3301 				if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3302 					failedIterations++;
3303 			}
3304 			if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3305 			{
3306 				std::vector<const void*> datas;
3307 				const Allocation& resultAlloc = imageBufferResult.getAllocation();
3308 				invalidateAlloc(vk, device, resultAlloc);
3309 
3310 				// we always have our result data first
3311 				datas.push_back(resultAlloc.getHostPtr());
3312 
3313 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3314 				{
3315 					const deUint32 datasNdx = index - stagesCount;
3316 					if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3317 					{
3318 						const Allocation& alloc = inputBuffers[index]->getAllocation();
3319 						invalidateAlloc(vk, device, alloc);
3320 						// we always have our result data first
3321 						datas.push_back(alloc.getHostPtr());
3322 					}
3323 				}
3324 
3325 				if (!checkResult(internalData, datas, width, subgroupSize, false))
3326 					failedIterations++;
3327 			}
3328 
3329 			vk.resetCommandBuffer(*cmdBuffer, 0);
3330 		}
3331 
3332 		if (0 < failedIterations)
3333 		{
3334 			unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3335 
3336 			context.getTestContext().getLog()
3337 				<< TestLog::Message << valuesPassed << " / "
3338 				<< totalIterations << " values passed" << TestLog::EndMessage;
3339 
3340 			return tcu::TestStatus::fail("Failed!");
3341 		}
3342 	}
3343 
3344 	return tcu::TestStatus::pass("OK");
3345 }
3346 
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize))3347 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
3348 	SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
3349 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
3350 {
3351 	return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
3352 														 0u, 0u);
3353 }
3354 
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3355 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format,
3356 	SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
3357 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
3358 	const deUint32 vertexShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
3359 {
3360 	const DeviceInterface&					vk						= context.getDeviceInterface();
3361 	const VkDevice							device					= context.getDevice();
3362 	const VkQueue							queue					= context.getUniversalQueue();
3363 	const deUint32							maxWidth				= getMaxWidth();
3364 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3365 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
3366 	DescriptorSetLayoutBuilder				layoutBuilder;
3367 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3368 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3369 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
3370 
3371 	const VkVertexInputBindingDescription	vertexInputBinding		=
3372 	{
3373 		0u,											// binding;
3374 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
3375 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
3376 	};
3377 
3378 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
3379 	{
3380 		0u,
3381 		0u,
3382 		VK_FORMAT_R32G32B32A32_SFLOAT,
3383 		0u
3384 	};
3385 
3386 	for (deUint32 i = 0u; i < extraDataCount; i++)
3387 	{
3388 		if (extraData[i].isImage)
3389 		{
3390 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3391 		}
3392 		else
3393 		{
3394 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3395 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3396 		}
3397 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3398 		initializeMemory(context, alloc, extraData[i]);
3399 	}
3400 
3401 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3402 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3403 
3404 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
3405 
3406 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
3407 
3408 	const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3409 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
3410 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3411 																						  *vertexShaderModule, *fragmentShaderModule,
3412 																						  DE_NULL, DE_NULL, DE_NULL,
3413 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3414 																						  &vertexInputBinding, &vertexInputAttribute, true, format,
3415 																						  vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3416 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3417 	DescriptorPoolBuilder					poolBuilder;
3418 	DescriptorSetUpdateBuilder				updateBuilder;
3419 
3420 
3421 	for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3422 		poolBuilder.addType(inputBuffers[ndx]->getType());
3423 
3424 	Move <VkDescriptorPool>					descriptorPool;
3425 	Move <VkDescriptorSet>					descriptorSet;
3426 
3427 	if (extraDataCount > 0)
3428 	{
3429 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3430 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3431 	}
3432 
3433 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3434 	{
3435 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3436 		initializeMemory(context, alloc, extraData[ndx]);
3437 	}
3438 
3439 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3440 	{
3441 		if (inputBuffers[buffersNdx]->isImage())
3442 		{
3443 			VkDescriptorImageInfo info =
3444 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3445 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3446 
3447 			updateBuilder.writeSingle(*descriptorSet,
3448 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3449 										inputBuffers[buffersNdx]->getType(), &info);
3450 		}
3451 		else
3452 		{
3453 			VkDescriptorBufferInfo info =
3454 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3455 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3456 
3457 			updateBuilder.writeSingle(*descriptorSet,
3458 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3459 										inputBuffers[buffersNdx]->getType(), &info);
3460 		}
3461 	}
3462 	updateBuilder.update(vk, device);
3463 
3464 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3465 
3466 	const deUint32							subgroupSize			= getSubgroupSize(context);
3467 
3468 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3469 
3470 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
3471 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3472 
3473 	unsigned								totalIterations			= 0u;
3474 	unsigned								failedIterations		= 0u;
3475 
3476 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3477 
3478 	{
3479 		const Allocation&		alloc				= vertexBuffer.getAllocation();
3480 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3481 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
3482 		float					leftHandPosition	= -1.0f;
3483 
3484 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3485 		{
3486 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3487 			leftHandPosition += pixelSize;
3488 		}
3489 
3490 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3491 		flushAlloc(vk, device, alloc);
3492 	}
3493 
3494 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3495 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
3496 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
3497 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3498 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3499 	const VkDeviceSize			vertexBufferOffset	= 0u;
3500 
3501 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3502 	{
3503 		totalIterations++;
3504 
3505 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3506 		{
3507 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3508 			initializeMemory(context, alloc, extraData[ndx]);
3509 		}
3510 
3511 		beginCommandBuffer(vk, *cmdBuffer);
3512 		{
3513 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3514 
3515 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3516 
3517 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3518 
3519 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3520 
3521 			if (extraDataCount > 0)
3522 			{
3523 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3524 					&descriptorSet.get(), 0u, DE_NULL);
3525 			}
3526 
3527 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3528 
3529 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3530 
3531 			endRenderPass(vk, *cmdBuffer);
3532 
3533 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3534 
3535 			endCommandBuffer(vk, *cmdBuffer);
3536 
3537 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3538 		}
3539 
3540 		{
3541 			const Allocation& allocResult = imageBufferResult.getAllocation();
3542 			invalidateAlloc(vk, device, allocResult);
3543 
3544 			std::vector<const void*> datas;
3545 			datas.push_back(allocResult.getHostPtr());
3546 			if (!checkResult(internalData, datas, width, subgroupSize))
3547 				failedIterations++;
3548 		}
3549 	}
3550 
3551 	if (0 < failedIterations)
3552 	{
3553 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3554 
3555 		context.getTestContext().getLog()
3556 			<< TestLog::Message << valuesPassed << " / "
3557 			<< totalIterations << " values passed" << TestLog::EndMessage;
3558 
3559 		return tcu::TestStatus::fail("Failed!");
3560 	}
3561 
3562 	return tcu::TestStatus::pass("OK");
3563 }
3564 
makeFragmentFrameBufferTest(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 height,deUint32 subgroupSize))3565 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(
3566 	Context& context, VkFormat format, SSBOData* extraDatas,
3567 	deUint32 extraDatasCount, const void* internalData,
3568 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
3569 						deUint32 height, deUint32 subgroupSize))
3570 {
3571 	return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult,
3572 														   0u, 0u);
3573 }
3574 
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 height,deUint32 subgroupSize),const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3575 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
3576 	Context& context, VkFormat format, SSBOData* extraDatas,
3577 	deUint32 extraDatasCount, const void* internalData,
3578 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
3579 						deUint32 height, deUint32 subgroupSize),
3580 	const deUint32 fragmentShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
3581 {
3582 	const DeviceInterface&					vk						= context.getDeviceInterface();
3583 	const VkDevice							device					= context.getDevice();
3584 	const VkQueue							queue					= context.getUniversalQueue();
3585 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3586 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule
3587 																		(vk, device, context.getBinaryCollection().get("vert"), 0u));
3588 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule
3589 																		(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3590 
3591 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
3592 
3593 	for (deUint32 i = 0; i < extraDatasCount; i++)
3594 	{
3595 		if (extraDatas[i].isImage)
3596 		{
3597 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3598 										static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3599 		}
3600 		else
3601 		{
3602 			vk::VkDeviceSize size =
3603 				getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3604 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3605 		}
3606 
3607 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3608 		initializeMemory(context, alloc, extraDatas[i]);
3609 	}
3610 
3611 	DescriptorSetLayoutBuilder layoutBuilder;
3612 
3613 	for (deUint32 i = 0; i < extraDatasCount; i++)
3614 	{
3615 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
3616 								 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3617 	}
3618 
3619 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3620 		layoutBuilder.build(vk, device));
3621 
3622 	const Unique<VkPipelineLayout> pipelineLayout(
3623 		makePipelineLayout(vk, device, *descriptorSetLayout));
3624 
3625 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3626 
3627 	const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3628 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3629 														   VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3630 														   *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3631 														   DE_NULL, DE_NULL, true, VK_FORMAT_R32G32B32A32_SFLOAT,
3632 														   0u, 0u, 0u, 0u, fragmentShaderStageCreateFlags, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3633 
3634 	DescriptorPoolBuilder poolBuilder;
3635 
3636 	// To stop validation complaining, always add at least one type to pool.
3637 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3638 	for (deUint32 i = 0; i < extraDatasCount; i++)
3639 	{
3640 		poolBuilder.addType(inputBuffers[i]->getType());
3641 	}
3642 
3643 	Move<VkDescriptorPool> descriptorPool;
3644 	// Create descriptor set
3645 	Move<VkDescriptorSet> descriptorSet;
3646 
3647 	if (extraDatasCount > 0)
3648 	{
3649 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3650 
3651 		descriptorSet	= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3652 	}
3653 
3654 	DescriptorSetUpdateBuilder updateBuilder;
3655 
3656 	for (deUint32 i = 0; i < extraDatasCount; i++)
3657 	{
3658 		if (inputBuffers[i]->isImage())
3659 		{
3660 			VkDescriptorImageInfo info =
3661 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3662 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3663 
3664 			updateBuilder.writeSingle(*descriptorSet,
3665 									  DescriptorSetUpdateBuilder::Location::binding(i),
3666 									  inputBuffers[i]->getType(), &info);
3667 		}
3668 		else
3669 		{
3670 			VkDescriptorBufferInfo info =
3671 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
3672 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3673 
3674 			updateBuilder.writeSingle(*descriptorSet,
3675 									  DescriptorSetUpdateBuilder::Location::binding(i),
3676 									  inputBuffers[i]->getType(), &info);
3677 		}
3678 	}
3679 
3680 	if (extraDatasCount > 0)
3681 		updateBuilder.update(vk, device);
3682 
3683 	const Unique<VkCommandPool>		cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
3684 
3685 	const deUint32					subgroupSize		= getSubgroupSize(context);
3686 
3687 	const Unique<VkCommandBuffer>	cmdBuffer			(makeCommandBuffer(context, *cmdPool));
3688 
3689 	unsigned totalIterations = 0;
3690 	unsigned failedIterations = 0;
3691 
3692 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3693 	{
3694 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3695 		{
3696 			totalIterations++;
3697 
3698 			// re-init the data
3699 			for (deUint32 i = 0; i < extraDatasCount; i++)
3700 			{
3701 				const Allocation& alloc = inputBuffers[i]->getAllocation();
3702 				initializeMemory(context, alloc, extraDatas[i]);
3703 			}
3704 
3705 			VkDeviceSize formatSize = getFormatSizeInBytes(format);
3706 			const VkDeviceSize resultImageSizeInBytes =
3707 				width * height * formatSize;
3708 
3709 			Image resultImage(context, width, height, format,
3710 							  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
3711 							  VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3712 
3713 			Buffer resultBuffer(context, resultImageSizeInBytes,
3714 								VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3715 
3716 			const Unique<VkFramebuffer> framebuffer(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3717 
3718 			beginCommandBuffer(vk, *cmdBuffer);
3719 
3720 			VkViewport viewport = makeViewport(width, height);
3721 
3722 			vk.cmdSetViewport(
3723 				*cmdBuffer, 0, 1, &viewport);
3724 
3725 			VkRect2D scissor = {{0, 0}, {width, height}};
3726 
3727 			vk.cmdSetScissor(
3728 				*cmdBuffer, 0, 1, &scissor);
3729 
3730 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3731 
3732 			vk.cmdBindPipeline(
3733 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3734 
3735 			if (extraDatasCount > 0)
3736 			{
3737 				vk.cmdBindDescriptorSets(*cmdBuffer,
3738 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3739 						&descriptorSet.get(), 0u, DE_NULL);
3740 			}
3741 
3742 			vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3743 
3744 			endRenderPass(vk, *cmdBuffer);
3745 
3746 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3747 
3748 			endCommandBuffer(vk, *cmdBuffer);
3749 
3750 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3751 
3752 			std::vector<const void*> datas;
3753 			{
3754 				const Allocation& resultAlloc = resultBuffer.getAllocation();
3755 				invalidateAlloc(vk, device, resultAlloc);
3756 
3757 				// we always have our result data first
3758 				datas.push_back(resultAlloc.getHostPtr());
3759 			}
3760 
3761 			if (!checkResult(internalData, datas, width, height, subgroupSize))
3762 			{
3763 				failedIterations++;
3764 			}
3765 
3766 			vk.resetCommandBuffer(*cmdBuffer, 0);
3767 		}
3768 	}
3769 
3770 	if (0 < failedIterations)
3771 	{
3772 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3773 
3774 		context.getTestContext().getLog()
3775 			<< TestLog::Message << valuesPassed << " / "
3776 			<< totalIterations << " values passed" << TestLog::EndMessage;
3777 
3778 		return tcu::TestStatus::fail("Failed!");
3779 	}
3780 
3781 	return tcu::TestStatus::pass("OK");
3782 }
3783 
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3784 Move<VkPipeline> makeComputePipeline(Context& context,
3785 									 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
3786 									 const deUint32 pipelineShaderStageFlags, const deUint32 pipelineCreateFlags, VkPipeline basePipelineHandle,
3787 									 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ, deUint32 requiredSubgroupSize)
3788 {
3789 	const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3790 
3791 	const vk::VkSpecializationMapEntry entries[3] =
3792 	{
3793 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
3794 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
3795 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3796 	};
3797 
3798 	const vk::VkSpecializationInfo info =
3799 	{
3800 		/* mapEntryCount = */ 3,
3801 		/* pMapEntries   = */ entries,
3802 		/* dataSize      = */ sizeof(localSize),
3803 		/* pData         = */ localSize
3804 	};
3805 
3806 	const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3807 	{
3808 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,	// VkStructureType    sType;
3809 		DE_NULL,																		// void*              pNext;
3810 		requiredSubgroupSize															// uint32_t           requiredSubgroupSize;
3811 	};
3812 
3813 	const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3814 	{
3815 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,				// VkStructureType					sType;
3816 		(requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL),	// const void*						pNext;
3817 		pipelineShaderStageFlags,											// VkPipelineShaderStageCreateFlags	flags;
3818 		VK_SHADER_STAGE_COMPUTE_BIT,										// VkShaderStageFlagBits			stage;
3819 		shaderModule,														// VkShaderModule					module;
3820 		"main",																// const char*						pName;
3821 		&info,																// const VkSpecializationInfo*		pSpecializationInfo;
3822 	};
3823 
3824 	const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
3825 	{
3826 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType	sType;
3827 		DE_NULL,										// const void*						pNext;
3828 		pipelineCreateFlags,							// VkPipelineCreateFlags			flags;
3829 		pipelineShaderStageParams,						// VkPipelineShaderStageCreateInfo	stage;
3830 		pipelineLayout,									// VkPipelineLayout					layout;
3831 		basePipelineHandle,								// VkPipeline						basePipelineHandle;
3832 		-1,												// deInt32							basePipelineIndex;
3833 	};
3834 
3835 	return createComputePipeline(context.getDeviceInterface(),
3836 								 context.getDevice(), DE_NULL, &pipelineCreateInfo);
3837 }
3838 
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * inputs,deUint32 inputsCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 subgroupSize),const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)3839 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
3840 	Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
3841 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
3842 						const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3843 						deUint32 subgroupSize),
3844 	const deUint32 pipelineShaderStageCreateFlags, const deUint32 numWorkgroups[3],
3845 	const deBool isRequiredSubgroupSize, const deUint32 subgroupSize, const deUint32 localSizesToTest[][3], const deUint32 localSizesToTestCount)
3846 {
3847 	const DeviceInterface&					vk						= context.getDeviceInterface();
3848 	const VkDevice							device					= context.getDevice();
3849 	const VkQueue							queue					= context.getUniversalQueue();
3850 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3851 	VkDeviceSize							elementSize				= getFormatSizeInBytes(format);
3852 
3853 	VkDeviceSize maxSubgroupSize = maxSupportedSubgroupSize();
3854 
3855 	if (isRequiredSubgroupSize)
3856 	{
3857 		VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
3858 		subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
3859 		subgroupSizeControlProperties.pNext = DE_NULL;
3860 
3861 		VkPhysicalDeviceProperties2 properties2;
3862 		properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3863 		properties2.pNext = &subgroupSizeControlProperties;
3864 		context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties2);
3865 		maxSubgroupSize = deMax32(subgroupSizeControlProperties.maxSubgroupSize, static_cast<deUint32>(maxSubgroupSize));
3866 	}
3867 
3868 	const VkDeviceSize resultBufferSize = maxSubgroupSize *
3869 										  maxSubgroupSize *
3870 										  maxSubgroupSize;
3871 
3872 	const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3873 
3874 	Buffer resultBuffer(
3875 		context, resultBufferSizeInBytes);
3876 
3877 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
3878 
3879 	for (deUint32 i = 0; i < inputsCount; i++)
3880 	{
3881 		if (inputs[i].isImage)
3882 		{
3883 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3884 										static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3885 		}
3886 		else
3887 		{
3888 			vk::VkDeviceSize size =
3889 				getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3890 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3891 		}
3892 
3893 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3894 		initializeMemory(context, alloc, inputs[i]);
3895 	}
3896 
3897 	DescriptorSetLayoutBuilder layoutBuilder;
3898 	layoutBuilder.addBinding(
3899 		resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3900 
3901 	for (deUint32 i = 0; i < inputsCount; i++)
3902 	{
3903 		layoutBuilder.addBinding(
3904 			inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3905 	}
3906 
3907 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3908 		layoutBuilder.build(vk, device));
3909 
3910 	const Unique<VkShaderModule> shaderModule(
3911 		createShaderModule(vk, device,
3912 						   context.getBinaryCollection().get("comp"), 0u));
3913 	const Unique<VkPipelineLayout> pipelineLayout(
3914 		makePipelineLayout(vk, device, *descriptorSetLayout));
3915 
3916 	DescriptorPoolBuilder poolBuilder;
3917 
3918 	poolBuilder.addType(resultBuffer.getType());
3919 
3920 	for (deUint32 i = 0; i < inputsCount; i++)
3921 	{
3922 		poolBuilder.addType(inputBuffers[i]->getType());
3923 	}
3924 
3925 	const Unique<VkDescriptorPool> descriptorPool(
3926 		poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3927 
3928 	// Create descriptor set
3929 	const Unique<VkDescriptorSet> descriptorSet(
3930 		makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3931 
3932 	DescriptorSetUpdateBuilder updateBuilder;
3933 
3934 	const VkDescriptorBufferInfo resultDescriptorInfo =
3935 		makeDescriptorBufferInfo(
3936 			resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3937 
3938 	updateBuilder.writeSingle(*descriptorSet,
3939 							  DescriptorSetUpdateBuilder::Location::binding(0u),
3940 							  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3941 
3942 	for (deUint32 i = 0; i < inputsCount; i++)
3943 	{
3944 		if (inputBuffers[i]->isImage())
3945 		{
3946 			VkDescriptorImageInfo info =
3947 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3948 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3949 
3950 			updateBuilder.writeSingle(*descriptorSet,
3951 									  DescriptorSetUpdateBuilder::Location::binding(i + 1),
3952 									  inputBuffers[i]->getType(), &info);
3953 		}
3954 		else
3955 		{
3956 			vk::VkDeviceSize size =
3957 				getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3958 			VkDescriptorBufferInfo info =
3959 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3960 
3961 			updateBuilder.writeSingle(*descriptorSet,
3962 									  DescriptorSetUpdateBuilder::Location::binding(i + 1),
3963 									  inputBuffers[i]->getType(), &info);
3964 		}
3965 	}
3966 
3967 	updateBuilder.update(vk, device);
3968 
3969 	const Unique<VkCommandPool>		cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
3970 
3971 	unsigned totalIterations = 0;
3972 	unsigned failedIterations = 0;
3973 
3974 	const Unique<VkCommandBuffer> cmdBuffer(
3975 		makeCommandBuffer(context, *cmdPool));
3976 
3977 	std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines(localSizesToTestCount);
3978 
3979 	context.getTestContext().touchWatchdog();
3980 	pipelines[0] =
3981 		de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
3982 			makeComputePipeline(context, *pipelineLayout, *shaderModule,
3983 								pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, (VkPipeline) DE_NULL,
3984 								localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2],
3985 								isRequiredSubgroupSize ? subgroupSize : 0u)));
3986 	context.getTestContext().touchWatchdog();
3987 
3988 	for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
3989 	{
3990 		const deUint32 nextX = localSizesToTest[index][0];
3991 		const deUint32 nextY = localSizesToTest[index][1];
3992 		const deUint32 nextZ = localSizesToTest[index][2];
3993 
3994 		context.getTestContext().touchWatchdog();
3995 		pipelines[index] =
3996 			de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
3997 				makeComputePipeline(context, *pipelineLayout, *shaderModule,
3998 									pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_DERIVATIVE_BIT, **pipelines[0],
3999 									nextX, nextY, nextZ,
4000 									isRequiredSubgroupSize ? subgroupSize : 0u)));
4001 		context.getTestContext().touchWatchdog();
4002 	}
4003 
4004 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
4005 	{
4006 
4007 		// we are running one test
4008 		totalIterations++;
4009 
4010 		beginCommandBuffer(vk, *cmdBuffer);
4011 
4012 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, **pipelines[index]);
4013 
4014 		vk.cmdBindDescriptorSets(*cmdBuffer,
4015 				VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
4016 				&descriptorSet.get(), 0u, DE_NULL);
4017 
4018 		vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4019 
4020 		endCommandBuffer(vk, *cmdBuffer);
4021 
4022 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4023 
4024 		std::vector<const void*> datas;
4025 
4026 		{
4027 			const Allocation& resultAlloc = resultBuffer.getAllocation();
4028 			invalidateAlloc(vk, device, resultAlloc);
4029 
4030 			// we always have our result data first
4031 			datas.push_back(resultAlloc.getHostPtr());
4032 		}
4033 
4034 		for (deUint32 i = 0; i < inputsCount; i++)
4035 		{
4036 			if (!inputBuffers[i]->isImage())
4037 			{
4038 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
4039 				invalidateAlloc(vk, device, resultAlloc);
4040 
4041 				// we always have our result data first
4042 				datas.push_back(resultAlloc.getHostPtr());
4043 			}
4044 		}
4045 
4046 		if (!checkResult(internalData, datas, numWorkgroups, localSizesToTest[index], subgroupSize))
4047 		{
4048 			failedIterations++;
4049 		}
4050 
4051 		vk.resetCommandBuffer(*cmdBuffer, 0);
4052 	}
4053 
4054 	if (0 < failedIterations)
4055 	{
4056 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4057 
4058 		context.getTestContext().getLog()
4059 			<< TestLog::Message << valuesPassed << " / "
4060 			<< totalIterations << " values passed" << TestLog::EndMessage;
4061 
4062 		return tcu::TestStatus::fail("Failed!");
4063 	}
4064 
4065 	return tcu::TestStatus::pass("OK");
4066 }
4067 
makeComputeTest(Context & context,VkFormat format,SSBOData * inputs,deUint32 inputsCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 subgroupSize),deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4068 tcu::TestStatus vkt::subgroups::makeComputeTest(
4069 	Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
4070 	bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
4071 						const deUint32 numWorkgroups[3], const deUint32 localSize[3],
4072 						deUint32 subgroupSize),
4073 	deUint32 requiredSubgroupSize, const deUint32 pipelineShaderStageCreateFlags)
4074 {
4075 	const deUint32 numWorkgroups[3] = {4, 2, 2};
4076 	deUint32 subgroupSize = requiredSubgroupSize;
4077 
4078 	if(requiredSubgroupSize == 0)
4079 		subgroupSize = vkt::subgroups::getSubgroupSize(context);
4080 
4081 	const deUint32 localSizesToTestCount = 8;
4082 	deUint32 localSizesToTest[localSizesToTestCount][3] =
4083 	{
4084 		{1, 1, 1},
4085 		{subgroupSize, 1, 1},
4086 		{1, subgroupSize, 1},
4087 		{1, 1, subgroupSize},
4088 		{32, 4, 1},
4089 		{1, 4, 32},
4090 		{3, 5, 7},
4091 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
4092 	};
4093 
4094 	return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4095 											   numWorkgroups, requiredSubgroupSize != 0u, subgroupSize, localSizesToTest, localSizesToTestCount);
4096 }
4097