1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuCommandLine.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "vkBarrierUtil.hpp"
32 #include "vkImageUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
36 using namespace tcu;
37 using namespace std;
38 using namespace vk;
39 using namespace vkt;
40
41 namespace
42 {
43
getMaxWidth()44 deUint32 getMaxWidth ()
45 {
46 return 1024u;
47 }
48
getNextWidth(const deUint32 width)49 deUint32 getNextWidth (const deUint32 width)
50 {
51 if (width < 128)
52 {
53 // This ensures we test every value up to 128 (the max subgroup size).
54 return width + 1;
55 }
56 else
57 {
58 // And once we hit 128 we increment to only power of 2's to reduce testing time.
59 return width * 2;
60 }
61 }
62
getFormatSizeInBytes(const VkFormat format)63 deUint32 getFormatSizeInBytes(const VkFormat format)
64 {
65 switch (format)
66 {
67 default:
68 DE_FATAL("Unhandled format!");
69 return 0;
70 case VK_FORMAT_R8_SINT:
71 case VK_FORMAT_R8_UINT:
72 return static_cast<deUint32>(sizeof(deInt8));
73 case VK_FORMAT_R8G8_SINT:
74 case VK_FORMAT_R8G8_UINT:
75 return static_cast<deUint32>(sizeof(deInt8) * 2);
76 case VK_FORMAT_R8G8B8_SINT:
77 case VK_FORMAT_R8G8B8_UINT:
78 case VK_FORMAT_R8G8B8A8_SINT:
79 case VK_FORMAT_R8G8B8A8_UINT:
80 return static_cast<deUint32>(sizeof(deInt8) * 4);
81 case VK_FORMAT_R16_SINT:
82 case VK_FORMAT_R16_UINT:
83 case VK_FORMAT_R16_SFLOAT:
84 return static_cast<deUint32>(sizeof(deInt16));
85 case VK_FORMAT_R16G16_SINT:
86 case VK_FORMAT_R16G16_UINT:
87 case VK_FORMAT_R16G16_SFLOAT:
88 return static_cast<deUint32>(sizeof(deInt16) * 2);
89 case VK_FORMAT_R16G16B16_UINT:
90 case VK_FORMAT_R16G16B16_SINT:
91 case VK_FORMAT_R16G16B16_SFLOAT:
92 case VK_FORMAT_R16G16B16A16_SINT:
93 case VK_FORMAT_R16G16B16A16_UINT:
94 case VK_FORMAT_R16G16B16A16_SFLOAT:
95 return static_cast<deUint32>(sizeof(deInt16) * 4);
96 case VK_FORMAT_R32_SINT:
97 case VK_FORMAT_R32_UINT:
98 case VK_FORMAT_R32_SFLOAT:
99 return static_cast<deUint32>(sizeof(deInt32));
100 case VK_FORMAT_R32G32_SINT:
101 case VK_FORMAT_R32G32_UINT:
102 case VK_FORMAT_R32G32_SFLOAT:
103 return static_cast<deUint32>(sizeof(deInt32) * 2);
104 case VK_FORMAT_R32G32B32_SINT:
105 case VK_FORMAT_R32G32B32_UINT:
106 case VK_FORMAT_R32G32B32_SFLOAT:
107 case VK_FORMAT_R32G32B32A32_SINT:
108 case VK_FORMAT_R32G32B32A32_UINT:
109 case VK_FORMAT_R32G32B32A32_SFLOAT:
110 return static_cast<deUint32>(sizeof(deInt32) * 4);
111 case VK_FORMAT_R64_SINT:
112 case VK_FORMAT_R64_UINT:
113 case VK_FORMAT_R64_SFLOAT:
114 return static_cast<deUint32>(sizeof(deInt64));
115 case VK_FORMAT_R64G64_SINT:
116 case VK_FORMAT_R64G64_UINT:
117 case VK_FORMAT_R64G64_SFLOAT:
118 return static_cast<deUint32>(sizeof(deInt64) * 2);
119 case VK_FORMAT_R64G64B64_SINT:
120 case VK_FORMAT_R64G64B64_UINT:
121 case VK_FORMAT_R64G64B64_SFLOAT:
122 case VK_FORMAT_R64G64B64A64_SINT:
123 case VK_FORMAT_R64G64B64A64_UINT:
124 case VK_FORMAT_R64G64B64A64_SFLOAT:
125 return static_cast<deUint32>(sizeof(deInt64) * 4);
126 // The below formats are used to represent bool and bvec* types. These
127 // types are passed to the shader as int and ivec* types, before the
128 // calculations are done as booleans. We need a distinct type here so
129 // that the shader generators can switch on it and generate the correct
130 // shader source for testing.
131 case VK_FORMAT_R8_USCALED:
132 return static_cast<deUint32>(sizeof(deInt32));
133 case VK_FORMAT_R8G8_USCALED:
134 return static_cast<deUint32>(sizeof(deInt32) * 2);
135 case VK_FORMAT_R8G8B8_USCALED:
136 case VK_FORMAT_R8G8B8A8_USCALED:
137 return static_cast<deUint32>(sizeof(deInt32) * 4);
138 }
139 }
140
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)141 deUint32 getElementSizeInBytes(
142 const VkFormat format,
143 const subgroups::SSBOData::InputDataLayoutType layout)
144 {
145 deUint32 bytes = getFormatSizeInBytes(format);
146 if (layout == subgroups::SSBOData::LayoutStd140)
147 return bytes < 16 ? 16 : bytes;
148 else
149 return bytes;
150 }
151
makeRenderPass(Context & context,VkFormat format)152 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
153 {
154 VkAttachmentReference colorReference = {
155 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
156 };
157
158 const VkSubpassDescription subpassDescription = {0u,
159 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
160 DE_NULL, DE_NULL, 0, DE_NULL
161 };
162
163 const VkSubpassDependency subpassDependencies[2] = {
164 { VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
165 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
166 VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
167 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
168 VK_DEPENDENCY_BY_REGION_BIT
169 },
170 { 0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
171 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
172 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
173 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
174 VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
175 },
176 };
177
178 VkAttachmentDescription attachmentDescription = {0u, format,
179 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
180 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
181 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
182 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
183 };
184
185 const VkRenderPassCreateInfo renderPassCreateInfo = {
186 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
187 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
188 };
189
190 return createRenderPass(context.getDeviceInterface(), context.getDevice(),
191 &renderPassCreateInfo);
192 }
193
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])194 Move<VkPipeline> makeGraphicsPipeline(const DeviceInterface& vk,
195 const VkDevice device,
196 const VkPipelineLayout pipelineLayout,
197 const VkShaderModule vertexShaderModule,
198 const VkShaderModule tessellationControlShaderModule,
199 const VkShaderModule tessellationEvalShaderModule,
200 const VkShaderModule geometryShaderModule,
201 const VkShaderModule fragmentShaderModule,
202 const VkRenderPass renderPass,
203 const std::vector<VkViewport>& viewports,
204 const std::vector<VkRect2D>& scissors,
205 const VkPrimitiveTopology topology,
206 const deUint32 subpass,
207 const deUint32 patchControlPoints,
208 const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo,
209 const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo,
210 const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo,
211 const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo,
212 const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo,
213 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfo,
214 const deUint32 vertexShaderStageCreateFlags,
215 const deUint32 tessellationControlShaderStageCreateFlags,
216 const deUint32 tessellationEvalShaderStageCreateFlags,
217 const deUint32 geometryShaderStageCreateFlags,
218 const deUint32 fragmentShaderStageCreateFlags,
219 const deUint32 requiredSubgroupSize[5])
220 {
221 const VkBool32 disableRasterization = (fragmentShaderModule == DE_NULL);
222 const bool hasTessellation = (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
223
224 VkPipelineShaderStageCreateInfo stageCreateInfo =
225 {
226 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType
227 DE_NULL, // const void* pNext
228 0u, // VkPipelineShaderStageCreateFlags flags
229 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage
230 DE_NULL, // VkShaderModule module
231 "main", // const char* pName
232 DE_NULL // const VkSpecializationInfo* pSpecializationInfo
233 };
234
235 std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
236
237 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
238 {
239 {
240 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
241 DE_NULL,
242 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
243 },
244 {
245 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
246 DE_NULL,
247 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
248 },
249 {
250 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
251 DE_NULL,
252 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
253 },
254 {
255 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
256 DE_NULL,
257 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
258 },
259 {
260 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
261 DE_NULL,
262 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
263 },
264 };
265 {
266 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
267 stageCreateInfo.flags = vertexShaderStageCreateFlags;
268 stageCreateInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
269 stageCreateInfo.module = vertexShaderModule;
270 pipelineShaderStageParams.push_back(stageCreateInfo);
271 }
272
273 if (tessellationControlShaderModule != DE_NULL)
274 {
275 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
276 stageCreateInfo.flags = tessellationControlShaderStageCreateFlags;
277 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
278 stageCreateInfo.module = tessellationControlShaderModule;
279 pipelineShaderStageParams.push_back(stageCreateInfo);
280 }
281
282 if (tessellationEvalShaderModule != DE_NULL)
283 {
284 stageCreateInfo.pNext = (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
285 stageCreateInfo.flags = tessellationEvalShaderStageCreateFlags;
286 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
287 stageCreateInfo.module = tessellationEvalShaderModule;
288 pipelineShaderStageParams.push_back(stageCreateInfo);
289 }
290
291 if (geometryShaderModule != DE_NULL)
292 {
293 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
294 stageCreateInfo.flags = geometryShaderStageCreateFlags;
295 stageCreateInfo.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
296 stageCreateInfo.module = geometryShaderModule;
297 pipelineShaderStageParams.push_back(stageCreateInfo);
298 }
299
300 if (fragmentShaderModule != DE_NULL)
301 {
302 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
303 stageCreateInfo.flags = fragmentShaderStageCreateFlags;
304 stageCreateInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
305 stageCreateInfo.module = fragmentShaderModule;
306 pipelineShaderStageParams.push_back(stageCreateInfo);
307 }
308
309 const VkVertexInputBindingDescription vertexInputBindingDescription =
310 {
311 0u, // deUint32 binding
312 sizeof(tcu::Vec4), // deUint32 stride
313 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate
314 };
315
316 const VkVertexInputAttributeDescription vertexInputAttributeDescription =
317 {
318 0u, // deUint32 location
319 0u, // deUint32 binding
320 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format
321 0u // deUint32 offset
322 };
323
324 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault =
325 {
326 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType
327 DE_NULL, // const void* pNext
328 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags
329 1u, // deUint32 vertexBindingDescriptionCount
330 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions
331 1u, // deUint32 vertexAttributeDescriptionCount
332 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions
333 };
334
335 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
336 {
337 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType
338 DE_NULL, // const void* pNext
339 0u, // VkPipelineInputAssemblyStateCreateFlags flags
340 topology, // VkPrimitiveTopology topology
341 VK_FALSE // VkBool32 primitiveRestartEnable
342 };
343
344 const VkPipelineTessellationStateCreateInfo tessStateCreateInfo =
345 {
346 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType
347 DE_NULL, // const void* pNext
348 0u, // VkPipelineTessellationStateCreateFlags flags
349 patchControlPoints // deUint32 patchControlPoints
350 };
351
352 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
353 {
354 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
355 DE_NULL, // const void* pNext
356 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
357 viewports.empty() ? 1u : (deUint32)viewports.size(), // deUint32 viewportCount
358 viewports.empty() ? DE_NULL : &viewports[0], // const VkViewport* pViewports
359 viewports.empty() ? 1u : (deUint32)scissors.size(), // deUint32 scissorCount
360 scissors.empty() ? DE_NULL : &scissors[0] // const VkRect2D* pScissors
361 };
362
363 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault =
364 {
365 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType
366 DE_NULL, // const void* pNext
367 0u, // VkPipelineRasterizationStateCreateFlags flags
368 VK_FALSE, // VkBool32 depthClampEnable
369 disableRasterization, // VkBool32 rasterizerDiscardEnable
370 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode
371 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode
372 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace
373 VK_FALSE, // VkBool32 depthBiasEnable
374 0.0f, // float depthBiasConstantFactor
375 0.0f, // float depthBiasClamp
376 0.0f, // float depthBiasSlopeFactor
377 1.0f // float lineWidth
378 };
379
380 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault =
381 {
382 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
383 DE_NULL, // const void* pNext
384 0u, // VkPipelineMultisampleStateCreateFlags flags
385 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
386 VK_FALSE, // VkBool32 sampleShadingEnable
387 1.0f, // float minSampleShading
388 DE_NULL, // const VkSampleMask* pSampleMask
389 VK_FALSE, // VkBool32 alphaToCoverageEnable
390 VK_FALSE // VkBool32 alphaToOneEnable
391 };
392
393 const VkStencilOpState stencilOpState =
394 {
395 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
396 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
397 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
398 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
399 0, // deUint32 compareMask
400 0, // deUint32 writeMask
401 0 // deUint32 reference
402 };
403
404 const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault =
405 {
406 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType
407 DE_NULL, // const void* pNext
408 0u, // VkPipelineDepthStencilStateCreateFlags flags
409 VK_FALSE, // VkBool32 depthTestEnable
410 VK_FALSE, // VkBool32 depthWriteEnable
411 VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp depthCompareOp
412 VK_FALSE, // VkBool32 depthBoundsTestEnable
413 VK_FALSE, // VkBool32 stencilTestEnable
414 stencilOpState, // VkStencilOpState front
415 stencilOpState, // VkStencilOpState back
416 0.0f, // float minDepthBounds
417 1.0f, // float maxDepthBounds
418 };
419
420 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
421 {
422 VK_FALSE, // VkBool32 blendEnable
423 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor
424 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor
425 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp
426 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor
427 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor
428 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp
429 VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags colorWriteMask
430 | VK_COLOR_COMPONENT_G_BIT
431 | VK_COLOR_COMPONENT_B_BIT
432 | VK_COLOR_COMPONENT_A_BIT
433 };
434
435 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault =
436 {
437 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType
438 DE_NULL, // const void* pNext
439 0u, // VkPipelineColorBlendStateCreateFlags flags
440 VK_FALSE, // VkBool32 logicOpEnable
441 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp
442 1u, // deUint32 attachmentCount
443 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments
444 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4]
445 };
446
447 std::vector<VkDynamicState> dynamicStates;
448
449 if (viewports.empty())
450 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
451 if (scissors.empty())
452 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
453
454 const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault =
455 {
456 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType
457 DE_NULL, // const void* pNext
458 0u, // VkPipelineDynamicStateCreateFlags flags
459 (deUint32)dynamicStates.size(), // deUint32 dynamicStateCount
460 dynamicStates.empty() ? DE_NULL : &dynamicStates[0] // const VkDynamicState* pDynamicStates
461 };
462
463 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfoDefaultPtr = dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
464
465 const VkGraphicsPipelineCreateInfo pipelineCreateInfo =
466 {
467 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType
468 DE_NULL, // const void* pNext
469 0u, // VkPipelineCreateFlags flags
470 (deUint32)pipelineShaderStageParams.size(), // deUint32 stageCount
471 &pipelineShaderStageParams[0], // const VkPipelineShaderStageCreateInfo* pStages
472 vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState
473 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState
474 hasTessellation ? &tessStateCreateInfo : DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState
475 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState
476 rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState
477 multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState
478 depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState
479 colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState
480 dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo* pDynamicState
481 pipelineLayout, // VkPipelineLayout layout
482 renderPass, // VkRenderPass renderPass
483 subpass, // deUint32 subpass
484 DE_NULL, // VkPipeline basePipelineHandle
485 0 // deInt32 basePipelineIndex;
486 };
487
488 return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
489 }
490
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)491 Move<VkPipeline> makeGraphicsPipeline(Context& context,
492 const VkPipelineLayout pipelineLayout,
493 const VkShaderStageFlags stages,
494 const VkShaderModule vertexShaderModule,
495 const VkShaderModule fragmentShaderModule,
496 const VkShaderModule geometryShaderModule,
497 const VkShaderModule tessellationControlModule,
498 const VkShaderModule tessellationEvaluationModule,
499 const VkRenderPass renderPass,
500 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
501 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
502 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
503 const bool frameBufferTests = false,
504 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
505 const deUint32 vertexShaderStageCreateFlags = 0u,
506 const deUint32 tessellationControlShaderStageCreateFlags = 0u,
507 const deUint32 tessellationEvalShaderStageCreateFlags = 0u,
508 const deUint32 geometryShaderStageCreateFlags = 0u,
509 const deUint32 fragmentShaderStageCreateFlags = 0u,
510 const deUint32 requiredSubgroupSize[5] = DE_NULL)
511 {
512 std::vector<VkViewport> noViewports;
513 std::vector<VkRect2D> noScissors;
514
515 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
516 {
517 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
518 DE_NULL, // const void* pNext;
519 0u, // VkPipelineVertexInputStateCreateFlags flags;
520 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
521 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
522 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
523 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
524 };
525
526 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
527 const VkColorComponentFlags colorComponent =
528 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
529 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
530 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
531 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
532
533 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
534 {
535 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
536 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
537 colorComponent
538 };
539
540 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
541 {
542 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
543 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
544 { 0.0f, 0.0f, 0.0f, 0.0f }
545 };
546
547 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
548
549 return makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
550 context.getDevice(), // const VkDevice device
551 pipelineLayout, // const VkPipelineLayout pipelineLayout
552 vertexShaderModule, // const VkShaderModule vertexShaderModule
553 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
554 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
555 geometryShaderModule, // const VkShaderModule geometryShaderModule
556 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
557 renderPass, // const VkRenderPass renderPass
558 noViewports, // const std::vector<VkViewport>& viewports
559 noScissors, // const std::vector<VkRect2D>& scissors
560 topology, // const VkPrimitiveTopology topology
561 0u, // const deUint32 subpass
562 patchControlPoints, // const deUint32 patchControlPoints
563 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
564 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
565 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
566 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
567 &colorBlendStateCreateInfo, // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
568 DE_NULL, // const VkPipelineDynamicStateCreateInfo*
569 vertexShaderStageCreateFlags, // const deUint32 vertexShaderStageCreateFlags,
570 tessellationControlShaderStageCreateFlags, // const deUint32 tessellationControlShaderStageCreateFlags
571 tessellationEvalShaderStageCreateFlags, // const deUint32 tessellationEvalShaderStageCreateFlags
572 geometryShaderStageCreateFlags, // const deUint32 geometryShaderStageCreateFlags
573 fragmentShaderStageCreateFlags, // const deUint32 fragmentShaderStageCreateFlags
574 requiredSubgroupSize); // const deUint32 requiredSubgroupSize[5]
575 }
576
makeCommandBuffer(Context & context,const VkCommandPool commandPool)577 Move<VkCommandBuffer> makeCommandBuffer(
578 Context& context, const VkCommandPool commandPool)
579 {
580 const VkCommandBufferAllocateInfo bufferAllocateParams =
581 {
582 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
583 DE_NULL, // const void* pNext;
584 commandPool, // VkCommandPool commandPool;
585 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
586 1u, // deUint32 bufferCount;
587 };
588 return allocateCommandBuffer(context.getDeviceInterface(),
589 context.getDevice(), &bufferAllocateParams);
590 }
591
592 struct Buffer;
593 struct Image;
594
595 struct BufferOrImage
596 {
isImage__anone2f77d5c0111::BufferOrImage597 bool isImage() const
598 {
599 return m_isImage;
600 }
601
getAsBuffer__anone2f77d5c0111::BufferOrImage602 Buffer* getAsBuffer()
603 {
604 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
605 return reinterpret_cast<Buffer* >(this);
606 }
607
getAsImage__anone2f77d5c0111::BufferOrImage608 Image* getAsImage()
609 {
610 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
611 return reinterpret_cast<Image*>(this);
612 }
613
getType__anone2f77d5c0111::BufferOrImage614 virtual VkDescriptorType getType() const
615 {
616 if (m_isImage)
617 {
618 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
619 }
620 else
621 {
622 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
623 }
624 }
625
getAllocation__anone2f77d5c0111::BufferOrImage626 Allocation& getAllocation() const
627 {
628 return *m_allocation;
629 }
630
~BufferOrImage__anone2f77d5c0111::BufferOrImage631 virtual ~BufferOrImage() {}
632
633 protected:
BufferOrImage__anone2f77d5c0111::BufferOrImage634 explicit BufferOrImage(bool image) : m_isImage(image) {}
635
636 bool m_isImage;
637 de::details::MovePtr<Allocation> m_allocation;
638 };
639
640 struct Buffer : public BufferOrImage
641 {
Buffer__anone2f77d5c0111::Buffer642 explicit Buffer(
643 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
644 : BufferOrImage (false)
645 , m_sizeInBytes (sizeInBytes)
646 , m_usage (usage)
647 {
648 const DeviceInterface& vkd = context.getDeviceInterface();
649 const VkDevice device = context.getDevice();
650
651 const vk::VkBufferCreateInfo bufferCreateInfo =
652 {
653 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
654 DE_NULL,
655 0u,
656 m_sizeInBytes,
657 m_usage,
658 VK_SHARING_MODE_EXCLUSIVE,
659 0u,
660 DE_NULL,
661 };
662 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
663
664 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
665
666 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
667 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
668 }
669
getType__anone2f77d5c0111::Buffer670 virtual VkDescriptorType getType() const
671 {
672 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
673 {
674 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
675 }
676 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
677 }
678
getBuffer__anone2f77d5c0111::Buffer679 VkBuffer getBuffer () const
680 {
681 return *m_buffer;
682 }
683
getBufferPtr__anone2f77d5c0111::Buffer684 const VkBuffer* getBufferPtr () const
685 {
686 return &(*m_buffer);
687 }
688
getSize__anone2f77d5c0111::Buffer689 VkDeviceSize getSize () const
690 {
691 return m_sizeInBytes;
692 }
693
694 private:
695 Move<VkBuffer> m_buffer;
696 VkDeviceSize m_sizeInBytes;
697 const VkBufferUsageFlags m_usage;
698 };
699
700 struct Image : public BufferOrImage
701 {
Image__anone2f77d5c0111::Image702 explicit Image(Context& context, deUint32 width, deUint32 height,
703 VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
704 : BufferOrImage(true)
705 {
706 const DeviceInterface& vk = context.getDeviceInterface();
707 const VkDevice device = context.getDevice();
708 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
709
710 const VkImageCreateInfo imageCreateInfo =
711 {
712 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
713 format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
714 VK_IMAGE_TILING_OPTIMAL, usage,
715 VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
716 VK_IMAGE_LAYOUT_UNDEFINED
717 };
718
719 const VkComponentMapping componentMapping =
720 {
721 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
722 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
723 };
724
725 const VkImageSubresourceRange subresourceRange =
726 {
727 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
728 0u, //deUint32 baseMipLevel
729 1u, //deUint32 levelCount
730 0u, //deUint32 baseArrayLayer
731 1u //deUint32 layerCount
732 };
733
734 const VkSamplerCreateInfo samplerCreateInfo =
735 {
736 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
737 DE_NULL,
738 0u,
739 VK_FILTER_NEAREST,
740 VK_FILTER_NEAREST,
741 VK_SAMPLER_MIPMAP_MODE_NEAREST,
742 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
743 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
744 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
745 0.0f,
746 VK_FALSE,
747 1.0f,
748 DE_FALSE,
749 VK_COMPARE_OP_ALWAYS,
750 0.0f,
751 0.0f,
752 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
753 VK_FALSE,
754 };
755
756 m_image = createImage(vk, device, &imageCreateInfo);
757
758 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
759
760 req.size *= 2;
761 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
762
763 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
764
765 const VkImageViewCreateInfo imageViewCreateInfo =
766 {
767 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
768 VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
769 subresourceRange
770 };
771
772 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
773 m_sampler = createSampler(vk, device, &samplerCreateInfo);
774
775 // Transition input image layouts
776 {
777 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
778 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
779
780 beginCommandBuffer(vk, *cmdBuffer);
781
782 const VkImageMemoryBarrier imageBarrier = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
783 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
784
785 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
786 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
787
788 endCommandBuffer(vk, *cmdBuffer);
789 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
790 }
791 }
792
getImage__anone2f77d5c0111::Image793 VkImage getImage () const
794 {
795 return *m_image;
796 }
797
getImageView__anone2f77d5c0111::Image798 VkImageView getImageView () const
799 {
800 return *m_imageView;
801 }
802
getSampler__anone2f77d5c0111::Image803 VkSampler getSampler () const
804 {
805 return *m_sampler;
806 }
807
808 private:
809 Move<VkImage> m_image;
810 Move<VkImageView> m_imageView;
811 Move<VkSampler> m_sampler;
812 };
813 }
814
getSharedMemoryBallotHelper()815 std::string vkt::subgroups::getSharedMemoryBallotHelper()
816 {
817 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
818 "uvec4 sharedMemoryBallot(bool vote)\n"
819 "{\n"
820 " uint groupOffset = gl_SubgroupID;\n"
821 " // One invocation in the group 0's the whole group's data\n"
822 " if (subgroupElect())\n"
823 " {\n"
824 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
825 " }\n"
826 " subgroupMemoryBarrierShared();\n"
827 " if (vote)\n"
828 " {\n"
829 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
830 " const highp uint bitToSet = 1u << invocationId;\n"
831 " switch (gl_SubgroupInvocationID / 32)\n"
832 " {\n"
833 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
834 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
835 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
836 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
837 " }\n"
838 " }\n"
839 " subgroupMemoryBarrierShared();\n"
840 " return superSecretComputeShaderHelper[groupOffset];\n"
841 "}\n";
842 }
843
getSharedMemoryBallotHelperARB()844 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
845 {
846 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
847 "uint64_t sharedMemoryBallot(bool vote)\n"
848 "{\n"
849 " uint groupOffset = gl_SubgroupID;\n"
850 " // One invocation in the group 0's the whole group's data\n"
851 " if (subgroupElect())\n"
852 " {\n"
853 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
854 " }\n"
855 " subgroupMemoryBarrierShared();\n"
856 " if (vote)\n"
857 " {\n"
858 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
859 " const highp uint bitToSet = 1u << invocationId;\n"
860 " switch (gl_SubgroupInvocationID / 32)\n"
861 " {\n"
862 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
863 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
864 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
865 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
866 " }\n"
867 " }\n"
868 " subgroupMemoryBarrierShared();\n"
869 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
870 "}\n";
871 }
872
getSubgroupSize(Context & context)873 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
874 {
875 VkPhysicalDeviceSubgroupProperties subgroupProperties;
876 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
877 subgroupProperties.pNext = DE_NULL;
878
879 VkPhysicalDeviceProperties2 properties;
880 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
881 properties.pNext = &subgroupProperties;
882
883 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
884
885 return subgroupProperties.subgroupSize;
886 }
887
maxSupportedSubgroupSize()888 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
889 return 128u;
890 }
891
getShaderStageName(VkShaderStageFlags stage)892 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
893 {
894 switch (stage)
895 {
896 default:
897 DE_FATAL("Unhandled stage!");
898 return "";
899 case VK_SHADER_STAGE_COMPUTE_BIT:
900 return "compute";
901 case VK_SHADER_STAGE_FRAGMENT_BIT:
902 return "fragment";
903 case VK_SHADER_STAGE_VERTEX_BIT:
904 return "vertex";
905 case VK_SHADER_STAGE_GEOMETRY_BIT:
906 return "geometry";
907 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
908 return "tess_control";
909 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
910 return "tess_eval";
911 }
912 }
913
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)914 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
915 {
916 switch (bit)
917 {
918 default:
919 DE_FATAL("Unknown subgroup feature category!");
920 return "";
921 case VK_SUBGROUP_FEATURE_BASIC_BIT:
922 return "VK_SUBGROUP_FEATURE_BASIC_BIT";
923 case VK_SUBGROUP_FEATURE_VOTE_BIT:
924 return "VK_SUBGROUP_FEATURE_VOTE_BIT";
925 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
926 return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
927 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
928 return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
929 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
930 return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
931 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
932 return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
933 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
934 return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
935 case VK_SUBGROUP_FEATURE_QUAD_BIT:
936 return "VK_SUBGROUP_FEATURE_QUAD_BIT";
937 }
938 }
939
addNoSubgroupShader(SourceCollections & programCollection)940 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
941 {
942 {
943 /*
944 "#version 450\n"
945 "void main (void)\n"
946 "{\n"
947 " float pixelSize = 2.0f/1024.0f;\n"
948 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
949 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
950 " gl_PointSize = 1.0f;\n"
951 "}\n"
952 */
953 const std::string vertNoSubgroup =
954 "; SPIR-V\n"
955 "; Version: 1.3\n"
956 "; Generator: Khronos Glslang Reference Front End; 1\n"
957 "; Bound: 37\n"
958 "; Schema: 0\n"
959 "OpCapability Shader\n"
960 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
961 "OpMemoryModel Logical GLSL450\n"
962 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
963 "OpMemberDecorate %20 0 BuiltIn Position\n"
964 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
965 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
966 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
967 "OpDecorate %20 Block\n"
968 "OpDecorate %26 BuiltIn VertexIndex\n"
969 "%2 = OpTypeVoid\n"
970 "%3 = OpTypeFunction %2\n"
971 "%6 = OpTypeFloat 32\n"
972 "%7 = OpTypePointer Function %6\n"
973 "%9 = OpConstant %6 0.00195313\n"
974 "%12 = OpConstant %6 2\n"
975 "%14 = OpConstant %6 1\n"
976 "%16 = OpTypeVector %6 4\n"
977 "%17 = OpTypeInt 32 0\n"
978 "%18 = OpConstant %17 1\n"
979 "%19 = OpTypeArray %6 %18\n"
980 "%20 = OpTypeStruct %16 %6 %19 %19\n"
981 "%21 = OpTypePointer Output %20\n"
982 "%22 = OpVariable %21 Output\n"
983 "%23 = OpTypeInt 32 1\n"
984 "%24 = OpConstant %23 0\n"
985 "%25 = OpTypePointer Input %23\n"
986 "%26 = OpVariable %25 Input\n"
987 "%33 = OpConstant %6 0\n"
988 "%35 = OpTypePointer Output %16\n"
989 "%37 = OpConstant %23 1\n"
990 "%38 = OpTypePointer Output %6\n"
991 "%4 = OpFunction %2 None %3\n"
992 "%5 = OpLabel\n"
993 "%8 = OpVariable %7 Function\n"
994 "%10 = OpVariable %7 Function\n"
995 "OpStore %8 %9\n"
996 "%11 = OpLoad %6 %8\n"
997 "%13 = OpFDiv %6 %11 %12\n"
998 "%15 = OpFSub %6 %13 %14\n"
999 "OpStore %10 %15\n"
1000 "%27 = OpLoad %23 %26\n"
1001 "%28 = OpConvertSToF %6 %27\n"
1002 "%29 = OpLoad %6 %8\n"
1003 "%30 = OpFMul %6 %28 %29\n"
1004 "%31 = OpLoad %6 %10\n"
1005 "%32 = OpFAdd %6 %30 %31\n"
1006 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1007 "%36 = OpAccessChain %35 %22 %24\n"
1008 "OpStore %36 %34\n"
1009 "%39 = OpAccessChain %38 %22 %37\n"
1010 "OpStore %39 %14\n"
1011 "OpReturn\n"
1012 "OpFunctionEnd\n";
1013 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1014 }
1015
1016 {
1017 /*
1018 "#version 450\n"
1019 "layout(vertices=1) out;\n"
1020 "\n"
1021 "void main (void)\n"
1022 "{\n"
1023 " if (gl_InvocationID == 0)\n"
1024 " {\n"
1025 " gl_TessLevelOuter[0] = 1.0f;\n"
1026 " gl_TessLevelOuter[1] = 1.0f;\n"
1027 " }\n"
1028 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1029 "}\n"
1030 */
1031 const std::string tescNoSubgroup =
1032 "; SPIR-V\n"
1033 "; Version: 1.3\n"
1034 "; Generator: Khronos Glslang Reference Front End; 1\n"
1035 "; Bound: 45\n"
1036 "; Schema: 0\n"
1037 "OpCapability Tessellation\n"
1038 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1039 "OpMemoryModel Logical GLSL450\n"
1040 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1041 "OpExecutionMode %4 OutputVertices 1\n"
1042 "OpDecorate %8 BuiltIn InvocationId\n"
1043 "OpDecorate %20 Patch\n"
1044 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1045 "OpMemberDecorate %29 0 BuiltIn Position\n"
1046 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1047 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1048 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1049 "OpDecorate %29 Block\n"
1050 "OpMemberDecorate %34 0 BuiltIn Position\n"
1051 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1052 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1053 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1054 "OpDecorate %34 Block\n"
1055 "%2 = OpTypeVoid\n"
1056 "%3 = OpTypeFunction %2\n"
1057 "%6 = OpTypeInt 32 1\n"
1058 "%7 = OpTypePointer Input %6\n"
1059 "%8 = OpVariable %7 Input\n"
1060 "%10 = OpConstant %6 0\n"
1061 "%11 = OpTypeBool\n"
1062 "%15 = OpTypeFloat 32\n"
1063 "%16 = OpTypeInt 32 0\n"
1064 "%17 = OpConstant %16 4\n"
1065 "%18 = OpTypeArray %15 %17\n"
1066 "%19 = OpTypePointer Output %18\n"
1067 "%20 = OpVariable %19 Output\n"
1068 "%21 = OpConstant %15 1\n"
1069 "%22 = OpTypePointer Output %15\n"
1070 "%24 = OpConstant %6 1\n"
1071 "%26 = OpTypeVector %15 4\n"
1072 "%27 = OpConstant %16 1\n"
1073 "%28 = OpTypeArray %15 %27\n"
1074 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1075 "%30 = OpTypeArray %29 %27\n"
1076 "%31 = OpTypePointer Output %30\n"
1077 "%32 = OpVariable %31 Output\n"
1078 "%34 = OpTypeStruct %26 %15 %28 %28\n"
1079 "%35 = OpConstant %16 32\n"
1080 "%36 = OpTypeArray %34 %35\n"
1081 "%37 = OpTypePointer Input %36\n"
1082 "%38 = OpVariable %37 Input\n"
1083 "%40 = OpTypePointer Input %26\n"
1084 "%43 = OpTypePointer Output %26\n"
1085 "%4 = OpFunction %2 None %3\n"
1086 "%5 = OpLabel\n"
1087 "%9 = OpLoad %6 %8\n"
1088 "%12 = OpIEqual %11 %9 %10\n"
1089 "OpSelectionMerge %14 None\n"
1090 "OpBranchConditional %12 %13 %14\n"
1091 "%13 = OpLabel\n"
1092 "%23 = OpAccessChain %22 %20 %10\n"
1093 "OpStore %23 %21\n"
1094 "%25 = OpAccessChain %22 %20 %24\n"
1095 "OpStore %25 %21\n"
1096 "OpBranch %14\n"
1097 "%14 = OpLabel\n"
1098 "%33 = OpLoad %6 %8\n"
1099 "%39 = OpLoad %6 %8\n"
1100 "%41 = OpAccessChain %40 %38 %39 %10\n"
1101 "%42 = OpLoad %26 %41\n"
1102 "%44 = OpAccessChain %43 %32 %33 %10\n"
1103 "OpStore %44 %42\n"
1104 "OpReturn\n"
1105 "OpFunctionEnd\n";
1106 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1107 }
1108
1109 {
1110 /*
1111 "#version 450\n"
1112 "layout(isolines) in;\n"
1113 "\n"
1114 "void main (void)\n"
1115 "{\n"
1116 " float pixelSize = 2.0f/1024.0f;\n"
1117 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1118 "}\n";
1119 */
1120 const std::string teseNoSubgroup =
1121 "; SPIR-V\n"
1122 "; Version: 1.3\n"
1123 "; Generator: Khronos Glslang Reference Front End; 2\n"
1124 "; Bound: 42\n"
1125 "; Schema: 0\n"
1126 "OpCapability Tessellation\n"
1127 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1128 "OpMemoryModel Logical GLSL450\n"
1129 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1130 "OpExecutionMode %4 Isolines\n"
1131 "OpExecutionMode %4 SpacingEqual\n"
1132 "OpExecutionMode %4 VertexOrderCcw\n"
1133 "OpMemberDecorate %14 0 BuiltIn Position\n"
1134 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1135 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1136 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1137 "OpDecorate %14 Block\n"
1138 "OpMemberDecorate %19 0 BuiltIn Position\n"
1139 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1140 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1141 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1142 "OpDecorate %19 Block\n"
1143 "OpDecorate %29 BuiltIn TessCoord\n"
1144 "%2 = OpTypeVoid\n"
1145 "%3 = OpTypeFunction %2\n"
1146 "%6 = OpTypeFloat 32\n"
1147 "%7 = OpTypePointer Function %6\n"
1148 "%9 = OpConstant %6 0.00195313\n"
1149 "%10 = OpTypeVector %6 4\n"
1150 "%11 = OpTypeInt 32 0\n"
1151 "%12 = OpConstant %11 1\n"
1152 "%13 = OpTypeArray %6 %12\n"
1153 "%14 = OpTypeStruct %10 %6 %13 %13\n"
1154 "%15 = OpTypePointer Output %14\n"
1155 "%16 = OpVariable %15 Output\n"
1156 "%17 = OpTypeInt 32 1\n"
1157 "%18 = OpConstant %17 0\n"
1158 "%19 = OpTypeStruct %10 %6 %13 %13\n"
1159 "%20 = OpConstant %11 32\n"
1160 "%21 = OpTypeArray %19 %20\n"
1161 "%22 = OpTypePointer Input %21\n"
1162 "%23 = OpVariable %22 Input\n"
1163 "%24 = OpTypePointer Input %10\n"
1164 "%27 = OpTypeVector %6 3\n"
1165 "%28 = OpTypePointer Input %27\n"
1166 "%29 = OpVariable %28 Input\n"
1167 "%30 = OpConstant %11 0\n"
1168 "%31 = OpTypePointer Input %6\n"
1169 "%36 = OpConstant %6 2\n"
1170 "%40 = OpTypePointer Output %10\n"
1171 "%4 = OpFunction %2 None %3\n"
1172 "%5 = OpLabel\n"
1173 "%8 = OpVariable %7 Function\n"
1174 "OpStore %8 %9\n"
1175 "%25 = OpAccessChain %24 %23 %18 %18\n"
1176 "%26 = OpLoad %10 %25\n"
1177 "%32 = OpAccessChain %31 %29 %30\n"
1178 "%33 = OpLoad %6 %32\n"
1179 "%34 = OpLoad %6 %8\n"
1180 "%35 = OpFMul %6 %33 %34\n"
1181 "%37 = OpFDiv %6 %35 %36\n"
1182 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1183 "%39 = OpFAdd %10 %26 %38\n"
1184 "%41 = OpAccessChain %40 %16 %18\n"
1185 "OpStore %41 %39\n"
1186 "OpReturn\n"
1187 "OpFunctionEnd\n";
1188 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1189 }
1190
1191 }
1192
1193
getVertShaderForStage(vk::VkShaderStageFlags stage)1194 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
1195 {
1196 switch (stage)
1197 {
1198 default:
1199 DE_FATAL("Unhandled stage!");
1200 return "";
1201 case VK_SHADER_STAGE_FRAGMENT_BIT:
1202 return
1203 "#version 450\n"
1204 "void main (void)\n"
1205 "{\n"
1206 " float pixelSize = 2.0f/1024.0f;\n"
1207 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1208 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1209 "}\n";
1210 case VK_SHADER_STAGE_GEOMETRY_BIT:
1211 return
1212 "#version 450\n"
1213 "void main (void)\n"
1214 "{\n"
1215 "}\n";
1216 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1217 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1218 return
1219 "#version 450\n"
1220 "void main (void)\n"
1221 "{\n"
1222 "}\n";
1223 }
1224 }
1225
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,std::string extHeader,std::string testSrc,std::string helperStr)1226 void vkt::subgroups::initStdFrameBufferPrograms( SourceCollections& programCollection,
1227 const vk::ShaderBuildOptions& buildOptions,
1228 VkShaderStageFlags shaderStage,
1229 VkFormat format,
1230 bool gsPointSize,
1231 std::string extHeader,
1232 std::string testSrc,
1233 std::string helperStr)
1234 {
1235 subgroups::setFragmentShaderFrameBuffer(programCollection);
1236
1237 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1238 subgroups::setVertexShaderFrameBuffer(programCollection);
1239
1240 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1241 {
1242 std::ostringstream vertex;
1243 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1244 << extHeader.c_str()
1245 << "layout(location = 0) in highp vec4 in_position;\n"
1246 << "layout(location = 0) out float result;\n"
1247 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1248 << "{\n"
1249 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1250 << "};\n"
1251 << "\n"
1252 << helperStr.c_str()
1253 << "void main (void)\n"
1254 << "{\n"
1255 << " uint tempRes;\n"
1256 << testSrc
1257 << " result = float(tempRes);\n"
1258 << " gl_Position = in_position;\n"
1259 << " gl_PointSize = 1.0f;\n"
1260 << "}\n";
1261 programCollection.glslSources.add("vert")
1262 << glu::VertexSource(vertex.str()) << buildOptions;
1263 }
1264 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1265 {
1266 std::ostringstream geometry;
1267
1268 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1269 << extHeader.c_str()
1270 << "layout(points) in;\n"
1271 << "layout(points, max_vertices = 1) out;\n"
1272 << "layout(location = 0) out float out_color;\n"
1273 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1274 << "{\n"
1275 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1276 << "};\n"
1277 << "\n"
1278 << helperStr.c_str()
1279 << "void main (void)\n"
1280 << "{\n"
1281 << " uint tempRes;\n"
1282 << testSrc
1283 << " out_color = float(tempRes);\n"
1284 << " gl_Position = gl_in[0].gl_Position;\n"
1285 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1286 << " EmitVertex();\n"
1287 << " EndPrimitive();\n"
1288 << "}\n";
1289
1290 programCollection.glslSources.add("geometry")
1291 << glu::GeometrySource(geometry.str()) << buildOptions;
1292 }
1293 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1294 {
1295 std::ostringstream controlSource;
1296 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1297 << extHeader.c_str()
1298 << "layout(vertices = 2) out;\n"
1299 << "layout(location = 0) out float out_color[];\n"
1300 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1301 << "{\n"
1302 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1303 << "};\n"
1304 << "\n"
1305 << helperStr.c_str()
1306 << "void main (void)\n"
1307 << "{\n"
1308 << " if (gl_InvocationID == 0)\n"
1309 << " {\n"
1310 << " gl_TessLevelOuter[0] = 1.0f;\n"
1311 << " gl_TessLevelOuter[1] = 1.0f;\n"
1312 << " }\n"
1313 << " uint tempRes;\n"
1314 << testSrc
1315 << " out_color[gl_InvocationID] = float(tempRes);\n"
1316 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1317 << (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1318 << "}\n";
1319
1320 programCollection.glslSources.add("tesc")
1321 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1322 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1323 }
1324 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1325 {
1326 ostringstream evaluationSource;
1327 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1328 << extHeader.c_str()
1329 << "layout(isolines, equal_spacing, ccw ) in;\n"
1330 << "layout(location = 0) out float out_color;\n"
1331 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1332 << "{\n"
1333 << " " << subgroups::getFormatNameForGLSL(format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
1334 << "};\n"
1335 << "\n"
1336 << helperStr.c_str()
1337 << "void main (void)\n"
1338 << "{\n"
1339 << " uint tempRes;\n"
1340 << testSrc
1341 << " out_color = float(tempRes);\n"
1342 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1343 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1344 << "}\n";
1345
1346 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1347 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1348 }
1349 else
1350 {
1351 DE_FATAL("Unsupported shader stage");
1352 }
1353 }
1354
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,std::string extHeader,std::string testSrc,std::string helperStr)1355 void vkt::subgroups::initStdPrograms( vk::SourceCollections& programCollection,
1356 const vk::ShaderBuildOptions& buildOptions,
1357 vk::VkShaderStageFlags shaderStage,
1358 vk::VkFormat format,
1359 bool gsPointSize,
1360 std::string extHeader,
1361 std::string testSrc,
1362 std::string helperStr)
1363 {
1364 if (shaderStage == VK_SHADER_STAGE_COMPUTE_BIT)
1365 {
1366 std::ostringstream src;
1367
1368 src << "#version 450\n"
1369 << extHeader.c_str()
1370 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1371 "local_size_z_id = 2) in;\n"
1372 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1373 << "{\n"
1374 << " uint result[];\n"
1375 << "};\n"
1376 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
1377 << "{\n"
1378 << " " << subgroups::getFormatNameForGLSL(format) << " data[];\n"
1379 << "};\n"
1380 << "\n"
1381 << helperStr.c_str()
1382 << "void main (void)\n"
1383 << "{\n"
1384 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1385 << " highp uint offset = globalSize.x * ((globalSize.y * "
1386 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1387 "gl_GlobalInvocationID.x;\n"
1388 << " uint tempRes;\n"
1389 << testSrc
1390 << " result[offset] = tempRes;\n"
1391 << "}\n";
1392
1393 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1394 }
1395 else
1396 {
1397 const string vertex =
1398 "#version 450\n"
1399 + extHeader +
1400 "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1401 "{\n"
1402 " uint result[];\n"
1403 "};\n"
1404 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1405 "{\n"
1406 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1407 "};\n"
1408 "\n"
1409 + helperStr +
1410 "void main (void)\n"
1411 "{\n"
1412 " uint tempRes;\n"
1413 + testSrc +
1414 " result[gl_VertexIndex] = tempRes;\n"
1415 " float pixelSize = 2.0f/1024.0f;\n"
1416 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1417 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1418 " gl_PointSize = 1.0f;\n"
1419 "}\n";
1420
1421 const string tesc =
1422 "#version 450\n"
1423 + extHeader +
1424 "layout(vertices=1) out;\n"
1425 "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
1426 "{\n"
1427 " uint result[];\n"
1428 "};\n"
1429 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1430 "{\n"
1431 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1432 "};\n"
1433 "\n"
1434 + helperStr +
1435 "void main (void)\n"
1436 "{\n"
1437 " uint tempRes;\n"
1438 + testSrc +
1439 " result[gl_PrimitiveID] = tempRes;\n"
1440 " if (gl_InvocationID == 0)\n"
1441 " {\n"
1442 " gl_TessLevelOuter[0] = 1.0f;\n"
1443 " gl_TessLevelOuter[1] = 1.0f;\n"
1444 " }\n"
1445 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1446 + (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1447 "}\n";
1448
1449 const string tese =
1450 "#version 450\n"
1451 + extHeader +
1452 "layout(isolines) in;\n"
1453 "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
1454 "{\n"
1455 " uint result[];\n"
1456 "};\n"
1457 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1458 "{\n"
1459 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1460 "};\n"
1461 "\n"
1462 + helperStr +
1463 "void main (void)\n"
1464 "{\n"
1465 " uint tempRes;\n"
1466 + testSrc +
1467 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1468 " float pixelSize = 2.0f/1024.0f;\n"
1469 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1470 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1471 "}\n";
1472
1473 const string geometry =
1474 "#version 450\n"
1475 + extHeader +
1476 "layout(${TOPOLOGY}) in;\n"
1477 "layout(points, max_vertices = 1) out;\n"
1478 "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
1479 "{\n"
1480 " uint result[];\n"
1481 "};\n"
1482 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
1483 "{\n"
1484 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1485 "};\n"
1486 "\n"
1487 + helperStr +
1488 "void main (void)\n"
1489 "{\n"
1490 " uint tempRes;\n"
1491 + testSrc +
1492 " result[gl_PrimitiveIDIn] = tempRes;\n"
1493 " gl_Position = gl_in[0].gl_Position;\n"
1494 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1495 " EmitVertex();\n"
1496 " EndPrimitive();\n"
1497 "}\n";
1498
1499 const string fragment =
1500 "#version 450\n"
1501 + extHeader +
1502 "layout(location = 0) out uint result;\n"
1503 "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
1504 "{\n"
1505 " " + subgroups::getFormatNameForGLSL(format) + " data[];\n"
1506 "};\n"
1507 + helperStr +
1508 "void main (void)\n"
1509 "{\n"
1510 " uint tempRes;\n"
1511 + testSrc +
1512 " result = tempRes;\n"
1513 "}\n";
1514
1515 subgroups::addNoSubgroupShader(programCollection);
1516
1517 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1518 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1519 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1520 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1521 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1522 }
1523 }
1524
isSubgroupSupported(Context & context)1525 bool vkt::subgroups::isSubgroupSupported(Context& context)
1526 {
1527 return context.contextSupports(vk::ApiVersion(1, 1, 0));
1528 }
1529
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1530 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
1531 Context& context, const VkShaderStageFlags stage)
1532 {
1533 VkPhysicalDeviceSubgroupProperties subgroupProperties;
1534 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1535 subgroupProperties.pNext = DE_NULL;
1536
1537 VkPhysicalDeviceProperties2 properties;
1538 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1539 properties.pNext = &subgroupProperties;
1540
1541 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1542
1543 return (stage & subgroupProperties.supportedStages) ? true : false;
1544 }
1545
areSubgroupOperationsRequiredForStage(VkShaderStageFlags stage)1546 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
1547 VkShaderStageFlags stage)
1548 {
1549 switch (stage)
1550 {
1551 default:
1552 return false;
1553 case VK_SHADER_STAGE_COMPUTE_BIT:
1554 return true;
1555 }
1556 }
1557
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1558 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
1559 Context& context,
1560 VkSubgroupFeatureFlagBits bit) {
1561 VkPhysicalDeviceSubgroupProperties subgroupProperties;
1562 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1563 subgroupProperties.pNext = DE_NULL;
1564
1565 VkPhysicalDeviceProperties2 properties;
1566 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1567 properties.pNext = &subgroupProperties;
1568
1569 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1570
1571 return (bit & subgroupProperties.supportedOperations) ? true : false;
1572 }
1573
isFragmentSSBOSupportedForDevice(Context & context)1574 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1575 {
1576 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1577 context.getInstanceInterface(), context.getPhysicalDevice());
1578 return features.fragmentStoresAndAtomics ? true : false;
1579 }
1580
isVertexSSBOSupportedForDevice(Context & context)1581 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1582 {
1583 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1584 context.getInstanceInterface(), context.getPhysicalDevice());
1585 return features.vertexPipelineStoresAndAtomics ? true : false;
1586 }
1587
isInt64SupportedForDevice(Context & context)1588 bool vkt::subgroups::isInt64SupportedForDevice(Context& context)
1589 {
1590 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1591 context.getInstanceInterface(), context.getPhysicalDevice());
1592 return features.shaderInt64 ? true : false;
1593 }
1594
isTessellationAndGeometryPointSizeSupported(Context & context)1595 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1596 {
1597 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1598 context.getInstanceInterface(), context.getPhysicalDevice());
1599 return features.shaderTessellationAndGeometryPointSize ? true : false;
1600 }
1601
is16BitUBOStorageSupported(Context & context)1602 bool vkt::subgroups::is16BitUBOStorageSupported(Context& context) {
1603 VkPhysicalDevice16BitStorageFeatures storage16bit;
1604 deMemset(&storage16bit, 0, sizeof(storage16bit));
1605 storage16bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1606 storage16bit.pNext = DE_NULL;
1607
1608 VkPhysicalDeviceFeatures2 features2;
1609 deMemset(&features2, 0, sizeof(features2));
1610 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1611 features2.pNext = &storage16bit;
1612
1613 const PlatformInterface& platformInterface = context.getPlatformInterface();
1614 const VkInstance instance = context.getInstance();
1615 const InstanceDriver instanceDriver(platformInterface, instance);
1616
1617 instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1618 return bool(storage16bit.uniformAndStorageBuffer16BitAccess);
1619 }
1620
1621
is8BitUBOStorageSupported(Context & context)1622 bool vkt::subgroups::is8BitUBOStorageSupported(Context& context) {
1623
1624 VkPhysicalDevice8BitStorageFeatures storage8bit;
1625 deMemset(&storage8bit, 0, sizeof(storage8bit));
1626 storage8bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR;
1627 storage8bit.pNext = DE_NULL;
1628
1629 VkPhysicalDeviceFeatures2 features2;
1630 deMemset(&features2, 0, sizeof(features2));
1631 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1632 features2.pNext = &storage8bit;
1633
1634
1635 const PlatformInterface& platformInterface = context.getPlatformInterface();
1636 const VkInstance instance = context.getInstance();
1637 const InstanceDriver instanceDriver(platformInterface, instance);
1638
1639 instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1640 return bool(storage8bit.uniformAndStorageBuffer8BitAccess);
1641 }
1642
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1643 bool vkt::subgroups::isFormatSupportedForDevice(Context& context, vk::VkFormat format)
1644 {
1645 VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures subgroupExtendedTypesFeatures;
1646 deMemset(&subgroupExtendedTypesFeatures, 0, sizeof(subgroupExtendedTypesFeatures));
1647 subgroupExtendedTypesFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES;
1648 subgroupExtendedTypesFeatures.pNext = DE_NULL;
1649
1650 VkPhysicalDeviceShaderFloat16Int8Features float16Int8Features;
1651 deMemset(&float16Int8Features, 0, sizeof(float16Int8Features));
1652 float16Int8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
1653 float16Int8Features.pNext = DE_NULL;
1654
1655 VkPhysicalDeviceFeatures2 features2;
1656 deMemset(&features2, 0, sizeof(features2));
1657 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1658 features2.pNext = DE_NULL;
1659
1660 VkPhysicalDevice16BitStorageFeatures storage16bit;
1661 deMemset(&storage16bit, 0, sizeof(storage16bit));
1662 storage16bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1663 storage16bit.pNext = DE_NULL;
1664 bool is16bitStorageSupported = context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage");
1665
1666 VkPhysicalDevice8BitStorageFeatures storage8bit;
1667 deMemset(&storage8bit, 0, sizeof(storage8bit));
1668 storage8bit.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR;
1669 storage8bit.pNext = DE_NULL;
1670 bool is8bitStorageSupported = context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage");
1671
1672 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1673 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1674 {
1675 features2.pNext = &subgroupExtendedTypesFeatures;
1676 subgroupExtendedTypesFeatures.pNext = &float16Int8Features;
1677 if ( is16bitStorageSupported )
1678 {
1679 float16Int8Features.pNext = &storage16bit;
1680 if (is8bitStorageSupported)
1681 {
1682 storage16bit.pNext = &storage8bit;
1683 }
1684 }
1685 else
1686 {
1687 if (is8bitStorageSupported)
1688 {
1689 float16Int8Features.pNext = &storage8bit;
1690 }
1691
1692 }
1693 }
1694
1695 const PlatformInterface& platformInterface = context.getPlatformInterface();
1696 const VkInstance instance = context.getInstance();
1697 const InstanceDriver instanceDriver (platformInterface, instance);
1698
1699 instanceDriver.getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
1700
1701 switch (format)
1702 {
1703 default:
1704 return true;
1705 case VK_FORMAT_R16_SFLOAT:
1706 case VK_FORMAT_R16G16_SFLOAT:
1707 case VK_FORMAT_R16G16B16_SFLOAT:
1708 case VK_FORMAT_R16G16B16A16_SFLOAT:
1709 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderFloat16 & storage16bit.storageBuffer16BitAccess ? true : false;
1710 case VK_FORMAT_R64_SFLOAT:
1711 case VK_FORMAT_R64G64_SFLOAT:
1712 case VK_FORMAT_R64G64B64_SFLOAT:
1713 case VK_FORMAT_R64G64B64A64_SFLOAT:
1714 return features2.features.shaderFloat64 ? true : false;
1715 case VK_FORMAT_R8_SINT:
1716 case VK_FORMAT_R8G8_SINT:
1717 case VK_FORMAT_R8G8B8_SINT:
1718 case VK_FORMAT_R8G8B8A8_SINT:
1719 case VK_FORMAT_R8_UINT:
1720 case VK_FORMAT_R8G8_UINT:
1721 case VK_FORMAT_R8G8B8_UINT:
1722 case VK_FORMAT_R8G8B8A8_UINT:
1723 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & float16Int8Features.shaderInt8 & storage8bit.storageBuffer8BitAccess ? true : false;
1724 case VK_FORMAT_R16_SINT:
1725 case VK_FORMAT_R16G16_SINT:
1726 case VK_FORMAT_R16G16B16_SINT:
1727 case VK_FORMAT_R16G16B16A16_SINT:
1728 case VK_FORMAT_R16_UINT:
1729 case VK_FORMAT_R16G16_UINT:
1730 case VK_FORMAT_R16G16B16_UINT:
1731 case VK_FORMAT_R16G16B16A16_UINT:
1732 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt16 & storage16bit.storageBuffer16BitAccess ? true : false;
1733 case VK_FORMAT_R64_SINT:
1734 case VK_FORMAT_R64G64_SINT:
1735 case VK_FORMAT_R64G64B64_SINT:
1736 case VK_FORMAT_R64G64B64A64_SINT:
1737 case VK_FORMAT_R64_UINT:
1738 case VK_FORMAT_R64G64_UINT:
1739 case VK_FORMAT_R64G64B64_UINT:
1740 case VK_FORMAT_R64G64B64A64_UINT:
1741 return subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes & features2.features.shaderInt64 ? true : false;
1742 }
1743 }
1744
isSubgroupBroadcastDynamicIdSupported(Context & context)1745 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1746 {
1747 return context.contextSupports(vk::ApiVersion(1, 2, 0)) &&
1748 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1749 }
1750
getFormatNameForGLSL(VkFormat format)1751 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1752 {
1753 switch (format)
1754 {
1755 default:
1756 DE_FATAL("Unhandled format!");
1757 return "";
1758 case VK_FORMAT_R8_SINT:
1759 return "int8_t";
1760 case VK_FORMAT_R8G8_SINT:
1761 return "i8vec2";
1762 case VK_FORMAT_R8G8B8_SINT:
1763 return "i8vec3";
1764 case VK_FORMAT_R8G8B8A8_SINT:
1765 return "i8vec4";
1766 case VK_FORMAT_R8_UINT:
1767 return "uint8_t";
1768 case VK_FORMAT_R8G8_UINT:
1769 return "u8vec2";
1770 case VK_FORMAT_R8G8B8_UINT:
1771 return "u8vec3";
1772 case VK_FORMAT_R8G8B8A8_UINT:
1773 return "u8vec4";
1774 case VK_FORMAT_R16_SINT:
1775 return "int16_t";
1776 case VK_FORMAT_R16G16_SINT:
1777 return "i16vec2";
1778 case VK_FORMAT_R16G16B16_SINT:
1779 return "i16vec3";
1780 case VK_FORMAT_R16G16B16A16_SINT:
1781 return "i16vec4";
1782 case VK_FORMAT_R16_UINT:
1783 return "uint16_t";
1784 case VK_FORMAT_R16G16_UINT:
1785 return "u16vec2";
1786 case VK_FORMAT_R16G16B16_UINT:
1787 return "u16vec3";
1788 case VK_FORMAT_R16G16B16A16_UINT:
1789 return "u16vec4";
1790 case VK_FORMAT_R32_SINT:
1791 return "int";
1792 case VK_FORMAT_R32G32_SINT:
1793 return "ivec2";
1794 case VK_FORMAT_R32G32B32_SINT:
1795 return "ivec3";
1796 case VK_FORMAT_R32G32B32A32_SINT:
1797 return "ivec4";
1798 case VK_FORMAT_R32_UINT:
1799 return "uint";
1800 case VK_FORMAT_R32G32_UINT:
1801 return "uvec2";
1802 case VK_FORMAT_R32G32B32_UINT:
1803 return "uvec3";
1804 case VK_FORMAT_R32G32B32A32_UINT:
1805 return "uvec4";
1806 case VK_FORMAT_R64_SINT:
1807 return "int64_t";
1808 case VK_FORMAT_R64G64_SINT:
1809 return "i64vec2";
1810 case VK_FORMAT_R64G64B64_SINT:
1811 return "i64vec3";
1812 case VK_FORMAT_R64G64B64A64_SINT:
1813 return "i64vec4";
1814 case VK_FORMAT_R64_UINT:
1815 return "uint64_t";
1816 case VK_FORMAT_R64G64_UINT:
1817 return "u64vec2";
1818 case VK_FORMAT_R64G64B64_UINT:
1819 return "u64vec3";
1820 case VK_FORMAT_R64G64B64A64_UINT:
1821 return "u64vec4";
1822 case VK_FORMAT_R16_SFLOAT:
1823 return "float16_t";
1824 case VK_FORMAT_R16G16_SFLOAT:
1825 return "f16vec2";
1826 case VK_FORMAT_R16G16B16_SFLOAT:
1827 return "f16vec3";
1828 case VK_FORMAT_R16G16B16A16_SFLOAT:
1829 return "f16vec4";
1830 case VK_FORMAT_R32_SFLOAT:
1831 return "float";
1832 case VK_FORMAT_R32G32_SFLOAT:
1833 return "vec2";
1834 case VK_FORMAT_R32G32B32_SFLOAT:
1835 return "vec3";
1836 case VK_FORMAT_R32G32B32A32_SFLOAT:
1837 return "vec4";
1838 case VK_FORMAT_R64_SFLOAT:
1839 return "double";
1840 case VK_FORMAT_R64G64_SFLOAT:
1841 return "dvec2";
1842 case VK_FORMAT_R64G64B64_SFLOAT:
1843 return "dvec3";
1844 case VK_FORMAT_R64G64B64A64_SFLOAT:
1845 return "dvec4";
1846 case VK_FORMAT_R8_USCALED:
1847 return "bool";
1848 case VK_FORMAT_R8G8_USCALED:
1849 return "bvec2";
1850 case VK_FORMAT_R8G8B8_USCALED:
1851 return "bvec3";
1852 case VK_FORMAT_R8G8B8A8_USCALED:
1853 return "bvec4";
1854 }
1855 }
1856
getAdditionalExtensionForFormat(vk::VkFormat format)1857 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1858 {
1859 switch (format)
1860 {
1861 default:
1862 return "";
1863 case VK_FORMAT_R8_SINT:
1864 case VK_FORMAT_R8G8_SINT:
1865 case VK_FORMAT_R8G8B8_SINT:
1866 case VK_FORMAT_R8G8B8A8_SINT:
1867 case VK_FORMAT_R8_UINT:
1868 case VK_FORMAT_R8G8_UINT:
1869 case VK_FORMAT_R8G8B8_UINT:
1870 case VK_FORMAT_R8G8B8A8_UINT:
1871 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1872 case VK_FORMAT_R16_SINT:
1873 case VK_FORMAT_R16G16_SINT:
1874 case VK_FORMAT_R16G16B16_SINT:
1875 case VK_FORMAT_R16G16B16A16_SINT:
1876 case VK_FORMAT_R16_UINT:
1877 case VK_FORMAT_R16G16_UINT:
1878 case VK_FORMAT_R16G16B16_UINT:
1879 case VK_FORMAT_R16G16B16A16_UINT:
1880 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1881 case VK_FORMAT_R64_SINT:
1882 case VK_FORMAT_R64G64_SINT:
1883 case VK_FORMAT_R64G64B64_SINT:
1884 case VK_FORMAT_R64G64B64A64_SINT:
1885 case VK_FORMAT_R64_UINT:
1886 case VK_FORMAT_R64G64_UINT:
1887 case VK_FORMAT_R64G64B64_UINT:
1888 case VK_FORMAT_R64G64B64A64_UINT:
1889 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1890 case VK_FORMAT_R16_SFLOAT:
1891 case VK_FORMAT_R16G16_SFLOAT:
1892 case VK_FORMAT_R16G16B16_SFLOAT:
1893 case VK_FORMAT_R16G16B16A16_SFLOAT:
1894 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1895 }
1896 }
1897
getAllFormats()1898 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1899 {
1900 std::vector<VkFormat> formats;
1901
1902 formats.push_back(VK_FORMAT_R8_SINT);
1903 formats.push_back(VK_FORMAT_R8G8_SINT);
1904 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1905 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1906 formats.push_back(VK_FORMAT_R8_UINT);
1907 formats.push_back(VK_FORMAT_R8G8_UINT);
1908 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1909 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1910 formats.push_back(VK_FORMAT_R16_SINT);
1911 formats.push_back(VK_FORMAT_R16G16_SINT);
1912 formats.push_back(VK_FORMAT_R16G16B16_SINT);
1913 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1914 formats.push_back(VK_FORMAT_R16_UINT);
1915 formats.push_back(VK_FORMAT_R16G16_UINT);
1916 formats.push_back(VK_FORMAT_R16G16B16_UINT);
1917 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1918 formats.push_back(VK_FORMAT_R32_SINT);
1919 formats.push_back(VK_FORMAT_R32G32_SINT);
1920 formats.push_back(VK_FORMAT_R32G32B32_SINT);
1921 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1922 formats.push_back(VK_FORMAT_R32_UINT);
1923 formats.push_back(VK_FORMAT_R32G32_UINT);
1924 formats.push_back(VK_FORMAT_R32G32B32_UINT);
1925 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1926 formats.push_back(VK_FORMAT_R64_SINT);
1927 formats.push_back(VK_FORMAT_R64G64_SINT);
1928 formats.push_back(VK_FORMAT_R64G64B64_SINT);
1929 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1930 formats.push_back(VK_FORMAT_R64_UINT);
1931 formats.push_back(VK_FORMAT_R64G64_UINT);
1932 formats.push_back(VK_FORMAT_R64G64B64_UINT);
1933 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1934 formats.push_back(VK_FORMAT_R16_SFLOAT);
1935 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1936 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1937 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1938 formats.push_back(VK_FORMAT_R32_SFLOAT);
1939 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1940 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1941 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1942 formats.push_back(VK_FORMAT_R64_SFLOAT);
1943 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1944 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1945 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1946 formats.push_back(VK_FORMAT_R8_USCALED);
1947 formats.push_back(VK_FORMAT_R8G8_USCALED);
1948 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1949 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1950
1951 return formats;
1952 }
1953
isFormatSigned(VkFormat format)1954 bool vkt::subgroups::isFormatSigned (VkFormat format)
1955 {
1956 switch (format)
1957 {
1958 default:
1959 return false;
1960 case VK_FORMAT_R8_SINT:
1961 case VK_FORMAT_R8G8_SINT:
1962 case VK_FORMAT_R8G8B8_SINT:
1963 case VK_FORMAT_R8G8B8A8_SINT:
1964 case VK_FORMAT_R16_SINT:
1965 case VK_FORMAT_R16G16_SINT:
1966 case VK_FORMAT_R16G16B16_SINT:
1967 case VK_FORMAT_R16G16B16A16_SINT:
1968 case VK_FORMAT_R32_SINT:
1969 case VK_FORMAT_R32G32_SINT:
1970 case VK_FORMAT_R32G32B32_SINT:
1971 case VK_FORMAT_R32G32B32A32_SINT:
1972 case VK_FORMAT_R64_SINT:
1973 case VK_FORMAT_R64G64_SINT:
1974 case VK_FORMAT_R64G64B64_SINT:
1975 case VK_FORMAT_R64G64B64A64_SINT:
1976 return true;
1977 }
1978 }
1979
isFormatUnsigned(VkFormat format)1980 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1981 {
1982 switch (format)
1983 {
1984 default:
1985 return false;
1986 case VK_FORMAT_R8_UINT:
1987 case VK_FORMAT_R8G8_UINT:
1988 case VK_FORMAT_R8G8B8_UINT:
1989 case VK_FORMAT_R8G8B8A8_UINT:
1990 case VK_FORMAT_R16_UINT:
1991 case VK_FORMAT_R16G16_UINT:
1992 case VK_FORMAT_R16G16B16_UINT:
1993 case VK_FORMAT_R16G16B16A16_UINT:
1994 case VK_FORMAT_R32_UINT:
1995 case VK_FORMAT_R32G32_UINT:
1996 case VK_FORMAT_R32G32B32_UINT:
1997 case VK_FORMAT_R32G32B32A32_UINT:
1998 case VK_FORMAT_R64_UINT:
1999 case VK_FORMAT_R64G64_UINT:
2000 case VK_FORMAT_R64G64B64_UINT:
2001 case VK_FORMAT_R64G64B64A64_UINT:
2002 return true;
2003 }
2004 }
2005
isFormatFloat(VkFormat format)2006 bool vkt::subgroups::isFormatFloat (VkFormat format)
2007 {
2008 switch (format)
2009 {
2010 default:
2011 return false;
2012 case VK_FORMAT_R16_SFLOAT:
2013 case VK_FORMAT_R16G16_SFLOAT:
2014 case VK_FORMAT_R16G16B16_SFLOAT:
2015 case VK_FORMAT_R16G16B16A16_SFLOAT:
2016 case VK_FORMAT_R32_SFLOAT:
2017 case VK_FORMAT_R32G32_SFLOAT:
2018 case VK_FORMAT_R32G32B32_SFLOAT:
2019 case VK_FORMAT_R32G32B32A32_SFLOAT:
2020 case VK_FORMAT_R64_SFLOAT:
2021 case VK_FORMAT_R64G64_SFLOAT:
2022 case VK_FORMAT_R64G64B64_SFLOAT:
2023 case VK_FORMAT_R64G64B64A64_SFLOAT:
2024 return true;
2025 }
2026 }
2027
isFormatBool(VkFormat format)2028 bool vkt::subgroups::isFormatBool (VkFormat format)
2029 {
2030 switch (format)
2031 {
2032 default:
2033 return false;
2034 case VK_FORMAT_R8_USCALED:
2035 case VK_FORMAT_R8G8_USCALED:
2036 case VK_FORMAT_R8G8B8_USCALED:
2037 case VK_FORMAT_R8G8B8A8_USCALED:
2038 return true;
2039 }
2040 }
2041
isFormat8bitTy(VkFormat format)2042 bool vkt::subgroups::isFormat8bitTy(VkFormat format)
2043 {
2044 switch (format)
2045 {
2046 default:
2047 return false;
2048 case VK_FORMAT_R8_SINT:
2049 case VK_FORMAT_R8G8_SINT:
2050 case VK_FORMAT_R8G8B8_SINT:
2051 case VK_FORMAT_R8G8B8A8_SINT:
2052 case VK_FORMAT_R8_UINT:
2053 case VK_FORMAT_R8G8_UINT:
2054 case VK_FORMAT_R8G8B8_UINT:
2055 case VK_FORMAT_R8G8B8A8_UINT:
2056 return true;
2057 }
2058 }
2059
isFormat16BitTy(VkFormat format)2060 bool vkt::subgroups::isFormat16BitTy(VkFormat format)
2061 {
2062 switch (format)
2063 {
2064 default:
2065 return false;
2066 case VK_FORMAT_R16_SFLOAT:
2067 case VK_FORMAT_R16G16_SFLOAT:
2068 case VK_FORMAT_R16G16B16_SFLOAT:
2069 case VK_FORMAT_R16G16B16A16_SFLOAT:
2070 case VK_FORMAT_R16_SINT:
2071 case VK_FORMAT_R16G16_SINT:
2072 case VK_FORMAT_R16G16B16_SINT:
2073 case VK_FORMAT_R16G16B16A16_SINT:
2074 case VK_FORMAT_R16_UINT:
2075 case VK_FORMAT_R16G16_UINT:
2076 case VK_FORMAT_R16G16B16_UINT:
2077 case VK_FORMAT_R16G16B16A16_UINT:
2078 return true;
2079 }
2080 }
2081
setVertexShaderFrameBuffer(SourceCollections & programCollection)2082 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2083 {
2084 /*
2085 "layout(location = 0) in highp vec4 in_position;\n"
2086 "void main (void)\n"
2087 "{\n"
2088 " gl_Position = in_position;\n"
2089 " gl_PointSize = 1.0f;\n"
2090 "}\n";
2091 */
2092 programCollection.spirvAsmSources.add("vert") <<
2093 "; SPIR-V\n"
2094 "; Version: 1.3\n"
2095 "; Generator: Khronos Glslang Reference Front End; 7\n"
2096 "; Bound: 25\n"
2097 "; Schema: 0\n"
2098 "OpCapability Shader\n"
2099 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2100 "OpMemoryModel Logical GLSL450\n"
2101 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2102 "OpMemberDecorate %11 0 BuiltIn Position\n"
2103 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2104 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2105 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2106 "OpDecorate %11 Block\n"
2107 "OpDecorate %17 Location 0\n"
2108 "%2 = OpTypeVoid\n"
2109 "%3 = OpTypeFunction %2\n"
2110 "%6 = OpTypeFloat 32\n"
2111 "%7 = OpTypeVector %6 4\n"
2112 "%8 = OpTypeInt 32 0\n"
2113 "%9 = OpConstant %8 1\n"
2114 "%10 = OpTypeArray %6 %9\n"
2115 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2116 "%12 = OpTypePointer Output %11\n"
2117 "%13 = OpVariable %12 Output\n"
2118 "%14 = OpTypeInt 32 1\n"
2119 "%15 = OpConstant %14 0\n"
2120 "%16 = OpTypePointer Input %7\n"
2121 "%17 = OpVariable %16 Input\n"
2122 "%19 = OpTypePointer Output %7\n"
2123 "%21 = OpConstant %14 1\n"
2124 "%22 = OpConstant %6 1\n"
2125 "%23 = OpTypePointer Output %6\n"
2126 "%4 = OpFunction %2 None %3\n"
2127 "%5 = OpLabel\n"
2128 "%18 = OpLoad %7 %17\n"
2129 "%20 = OpAccessChain %19 %13 %15\n"
2130 "OpStore %20 %18\n"
2131 "%24 = OpAccessChain %23 %13 %21\n"
2132 "OpStore %24 %22\n"
2133 "OpReturn\n"
2134 "OpFunctionEnd\n";
2135 }
2136
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2137 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2138 {
2139 /*
2140 "layout(location = 0) in float in_color;\n"
2141 "layout(location = 0) out uint out_color;\n"
2142 "void main()\n"
2143 {\n"
2144 " out_color = uint(in_color);\n"
2145 "}\n";
2146 */
2147 programCollection.spirvAsmSources.add("fragment") <<
2148 "; SPIR-V\n"
2149 "; Version: 1.3\n"
2150 "; Generator: Khronos Glslang Reference Front End; 2\n"
2151 "; Bound: 14\n"
2152 "; Schema: 0\n"
2153 "OpCapability Shader\n"
2154 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2155 "OpMemoryModel Logical GLSL450\n"
2156 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2157 "OpExecutionMode %4 OriginUpperLeft\n"
2158 "OpDecorate %8 Location 0\n"
2159 "OpDecorate %11 Location 0\n"
2160 "%2 = OpTypeVoid\n"
2161 "%3 = OpTypeFunction %2\n"
2162 "%6 = OpTypeInt 32 0\n"
2163 "%7 = OpTypePointer Output %6\n"
2164 "%8 = OpVariable %7 Output\n"
2165 "%9 = OpTypeFloat 32\n"
2166 "%10 = OpTypePointer Input %9\n"
2167 "%11 = OpVariable %10 Input\n"
2168 "%4 = OpFunction %2 None %3\n"
2169 "%5 = OpLabel\n"
2170 "%12 = OpLoad %9 %11\n"
2171 "%13 = OpConvertFToU %6 %12\n"
2172 "OpStore %8 %13\n"
2173 "OpReturn\n"
2174 "OpFunctionEnd\n";
2175 }
2176
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2177 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2178 {
2179 /*
2180 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2181 "#extension GL_EXT_tessellation_shader : require\n"
2182 "layout(vertices = 2) out;\n"
2183 "void main (void)\n"
2184 "{\n"
2185 " if (gl_InvocationID == 0)\n"
2186 " {\n"
2187 " gl_TessLevelOuter[0] = 1.0f;\n"
2188 " gl_TessLevelOuter[1] = 1.0f;\n"
2189 " }\n"
2190 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2191 "}\n";
2192 */
2193 programCollection.spirvAsmSources.add("tesc") <<
2194 "; SPIR-V\n"
2195 "; Version: 1.3\n"
2196 "; Generator: Khronos Glslang Reference Front End; 2\n"
2197 "; Bound: 46\n"
2198 "; Schema: 0\n"
2199 "OpCapability Tessellation\n"
2200 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2201 "OpMemoryModel Logical GLSL450\n"
2202 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2203 "OpExecutionMode %4 OutputVertices 2\n"
2204 "OpDecorate %8 BuiltIn InvocationId\n"
2205 "OpDecorate %20 Patch\n"
2206 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2207 "OpMemberDecorate %29 0 BuiltIn Position\n"
2208 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2209 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2210 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2211 "OpDecorate %29 Block\n"
2212 "OpMemberDecorate %35 0 BuiltIn Position\n"
2213 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2214 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2215 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2216 "OpDecorate %35 Block\n"
2217 "%2 = OpTypeVoid\n"
2218 "%3 = OpTypeFunction %2\n"
2219 "%6 = OpTypeInt 32 1\n"
2220 "%7 = OpTypePointer Input %6\n"
2221 "%8 = OpVariable %7 Input\n"
2222 "%10 = OpConstant %6 0\n"
2223 "%11 = OpTypeBool\n"
2224 "%15 = OpTypeFloat 32\n"
2225 "%16 = OpTypeInt 32 0\n"
2226 "%17 = OpConstant %16 4\n"
2227 "%18 = OpTypeArray %15 %17\n"
2228 "%19 = OpTypePointer Output %18\n"
2229 "%20 = OpVariable %19 Output\n"
2230 "%21 = OpConstant %15 1\n"
2231 "%22 = OpTypePointer Output %15\n"
2232 "%24 = OpConstant %6 1\n"
2233 "%26 = OpTypeVector %15 4\n"
2234 "%27 = OpConstant %16 1\n"
2235 "%28 = OpTypeArray %15 %27\n"
2236 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2237 "%30 = OpConstant %16 2\n"
2238 "%31 = OpTypeArray %29 %30\n"
2239 "%32 = OpTypePointer Output %31\n"
2240 "%33 = OpVariable %32 Output\n"
2241 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2242 "%36 = OpConstant %16 32\n"
2243 "%37 = OpTypeArray %35 %36\n"
2244 "%38 = OpTypePointer Input %37\n"
2245 "%39 = OpVariable %38 Input\n"
2246 "%41 = OpTypePointer Input %26\n"
2247 "%44 = OpTypePointer Output %26\n"
2248 "%4 = OpFunction %2 None %3\n"
2249 "%5 = OpLabel\n"
2250 "%9 = OpLoad %6 %8\n"
2251 "%12 = OpIEqual %11 %9 %10\n"
2252 "OpSelectionMerge %14 None\n"
2253 "OpBranchConditional %12 %13 %14\n"
2254 "%13 = OpLabel\n"
2255 "%23 = OpAccessChain %22 %20 %10\n"
2256 "OpStore %23 %21\n"
2257 "%25 = OpAccessChain %22 %20 %24\n"
2258 "OpStore %25 %21\n"
2259 "OpBranch %14\n"
2260 "%14 = OpLabel\n"
2261 "%34 = OpLoad %6 %8\n"
2262 "%40 = OpLoad %6 %8\n"
2263 "%42 = OpAccessChain %41 %39 %40 %10\n"
2264 "%43 = OpLoad %26 %42\n"
2265 "%45 = OpAccessChain %44 %33 %34 %10\n"
2266 "OpStore %45 %43\n"
2267 "OpReturn\n"
2268 "OpFunctionEnd\n";
2269 }
2270
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2271 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2272 {
2273 /*
2274 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2275 "#extension GL_EXT_tessellation_shader : require\n"
2276 "layout(isolines, equal_spacing, ccw ) in;\n"
2277 "layout(location = 0) in float in_color[];\n"
2278 "layout(location = 0) out float out_color;\n"
2279 "\n"
2280 "void main (void)\n"
2281 "{\n"
2282 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2283 " out_color = in_color[0];\n"
2284 "}\n";
2285 */
2286 programCollection.spirvAsmSources.add("tese") <<
2287 "; SPIR-V\n"
2288 "; Version: 1.3\n"
2289 "; Generator: Khronos Glslang Reference Front End; 2\n"
2290 "; Bound: 45\n"
2291 "; Schema: 0\n"
2292 "OpCapability Tessellation\n"
2293 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2294 "OpMemoryModel Logical GLSL450\n"
2295 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2296 "OpExecutionMode %4 Isolines\n"
2297 "OpExecutionMode %4 SpacingEqual\n"
2298 "OpExecutionMode %4 VertexOrderCcw\n"
2299 "OpMemberDecorate %11 0 BuiltIn Position\n"
2300 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2301 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2302 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2303 "OpDecorate %11 Block\n"
2304 "OpMemberDecorate %16 0 BuiltIn Position\n"
2305 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2306 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2307 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2308 "OpDecorate %16 Block\n"
2309 "OpDecorate %29 BuiltIn TessCoord\n"
2310 "OpDecorate %39 Location 0\n"
2311 "OpDecorate %42 Location 0\n"
2312 "%2 = OpTypeVoid\n"
2313 "%3 = OpTypeFunction %2\n"
2314 "%6 = OpTypeFloat 32\n"
2315 "%7 = OpTypeVector %6 4\n"
2316 "%8 = OpTypeInt 32 0\n"
2317 "%9 = OpConstant %8 1\n"
2318 "%10 = OpTypeArray %6 %9\n"
2319 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2320 "%12 = OpTypePointer Output %11\n"
2321 "%13 = OpVariable %12 Output\n"
2322 "%14 = OpTypeInt 32 1\n"
2323 "%15 = OpConstant %14 0\n"
2324 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2325 "%17 = OpConstant %8 32\n"
2326 "%18 = OpTypeArray %16 %17\n"
2327 "%19 = OpTypePointer Input %18\n"
2328 "%20 = OpVariable %19 Input\n"
2329 "%21 = OpTypePointer Input %7\n"
2330 "%24 = OpConstant %14 1\n"
2331 "%27 = OpTypeVector %6 3\n"
2332 "%28 = OpTypePointer Input %27\n"
2333 "%29 = OpVariable %28 Input\n"
2334 "%30 = OpConstant %8 0\n"
2335 "%31 = OpTypePointer Input %6\n"
2336 "%36 = OpTypePointer Output %7\n"
2337 "%38 = OpTypePointer Output %6\n"
2338 "%39 = OpVariable %38 Output\n"
2339 "%40 = OpTypeArray %6 %17\n"
2340 "%41 = OpTypePointer Input %40\n"
2341 "%42 = OpVariable %41 Input\n"
2342 "%4 = OpFunction %2 None %3\n"
2343 "%5 = OpLabel\n"
2344 "%22 = OpAccessChain %21 %20 %15 %15\n"
2345 "%23 = OpLoad %7 %22\n"
2346 "%25 = OpAccessChain %21 %20 %24 %15\n"
2347 "%26 = OpLoad %7 %25\n"
2348 "%32 = OpAccessChain %31 %29 %30\n"
2349 "%33 = OpLoad %6 %32\n"
2350 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2351 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2352 "%37 = OpAccessChain %36 %13 %15\n"
2353 "OpStore %37 %35\n"
2354 "%43 = OpAccessChain %31 %42 %15\n"
2355 "%44 = OpLoad %6 %43\n"
2356 "OpStore %39 %44\n"
2357 "OpReturn\n"
2358 "OpFunctionEnd\n";
2359 }
2360
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2361 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
2362 {
2363 tcu::StringTemplate geometryTemplate(glslTemplate);
2364
2365 map<string, string> linesParams;
2366 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2367
2368 map<string, string> pointsParams;
2369 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2370
2371 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2372 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2373 }
2374
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2375 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2376 {
2377 tcu::StringTemplate geometryTemplate(spirvTemplate);
2378
2379 map<string, string> linesParams;
2380 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2381
2382 map<string, string> pointsParams;
2383 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2384
2385 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2386 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2387 }
2388
initializeMemory(Context & context,const Allocation & alloc,subgroups::SSBOData & data)2389 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
2390 {
2391 const vk::VkFormat format = data.format;
2392 const vk::VkDeviceSize size = data.numElements *
2393 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2394 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2395 {
2396 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2397
2398 switch (format)
2399 {
2400 default:
2401 DE_FATAL("Illegal buffer format");
2402 break;
2403 case VK_FORMAT_R8_SINT:
2404 case VK_FORMAT_R8G8_SINT:
2405 case VK_FORMAT_R8G8B8_SINT:
2406 case VK_FORMAT_R8G8B8A8_SINT:
2407 case VK_FORMAT_R8_UINT:
2408 case VK_FORMAT_R8G8_UINT:
2409 case VK_FORMAT_R8G8B8_UINT:
2410 case VK_FORMAT_R8G8B8A8_UINT:
2411 {
2412 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2413
2414 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2415 {
2416 ptr[k] = rnd.getUint8();
2417 }
2418 }
2419 break;
2420 case VK_FORMAT_R16_SINT:
2421 case VK_FORMAT_R16G16_SINT:
2422 case VK_FORMAT_R16G16B16_SINT:
2423 case VK_FORMAT_R16G16B16A16_SINT:
2424 case VK_FORMAT_R16_UINT:
2425 case VK_FORMAT_R16G16_UINT:
2426 case VK_FORMAT_R16G16B16_UINT:
2427 case VK_FORMAT_R16G16B16A16_UINT:
2428 {
2429 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2430
2431 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2432 {
2433 ptr[k] = rnd.getUint16();
2434 }
2435 }
2436 break;
2437 case VK_FORMAT_R8_USCALED:
2438 case VK_FORMAT_R8G8_USCALED:
2439 case VK_FORMAT_R8G8B8_USCALED:
2440 case VK_FORMAT_R8G8B8A8_USCALED:
2441 {
2442 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2443
2444 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2445 {
2446 deUint32 r = rnd.getUint32();
2447 ptr[k] = (r & 1) ? r : 0;
2448 }
2449 }
2450 break;
2451 case VK_FORMAT_R32_SINT:
2452 case VK_FORMAT_R32G32_SINT:
2453 case VK_FORMAT_R32G32B32_SINT:
2454 case VK_FORMAT_R32G32B32A32_SINT:
2455 case VK_FORMAT_R32_UINT:
2456 case VK_FORMAT_R32G32_UINT:
2457 case VK_FORMAT_R32G32B32_UINT:
2458 case VK_FORMAT_R32G32B32A32_UINT:
2459 {
2460 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2461
2462 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2463 {
2464 ptr[k] = rnd.getUint32();
2465 }
2466 }
2467 break;
2468 case VK_FORMAT_R64_SINT:
2469 case VK_FORMAT_R64G64_SINT:
2470 case VK_FORMAT_R64G64B64_SINT:
2471 case VK_FORMAT_R64G64B64A64_SINT:
2472 case VK_FORMAT_R64_UINT:
2473 case VK_FORMAT_R64G64_UINT:
2474 case VK_FORMAT_R64G64B64_UINT:
2475 case VK_FORMAT_R64G64B64A64_UINT:
2476 {
2477 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2478
2479 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2480 {
2481 ptr[k] = rnd.getUint64();
2482 }
2483 }
2484 break;
2485 case VK_FORMAT_R16_SFLOAT:
2486 case VK_FORMAT_R16G16_SFLOAT:
2487 case VK_FORMAT_R16G16B16_SFLOAT:
2488 case VK_FORMAT_R16G16B16A16_SFLOAT:
2489 {
2490 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2491
2492 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2493 {
2494 ptr[k] = deFloat32To16(rnd.getFloat());
2495 }
2496 }
2497 break;
2498 case VK_FORMAT_R32_SFLOAT:
2499 case VK_FORMAT_R32G32_SFLOAT:
2500 case VK_FORMAT_R32G32B32_SFLOAT:
2501 case VK_FORMAT_R32G32B32A32_SFLOAT:
2502 {
2503 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2504
2505 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2506 {
2507 ptr[k] = rnd.getFloat();
2508 }
2509 }
2510 break;
2511 case VK_FORMAT_R64_SFLOAT:
2512 case VK_FORMAT_R64G64_SFLOAT:
2513 case VK_FORMAT_R64G64B64_SFLOAT:
2514 case VK_FORMAT_R64G64B64A64_SFLOAT:
2515 {
2516 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2517
2518 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2519 {
2520 ptr[k] = rnd.getDouble();
2521 }
2522 }
2523 break;
2524 }
2525 }
2526 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2527 {
2528 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2529
2530 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2531 {
2532 ptr[k] = 0;
2533 }
2534 }
2535
2536 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2537 {
2538 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2539 }
2540 }
2541
getResultBinding(const VkShaderStageFlagBits shaderStage)2542 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2543 {
2544 switch(shaderStage)
2545 {
2546 case VK_SHADER_STAGE_VERTEX_BIT:
2547 return 0u;
2548 break;
2549 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2550 return 1u;
2551 break;
2552 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2553 return 2u;
2554 break;
2555 case VK_SHADER_STAGE_GEOMETRY_BIT:
2556 return 3u;
2557 break;
2558 default:
2559 DE_ASSERT(0);
2560 return -1;
2561 }
2562 DE_ASSERT(0);
2563 return -1;
2564 }
2565
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const VkShaderStageFlags shaderStage)2566 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
2567 Context& context, VkFormat format, SSBOData* extraData,
2568 deUint32 extraDataCount, const void* internalData,
2569 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2570 const VkShaderStageFlags shaderStage)
2571 {
2572 return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2573 }
2574
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2575 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2576 Context& context, VkFormat format, SSBOData* extraData,
2577 deUint32 extraDataCount, const void* internalData,
2578 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2579 const VkShaderStageFlags shaderStage, const deUint32 tessShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
2580 {
2581 const DeviceInterface& vk = context.getDeviceInterface();
2582 const VkDevice device = context.getDevice();
2583 const deUint32 maxWidth = getMaxWidth();
2584 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2585 DescriptorSetLayoutBuilder layoutBuilder;
2586 DescriptorPoolBuilder poolBuilder;
2587 DescriptorSetUpdateBuilder updateBuilder;
2588 Move <VkDescriptorPool> descriptorPool;
2589 Move <VkDescriptorSet> descriptorSet;
2590
2591 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device,
2592 context.getBinaryCollection().get("vert"), 0u));
2593 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(vk, device,
2594 context.getBinaryCollection().get("tesc"), 0u));
2595 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(vk, device,
2596 context.getBinaryCollection().get("tese"), 0u));
2597 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device,
2598 context.getBinaryCollection().get("fragment"), 0u));
2599 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2600
2601 const VkVertexInputBindingDescription vertexInputBinding =
2602 {
2603 0u, // binding;
2604 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2605 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2606 };
2607
2608 const VkVertexInputAttributeDescription vertexInputAttribute =
2609 {
2610 0u,
2611 0u,
2612 VK_FORMAT_R32G32B32A32_SFLOAT,
2613 0u
2614 };
2615
2616 for (deUint32 i = 0u; i < extraDataCount; i++)
2617 {
2618 if (extraData[i].isImage)
2619 {
2620 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2621 }
2622 else
2623 {
2624 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2625 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2626 }
2627 const Allocation& alloc = inputBuffers[i]->getAllocation();
2628 initializeMemory(context, alloc, extraData[i]);
2629 }
2630
2631 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2632 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2633
2634 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2635
2636 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2637
2638 const deUint32 requiredSubgroupSizes[5] = {0u,
2639 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2640 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2641 0u,
2642 0u};
2643
2644 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2645 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2646 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2647 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2648 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2649 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2650 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2651 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2652
2653 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2654 poolBuilder.addType(inputBuffers[ndx]->getType());
2655
2656 if (extraDataCount > 0)
2657 {
2658 descriptorPool = poolBuilder.build(vk, device,
2659 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2660 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2661 }
2662
2663 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2664 {
2665 if (inputBuffers[buffersNdx]->isImage())
2666 {
2667 VkDescriptorImageInfo info =
2668 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2669 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2670
2671 updateBuilder.writeSingle(*descriptorSet,
2672 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2673 inputBuffers[buffersNdx]->getType(), &info);
2674 }
2675 else
2676 {
2677 VkDescriptorBufferInfo info =
2678 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2679 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2680
2681 updateBuilder.writeSingle(*descriptorSet,
2682 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2683 inputBuffers[buffersNdx]->getType(), &info);
2684 }
2685 }
2686
2687 updateBuilder.update(vk, device);
2688
2689 const VkQueue queue = context.getUniversalQueue();
2690 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2691 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2692 const deUint32 subgroupSize = getSubgroupSize(context);
2693 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2694 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2695 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2696 unsigned totalIterations = 0u;
2697 unsigned failedIterations = 0u;
2698 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2699
2700 {
2701 const Allocation& alloc = vertexBuffer.getAllocation();
2702 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2703 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2704 float leftHandPosition = -1.0f;
2705
2706 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2707 {
2708 data[ndx][0] = leftHandPosition;
2709 leftHandPosition += pixelSize;
2710 data[ndx+1][0] = leftHandPosition;
2711 }
2712
2713 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2714 flushAlloc(vk, device, alloc);
2715 }
2716
2717 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2718 const VkViewport viewport = makeViewport(maxWidth, 1u);
2719 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2720 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2721 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2722 const VkDeviceSize vertexBufferOffset = 0u;
2723
2724 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2725 {
2726 totalIterations++;
2727
2728 beginCommandBuffer(vk, *cmdBuffer);
2729 {
2730
2731 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2732 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2733
2734 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2735
2736 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2737
2738 if (extraDataCount > 0)
2739 {
2740 vk.cmdBindDescriptorSets(*cmdBuffer,
2741 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2742 &descriptorSet.get(), 0u, DE_NULL);
2743 }
2744
2745 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2746 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2747
2748 endRenderPass(vk, *cmdBuffer);
2749
2750 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2751 endCommandBuffer(vk, *cmdBuffer);
2752
2753 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2754 }
2755
2756 {
2757 const Allocation& allocResult = imageBufferResult.getAllocation();
2758 invalidateAlloc(vk, device, allocResult);
2759
2760 std::vector<const void*> datas;
2761 datas.push_back(allocResult.getHostPtr());
2762 if (!checkResult(internalData, datas, width/2u, subgroupSize))
2763 failedIterations++;
2764 }
2765 }
2766
2767 if (0 < failedIterations)
2768 {
2769 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2770
2771 context.getTestContext().getLog()
2772 << TestLog::Message << valuesPassed << " / "
2773 << totalIterations << " values passed" << TestLog::EndMessage;
2774 return tcu::TestStatus::fail("Failed!");
2775 }
2776
2777 return tcu::TestStatus::pass("OK");
2778 }
2779
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2780 bool vkt::subgroups::check(std::vector<const void*> datas,
2781 deUint32 width, deUint32 ref)
2782 {
2783 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2784
2785 for (deUint32 n = 0; n < width; ++n)
2786 {
2787 if (data[n] != ref)
2788 {
2789 return false;
2790 }
2791 }
2792
2793 return true;
2794 }
2795
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2796 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
2797 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2798 deUint32 ref)
2799 {
2800 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2801 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2802 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2803
2804 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2805 }
2806
makeGeometryFrameBufferTest(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize))2807 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
2808 Context& context, VkFormat format, SSBOData* extraData,
2809 deUint32 extraDataCount, const void* internalData,
2810 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2811 {
2812 return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
2813 0u, 0u);
2814 }
2815
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2816 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
2817 Context& context, VkFormat format, SSBOData* extraData,
2818 deUint32 extraDataCount, const void* internalData,
2819 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
2820 const deUint32 geometryShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
2821 {
2822 const DeviceInterface& vk = context.getDeviceInterface();
2823 const VkDevice device = context.getDevice();
2824 const deUint32 maxWidth = getMaxWidth();
2825 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2826 DescriptorSetLayoutBuilder layoutBuilder;
2827 DescriptorPoolBuilder poolBuilder;
2828 DescriptorSetUpdateBuilder updateBuilder;
2829 Move <VkDescriptorPool> descriptorPool;
2830 Move <VkDescriptorSet> descriptorSet;
2831
2832 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2833 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2834 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2835 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2836 const VkVertexInputBindingDescription vertexInputBinding =
2837 {
2838 0u, // binding;
2839 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2840 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2841 };
2842
2843 const VkVertexInputAttributeDescription vertexInputAttribute =
2844 {
2845 0u,
2846 0u,
2847 VK_FORMAT_R32G32B32A32_SFLOAT,
2848 0u
2849 };
2850
2851 for (deUint32 i = 0u; i < extraDataCount; i++)
2852 {
2853 if (extraData[i].isImage)
2854 {
2855 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2856 }
2857 else
2858 {
2859 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2860 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2861 }
2862 const Allocation& alloc = inputBuffers[i]->getAllocation();
2863 initializeMemory(context, alloc, extraData[i]);
2864 }
2865
2866 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2867 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2868
2869 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2870
2871 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2872
2873 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2874
2875 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2876 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2877 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2878 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2879 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2880 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2881
2882 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2883 poolBuilder.addType(inputBuffers[ndx]->getType());
2884
2885 if (extraDataCount > 0)
2886 {
2887 descriptorPool = poolBuilder.build(vk, device,
2888 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2889 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2890 }
2891
2892 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2893 {
2894 if (inputBuffers[buffersNdx]->isImage())
2895 {
2896 VkDescriptorImageInfo info =
2897 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2898 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2899
2900 updateBuilder.writeSingle(*descriptorSet,
2901 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2902 inputBuffers[buffersNdx]->getType(), &info);
2903 }
2904 else
2905 {
2906 VkDescriptorBufferInfo info =
2907 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2908 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2909
2910 updateBuilder.writeSingle(*descriptorSet,
2911 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2912 inputBuffers[buffersNdx]->getType(), &info);
2913 }
2914 }
2915
2916 updateBuilder.update(vk, device);
2917
2918 const VkQueue queue = context.getUniversalQueue();
2919 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2920 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2921 const deUint32 subgroupSize = getSubgroupSize(context);
2922 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2923 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2924 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2925 unsigned totalIterations = 0u;
2926 unsigned failedIterations = 0u;
2927 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2928
2929 {
2930 const Allocation& alloc = vertexBuffer.getAllocation();
2931 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2932 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2933 float leftHandPosition = -1.0f;
2934
2935 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2936 {
2937 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2938 leftHandPosition += pixelSize;
2939 }
2940
2941 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2942 flushAlloc(vk, device, alloc);
2943 }
2944
2945 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2946 const VkViewport viewport = makeViewport(maxWidth, 1u);
2947 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2948 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2949 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2950 const VkDeviceSize vertexBufferOffset = 0u;
2951
2952 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2953 {
2954 totalIterations++;
2955
2956 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2957 {
2958 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2959 initializeMemory(context, alloc, extraData[ndx]);
2960 }
2961
2962 beginCommandBuffer(vk, *cmdBuffer);
2963 {
2964 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2965
2966 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2967
2968 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2969
2970 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2971
2972 if (extraDataCount > 0)
2973 {
2974 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2975 &descriptorSet.get(), 0u, DE_NULL);
2976 }
2977
2978 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2979
2980 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2981
2982 endRenderPass(vk, *cmdBuffer);
2983
2984 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2985
2986 endCommandBuffer(vk, *cmdBuffer);
2987
2988 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2989 }
2990
2991 {
2992 const Allocation& allocResult = imageBufferResult.getAllocation();
2993 invalidateAlloc(vk, device, allocResult);
2994
2995 std::vector<const void*> datas;
2996 datas.push_back(allocResult.getHostPtr());
2997 if (!checkResult(internalData, datas, width, subgroupSize))
2998 failedIterations++;
2999 }
3000 }
3001
3002 if (0 < failedIterations)
3003 {
3004 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3005
3006 context.getTestContext().getLog()
3007 << TestLog::Message << valuesPassed << " / "
3008 << totalIterations << " values passed" << TestLog::EndMessage;
3009
3010 return tcu::TestStatus::fail("Failed!");
3011 }
3012
3013 return tcu::TestStatus::pass("OK");
3014 }
3015
allStages(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3016 tcu::TestStatus vkt::subgroups::allStages(
3017 Context& context, VkFormat format, SSBOData* extraData,
3018 deUint32 extraDataCount, const void* internalData,
3019 const VerificationFunctor& checkResult,
3020 const vk::VkShaderStageFlags shaderStage)
3021 {
3022 return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3023 0u, 0u, 0u, 0u, 0u, DE_NULL);
3024 }
3025
allStagesRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3026 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
3027 Context& context, VkFormat format, SSBOData* extraDatas,
3028 deUint32 extraDatasCount, const void* internalData,
3029 const VerificationFunctor& checkResult,
3030 const VkShaderStageFlags shaderStageTested,
3031 const deUint32 vertexShaderStageCreateFlags,
3032 const deUint32 tessellationControlShaderStageCreateFlags,
3033 const deUint32 tessellationEvalShaderStageCreateFlags,
3034 const deUint32 geometryShaderStageCreateFlags,
3035 const deUint32 fragmentShaderStageCreateFlags,
3036 const deUint32 requiredSubgroupSize[5])
3037 {
3038 const DeviceInterface& vk = context.getDeviceInterface();
3039 const VkDevice device = context.getDevice();
3040 const deUint32 maxWidth = getMaxWidth();
3041 vector<VkShaderStageFlagBits> stagesVector;
3042 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3043
3044 Move<VkShaderModule> vertexShaderModule;
3045 Move<VkShaderModule> teCtrlShaderModule;
3046 Move<VkShaderModule> teEvalShaderModule;
3047 Move<VkShaderModule> geometryShaderModule;
3048 Move<VkShaderModule> fragmentShaderModule;
3049
3050 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3051 {
3052 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3053 }
3054 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3055 {
3056 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3057 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3058 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3059 }
3060 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3061 {
3062 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3063 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3064 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3065 }
3066 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3067 {
3068 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3069 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3070 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3071 }
3072 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3073 {
3074 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3075 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3076 }
3077
3078 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
3079 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3080 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3081 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3082
3083 shaderStageRequired = shaderStageTested | shaderStageRequired;
3084
3085 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3086 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3087 {
3088 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3089 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3090 }
3091 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3092 {
3093 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3094 {
3095 // tessellation shaders output line primitives
3096 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3097 }
3098 else
3099 {
3100 // otherwise points are processed by geometry shader
3101 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3102 }
3103 }
3104 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3105 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3106
3107 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3108
3109 DescriptorSetLayoutBuilder layoutBuilder;
3110 // The implicit result SSBO we use to store our outputs from the shader
3111 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3112 {
3113 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3114 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3115 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3116
3117 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3118 }
3119
3120 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3121 {
3122 const deUint32 datasNdx = ndx - stagesCount;
3123 if (extraDatas[datasNdx].isImage)
3124 {
3125 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3126 }
3127 else
3128 {
3129 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3130 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3131 }
3132
3133 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3134 initializeMemory(context, alloc, extraDatas[datasNdx]);
3135
3136 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3137 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3138 }
3139
3140 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3141
3142 const Unique<VkPipelineLayout> pipelineLayout(
3143 makePipelineLayout(vk, device, *descriptorSetLayout));
3144
3145 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3146 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3147 shaderStageRequired,
3148 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3149 *renderPass,
3150 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3151 DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3152 vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3153 geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3154
3155 Move <VkDescriptorPool> descriptorPool;
3156 Move <VkDescriptorSet> descriptorSet;
3157
3158 if (inputBuffers.size() > 0)
3159 {
3160 DescriptorPoolBuilder poolBuilder;
3161
3162 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3163 {
3164 poolBuilder.addType(inputBuffers[ndx]->getType());
3165 }
3166
3167 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3168
3169 // Create descriptor set
3170 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3171
3172 DescriptorSetUpdateBuilder updateBuilder;
3173
3174 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3175 {
3176 deUint32 binding;
3177 if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3178 else binding = extraDatas[ndx -stagesCount].binding;
3179
3180 if (inputBuffers[ndx]->isImage())
3181 {
3182 VkDescriptorImageInfo info =
3183 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3184 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3185
3186 updateBuilder.writeSingle( *descriptorSet,
3187 DescriptorSetUpdateBuilder::Location::binding(binding),
3188 inputBuffers[ndx]->getType(), &info);
3189 }
3190 else
3191 {
3192 VkDescriptorBufferInfo info =
3193 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3194 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3195
3196 updateBuilder.writeSingle( *descriptorSet,
3197 DescriptorSetUpdateBuilder::Location::binding(binding),
3198 inputBuffers[ndx]->getType(), &info);
3199 }
3200 }
3201
3202 updateBuilder.update(vk, device);
3203 }
3204
3205 {
3206 const VkQueue queue = context.getUniversalQueue();
3207 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3208 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3209 const deUint32 subgroupSize = getSubgroupSize(context);
3210 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3211 unsigned totalIterations = 0u;
3212 unsigned failedIterations = 0u;
3213 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3214 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3215 const VkViewport viewport = makeViewport(maxWidth, 1u);
3216 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3217 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3218 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3219 const VkImageSubresourceRange subresourceRange =
3220 {
3221 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
3222 0u, //deUint32 baseMipLevel
3223 1u, //deUint32 levelCount
3224 0u, //deUint32 baseArrayLayer
3225 1u //deUint32 layerCount
3226 };
3227
3228 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
3229 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3230 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3231 resultImage.getImage(), subresourceRange);
3232
3233 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3234 {
3235 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3236 {
3237 // re-init the data
3238 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3239 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3240 }
3241
3242 totalIterations++;
3243
3244 beginCommandBuffer(vk, *cmdBuffer);
3245
3246 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3247
3248 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3249
3250 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3251
3252 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3253
3254 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3255
3256 if (stagesCount + extraDatasCount > 0)
3257 vk.cmdBindDescriptorSets(*cmdBuffer,
3258 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3259 &descriptorSet.get(), 0u, DE_NULL);
3260
3261 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3262
3263 endRenderPass(vk, *cmdBuffer);
3264
3265 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3266
3267 endCommandBuffer(vk, *cmdBuffer);
3268
3269 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3270
3271 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3272 {
3273 std::vector<const void*> datas;
3274 if (!inputBuffers[ndx]->isImage())
3275 {
3276 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3277 invalidateAlloc(vk, device, resultAlloc);
3278 // we always have our result data first
3279 datas.push_back(resultAlloc.getHostPtr());
3280 }
3281
3282 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3283 {
3284 const deUint32 datasNdx = index - stagesCount;
3285 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3286 {
3287 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3288 invalidateAlloc(vk, device, resultAlloc);
3289 // we always have our result data first
3290 datas.push_back(resultAlloc.getHostPtr());
3291 }
3292 }
3293
3294 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3295 const bool multiCall = ( stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3296 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3297 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3298 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT );
3299 const deUint32 usedWidth = ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3300
3301 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3302 failedIterations++;
3303 }
3304 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3305 {
3306 std::vector<const void*> datas;
3307 const Allocation& resultAlloc = imageBufferResult.getAllocation();
3308 invalidateAlloc(vk, device, resultAlloc);
3309
3310 // we always have our result data first
3311 datas.push_back(resultAlloc.getHostPtr());
3312
3313 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3314 {
3315 const deUint32 datasNdx = index - stagesCount;
3316 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3317 {
3318 const Allocation& alloc = inputBuffers[index]->getAllocation();
3319 invalidateAlloc(vk, device, alloc);
3320 // we always have our result data first
3321 datas.push_back(alloc.getHostPtr());
3322 }
3323 }
3324
3325 if (!checkResult(internalData, datas, width, subgroupSize, false))
3326 failedIterations++;
3327 }
3328
3329 vk.resetCommandBuffer(*cmdBuffer, 0);
3330 }
3331
3332 if (0 < failedIterations)
3333 {
3334 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3335
3336 context.getTestContext().getLog()
3337 << TestLog::Message << valuesPassed << " / "
3338 << totalIterations << " values passed" << TestLog::EndMessage;
3339
3340 return tcu::TestStatus::fail("Failed!");
3341 }
3342 }
3343
3344 return tcu::TestStatus::pass("OK");
3345 }
3346
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize))3347 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
3348 SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
3349 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
3350 {
3351 return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
3352 0u, 0u);
3353 }
3354
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,SSBOData * extraData,deUint32 extraDataCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3355 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format,
3356 SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
3357 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
3358 const deUint32 vertexShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
3359 {
3360 const DeviceInterface& vk = context.getDeviceInterface();
3361 const VkDevice device = context.getDevice();
3362 const VkQueue queue = context.getUniversalQueue();
3363 const deUint32 maxWidth = getMaxWidth();
3364 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3365 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
3366 DescriptorSetLayoutBuilder layoutBuilder;
3367 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3368 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3369 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
3370
3371 const VkVertexInputBindingDescription vertexInputBinding =
3372 {
3373 0u, // binding;
3374 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
3375 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
3376 };
3377
3378 const VkVertexInputAttributeDescription vertexInputAttribute =
3379 {
3380 0u,
3381 0u,
3382 VK_FORMAT_R32G32B32A32_SFLOAT,
3383 0u
3384 };
3385
3386 for (deUint32 i = 0u; i < extraDataCount; i++)
3387 {
3388 if (extraData[i].isImage)
3389 {
3390 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3391 }
3392 else
3393 {
3394 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3395 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3396 }
3397 const Allocation& alloc = inputBuffers[i]->getAllocation();
3398 initializeMemory(context, alloc, extraData[i]);
3399 }
3400
3401 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3402 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3403
3404 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
3405
3406 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
3407
3408 const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3409 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
3410 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3411 *vertexShaderModule, *fragmentShaderModule,
3412 DE_NULL, DE_NULL, DE_NULL,
3413 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3414 &vertexInputBinding, &vertexInputAttribute, true, format,
3415 vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3416 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3417 DescriptorPoolBuilder poolBuilder;
3418 DescriptorSetUpdateBuilder updateBuilder;
3419
3420
3421 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3422 poolBuilder.addType(inputBuffers[ndx]->getType());
3423
3424 Move <VkDescriptorPool> descriptorPool;
3425 Move <VkDescriptorSet> descriptorSet;
3426
3427 if (extraDataCount > 0)
3428 {
3429 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3430 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3431 }
3432
3433 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3434 {
3435 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3436 initializeMemory(context, alloc, extraData[ndx]);
3437 }
3438
3439 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3440 {
3441 if (inputBuffers[buffersNdx]->isImage())
3442 {
3443 VkDescriptorImageInfo info =
3444 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3445 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3446
3447 updateBuilder.writeSingle(*descriptorSet,
3448 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3449 inputBuffers[buffersNdx]->getType(), &info);
3450 }
3451 else
3452 {
3453 VkDescriptorBufferInfo info =
3454 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3455 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3456
3457 updateBuilder.writeSingle(*descriptorSet,
3458 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3459 inputBuffers[buffersNdx]->getType(), &info);
3460 }
3461 }
3462 updateBuilder.update(vk, device);
3463
3464 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3465
3466 const deUint32 subgroupSize = getSubgroupSize(context);
3467
3468 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3469
3470 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3471 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3472
3473 unsigned totalIterations = 0u;
3474 unsigned failedIterations = 0u;
3475
3476 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3477
3478 {
3479 const Allocation& alloc = vertexBuffer.getAllocation();
3480 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3481 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3482 float leftHandPosition = -1.0f;
3483
3484 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3485 {
3486 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3487 leftHandPosition += pixelSize;
3488 }
3489
3490 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3491 flushAlloc(vk, device, alloc);
3492 }
3493
3494 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3495 const VkViewport viewport = makeViewport(maxWidth, 1u);
3496 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3497 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3498 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3499 const VkDeviceSize vertexBufferOffset = 0u;
3500
3501 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3502 {
3503 totalIterations++;
3504
3505 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3506 {
3507 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3508 initializeMemory(context, alloc, extraData[ndx]);
3509 }
3510
3511 beginCommandBuffer(vk, *cmdBuffer);
3512 {
3513 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3514
3515 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3516
3517 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3518
3519 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3520
3521 if (extraDataCount > 0)
3522 {
3523 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3524 &descriptorSet.get(), 0u, DE_NULL);
3525 }
3526
3527 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3528
3529 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3530
3531 endRenderPass(vk, *cmdBuffer);
3532
3533 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3534
3535 endCommandBuffer(vk, *cmdBuffer);
3536
3537 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3538 }
3539
3540 {
3541 const Allocation& allocResult = imageBufferResult.getAllocation();
3542 invalidateAlloc(vk, device, allocResult);
3543
3544 std::vector<const void*> datas;
3545 datas.push_back(allocResult.getHostPtr());
3546 if (!checkResult(internalData, datas, width, subgroupSize))
3547 failedIterations++;
3548 }
3549 }
3550
3551 if (0 < failedIterations)
3552 {
3553 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3554
3555 context.getTestContext().getLog()
3556 << TestLog::Message << valuesPassed << " / "
3557 << totalIterations << " values passed" << TestLog::EndMessage;
3558
3559 return tcu::TestStatus::fail("Failed!");
3560 }
3561
3562 return tcu::TestStatus::pass("OK");
3563 }
3564
makeFragmentFrameBufferTest(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 height,deUint32 subgroupSize))3565 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(
3566 Context& context, VkFormat format, SSBOData* extraDatas,
3567 deUint32 extraDatasCount, const void* internalData,
3568 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
3569 deUint32 height, deUint32 subgroupSize))
3570 {
3571 return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult,
3572 0u, 0u);
3573 }
3574
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,deUint32 width,deUint32 height,deUint32 subgroupSize),const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3575 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
3576 Context& context, VkFormat format, SSBOData* extraDatas,
3577 deUint32 extraDatasCount, const void* internalData,
3578 bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
3579 deUint32 height, deUint32 subgroupSize),
3580 const deUint32 fragmentShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
3581 {
3582 const DeviceInterface& vk = context.getDeviceInterface();
3583 const VkDevice device = context.getDevice();
3584 const VkQueue queue = context.getUniversalQueue();
3585 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3586 const Unique<VkShaderModule> vertexShaderModule (createShaderModule
3587 (vk, device, context.getBinaryCollection().get("vert"), 0u));
3588 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
3589 (vk, device, context.getBinaryCollection().get("fragment"), 0u));
3590
3591 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
3592
3593 for (deUint32 i = 0; i < extraDatasCount; i++)
3594 {
3595 if (extraDatas[i].isImage)
3596 {
3597 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3598 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3599 }
3600 else
3601 {
3602 vk::VkDeviceSize size =
3603 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3604 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3605 }
3606
3607 const Allocation& alloc = inputBuffers[i]->getAllocation();
3608 initializeMemory(context, alloc, extraDatas[i]);
3609 }
3610
3611 DescriptorSetLayoutBuilder layoutBuilder;
3612
3613 for (deUint32 i = 0; i < extraDatasCount; i++)
3614 {
3615 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
3616 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3617 }
3618
3619 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3620 layoutBuilder.build(vk, device));
3621
3622 const Unique<VkPipelineLayout> pipelineLayout(
3623 makePipelineLayout(vk, device, *descriptorSetLayout));
3624
3625 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3626
3627 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3628 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3629 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3630 *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3631 DE_NULL, DE_NULL, true, VK_FORMAT_R32G32B32A32_SFLOAT,
3632 0u, 0u, 0u, 0u, fragmentShaderStageCreateFlags, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3633
3634 DescriptorPoolBuilder poolBuilder;
3635
3636 // To stop validation complaining, always add at least one type to pool.
3637 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3638 for (deUint32 i = 0; i < extraDatasCount; i++)
3639 {
3640 poolBuilder.addType(inputBuffers[i]->getType());
3641 }
3642
3643 Move<VkDescriptorPool> descriptorPool;
3644 // Create descriptor set
3645 Move<VkDescriptorSet> descriptorSet;
3646
3647 if (extraDatasCount > 0)
3648 {
3649 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3650
3651 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3652 }
3653
3654 DescriptorSetUpdateBuilder updateBuilder;
3655
3656 for (deUint32 i = 0; i < extraDatasCount; i++)
3657 {
3658 if (inputBuffers[i]->isImage())
3659 {
3660 VkDescriptorImageInfo info =
3661 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3662 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3663
3664 updateBuilder.writeSingle(*descriptorSet,
3665 DescriptorSetUpdateBuilder::Location::binding(i),
3666 inputBuffers[i]->getType(), &info);
3667 }
3668 else
3669 {
3670 VkDescriptorBufferInfo info =
3671 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
3672 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3673
3674 updateBuilder.writeSingle(*descriptorSet,
3675 DescriptorSetUpdateBuilder::Location::binding(i),
3676 inputBuffers[i]->getType(), &info);
3677 }
3678 }
3679
3680 if (extraDatasCount > 0)
3681 updateBuilder.update(vk, device);
3682
3683 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3684
3685 const deUint32 subgroupSize = getSubgroupSize(context);
3686
3687 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3688
3689 unsigned totalIterations = 0;
3690 unsigned failedIterations = 0;
3691
3692 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3693 {
3694 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3695 {
3696 totalIterations++;
3697
3698 // re-init the data
3699 for (deUint32 i = 0; i < extraDatasCount; i++)
3700 {
3701 const Allocation& alloc = inputBuffers[i]->getAllocation();
3702 initializeMemory(context, alloc, extraDatas[i]);
3703 }
3704
3705 VkDeviceSize formatSize = getFormatSizeInBytes(format);
3706 const VkDeviceSize resultImageSizeInBytes =
3707 width * height * formatSize;
3708
3709 Image resultImage(context, width, height, format,
3710 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
3711 VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3712
3713 Buffer resultBuffer(context, resultImageSizeInBytes,
3714 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3715
3716 const Unique<VkFramebuffer> framebuffer(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3717
3718 beginCommandBuffer(vk, *cmdBuffer);
3719
3720 VkViewport viewport = makeViewport(width, height);
3721
3722 vk.cmdSetViewport(
3723 *cmdBuffer, 0, 1, &viewport);
3724
3725 VkRect2D scissor = {{0, 0}, {width, height}};
3726
3727 vk.cmdSetScissor(
3728 *cmdBuffer, 0, 1, &scissor);
3729
3730 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3731
3732 vk.cmdBindPipeline(
3733 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3734
3735 if (extraDatasCount > 0)
3736 {
3737 vk.cmdBindDescriptorSets(*cmdBuffer,
3738 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3739 &descriptorSet.get(), 0u, DE_NULL);
3740 }
3741
3742 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3743
3744 endRenderPass(vk, *cmdBuffer);
3745
3746 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3747
3748 endCommandBuffer(vk, *cmdBuffer);
3749
3750 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3751
3752 std::vector<const void*> datas;
3753 {
3754 const Allocation& resultAlloc = resultBuffer.getAllocation();
3755 invalidateAlloc(vk, device, resultAlloc);
3756
3757 // we always have our result data first
3758 datas.push_back(resultAlloc.getHostPtr());
3759 }
3760
3761 if (!checkResult(internalData, datas, width, height, subgroupSize))
3762 {
3763 failedIterations++;
3764 }
3765
3766 vk.resetCommandBuffer(*cmdBuffer, 0);
3767 }
3768 }
3769
3770 if (0 < failedIterations)
3771 {
3772 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3773
3774 context.getTestContext().getLog()
3775 << TestLog::Message << valuesPassed << " / "
3776 << totalIterations << " values passed" << TestLog::EndMessage;
3777
3778 return tcu::TestStatus::fail("Failed!");
3779 }
3780
3781 return tcu::TestStatus::pass("OK");
3782 }
3783
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3784 Move<VkPipeline> makeComputePipeline(Context& context,
3785 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
3786 const deUint32 pipelineShaderStageFlags, const deUint32 pipelineCreateFlags, VkPipeline basePipelineHandle,
3787 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ, deUint32 requiredSubgroupSize)
3788 {
3789 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3790
3791 const vk::VkSpecializationMapEntry entries[3] =
3792 {
3793 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3794 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3795 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3796 };
3797
3798 const vk::VkSpecializationInfo info =
3799 {
3800 /* mapEntryCount = */ 3,
3801 /* pMapEntries = */ entries,
3802 /* dataSize = */ sizeof(localSize),
3803 /* pData = */ localSize
3804 };
3805
3806 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3807 {
3808 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3809 DE_NULL, // void* pNext;
3810 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3811 };
3812
3813 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3814 {
3815 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3816 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL), // const void* pNext;
3817 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3818 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
3819 shaderModule, // VkShaderModule module;
3820 "main", // const char* pName;
3821 &info, // const VkSpecializationInfo* pSpecializationInfo;
3822 };
3823
3824 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
3825 {
3826 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3827 DE_NULL, // const void* pNext;
3828 pipelineCreateFlags, // VkPipelineCreateFlags flags;
3829 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
3830 pipelineLayout, // VkPipelineLayout layout;
3831 basePipelineHandle, // VkPipeline basePipelineHandle;
3832 -1, // deInt32 basePipelineIndex;
3833 };
3834
3835 return createComputePipeline(context.getDeviceInterface(),
3836 context.getDevice(), DE_NULL, &pipelineCreateInfo);
3837 }
3838
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,SSBOData * inputs,deUint32 inputsCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 subgroupSize),const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)3839 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
3840 Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
3841 bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
3842 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
3843 deUint32 subgroupSize),
3844 const deUint32 pipelineShaderStageCreateFlags, const deUint32 numWorkgroups[3],
3845 const deBool isRequiredSubgroupSize, const deUint32 subgroupSize, const deUint32 localSizesToTest[][3], const deUint32 localSizesToTestCount)
3846 {
3847 const DeviceInterface& vk = context.getDeviceInterface();
3848 const VkDevice device = context.getDevice();
3849 const VkQueue queue = context.getUniversalQueue();
3850 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3851 VkDeviceSize elementSize = getFormatSizeInBytes(format);
3852
3853 VkDeviceSize maxSubgroupSize = maxSupportedSubgroupSize();
3854
3855 if (isRequiredSubgroupSize)
3856 {
3857 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
3858 subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
3859 subgroupSizeControlProperties.pNext = DE_NULL;
3860
3861 VkPhysicalDeviceProperties2 properties2;
3862 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3863 properties2.pNext = &subgroupSizeControlProperties;
3864 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties2);
3865 maxSubgroupSize = deMax32(subgroupSizeControlProperties.maxSubgroupSize, static_cast<deUint32>(maxSubgroupSize));
3866 }
3867
3868 const VkDeviceSize resultBufferSize = maxSubgroupSize *
3869 maxSubgroupSize *
3870 maxSubgroupSize;
3871
3872 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3873
3874 Buffer resultBuffer(
3875 context, resultBufferSizeInBytes);
3876
3877 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
3878
3879 for (deUint32 i = 0; i < inputsCount; i++)
3880 {
3881 if (inputs[i].isImage)
3882 {
3883 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
3884 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3885 }
3886 else
3887 {
3888 vk::VkDeviceSize size =
3889 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3890 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3891 }
3892
3893 const Allocation& alloc = inputBuffers[i]->getAllocation();
3894 initializeMemory(context, alloc, inputs[i]);
3895 }
3896
3897 DescriptorSetLayoutBuilder layoutBuilder;
3898 layoutBuilder.addBinding(
3899 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3900
3901 for (deUint32 i = 0; i < inputsCount; i++)
3902 {
3903 layoutBuilder.addBinding(
3904 inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3905 }
3906
3907 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3908 layoutBuilder.build(vk, device));
3909
3910 const Unique<VkShaderModule> shaderModule(
3911 createShaderModule(vk, device,
3912 context.getBinaryCollection().get("comp"), 0u));
3913 const Unique<VkPipelineLayout> pipelineLayout(
3914 makePipelineLayout(vk, device, *descriptorSetLayout));
3915
3916 DescriptorPoolBuilder poolBuilder;
3917
3918 poolBuilder.addType(resultBuffer.getType());
3919
3920 for (deUint32 i = 0; i < inputsCount; i++)
3921 {
3922 poolBuilder.addType(inputBuffers[i]->getType());
3923 }
3924
3925 const Unique<VkDescriptorPool> descriptorPool(
3926 poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3927
3928 // Create descriptor set
3929 const Unique<VkDescriptorSet> descriptorSet(
3930 makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3931
3932 DescriptorSetUpdateBuilder updateBuilder;
3933
3934 const VkDescriptorBufferInfo resultDescriptorInfo =
3935 makeDescriptorBufferInfo(
3936 resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3937
3938 updateBuilder.writeSingle(*descriptorSet,
3939 DescriptorSetUpdateBuilder::Location::binding(0u),
3940 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3941
3942 for (deUint32 i = 0; i < inputsCount; i++)
3943 {
3944 if (inputBuffers[i]->isImage())
3945 {
3946 VkDescriptorImageInfo info =
3947 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3948 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3949
3950 updateBuilder.writeSingle(*descriptorSet,
3951 DescriptorSetUpdateBuilder::Location::binding(i + 1),
3952 inputBuffers[i]->getType(), &info);
3953 }
3954 else
3955 {
3956 vk::VkDeviceSize size =
3957 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3958 VkDescriptorBufferInfo info =
3959 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3960
3961 updateBuilder.writeSingle(*descriptorSet,
3962 DescriptorSetUpdateBuilder::Location::binding(i + 1),
3963 inputBuffers[i]->getType(), &info);
3964 }
3965 }
3966
3967 updateBuilder.update(vk, device);
3968
3969 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3970
3971 unsigned totalIterations = 0;
3972 unsigned failedIterations = 0;
3973
3974 const Unique<VkCommandBuffer> cmdBuffer(
3975 makeCommandBuffer(context, *cmdPool));
3976
3977 std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines(localSizesToTestCount);
3978
3979 context.getTestContext().touchWatchdog();
3980 pipelines[0] =
3981 de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
3982 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3983 pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, (VkPipeline) DE_NULL,
3984 localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2],
3985 isRequiredSubgroupSize ? subgroupSize : 0u)));
3986 context.getTestContext().touchWatchdog();
3987
3988 for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
3989 {
3990 const deUint32 nextX = localSizesToTest[index][0];
3991 const deUint32 nextY = localSizesToTest[index][1];
3992 const deUint32 nextZ = localSizesToTest[index][2];
3993
3994 context.getTestContext().touchWatchdog();
3995 pipelines[index] =
3996 de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
3997 makeComputePipeline(context, *pipelineLayout, *shaderModule,
3998 pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_DERIVATIVE_BIT, **pipelines[0],
3999 nextX, nextY, nextZ,
4000 isRequiredSubgroupSize ? subgroupSize : 0u)));
4001 context.getTestContext().touchWatchdog();
4002 }
4003
4004 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
4005 {
4006
4007 // we are running one test
4008 totalIterations++;
4009
4010 beginCommandBuffer(vk, *cmdBuffer);
4011
4012 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, **pipelines[index]);
4013
4014 vk.cmdBindDescriptorSets(*cmdBuffer,
4015 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
4016 &descriptorSet.get(), 0u, DE_NULL);
4017
4018 vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4019
4020 endCommandBuffer(vk, *cmdBuffer);
4021
4022 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4023
4024 std::vector<const void*> datas;
4025
4026 {
4027 const Allocation& resultAlloc = resultBuffer.getAllocation();
4028 invalidateAlloc(vk, device, resultAlloc);
4029
4030 // we always have our result data first
4031 datas.push_back(resultAlloc.getHostPtr());
4032 }
4033
4034 for (deUint32 i = 0; i < inputsCount; i++)
4035 {
4036 if (!inputBuffers[i]->isImage())
4037 {
4038 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
4039 invalidateAlloc(vk, device, resultAlloc);
4040
4041 // we always have our result data first
4042 datas.push_back(resultAlloc.getHostPtr());
4043 }
4044 }
4045
4046 if (!checkResult(internalData, datas, numWorkgroups, localSizesToTest[index], subgroupSize))
4047 {
4048 failedIterations++;
4049 }
4050
4051 vk.resetCommandBuffer(*cmdBuffer, 0);
4052 }
4053
4054 if (0 < failedIterations)
4055 {
4056 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4057
4058 context.getTestContext().getLog()
4059 << TestLog::Message << valuesPassed << " / "
4060 << totalIterations << " values passed" << TestLog::EndMessage;
4061
4062 return tcu::TestStatus::fail("Failed!");
4063 }
4064
4065 return tcu::TestStatus::pass("OK");
4066 }
4067
makeComputeTest(Context & context,VkFormat format,SSBOData * inputs,deUint32 inputsCount,const void * internalData,bool (* checkResult)(const void * internalData,std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 subgroupSize),deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4068 tcu::TestStatus vkt::subgroups::makeComputeTest(
4069 Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
4070 bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
4071 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
4072 deUint32 subgroupSize),
4073 deUint32 requiredSubgroupSize, const deUint32 pipelineShaderStageCreateFlags)
4074 {
4075 const deUint32 numWorkgroups[3] = {4, 2, 2};
4076 deUint32 subgroupSize = requiredSubgroupSize;
4077
4078 if(requiredSubgroupSize == 0)
4079 subgroupSize = vkt::subgroups::getSubgroupSize(context);
4080
4081 const deUint32 localSizesToTestCount = 8;
4082 deUint32 localSizesToTest[localSizesToTestCount][3] =
4083 {
4084 {1, 1, 1},
4085 {subgroupSize, 1, 1},
4086 {1, subgroupSize, 1},
4087 {1, 1, subgroupSize},
4088 {32, 4, 1},
4089 {1, 4, 32},
4090 {3, 5, 7},
4091 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4092 };
4093
4094 return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4095 numWorkgroups, requiredSubgroupSize != 0u, subgroupSize, localSizesToTest, localSizesToTestCount);
4096 }
4097