1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2017 Google Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Atomic operations (OpAtomic*) tests.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39
40 #include "deStringUtil.hpp"
41 #include "deSharedPtr.hpp"
42 #include "deRandom.hpp"
43 #include "deArrayUtil.hpp"
44
45 #include <string>
46 #include <memory>
47 #include <cmath>
48
49 namespace vkt
50 {
51 namespace shaderexecutor
52 {
53
54 namespace
55 {
56
57 using de::UniquePtr;
58 using de::MovePtr;
59 using std::vector;
60
61 using namespace vk;
62
63 enum class AtomicMemoryType
64 {
65 BUFFER = 0, // Normal buffer.
66 SHARED, // Shared global struct in a compute workgroup.
67 REFERENCE, // Buffer passed as a reference.
68 };
69
70 // Helper struct to indicate the shader type and if it should use shared global memory.
71 class AtomicShaderType
72 {
73 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)74 AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
75 : m_type (type)
76 , m_atomicMemoryType (memoryType)
77 {
78 // Shared global memory can only be set to true with compute shaders.
79 DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
80 }
81
getType(void) const82 glu::ShaderType getType (void) const { return m_type; }
getMemoryType(void) const83 AtomicMemoryType getMemoryType (void) const { return m_atomicMemoryType; }
84
85 private:
86 glu::ShaderType m_type;
87 AtomicMemoryType m_atomicMemoryType;
88 };
89
90 // Buffer helper
91 class Buffer
92 {
93 public:
94 Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
95
getBuffer(void) const96 VkBuffer getBuffer (void) const { return *m_buffer; }
getHostPtr(void) const97 void* getHostPtr (void) const { return m_allocation->getHostPtr(); }
98 void flush (void);
99 void invalidate (void);
100
101 private:
102 const DeviceInterface& m_vkd;
103 const VkDevice m_device;
104 const VkQueue m_queue;
105 const deUint32 m_queueIndex;
106 const Unique<VkBuffer> m_buffer;
107 const UniquePtr<Allocation> m_allocation;
108 };
109
110 typedef de::SharedPtr<Buffer> BufferSp;
111
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)112 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
113 {
114 const VkBufferCreateInfo createInfo =
115 {
116 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
117 DE_NULL,
118 (VkBufferCreateFlags)0,
119 size,
120 usageFlags,
121 VK_SHARING_MODE_EXCLUSIVE,
122 0u,
123 DE_NULL
124 };
125 return createBuffer(vkd, device, &createInfo);
126 }
127
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)128 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
129 {
130 const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
131 MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
132
133 VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
134
135 return alloc;
136 }
137
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)138 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
139 : m_vkd (context.getDeviceInterface())
140 , m_device (context.getDevice())
141 , m_queue (context.getUniversalQueue())
142 , m_queueIndex (context.getUniversalQueueFamilyIndex())
143 , m_buffer (createBuffer (context.getDeviceInterface(),
144 context.getDevice(),
145 (VkDeviceSize)size,
146 usage))
147 , m_allocation (allocateAndBindMemory (context.getDeviceInterface(),
148 context.getDevice(),
149 context.getDefaultAllocator(),
150 *m_buffer,
151 useRef))
152 {
153 }
154
flush(void)155 void Buffer::flush (void)
156 {
157 flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
158 }
159
invalidate(void)160 void Buffer::invalidate (void)
161 {
162 const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
163 const auto cmdBufferPtr = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
164 const auto cmdBuffer = cmdBufferPtr.get();
165 const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
166
167 beginCommandBuffer(m_vkd, cmdBuffer);
168 m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
169 endCommandBuffer(m_vkd, cmdBuffer);
170 submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
171
172 invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
173 }
174
175 // Tests
176
177 enum AtomicOperation
178 {
179 ATOMIC_OP_EXCHANGE = 0,
180 ATOMIC_OP_COMP_SWAP,
181 ATOMIC_OP_ADD,
182 ATOMIC_OP_MIN,
183 ATOMIC_OP_MAX,
184 ATOMIC_OP_AND,
185 ATOMIC_OP_OR,
186 ATOMIC_OP_XOR,
187
188 ATOMIC_OP_LAST
189 };
190
atomicOp2Str(AtomicOperation op)191 std::string atomicOp2Str (AtomicOperation op)
192 {
193 static const char* const s_names[] =
194 {
195 "atomicExchange",
196 "atomicCompSwap",
197 "atomicAdd",
198 "atomicMin",
199 "atomicMax",
200 "atomicAnd",
201 "atomicOr",
202 "atomicXor"
203 };
204 return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
205 }
206
207 enum
208 {
209 NUM_ELEMENTS = 32
210 };
211
212 enum DataType
213 {
214 DATA_TYPE_INT32 = 0,
215 DATA_TYPE_UINT32,
216 DATA_TYPE_FLOAT32,
217 DATA_TYPE_INT64,
218 DATA_TYPE_UINT64,
219 DATA_TYPE_FLOAT64,
220
221 DATA_TYPE_LAST
222 };
223
dataType2Str(DataType type)224 std::string dataType2Str(DataType type)
225 {
226 static const char* const s_names[] =
227 {
228 "int",
229 "uint",
230 "float",
231 "int64_t",
232 "uint64_t",
233 "double",
234 };
235 return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
236 }
237
238 class BufferInterface
239 {
240 public:
241 virtual void setBuffer(void* ptr) = 0;
242
243 virtual size_t bufferSize() = 0;
244
245 virtual void fillWithTestData(de::Random &rnd) = 0;
246
247 virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
248
~BufferInterface()249 virtual ~BufferInterface() {};
250 };
251
252 template<typename dataTypeT>
253 class TestBuffer : public BufferInterface
254 {
255 public:
256
TestBuffer(AtomicOperation atomicOp)257 TestBuffer(AtomicOperation atomicOp)
258 : m_atomicOp(atomicOp)
259 {}
260
261 template<typename T>
262 struct BufferData
263 {
264 // Use half the number of elements for inout to cause overlap between atomic operations.
265 // Each inout element at index i will have two atomic operations using input from
266 // indices i and i + NUM_ELEMENTS / 2.
267 T inout[NUM_ELEMENTS / 2];
268 T input[NUM_ELEMENTS];
269 T compare[NUM_ELEMENTS];
270 T output[NUM_ELEMENTS];
271 T invocationHitCount[NUM_ELEMENTS];
272 deInt32 index;
273 };
274
setBuffer(void * ptr)275 virtual void setBuffer(void* ptr)
276 {
277 m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
278 }
279
bufferSize()280 virtual size_t bufferSize()
281 {
282 return sizeof(BufferData<dataTypeT>);
283 }
284
fillWithTestData(de::Random & rnd)285 virtual void fillWithTestData(de::Random &rnd)
286 {
287 dataTypeT pattern;
288 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
289
290 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
291 {
292 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
293 // The first half of compare elements match with every even index.
294 // The second half matches with odd indices. This causes the
295 // overlapping operations to only select one.
296 m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
297 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
298 }
299 for (int i = 0; i < NUM_ELEMENTS; i++)
300 {
301 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
302 m_ptr->output[i] = pattern;
303 m_ptr->invocationHitCount[i] = 0;
304 }
305 m_ptr->index = 0;
306
307 // Take a copy to be used when calculating expected values.
308 m_original = *m_ptr;
309 }
310
checkResults(tcu::ResultCollector & resultCollector)311 virtual void checkResults(tcu::ResultCollector& resultCollector)
312 {
313 checkOperation(m_original, *m_ptr, resultCollector);
314 }
315
316 template<typename T>
317 struct Expected
318 {
319 T m_inout;
320 T m_output[2];
321
Expectedvkt::shaderexecutor::__anon9fea046b0111::TestBuffer::Expected322 Expected (T inout, T output0, T output1)
323 : m_inout(inout)
324 {
325 m_output[0] = output0;
326 m_output[1] = output1;
327 }
328
comparevkt::shaderexecutor::__anon9fea046b0111::TestBuffer::Expected329 bool compare (T inout, T output0, T output1)
330 {
331 return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
332 && deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
333 && deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
334 }
335 };
336
337 void checkOperation (const BufferData<dataTypeT>& original,
338 const BufferData<dataTypeT>& result,
339 tcu::ResultCollector& resultCollector);
340
341 const AtomicOperation m_atomicOp;
342
343 BufferData<dataTypeT>* m_ptr;
344 BufferData<dataTypeT> m_original;
345
346 };
347
348 template<typename dataTypeT>
349 class TestBufferFloatingPoint : public BufferInterface
350 {
351 public:
352
TestBufferFloatingPoint(AtomicOperation atomicOp)353 TestBufferFloatingPoint(AtomicOperation atomicOp)
354 : m_atomicOp(atomicOp)
355 {}
356
357 template<typename T>
358 struct BufferDataFloatingPoint
359 {
360 // Use half the number of elements for inout to cause overlap between atomic operations.
361 // Each inout element at index i will have two atomic operations using input from
362 // indices i and i + NUM_ELEMENTS / 2.
363 T inout[NUM_ELEMENTS / 2];
364 T input[NUM_ELEMENTS];
365 T compare[NUM_ELEMENTS];
366 T output[NUM_ELEMENTS];
367 T invocationHitCount[NUM_ELEMENTS];
368 deInt32 index;
369 };
370
setBuffer(void * ptr)371 virtual void setBuffer(void* ptr)
372 {
373 m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
374 }
375
bufferSize()376 virtual size_t bufferSize()
377 {
378 return sizeof(BufferDataFloatingPoint<dataTypeT>);
379 }
380
fillWithTestData(de::Random & rnd)381 virtual void fillWithTestData(de::Random& rnd)
382 {
383 dataTypeT pattern;
384 deMemset(&pattern, 0xcd, sizeof(dataTypeT));
385
386 for (int i = 0; i < NUM_ELEMENTS / 2; i++)
387 {
388 m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getFloat());
389 // The first half of compare elements match with every even index.
390 // The second half matches with odd indices. This causes the
391 // overlapping operations to only select one.
392 m_ptr->compare[i] = m_ptr->inout[i] + (dataTypeT)(i % 2);
393 m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + (dataTypeT)(1 - (i % 2));
394 }
395 for (int i = 0; i < NUM_ELEMENTS; i++)
396 {
397 m_ptr->input[i] = static_cast<dataTypeT>(rnd.getFloat());
398 m_ptr->output[i] = pattern;
399 m_ptr->invocationHitCount[i] = 0;
400 }
401 m_ptr->index = 0;
402
403 // Take a copy to be used when calculating expected values.
404 m_original = *m_ptr;
405 }
406
checkResults(tcu::ResultCollector & resultCollector)407 virtual void checkResults(tcu::ResultCollector& resultCollector)
408 {
409 checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
410 }
411
412 template<typename T>
413 struct Expected
414 {
415 T m_inout;
416 T m_output[2];
417
Expectedvkt::shaderexecutor::__anon9fea046b0111::TestBufferFloatingPoint::Expected418 Expected(T inout, T output0, T output1)
419 : m_inout(inout)
420 {
421 m_output[0] = output0;
422 m_output[1] = output1;
423 }
424
comparevkt::shaderexecutor::__anon9fea046b0111::TestBufferFloatingPoint::Expected425 bool compare(T inout, T output0, T output1)
426 {
427 T diff1 = static_cast<T>(fabs(m_inout - inout));
428 T diff2 = static_cast<T>(fabs(m_output[0] - output0));
429 T diff3 = static_cast<T>(fabs(m_output[1] - output1));
430 const T epsilon = static_cast<T>(0.00001);
431 return (diff1 < epsilon) && (diff2 < epsilon) && (diff3 < epsilon);
432 }
433 };
434
435 void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
436 const BufferDataFloatingPoint<dataTypeT>& result,
437 tcu::ResultCollector& resultCollector);
438
439 const AtomicOperation m_atomicOp;
440
441 BufferDataFloatingPoint<dataTypeT>* m_ptr;
442 BufferDataFloatingPoint<dataTypeT> m_original;
443
444 };
445
createTestBuffer(DataType type,AtomicOperation atomicOp)446 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
447 {
448 switch (type)
449 {
450 case DATA_TYPE_INT32:
451 return new TestBuffer<deInt32>(atomicOp);
452 case DATA_TYPE_UINT32:
453 return new TestBuffer<deUint32>(atomicOp);
454 case DATA_TYPE_FLOAT32:
455 return new TestBufferFloatingPoint<float>(atomicOp);
456 case DATA_TYPE_INT64:
457 return new TestBuffer<deInt64>(atomicOp);
458 case DATA_TYPE_UINT64:
459 return new TestBuffer<deUint64>(atomicOp);
460 case DATA_TYPE_FLOAT64:
461 return new TestBufferFloatingPoint<double>(atomicOp);
462 default:
463 DE_ASSERT(false);
464 return DE_NULL;
465 }
466 }
467
468 // Use template to handle both signed and unsigned cases. SPIR-V should
469 // have separate operations for both.
470 template<typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)471 void TestBuffer<T>::checkOperation (const BufferData<T>& original,
472 const BufferData<T>& result,
473 tcu::ResultCollector& resultCollector)
474 {
475 // originalInout = original inout
476 // input0 = input at index i
477 // iinput1 = input at index i + NUM_ELEMENTS / 2
478 //
479 // atomic operation will return the memory contents before
480 // the operation and this is stored as output. Two operations
481 // are executed for each InOut value (using input0 and input1).
482 //
483 // Since there is an overlap of two operations per each
484 // InOut element, the outcome of the resulting InOut and
485 // the outputs of the operations have two result candidates
486 // depending on the execution order. Verification passes
487 // if the results match one of these options.
488
489 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
490 {
491 // Needed when reinterpeting the data as signed values.
492 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
493 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
494 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
495
496 // Expected results are collected to this vector.
497 vector<Expected<T> > exp;
498
499 switch (m_atomicOp)
500 {
501 case ATOMIC_OP_ADD:
502 {
503 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
504 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
505 }
506 break;
507
508 case ATOMIC_OP_AND:
509 {
510 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
511 exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
512 }
513 break;
514
515 case ATOMIC_OP_OR:
516 {
517 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
518 exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
519 }
520 break;
521
522 case ATOMIC_OP_XOR:
523 {
524 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
525 exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
526 }
527 break;
528
529 case ATOMIC_OP_MIN:
530 {
531 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
532 exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
533 }
534 break;
535
536 case ATOMIC_OP_MAX:
537 {
538 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
539 exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
540 }
541 break;
542
543 case ATOMIC_OP_EXCHANGE:
544 {
545 exp.push_back(Expected<T>(input1, originalInout, input0));
546 exp.push_back(Expected<T>(input0, input1, originalInout));
547 }
548 break;
549
550 case ATOMIC_OP_COMP_SWAP:
551 {
552 if (elementNdx % 2 == 0)
553 {
554 exp.push_back(Expected<T>(input0, originalInout, input0));
555 exp.push_back(Expected<T>(input0, originalInout, originalInout));
556 }
557 else
558 {
559 exp.push_back(Expected<T>(input1, input1, originalInout));
560 exp.push_back(Expected<T>(input1, originalInout, originalInout));
561 }
562 }
563 break;
564
565
566 default:
567 DE_FATAL("Unexpected atomic operation.");
568 break;
569 };
570
571 const T resIo = result.inout[elementNdx];
572 const T resOutput0 = result.output[elementNdx];
573 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
574
575
576 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
577 {
578 std::ostringstream errorMessage;
579 errorMessage << "ERROR: Result value check failed at index " << elementNdx
580 << ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
581 << ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
582 << tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
583 << ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
584 << tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
585 << ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
586 << tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
587 << " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
588
589 resultCollector.fail(errorMessage.str());
590 }
591 }
592 }
593
594 // Use template to handle both float and double cases. SPIR-V should
595 // have separate operations for both.
596 template<typename T>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T> & original,const BufferDataFloatingPoint<T> & result,tcu::ResultCollector & resultCollector)597 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
598 const BufferDataFloatingPoint<T>& result,
599 tcu::ResultCollector& resultCollector)
600 {
601 // originalInout = original inout
602 // input0 = input at index i
603 // iinput1 = input at index i + NUM_ELEMENTS / 2
604 //
605 // atomic operation will return the memory contents before
606 // the operation and this is stored as output. Two operations
607 // are executed for each InOut value (using input0 and input1).
608 //
609 // Since there is an overlap of two operations per each
610 // InOut element, the outcome of the resulting InOut and
611 // the outputs of the operations have two result candidates
612 // depending on the execution order. Verification passes
613 // if the results match one of these options.
614
615 for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
616 {
617 // Needed when reinterpeting the data as signed values.
618 const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
619 const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
620 const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
621
622 // Expected results are collected to this vector.
623 vector<Expected<T> > exp;
624
625 switch (m_atomicOp)
626 {
627 case ATOMIC_OP_ADD:
628 {
629 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
630 exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
631 }
632 break;
633
634 case ATOMIC_OP_EXCHANGE:
635 {
636 exp.push_back(Expected<T>(input1, originalInout, input0));
637 exp.push_back(Expected<T>(input0, input1, originalInout));
638 }
639 break;
640
641 default:
642 DE_FATAL("Unexpected atomic operation.");
643 break;
644 };
645
646 const T resIo = result.inout[elementNdx];
647 const T resOutput0 = result.output[elementNdx];
648 const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
649
650
651 if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
652 {
653 std::ostringstream errorMessage;
654 errorMessage << "ERROR: Result value check failed at index " << elementNdx
655 << ". Expected one of the two outcomes: InOut = " << exp[0].m_inout
656 << ", Output0 = " << exp[0].m_output[0] << ", Output1 = "
657 << exp[0].m_output[1] << ", or InOut = " << exp[1].m_inout
658 << ", Output0 = " << exp[1].m_output[0] << ", Output1 = "
659 << exp[1].m_output[1] << ". Got: InOut = " << resIo
660 << ", Output0 = " << resOutput0 << ", Output1 = "
661 << resOutput1 << ". Using Input0 = " << original.input[elementNdx]
662 << " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
663
664 resultCollector.fail(errorMessage.str());
665 }
666 }
667 }
668
669 class AtomicOperationCaseInstance : public TestInstance
670 {
671 public:
672 AtomicOperationCaseInstance (Context& context,
673 const ShaderSpec& shaderSpec,
674 AtomicShaderType shaderType,
675 DataType dataType,
676 AtomicOperation atomicOp);
677
678 virtual tcu::TestStatus iterate (void);
679
680 private:
681 const ShaderSpec& m_shaderSpec;
682 AtomicShaderType m_shaderType;
683 const DataType m_dataType;
684 AtomicOperation m_atomicOp;
685
686 };
687
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)688 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context& context,
689 const ShaderSpec& shaderSpec,
690 AtomicShaderType shaderType,
691 DataType dataType,
692 AtomicOperation atomicOp)
693 : TestInstance (context)
694 , m_shaderSpec (shaderSpec)
695 , m_shaderType (shaderType)
696 , m_dataType (dataType)
697 , m_atomicOp (atomicOp)
698 {
699 }
700
iterate(void)701 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
702 {
703 de::UniquePtr<BufferInterface> testBuffer (createTestBuffer(m_dataType, m_atomicOp));
704 tcu::TestLog& log = m_context.getTestContext().getLog();
705 const DeviceInterface& vkd = m_context.getDeviceInterface();
706 const VkDevice device = m_context.getDevice();
707 de::Random rnd (0x62a15e34);
708 const bool useRef = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
709 const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
710 const VkBufferUsageFlags usageFlags = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
711
712 // The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
713 // a uniform buffer. If not, it will be passed directly as a descriptor.
714 Buffer buffer (m_context, usageFlags, testBuffer->bufferSize(), useRef);
715 std::unique_ptr<Buffer> auxBuffer;
716
717 if (useRef)
718 {
719 // Pass the main buffer address inside a uniform buffer.
720 const VkBufferDeviceAddressInfo addressInfo =
721 {
722 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
723 nullptr, // const void* pNext;
724 buffer.getBuffer(), // VkBuffer buffer;
725 };
726 const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
727
728 auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
729 deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
730 auxBuffer->flush();
731 }
732
733 testBuffer->setBuffer(buffer.getHostPtr());
734 testBuffer->fillWithTestData(rnd);
735
736 buffer.flush();
737
738 Move<VkDescriptorSetLayout> extraResourcesLayout;
739 Move<VkDescriptorPool> extraResourcesSetPool;
740 Move<VkDescriptorSet> extraResourcesSet;
741
742 const VkDescriptorSetLayoutBinding bindings[] =
743 {
744 { 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
745 };
746
747 const VkDescriptorSetLayoutCreateInfo layoutInfo =
748 {
749 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
750 DE_NULL,
751 (VkDescriptorSetLayoutCreateFlags)0u,
752 DE_LENGTH_OF_ARRAY(bindings),
753 bindings
754 };
755
756 extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
757
758 const VkDescriptorPoolSize poolSizes[] =
759 {
760 { descType, 1u }
761 };
762
763 const VkDescriptorPoolCreateInfo poolInfo =
764 {
765 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
766 DE_NULL,
767 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
768 1u, // maxSets
769 DE_LENGTH_OF_ARRAY(poolSizes),
770 poolSizes
771 };
772
773 extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
774
775 const VkDescriptorSetAllocateInfo allocInfo =
776 {
777 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
778 DE_NULL,
779 *extraResourcesSetPool,
780 1u,
781 &extraResourcesLayout.get()
782 };
783
784 extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
785
786 VkDescriptorBufferInfo bufferInfo;
787 bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
788 bufferInfo.offset = 0u;
789 bufferInfo.range = VK_WHOLE_SIZE;
790
791 const VkWriteDescriptorSet descriptorWrite =
792 {
793 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
794 DE_NULL,
795 *extraResourcesSet,
796 0u, // dstBinding
797 0u, // dstArrayElement
798 1u,
799 descType,
800 (const VkDescriptorImageInfo*)DE_NULL,
801 &bufferInfo,
802 (const VkBufferView*)DE_NULL
803 };
804
805 vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
806
807 // Storage for output varying data.
808 std::vector<deUint32> outputs (NUM_ELEMENTS);
809 std::vector<void*> outputPtr (NUM_ELEMENTS);
810
811 for (size_t i = 0; i < NUM_ELEMENTS; i++)
812 {
813 outputs[i] = 0xcdcdcdcd;
814 outputPtr[i] = &outputs[i];
815 }
816
817 const int numWorkGroups = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
818 UniquePtr<ShaderExecutor> executor (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
819
820 executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
821 buffer.invalidate();
822
823 tcu::ResultCollector resultCollector(log);
824
825 // Check the results of the atomic operation
826 testBuffer->checkResults(resultCollector);
827
828 return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
829 }
830
831 class AtomicOperationCase : public TestCase
832 {
833 public:
834 AtomicOperationCase (tcu::TestContext& testCtx,
835 const char* name,
836 const char* description,
837 AtomicShaderType type,
838 DataType dataType,
839 AtomicOperation atomicOp);
840 virtual ~AtomicOperationCase (void);
841
842 virtual TestInstance* createInstance (Context& ctx) const;
843 virtual void checkSupport (Context& ctx) const;
initPrograms(vk::SourceCollections & programCollection) const844 virtual void initPrograms (vk::SourceCollections& programCollection) const
845 {
846 generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
847 }
848
849 private:
850
851 void createShaderSpec();
852 ShaderSpec m_shaderSpec;
853 const AtomicShaderType m_shaderType;
854 const DataType m_dataType;
855 const AtomicOperation m_atomicOp;
856 };
857
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,const char * description,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)858 AtomicOperationCase::AtomicOperationCase (tcu::TestContext& testCtx,
859 const char* name,
860 const char* description,
861 AtomicShaderType shaderType,
862 DataType dataType,
863 AtomicOperation atomicOp)
864 : TestCase (testCtx, name, description)
865 , m_shaderType (shaderType)
866 , m_dataType (dataType)
867 , m_atomicOp (atomicOp)
868 {
869 createShaderSpec();
870 init();
871 }
872
~AtomicOperationCase(void)873 AtomicOperationCase::~AtomicOperationCase (void)
874 {
875 }
876
createInstance(Context & ctx) const877 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
878 {
879 return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
880 }
881
checkSupport(Context & ctx) const882 void AtomicOperationCase::checkSupport (Context& ctx) const
883 {
884 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
885 {
886 ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
887
888 const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
889 const bool isSharedMemory = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
890
891 if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
892 {
893 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
894 }
895 if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
896 {
897 TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
898 }
899 }
900
901 if (m_dataType == DATA_TYPE_FLOAT32)
902 {
903 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
904 if (m_atomicOp == ATOMIC_OP_ADD)
905 {
906 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
907 {
908 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
909 {
910 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
911 }
912 }
913 else
914 {
915 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
916 {
917 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
918 }
919 }
920 }
921 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
922 {
923 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
924 {
925 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
926 {
927 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
928 }
929 }
930 else
931 {
932 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
933 {
934 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
935 }
936 }
937 }
938 }
939
940 if (m_dataType == DATA_TYPE_FLOAT64)
941 {
942 ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
943 if (m_atomicOp == ATOMIC_OP_ADD)
944 {
945 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
946 {
947 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
948 {
949 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
950 }
951 }
952 else
953 {
954 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
955 {
956 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
957 }
958 }
959 }
960 if (m_atomicOp == ATOMIC_OP_EXCHANGE)
961 {
962 if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
963 {
964 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
965 {
966 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
967 }
968 }
969 else
970 {
971 if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
972 {
973 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
974 }
975 }
976 }
977 }
978
979 if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
980 {
981 ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
982 }
983
984 // Check stores and atomic operation support.
985 switch (m_shaderType.getType())
986 {
987 case glu::SHADERTYPE_VERTEX:
988 case glu::SHADERTYPE_TESSELLATION_CONTROL:
989 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
990 case glu::SHADERTYPE_GEOMETRY:
991 if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
992 TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
993 break;
994 case glu::SHADERTYPE_FRAGMENT:
995 if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
996 TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
997 break;
998 case glu::SHADERTYPE_COMPUTE:
999 break;
1000 default:
1001 DE_FATAL("Unsupported shader type");
1002 }
1003 }
1004
createShaderSpec(void)1005 void AtomicOperationCase::createShaderSpec (void)
1006 {
1007 const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1008
1009 // Global declarations.
1010 std::ostringstream shaderTemplateGlobalStream;
1011
1012 // Structure in use for atomic operations.
1013 shaderTemplateGlobalStream
1014 << "${EXTENSIONS}\n"
1015 << "\n"
1016 << "struct AtomicStruct\n"
1017 << "{\n"
1018 << " ${DATATYPE} inoutValues[${N}/2];\n"
1019 << " ${DATATYPE} inputValues[${N}];\n"
1020 << " ${DATATYPE} compareValues[${N}];\n"
1021 << " ${DATATYPE} outputValues[${N}];\n"
1022 << " int invocationHitCount[${N}];\n"
1023 << " int index;\n"
1024 << "};\n"
1025 << "\n"
1026 ;
1027
1028 // The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1029 // as "buf.data", which is the name used in the atomic operation statements.
1030 //
1031 // * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1032 // * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1033 // * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1034 //
1035 if (memoryType != AtomicMemoryType::REFERENCE)
1036 {
1037 shaderTemplateGlobalStream
1038 << "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1039 << " AtomicStruct data;\n"
1040 << "} ${RESULT_BUFFER_NAME};\n"
1041 << "\n"
1042 ;
1043
1044 // When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1045 // descriptor set as the sources and results of each tested operation.
1046 if (memoryType == AtomicMemoryType::SHARED)
1047 {
1048 shaderTemplateGlobalStream
1049 << "shared struct { AtomicStruct data; } buf;\n"
1050 << "\n"
1051 ;
1052 }
1053 }
1054 else
1055 {
1056 shaderTemplateGlobalStream
1057 << "layout (buffer_reference) buffer AtomicBuffer {\n"
1058 << " AtomicStruct data;\n"
1059 << "};\n"
1060 << "\n"
1061 << "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1062 << " AtomicBuffer buf;\n"
1063 << "};\n"
1064 << "\n"
1065 ;
1066 }
1067
1068 const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
1069 const tcu::StringTemplate shaderTemplateGlobal (shaderTemplateGlobalString);
1070
1071 // Shader body for the non-vertex case.
1072 std::ostringstream nonVertexShaderTemplateStream;
1073
1074 if (memoryType == AtomicMemoryType::SHARED)
1075 {
1076 // Invocation zero will initialize the shared structure from the descriptor set.
1077 nonVertexShaderTemplateStream
1078 << "if (gl_LocalInvocationIndex == 0u)\n"
1079 << "{\n"
1080 << " buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1081 << "}\n"
1082 << "barrier();\n"
1083 ;
1084 }
1085
1086 if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1087 {
1088 nonVertexShaderTemplateStream
1089 << "if (!gl_HelperInvocation) {\n"
1090 << " int idx = atomicAdd(buf.data.index, 1);\n"
1091 << " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1092 << "}\n"
1093 ;
1094 }
1095 else
1096 {
1097 nonVertexShaderTemplateStream
1098 << "int idx = atomicAdd(buf.data.index, 1);\n"
1099 << "buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1100 ;
1101 }
1102
1103 if (memoryType == AtomicMemoryType::SHARED)
1104 {
1105 // Invocation zero will copy results back to the descriptor set.
1106 nonVertexShaderTemplateStream
1107 << "barrier();\n"
1108 << "if (gl_LocalInvocationIndex == 0u)\n"
1109 << "{\n"
1110 << " ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1111 << "}\n"
1112 ;
1113 }
1114
1115 const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
1116 const tcu::StringTemplate nonVertexShaderTemplateSrc (nonVertexShaderTemplateStreamStr);
1117
1118 // Shader body for the vertex case.
1119 const tcu::StringTemplate vertexShaderTemplateSrc(
1120 "int idx = gl_VertexIndex;\n"
1121 "if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1122 "{\n"
1123 " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1124 "}\n");
1125
1126 // Extensions.
1127 std::ostringstream extensions;
1128
1129 if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1130 {
1131 extensions
1132 << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1133 << "#extension GL_EXT_shader_atomic_int64 : enable\n"
1134 ;
1135 }
1136 else if ((m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1137 {
1138 extensions
1139 << "#extension GL_EXT_shader_atomic_float : enable\n"
1140 << "#extension GL_KHR_memory_scope_semantics : enable\n"
1141 ;
1142 }
1143
1144 if (memoryType == AtomicMemoryType::REFERENCE)
1145 {
1146 extensions << "#extension GL_EXT_buffer_reference : require\n";
1147 }
1148
1149 // Specializations.
1150 std::map<std::string, std::string> specializations;
1151
1152 specializations["EXTENSIONS"] = extensions.str();
1153 specializations["DATATYPE"] = dataType2Str(m_dataType);
1154 specializations["ATOMICOP"] = atomicOp2Str(m_atomicOp);
1155 specializations["SETIDX"] = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1156 specializations["N"] = de::toString((int)NUM_ELEMENTS);
1157 specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1158 specializations["RESULT_BUFFER_NAME"] = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1159
1160 // Shader spec.
1161 m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1162 m_shaderSpec.glslVersion = glu::GLSL_VERSION_450;
1163 m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
1164 m_shaderSpec.source = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1165 ? vertexShaderTemplateSrc.specialize(specializations)
1166 : nonVertexShaderTemplateSrc.specialize(specializations));
1167
1168 if (memoryType == AtomicMemoryType::SHARED)
1169 {
1170 // When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1171 m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1172 }
1173 }
1174
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1175 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1176 {
1177 tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1178
1179 static const struct
1180 {
1181 glu::ShaderType type;
1182 const char* name;
1183 } shaderTypes[] =
1184 {
1185 { glu::SHADERTYPE_VERTEX, "vertex" },
1186 { glu::SHADERTYPE_FRAGMENT, "fragment" },
1187 { glu::SHADERTYPE_GEOMETRY, "geometry" },
1188 { glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl" },
1189 { glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval" },
1190 { glu::SHADERTYPE_COMPUTE, "compute" },
1191 };
1192
1193 static const struct
1194 {
1195 AtomicMemoryType type;
1196 const char* suffix;
1197 } kMemoryTypes[] =
1198 {
1199 { AtomicMemoryType::BUFFER, "" },
1200 { AtomicMemoryType::SHARED, "_shared" },
1201 { AtomicMemoryType::REFERENCE, "_reference" },
1202 };
1203
1204 static const struct
1205 {
1206 DataType dataType;
1207 const char* name;
1208 const char* description;
1209 } dataSign[] =
1210 {
1211 { DATA_TYPE_INT32, "signed", "Tests using signed data (int)" },
1212 { DATA_TYPE_UINT32, "unsigned", "Tests using unsigned data (uint)" },
1213 { DATA_TYPE_FLOAT32,"float32", "Tests using 32-bit float data" },
1214 { DATA_TYPE_INT64, "signed64bit", "Tests using 64 bit signed data (int64)" },
1215 { DATA_TYPE_UINT64, "unsigned64bit", "Tests using 64 bit unsigned data (uint64)" },
1216 { DATA_TYPE_FLOAT64,"float64", "Tests using 64-bit float data)" }
1217 };
1218
1219 static const struct
1220 {
1221 AtomicOperation value;
1222 const char* name;
1223 } atomicOp[] =
1224 {
1225 { ATOMIC_OP_EXCHANGE, "exchange" },
1226 { ATOMIC_OP_COMP_SWAP, "comp_swap" },
1227 { ATOMIC_OP_ADD, "add" },
1228 { ATOMIC_OP_MIN, "min" },
1229 { ATOMIC_OP_MAX, "max" },
1230 { ATOMIC_OP_AND, "and" },
1231 { ATOMIC_OP_OR, "or" },
1232 { ATOMIC_OP_XOR, "xor" }
1233 };
1234
1235 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1236 {
1237 for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1238 {
1239 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1240 {
1241 // Only ADD and EXCHANGE are supported on floating-point
1242 if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1243 {
1244 if (atomicOp[opNdx].value != ATOMIC_OP_ADD && atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1245 {
1246 continue;
1247 }
1248 }
1249
1250 for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1251 {
1252 // Shared memory only available in compute shaders.
1253 if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1254 continue;
1255
1256 const std::string description = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1257 const std::string name = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1258
1259 atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1260 }
1261 }
1262 }
1263 }
1264 }
1265
1266 } // anonymous
1267
createAtomicOperationTests(tcu::TestContext & testCtx)1268 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1269 {
1270 return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1271 }
1272
1273 } // shaderexecutor
1274 } // vkt
1275