1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2017 Google Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Atomic operations (OpAtomic*) tests.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktAtomicOperationTests.hpp"
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkRefUtil.hpp"
29 #include "vkMemUtil.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vktTestGroupUtil.hpp"
35 
36 #include "tcuTestLog.hpp"
37 #include "tcuStringTemplate.hpp"
38 #include "tcuResultCollector.hpp"
39 
40 #include "deStringUtil.hpp"
41 #include "deSharedPtr.hpp"
42 #include "deRandom.hpp"
43 #include "deArrayUtil.hpp"
44 
45 #include <string>
46 #include <memory>
47 #include <cmath>
48 
49 namespace vkt
50 {
51 namespace shaderexecutor
52 {
53 
54 namespace
55 {
56 
57 using de::UniquePtr;
58 using de::MovePtr;
59 using std::vector;
60 
61 using namespace vk;
62 
63 enum class AtomicMemoryType
64 {
65 	BUFFER = 0,	// Normal buffer.
66 	SHARED,		// Shared global struct in a compute workgroup.
67 	REFERENCE,	// Buffer passed as a reference.
68 };
69 
70 // Helper struct to indicate the shader type and if it should use shared global memory.
71 class AtomicShaderType
72 {
73 public:
AtomicShaderType(glu::ShaderType type,AtomicMemoryType memoryType)74 	AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
75 		: m_type				(type)
76 		, m_atomicMemoryType	(memoryType)
77 	{
78 		// Shared global memory can only be set to true with compute shaders.
79 		DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
80 	}
81 
getType(void) const82 	glu::ShaderType		getType					(void) const	{ return m_type; }
getMemoryType(void) const83 	AtomicMemoryType	getMemoryType			(void) const	{ return m_atomicMemoryType; }
84 
85 private:
86 	glu::ShaderType		m_type;
87 	AtomicMemoryType	m_atomicMemoryType;
88 };
89 
90 // Buffer helper
91 class Buffer
92 {
93 public:
94 						Buffer				(Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
95 
getBuffer(void) const96 	VkBuffer			getBuffer			(void) const { return *m_buffer;					}
getHostPtr(void) const97 	void*				getHostPtr			(void) const { return m_allocation->getHostPtr();	}
98 	void				flush				(void);
99 	void				invalidate			(void);
100 
101 private:
102 	const DeviceInterface&		m_vkd;
103 	const VkDevice				m_device;
104 	const VkQueue				m_queue;
105 	const deUint32				m_queueIndex;
106 	const Unique<VkBuffer>		m_buffer;
107 	const UniquePtr<Allocation>	m_allocation;
108 };
109 
110 typedef de::SharedPtr<Buffer> BufferSp;
111 
createBuffer(const DeviceInterface & vkd,VkDevice device,VkDeviceSize size,VkBufferUsageFlags usageFlags)112 Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
113 {
114 	const VkBufferCreateInfo createInfo	=
115 	{
116 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
117 		DE_NULL,
118 		(VkBufferCreateFlags)0,
119 		size,
120 		usageFlags,
121 		VK_SHARING_MODE_EXCLUSIVE,
122 		0u,
123 		DE_NULL
124 	};
125 	return createBuffer(vkd, device, &createInfo);
126 }
127 
allocateAndBindMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkBuffer buffer,bool useRef)128 MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
129 {
130 	const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
131 	MovePtr<Allocation>	alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
132 
133 	VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
134 
135 	return alloc;
136 }
137 
Buffer(Context & context,VkBufferUsageFlags usage,size_t size,bool useRef)138 Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
139 	: m_vkd			(context.getDeviceInterface())
140 	, m_device		(context.getDevice())
141 	, m_queue		(context.getUniversalQueue())
142 	, m_queueIndex	(context.getUniversalQueueFamilyIndex())
143 	, m_buffer		(createBuffer			(context.getDeviceInterface(),
144 											 context.getDevice(),
145 											 (VkDeviceSize)size,
146 											 usage))
147 	, m_allocation	(allocateAndBindMemory	(context.getDeviceInterface(),
148 											 context.getDevice(),
149 											 context.getDefaultAllocator(),
150 											 *m_buffer,
151 											 useRef))
152 {
153 }
154 
flush(void)155 void Buffer::flush (void)
156 {
157 	flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
158 }
159 
invalidate(void)160 void Buffer::invalidate (void)
161 {
162 	const auto	cmdPool			= vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
163 	const auto	cmdBufferPtr	= vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
164 	const auto	cmdBuffer		= cmdBufferPtr.get();
165 	const auto	bufferBarrier	= vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
166 
167 	beginCommandBuffer(m_vkd, cmdBuffer);
168 	m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
169 	endCommandBuffer(m_vkd, cmdBuffer);
170 	submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
171 
172 	invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
173 }
174 
175 // Tests
176 
177 enum AtomicOperation
178 {
179 	ATOMIC_OP_EXCHANGE = 0,
180 	ATOMIC_OP_COMP_SWAP,
181 	ATOMIC_OP_ADD,
182 	ATOMIC_OP_MIN,
183 	ATOMIC_OP_MAX,
184 	ATOMIC_OP_AND,
185 	ATOMIC_OP_OR,
186 	ATOMIC_OP_XOR,
187 
188 	ATOMIC_OP_LAST
189 };
190 
atomicOp2Str(AtomicOperation op)191 std::string atomicOp2Str (AtomicOperation op)
192 {
193 	static const char* const s_names[] =
194 	{
195 		"atomicExchange",
196 		"atomicCompSwap",
197 		"atomicAdd",
198 		"atomicMin",
199 		"atomicMax",
200 		"atomicAnd",
201 		"atomicOr",
202 		"atomicXor"
203 	};
204 	return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
205 }
206 
207 enum
208 {
209 	NUM_ELEMENTS = 32
210 };
211 
212 enum DataType
213 {
214 	DATA_TYPE_INT32 = 0,
215 	DATA_TYPE_UINT32,
216 	DATA_TYPE_FLOAT32,
217 	DATA_TYPE_INT64,
218 	DATA_TYPE_UINT64,
219 	DATA_TYPE_FLOAT64,
220 
221 	DATA_TYPE_LAST
222 };
223 
dataType2Str(DataType type)224 std::string dataType2Str(DataType type)
225 {
226 	static const char* const s_names[] =
227 	{
228 		"int",
229 		"uint",
230 		"float",
231 		"int64_t",
232 		"uint64_t",
233 		"double",
234 	};
235 	return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
236 }
237 
238 class BufferInterface
239 {
240 public:
241 	virtual void setBuffer(void* ptr) = 0;
242 
243 	virtual size_t bufferSize() = 0;
244 
245 	virtual void fillWithTestData(de::Random &rnd) = 0;
246 
247 	virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
248 
~BufferInterface()249 	virtual ~BufferInterface() {};
250 };
251 
252 template<typename dataTypeT>
253 class TestBuffer : public BufferInterface
254 {
255 public:
256 
TestBuffer(AtomicOperation atomicOp)257 	TestBuffer(AtomicOperation	atomicOp)
258 		: m_atomicOp(atomicOp)
259 	{}
260 
261 	template<typename T>
262 	struct BufferData
263 	{
264 		// Use half the number of elements for inout to cause overlap between atomic operations.
265 		// Each inout element at index i will have two atomic operations using input from
266 		// indices i and i + NUM_ELEMENTS / 2.
267 		T			inout[NUM_ELEMENTS / 2];
268 		T			input[NUM_ELEMENTS];
269 		T			compare[NUM_ELEMENTS];
270 		T			output[NUM_ELEMENTS];
271 		T			invocationHitCount[NUM_ELEMENTS];
272 		deInt32		index;
273 	};
274 
setBuffer(void * ptr)275 	virtual void setBuffer(void* ptr)
276 	{
277 		m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
278 	}
279 
bufferSize()280 	virtual size_t bufferSize()
281 	{
282 		return sizeof(BufferData<dataTypeT>);
283 	}
284 
fillWithTestData(de::Random & rnd)285 	virtual void fillWithTestData(de::Random &rnd)
286 	{
287 		dataTypeT pattern;
288 		deMemset(&pattern, 0xcd, sizeof(dataTypeT));
289 
290 		for (int i = 0; i < NUM_ELEMENTS / 2; i++)
291 		{
292 			m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
293 			// The first half of compare elements match with every even index.
294 			// The second half matches with odd indices. This causes the
295 			// overlapping operations to only select one.
296 			m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
297 			m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
298 		}
299 		for (int i = 0; i < NUM_ELEMENTS; i++)
300 		{
301 			m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
302 			m_ptr->output[i] = pattern;
303 			m_ptr->invocationHitCount[i] = 0;
304 		}
305 		m_ptr->index = 0;
306 
307 		// Take a copy to be used when calculating expected values.
308 		m_original = *m_ptr;
309 	}
310 
checkResults(tcu::ResultCollector & resultCollector)311 	virtual void checkResults(tcu::ResultCollector&	resultCollector)
312 	{
313 		checkOperation(m_original, *m_ptr, resultCollector);
314 	}
315 
316 	template<typename T>
317 	struct Expected
318 	{
319 		T m_inout;
320 		T m_output[2];
321 
Expectedvkt::shaderexecutor::__anon9fea046b0111::TestBuffer::Expected322 		Expected (T inout, T output0, T output1)
323 		: m_inout(inout)
324 		{
325 			m_output[0] = output0;
326 			m_output[1] = output1;
327 		}
328 
comparevkt::shaderexecutor::__anon9fea046b0111::TestBuffer::Expected329 		bool compare (T inout, T output0, T output1)
330 		{
331 			return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
332 					&& deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
333 					&& deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
334 		}
335 	};
336 
337 	void checkOperation	(const BufferData<dataTypeT>&	original,
338 						 const BufferData<dataTypeT>&	result,
339 						 tcu::ResultCollector&			resultCollector);
340 
341 	const AtomicOperation	m_atomicOp;
342 
343 	BufferData<dataTypeT>* m_ptr;
344 	BufferData<dataTypeT>  m_original;
345 
346 };
347 
348 template<typename dataTypeT>
349 class TestBufferFloatingPoint : public BufferInterface
350 {
351 public:
352 
TestBufferFloatingPoint(AtomicOperation atomicOp)353 	TestBufferFloatingPoint(AtomicOperation	atomicOp)
354 		: m_atomicOp(atomicOp)
355 	{}
356 
357 	template<typename T>
358 	struct BufferDataFloatingPoint
359 	{
360 		// Use half the number of elements for inout to cause overlap between atomic operations.
361 		// Each inout element at index i will have two atomic operations using input from
362 		// indices i and i + NUM_ELEMENTS / 2.
363 		T			inout[NUM_ELEMENTS / 2];
364 		T			input[NUM_ELEMENTS];
365 		T			compare[NUM_ELEMENTS];
366 		T			output[NUM_ELEMENTS];
367 		T			invocationHitCount[NUM_ELEMENTS];
368 		deInt32		index;
369 	};
370 
setBuffer(void * ptr)371 	virtual void setBuffer(void* ptr)
372 	{
373 		m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
374 	}
375 
bufferSize()376 	virtual size_t bufferSize()
377 	{
378 		return sizeof(BufferDataFloatingPoint<dataTypeT>);
379 	}
380 
fillWithTestData(de::Random & rnd)381 	virtual void fillWithTestData(de::Random& rnd)
382 	{
383 		dataTypeT pattern;
384 		deMemset(&pattern, 0xcd, sizeof(dataTypeT));
385 
386 		for (int i = 0; i < NUM_ELEMENTS / 2; i++)
387 		{
388 			m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getFloat());
389 			// The first half of compare elements match with every even index.
390 			// The second half matches with odd indices. This causes the
391 			// overlapping operations to only select one.
392 			m_ptr->compare[i] = m_ptr->inout[i] + (dataTypeT)(i % 2);
393 			m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + (dataTypeT)(1 - (i % 2));
394 		}
395 		for (int i = 0; i < NUM_ELEMENTS; i++)
396 		{
397 			m_ptr->input[i] = static_cast<dataTypeT>(rnd.getFloat());
398 			m_ptr->output[i] = pattern;
399 			m_ptr->invocationHitCount[i] = 0;
400 		}
401 		m_ptr->index = 0;
402 
403 		// Take a copy to be used when calculating expected values.
404 		m_original = *m_ptr;
405 	}
406 
checkResults(tcu::ResultCollector & resultCollector)407 	virtual void checkResults(tcu::ResultCollector& resultCollector)
408 	{
409 		checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
410 	}
411 
412 	template<typename T>
413 	struct Expected
414 	{
415 		T m_inout;
416 		T m_output[2];
417 
Expectedvkt::shaderexecutor::__anon9fea046b0111::TestBufferFloatingPoint::Expected418 		Expected(T inout, T output0, T output1)
419 			: m_inout(inout)
420 		{
421 			m_output[0] = output0;
422 			m_output[1] = output1;
423 		}
424 
comparevkt::shaderexecutor::__anon9fea046b0111::TestBufferFloatingPoint::Expected425 		bool compare(T inout, T output0, T output1)
426 		{
427 			T diff1 = static_cast<T>(fabs(m_inout - inout));
428 			T diff2 = static_cast<T>(fabs(m_output[0] - output0));
429 			T diff3 = static_cast<T>(fabs(m_output[1] - output1));
430 			const T epsilon = static_cast<T>(0.00001);
431 			return (diff1 < epsilon) && (diff2 < epsilon) && (diff3 < epsilon);
432 		}
433 	};
434 
435 	void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
436 		const BufferDataFloatingPoint<dataTypeT>& result,
437 		tcu::ResultCollector& resultCollector);
438 
439 	const AtomicOperation	m_atomicOp;
440 
441 	BufferDataFloatingPoint<dataTypeT>* m_ptr;
442 	BufferDataFloatingPoint<dataTypeT>  m_original;
443 
444 };
445 
createTestBuffer(DataType type,AtomicOperation atomicOp)446 static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
447 {
448 	switch (type)
449 	{
450 	case DATA_TYPE_INT32:
451 		return new TestBuffer<deInt32>(atomicOp);
452 	case DATA_TYPE_UINT32:
453 		return new TestBuffer<deUint32>(atomicOp);
454 	case DATA_TYPE_FLOAT32:
455 		return new TestBufferFloatingPoint<float>(atomicOp);
456 	case DATA_TYPE_INT64:
457 		return new TestBuffer<deInt64>(atomicOp);
458 	case DATA_TYPE_UINT64:
459 		return new TestBuffer<deUint64>(atomicOp);
460 	case DATA_TYPE_FLOAT64:
461 		return new TestBufferFloatingPoint<double>(atomicOp);
462 	default:
463 		DE_ASSERT(false);
464 		return DE_NULL;
465 	}
466 }
467 
468 // Use template to handle both signed and unsigned cases. SPIR-V should
469 // have separate operations for both.
470 template<typename T>
checkOperation(const BufferData<T> & original,const BufferData<T> & result,tcu::ResultCollector & resultCollector)471 void TestBuffer<T>::checkOperation (const BufferData<T>&	original,
472 									const BufferData<T>&	result,
473 									tcu::ResultCollector&	resultCollector)
474 {
475 	// originalInout = original inout
476 	// input0 = input at index i
477 	// iinput1 = input at index i + NUM_ELEMENTS / 2
478 	//
479 	// atomic operation will return the memory contents before
480 	// the operation and this is stored as output. Two operations
481 	// are executed for each InOut value (using input0 and input1).
482 	//
483 	// Since there is an overlap of two operations per each
484 	// InOut element, the outcome of the resulting InOut and
485 	// the outputs of the operations have two result candidates
486 	// depending on the execution order. Verification passes
487 	// if the results match one of these options.
488 
489 	for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
490 	{
491 		// Needed when reinterpeting the data as signed values.
492 		const T originalInout	= *reinterpret_cast<const T*>(&original.inout[elementNdx]);
493 		const T input0			= *reinterpret_cast<const T*>(&original.input[elementNdx]);
494 		const T input1			= *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
495 
496 		// Expected results are collected to this vector.
497 		vector<Expected<T> > exp;
498 
499 		switch (m_atomicOp)
500 		{
501 			case ATOMIC_OP_ADD:
502 			{
503 				exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
504 				exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
505 			}
506 			break;
507 
508 			case ATOMIC_OP_AND:
509 			{
510 				exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
511 				exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
512 			}
513 			break;
514 
515 			case ATOMIC_OP_OR:
516 			{
517 				exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
518 				exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
519 			}
520 			break;
521 
522 			case ATOMIC_OP_XOR:
523 			{
524 				exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
525 				exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
526 			}
527 			break;
528 
529 			case ATOMIC_OP_MIN:
530 			{
531 				exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
532 				exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
533 			}
534 			break;
535 
536 			case ATOMIC_OP_MAX:
537 			{
538 				exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
539 				exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
540 			}
541 			break;
542 
543 			case ATOMIC_OP_EXCHANGE:
544 			{
545 				exp.push_back(Expected<T>(input1, originalInout, input0));
546 				exp.push_back(Expected<T>(input0, input1, originalInout));
547 			}
548 			break;
549 
550 			case ATOMIC_OP_COMP_SWAP:
551 			{
552 				if (elementNdx % 2 == 0)
553 				{
554 					exp.push_back(Expected<T>(input0, originalInout, input0));
555 					exp.push_back(Expected<T>(input0, originalInout, originalInout));
556 				}
557 				else
558 				{
559 					exp.push_back(Expected<T>(input1, input1, originalInout));
560 					exp.push_back(Expected<T>(input1, originalInout, originalInout));
561 				}
562 			}
563 			break;
564 
565 
566 			default:
567 				DE_FATAL("Unexpected atomic operation.");
568 				break;
569 		};
570 
571 		const T resIo		= result.inout[elementNdx];
572 		const T resOutput0	= result.output[elementNdx];
573 		const T resOutput1	= result.output[elementNdx + NUM_ELEMENTS / 2];
574 
575 
576 		if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
577 		{
578 			std::ostringstream errorMessage;
579 			errorMessage	<< "ERROR: Result value check failed at index " << elementNdx
580 							<< ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
581 							<< ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
582 							<< tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
583 							<< ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
584 							<< tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
585 							<< ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
586 							<< tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
587 							<< " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
588 
589 			resultCollector.fail(errorMessage.str());
590 		}
591 	}
592 }
593 
594 // Use template to handle both float and double cases. SPIR-V should
595 // have separate operations for both.
596 template<typename T>
checkOperationFloatingPoint(const BufferDataFloatingPoint<T> & original,const BufferDataFloatingPoint<T> & result,tcu::ResultCollector & resultCollector)597 void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
598 	const BufferDataFloatingPoint<T>& result,
599 	tcu::ResultCollector& resultCollector)
600 {
601 	// originalInout = original inout
602 	// input0 = input at index i
603 	// iinput1 = input at index i + NUM_ELEMENTS / 2
604 	//
605 	// atomic operation will return the memory contents before
606 	// the operation and this is stored as output. Two operations
607 	// are executed for each InOut value (using input0 and input1).
608 	//
609 	// Since there is an overlap of two operations per each
610 	// InOut element, the outcome of the resulting InOut and
611 	// the outputs of the operations have two result candidates
612 	// depending on the execution order. Verification passes
613 	// if the results match one of these options.
614 
615 	for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
616 	{
617 		// Needed when reinterpeting the data as signed values.
618 		const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
619 		const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
620 		const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
621 
622 		// Expected results are collected to this vector.
623 		vector<Expected<T> > exp;
624 
625 		switch (m_atomicOp)
626 		{
627 		case ATOMIC_OP_ADD:
628 		{
629 			exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
630 			exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
631 		}
632 		break;
633 
634 		case ATOMIC_OP_EXCHANGE:
635 		{
636 			exp.push_back(Expected<T>(input1, originalInout, input0));
637 			exp.push_back(Expected<T>(input0, input1, originalInout));
638 		}
639 		break;
640 
641 		default:
642 			DE_FATAL("Unexpected atomic operation.");
643 			break;
644 		};
645 
646 		const T resIo = result.inout[elementNdx];
647 		const T resOutput0 = result.output[elementNdx];
648 		const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
649 
650 
651 		if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
652 		{
653 			std::ostringstream errorMessage;
654 			errorMessage << "ERROR: Result value check failed at index " << elementNdx
655 				<< ". Expected one of the two outcomes: InOut = " << exp[0].m_inout
656 				<< ", Output0 = " << exp[0].m_output[0] << ", Output1 = "
657 				<< exp[0].m_output[1] << ", or InOut = " << exp[1].m_inout
658 				<< ", Output0 = " << exp[1].m_output[0] << ", Output1 = "
659 				<< exp[1].m_output[1] << ". Got: InOut = " << resIo
660 				<< ", Output0 = " << resOutput0 << ", Output1 = "
661 				<< resOutput1 << ". Using Input0 = " << original.input[elementNdx]
662 				<< " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
663 
664 			resultCollector.fail(errorMessage.str());
665 		}
666 	}
667 }
668 
669 class AtomicOperationCaseInstance : public TestInstance
670 {
671 public:
672 									AtomicOperationCaseInstance		(Context&			context,
673 																	 const ShaderSpec&	shaderSpec,
674 																	 AtomicShaderType	shaderType,
675 																	 DataType			dataType,
676 																	 AtomicOperation	atomicOp);
677 
678 	virtual tcu::TestStatus			iterate							(void);
679 
680 private:
681 	const ShaderSpec&				m_shaderSpec;
682 	AtomicShaderType				m_shaderType;
683 	const DataType					m_dataType;
684 	AtomicOperation					m_atomicOp;
685 
686 };
687 
AtomicOperationCaseInstance(Context & context,const ShaderSpec & shaderSpec,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)688 AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context&				context,
689 														  const ShaderSpec&		shaderSpec,
690 														  AtomicShaderType		shaderType,
691 														  DataType				dataType,
692 														  AtomicOperation		atomicOp)
693 	: TestInstance	(context)
694 	, m_shaderSpec	(shaderSpec)
695 	, m_shaderType	(shaderType)
696 	, m_dataType	(dataType)
697 	, m_atomicOp	(atomicOp)
698 {
699 }
700 
iterate(void)701 tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
702 {
703 	de::UniquePtr<BufferInterface>	testBuffer	(createTestBuffer(m_dataType, m_atomicOp));
704 	tcu::TestLog&					log			= m_context.getTestContext().getLog();
705 	const DeviceInterface&			vkd			= m_context.getDeviceInterface();
706 	const VkDevice					device		= m_context.getDevice();
707 	de::Random						rnd			(0x62a15e34);
708 	const bool						useRef		= (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
709 	const VkDescriptorType			descType	= (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
710 	const VkBufferUsageFlags		usageFlags	= (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
711 
712 	// The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
713 	// a uniform buffer. If not, it will be passed directly as a descriptor.
714 	Buffer							buffer		(m_context, usageFlags, testBuffer->bufferSize(), useRef);
715 	std::unique_ptr<Buffer>			auxBuffer;
716 
717 	if (useRef)
718 	{
719 		// Pass the main buffer address inside a uniform buffer.
720 		const VkBufferDeviceAddressInfo addressInfo =
721 		{
722 			VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	//	VkStructureType	sType;
723 			nullptr,										//	const void*		pNext;
724 			buffer.getBuffer(),								//	VkBuffer		buffer;
725 		};
726 		const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
727 
728 		auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
729 		deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
730 		auxBuffer->flush();
731 	}
732 
733 	testBuffer->setBuffer(buffer.getHostPtr());
734 	testBuffer->fillWithTestData(rnd);
735 
736 	buffer.flush();
737 
738 	Move<VkDescriptorSetLayout>	extraResourcesLayout;
739 	Move<VkDescriptorPool>		extraResourcesSetPool;
740 	Move<VkDescriptorSet>		extraResourcesSet;
741 
742 	const VkDescriptorSetLayoutBinding bindings[] =
743 	{
744 		{ 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
745 	};
746 
747 	const VkDescriptorSetLayoutCreateInfo	layoutInfo	=
748 	{
749 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
750 		DE_NULL,
751 		(VkDescriptorSetLayoutCreateFlags)0u,
752 		DE_LENGTH_OF_ARRAY(bindings),
753 		bindings
754 	};
755 
756 	extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
757 
758 	const VkDescriptorPoolSize poolSizes[] =
759 	{
760 		{ descType, 1u }
761 	};
762 
763 	const VkDescriptorPoolCreateInfo poolInfo =
764 	{
765 		VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
766 		DE_NULL,
767 		(VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
768 		1u,		// maxSets
769 		DE_LENGTH_OF_ARRAY(poolSizes),
770 		poolSizes
771 	};
772 
773 	extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
774 
775 	const VkDescriptorSetAllocateInfo allocInfo =
776 	{
777 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
778 		DE_NULL,
779 		*extraResourcesSetPool,
780 		1u,
781 		&extraResourcesLayout.get()
782 	};
783 
784 	extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
785 
786 	VkDescriptorBufferInfo bufferInfo;
787 	bufferInfo.buffer	= (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
788 	bufferInfo.offset	= 0u;
789 	bufferInfo.range	= VK_WHOLE_SIZE;
790 
791 	const VkWriteDescriptorSet descriptorWrite =
792 	{
793 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
794 		DE_NULL,
795 		*extraResourcesSet,
796 		0u,		// dstBinding
797 		0u,		// dstArrayElement
798 		1u,
799 		descType,
800 		(const VkDescriptorImageInfo*)DE_NULL,
801 		&bufferInfo,
802 		(const VkBufferView*)DE_NULL
803 	};
804 
805 	vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
806 
807 	// Storage for output varying data.
808 	std::vector<deUint32>	outputs		(NUM_ELEMENTS);
809 	std::vector<void*>		outputPtr	(NUM_ELEMENTS);
810 
811 	for (size_t i = 0; i < NUM_ELEMENTS; i++)
812 	{
813 		outputs[i] = 0xcdcdcdcd;
814 		outputPtr[i] = &outputs[i];
815 	}
816 
817 	const int					numWorkGroups	= ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
818 	UniquePtr<ShaderExecutor>	executor		(createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
819 
820 	executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
821 	buffer.invalidate();
822 
823 	tcu::ResultCollector resultCollector(log);
824 
825 	// Check the results of the atomic operation
826 	testBuffer->checkResults(resultCollector);
827 
828 	return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
829 }
830 
831 class AtomicOperationCase : public TestCase
832 {
833 public:
834 							AtomicOperationCase		(tcu::TestContext&		testCtx,
835 													 const char*			name,
836 													 const char*			description,
837 													 AtomicShaderType		type,
838 													 DataType				dataType,
839 													 AtomicOperation		atomicOp);
840 	virtual					~AtomicOperationCase	(void);
841 
842 	virtual TestInstance*	createInstance			(Context& ctx) const;
843 	virtual void			checkSupport			(Context& ctx) const;
initPrograms(vk::SourceCollections & programCollection) const844 	virtual void			initPrograms			(vk::SourceCollections& programCollection) const
845 	{
846 		generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
847 	}
848 
849 private:
850 
851 	void					createShaderSpec();
852 	ShaderSpec				m_shaderSpec;
853 	const AtomicShaderType	m_shaderType;
854 	const DataType			m_dataType;
855 	const AtomicOperation	m_atomicOp;
856 };
857 
AtomicOperationCase(tcu::TestContext & testCtx,const char * name,const char * description,AtomicShaderType shaderType,DataType dataType,AtomicOperation atomicOp)858 AtomicOperationCase::AtomicOperationCase (tcu::TestContext&	testCtx,
859 										  const char*		name,
860 										  const char*		description,
861 										  AtomicShaderType	shaderType,
862 										  DataType			dataType,
863 										  AtomicOperation	atomicOp)
864 	: TestCase			(testCtx, name, description)
865 	, m_shaderType		(shaderType)
866 	, m_dataType		(dataType)
867 	, m_atomicOp		(atomicOp)
868 {
869 	createShaderSpec();
870 	init();
871 }
872 
~AtomicOperationCase(void)873 AtomicOperationCase::~AtomicOperationCase (void)
874 {
875 }
876 
createInstance(Context & ctx) const877 TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
878 {
879 	return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
880 }
881 
checkSupport(Context & ctx) const882 void AtomicOperationCase::checkSupport (Context& ctx) const
883 {
884 	if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
885 	{
886 		ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
887 
888 		const auto atomicInt64Features	= ctx.getShaderAtomicInt64Features();
889 		const bool isSharedMemory		= (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
890 
891 		if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
892 		{
893 			TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
894 		}
895 		if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
896 		{
897 			TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
898 		}
899 	}
900 
901 	if (m_dataType == DATA_TYPE_FLOAT32)
902 	{
903 		ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
904 		if (m_atomicOp == ATOMIC_OP_ADD)
905 		{
906 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
907 			{
908 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
909 				{
910 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
911 				}
912 			}
913 			else
914 			{
915 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
916 				{
917 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
918 				}
919 			}
920 		}
921 		if (m_atomicOp == ATOMIC_OP_EXCHANGE)
922 		{
923 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
924 			{
925 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
926 				{
927 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
928 				}
929 			}
930 			else
931 			{
932 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
933 				{
934 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
935 				}
936 			}
937 		}
938 	}
939 
940 	if (m_dataType == DATA_TYPE_FLOAT64)
941 	{
942 		ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
943 		if (m_atomicOp == ATOMIC_OP_ADD)
944 		{
945 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
946 			{
947 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
948 				{
949 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
950 				}
951 			}
952 			else
953 			{
954 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
955 				{
956 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
957 				}
958 			}
959 		}
960 		if (m_atomicOp == ATOMIC_OP_EXCHANGE)
961 		{
962 			if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
963 			{
964 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
965 				{
966 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
967 				}
968 			}
969 			else
970 			{
971 				if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
972 				{
973 					TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
974 				}
975 			}
976 		}
977 	}
978 
979 	if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
980 	{
981 		ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
982 	}
983 
984 	// Check stores and atomic operation support.
985 	switch (m_shaderType.getType())
986 	{
987 	case glu::SHADERTYPE_VERTEX:
988 	case glu::SHADERTYPE_TESSELLATION_CONTROL:
989 	case glu::SHADERTYPE_TESSELLATION_EVALUATION:
990 	case glu::SHADERTYPE_GEOMETRY:
991 		if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
992 			TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
993 		break;
994 	case glu::SHADERTYPE_FRAGMENT:
995 		if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
996 			TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
997 		break;
998 	case glu::SHADERTYPE_COMPUTE:
999 		break;
1000 	default:
1001 		DE_FATAL("Unsupported shader type");
1002 	}
1003 }
1004 
createShaderSpec(void)1005 void AtomicOperationCase::createShaderSpec (void)
1006 {
1007 	const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
1008 
1009 	// Global declarations.
1010 	std::ostringstream shaderTemplateGlobalStream;
1011 
1012 	// Structure in use for atomic operations.
1013 	shaderTemplateGlobalStream
1014 		<< "${EXTENSIONS}\n"
1015 		<< "\n"
1016 		<< "struct AtomicStruct\n"
1017 		<< "{\n"
1018 		<< "    ${DATATYPE} inoutValues[${N}/2];\n"
1019 		<< "    ${DATATYPE} inputValues[${N}];\n"
1020 		<< "    ${DATATYPE} compareValues[${N}];\n"
1021 		<< "    ${DATATYPE} outputValues[${N}];\n"
1022 		<< "    int invocationHitCount[${N}];\n"
1023 		<< "    int index;\n"
1024 		<< "};\n"
1025 		<< "\n"
1026 		;
1027 
1028 	// The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
1029 	// as "buf.data", which is the name used in the atomic operation statements.
1030 	//
1031 	// * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
1032 	// * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
1033 	// * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
1034 	//
1035 	if (memoryType != AtomicMemoryType::REFERENCE)
1036 	{
1037 		shaderTemplateGlobalStream
1038 			<< "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
1039 			<< "    AtomicStruct data;\n"
1040 			<< "} ${RESULT_BUFFER_NAME};\n"
1041 			<< "\n"
1042 			;
1043 
1044 		// When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
1045 		// descriptor set as the sources and results of each tested operation.
1046 		if (memoryType == AtomicMemoryType::SHARED)
1047 		{
1048 			shaderTemplateGlobalStream
1049 				<< "shared struct { AtomicStruct data; } buf;\n"
1050 				<< "\n"
1051 				;
1052 		}
1053 	}
1054 	else
1055 	{
1056 		shaderTemplateGlobalStream
1057 			<< "layout (buffer_reference) buffer AtomicBuffer {\n"
1058 			<< "    AtomicStruct data;\n"
1059 			<< "};\n"
1060 			<< "\n"
1061 			<< "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
1062 			<< "    AtomicBuffer buf;\n"
1063 			<< "};\n"
1064 			<< "\n"
1065 			;
1066 	}
1067 
1068 	const auto					shaderTemplateGlobalString	= shaderTemplateGlobalStream.str();
1069 	const tcu::StringTemplate	shaderTemplateGlobal		(shaderTemplateGlobalString);
1070 
1071 	// Shader body for the non-vertex case.
1072 	std::ostringstream nonVertexShaderTemplateStream;
1073 
1074 	if (memoryType == AtomicMemoryType::SHARED)
1075 	{
1076 		// Invocation zero will initialize the shared structure from the descriptor set.
1077 		nonVertexShaderTemplateStream
1078 			<< "if (gl_LocalInvocationIndex == 0u)\n"
1079 			<< "{\n"
1080 			<< "    buf.data = ${RESULT_BUFFER_NAME}.data;\n"
1081 			<< "}\n"
1082 			<< "barrier();\n"
1083 			;
1084 	}
1085 
1086 	if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
1087 	{
1088 		nonVertexShaderTemplateStream
1089 			<< "if (!gl_HelperInvocation) {\n"
1090 			<< "    int idx = atomicAdd(buf.data.index, 1);\n"
1091 			<< "    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1092 			<< "}\n"
1093 			;
1094 	}
1095 	else
1096 	{
1097 		nonVertexShaderTemplateStream
1098 			<< "int idx = atomicAdd(buf.data.index, 1);\n"
1099 			<< "buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1100 			;
1101 	}
1102 
1103 	if (memoryType == AtomicMemoryType::SHARED)
1104 	{
1105 		// Invocation zero will copy results back to the descriptor set.
1106 		nonVertexShaderTemplateStream
1107 			<< "barrier();\n"
1108 			<< "if (gl_LocalInvocationIndex == 0u)\n"
1109 			<< "{\n"
1110 			<< "    ${RESULT_BUFFER_NAME}.data = buf.data;\n"
1111 			<< "}\n"
1112 			;
1113 	}
1114 
1115 	const auto					nonVertexShaderTemplateStreamStr	= nonVertexShaderTemplateStream.str();
1116 	const tcu::StringTemplate	nonVertexShaderTemplateSrc			(nonVertexShaderTemplateStreamStr);
1117 
1118 	// Shader body for the vertex case.
1119 	const tcu::StringTemplate vertexShaderTemplateSrc(
1120 		"int idx = gl_VertexIndex;\n"
1121 		"if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
1122 		"{\n"
1123 		"    buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
1124 		"}\n");
1125 
1126 	// Extensions.
1127 	std::ostringstream extensions;
1128 
1129 	if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
1130 	{
1131 		extensions
1132 			<< "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
1133 			<< "#extension GL_EXT_shader_atomic_int64 : enable\n"
1134 			;
1135 	}
1136 	else if ((m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
1137 	{
1138 		extensions
1139 			<< "#extension GL_EXT_shader_atomic_float : enable\n"
1140 			<< "#extension GL_KHR_memory_scope_semantics : enable\n"
1141 			;
1142 	}
1143 
1144 	if (memoryType == AtomicMemoryType::REFERENCE)
1145 	{
1146 		extensions << "#extension GL_EXT_buffer_reference : require\n";
1147 	}
1148 
1149 	// Specializations.
1150 	std::map<std::string, std::string> specializations;
1151 
1152 	specializations["EXTENSIONS"]			= extensions.str();
1153 	specializations["DATATYPE"]				= dataType2Str(m_dataType);
1154 	specializations["ATOMICOP"]				= atomicOp2Str(m_atomicOp);
1155 	specializations["SETIDX"]				= de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
1156 	specializations["N"]					= de::toString((int)NUM_ELEMENTS);
1157 	specializations["COMPARE_ARG"]			= ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
1158 	specializations["RESULT_BUFFER_NAME"]	= ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
1159 
1160 	// Shader spec.
1161 	m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1162 	m_shaderSpec.glslVersion		= glu::GLSL_VERSION_450;
1163 	m_shaderSpec.globalDeclarations	= shaderTemplateGlobal.specialize(specializations);
1164 	m_shaderSpec.source				= ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
1165 										? vertexShaderTemplateSrc.specialize(specializations)
1166 										: nonVertexShaderTemplateSrc.specialize(specializations));
1167 
1168 	if (memoryType == AtomicMemoryType::SHARED)
1169 	{
1170 		// When using global shared memory, use a single workgroup and an appropriate number of local invocations.
1171 		m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
1172 	}
1173 }
1174 
addAtomicOperationTests(tcu::TestCaseGroup * atomicOperationTestsGroup)1175 void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
1176 {
1177 	tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
1178 
1179 	static const struct
1180 	{
1181 		glu::ShaderType		type;
1182 		const char*			name;
1183 	} shaderTypes[] =
1184 	{
1185 		{ glu::SHADERTYPE_VERTEX,							"vertex"			},
1186 		{ glu::SHADERTYPE_FRAGMENT,							"fragment"			},
1187 		{ glu::SHADERTYPE_GEOMETRY,							"geometry"			},
1188 		{ glu::SHADERTYPE_TESSELLATION_CONTROL,				"tess_ctrl"			},
1189 		{ glu::SHADERTYPE_TESSELLATION_EVALUATION,			"tess_eval"			},
1190 		{ glu::SHADERTYPE_COMPUTE,							"compute"			},
1191 	};
1192 
1193 	static const struct
1194 	{
1195 		AtomicMemoryType	type;
1196 		const char*			suffix;
1197 	} kMemoryTypes[] =
1198 	{
1199 		{ AtomicMemoryType::BUFFER,		""				},
1200 		{ AtomicMemoryType::SHARED,		"_shared"		},
1201 		{ AtomicMemoryType::REFERENCE,	"_reference"	},
1202 	};
1203 
1204 	static const struct
1205 	{
1206 		DataType		dataType;
1207 		const char*		name;
1208 		const char*		description;
1209 	} dataSign[] =
1210 	{
1211 		{ DATA_TYPE_INT32,	"signed",			"Tests using signed data (int)"				},
1212 		{ DATA_TYPE_UINT32,	"unsigned",			"Tests using unsigned data (uint)"			},
1213 		{ DATA_TYPE_FLOAT32,"float32",			"Tests using 32-bit float data"				},
1214 		{ DATA_TYPE_INT64,	"signed64bit",		"Tests using 64 bit signed data (int64)"	},
1215 		{ DATA_TYPE_UINT64,	"unsigned64bit",	"Tests using 64 bit unsigned data (uint64)"	},
1216 		{ DATA_TYPE_FLOAT64,"float64",			"Tests using 64-bit float data)"			}
1217 	};
1218 
1219 	static const struct
1220 	{
1221 		AtomicOperation		value;
1222 		const char*			name;
1223 	} atomicOp[] =
1224 	{
1225 		{ ATOMIC_OP_EXCHANGE,	"exchange"	},
1226 		{ ATOMIC_OP_COMP_SWAP,	"comp_swap"	},
1227 		{ ATOMIC_OP_ADD,		"add"		},
1228 		{ ATOMIC_OP_MIN,		"min"		},
1229 		{ ATOMIC_OP_MAX,		"max"		},
1230 		{ ATOMIC_OP_AND,		"and"		},
1231 		{ ATOMIC_OP_OR,			"or"		},
1232 		{ ATOMIC_OP_XOR,		"xor"		}
1233 	};
1234 
1235 	for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
1236 	{
1237 		for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
1238 		{
1239 			for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
1240 			{
1241 				// Only ADD and EXCHANGE are supported on floating-point
1242 				if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
1243 				{
1244 					if (atomicOp[opNdx].value != ATOMIC_OP_ADD && atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
1245 					{
1246 						continue;
1247 					}
1248 				}
1249 
1250 				for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
1251 				{
1252 					// Shared memory only available in compute shaders.
1253 					if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
1254 						continue;
1255 
1256 					const std::string description	= std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
1257 					const std::string name			= std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
1258 
1259 					atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
1260 				}
1261 			}
1262 		}
1263 	}
1264 }
1265 
1266 } // anonymous
1267 
createAtomicOperationTests(tcu::TestContext & testCtx)1268 tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
1269 {
1270 	return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
1271 }
1272 
1273 } // shaderexecutor
1274 } // vkt
1275