1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Valve Corporation.
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief OpFConvert tests.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktShaderFConvertTests.hpp"
26 #include "vktTestCase.hpp"
27 
28 #include "vkBufferWithMemory.hpp"
29 #include "vkObjUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkPrograms.hpp"
33 
34 #include "deDefs.hpp"
35 #include "deRandom.hpp"
36 
37 #include "tcuFloat.hpp"
38 #include "tcuTestLog.hpp"
39 #include "tcuFormatUtil.hpp"
40 
41 #include <vector>
42 #include <iterator>
43 #include <algorithm>
44 #include <memory>
45 #include <sstream>
46 #include <iomanip>
47 #include <string>
48 #include <limits>
49 
50 namespace vkt
51 {
52 namespace shaderexecutor
53 {
54 
55 namespace
56 {
57 
58 constexpr deUint32	kRandomSeed								= 0xdeadbeef;
59 constexpr size_t	kRandomSourcesPerType					= 240;
60 constexpr size_t	kMinVectorLength						= 1;
61 constexpr size_t	kMaxVectorLength						= 4;
62 constexpr size_t	kArrayAlignment							= 16;					// Bytes.
63 constexpr size_t	kEffectiveLength[kMaxVectorLength + 1]	= { 0, 1, 2, 4, 4 };	// Effective length of a vector of size i.
64 constexpr size_t	kGCFNumFloats							= 12;					// Greatest Common Factor of the number of floats in a test.
65 
66 // Get a random normal number.
67 // Works for implementations of tcu::Float as T.
68 template <class T>
getRandomNormal(de::Random & rnd)69 T getRandomNormal (de::Random& rnd)
70 {
71 	static constexpr typename T::StorageType	kLeadingMantissaBit	= (static_cast<typename T::StorageType>(1) << T::MANTISSA_BITS);
72 	static constexpr int						kSignValues[]		= { -1, 1 };
73 
74 	int						signBit		= rnd.getInt(0, 1);
75 	int						exponent	= rnd.getInt(1 - T::EXPONENT_BIAS, T::EXPONENT_BIAS + 1);
76 	typename T::StorageType	mantissa	= static_cast<typename T::StorageType>(rnd.getUint64() & static_cast<deUint64>(kLeadingMantissaBit - 1));
77 
78 	// Construct number.
79 	return T::construct(kSignValues[signBit], exponent, (kLeadingMantissaBit | mantissa));
80 }
81 
82 // Get a list of hand-picked interesting samples for tcu::Float class T.
83 template <class T>
interestingSamples()84 const std::vector<T>& interestingSamples ()
85 {
86 	static const std::vector<T> samples =
87 	{
88 		T::zero				(-1),
89 		T::zero				( 1),
90 		//T::inf				(-1),
91 		//T::inf				( 1),
92 		//T::nan				(  ),
93 		T::largestNormal	(-1),
94 		T::largestNormal	( 1),
95 		T::smallestNormal	(-1),
96 		T::smallestNormal	( 1),
97 	};
98 
99 	return samples;
100 }
101 
102 // Get some random interesting numbers.
103 // Works for implementations of tcu::Float as T.
104 template <class T>
getRandomInteresting(de::Random & rnd,size_t numSamples)105 std::vector<T> getRandomInteresting (de::Random& rnd, size_t numSamples)
106 {
107 	auto&			samples = interestingSamples<T>();
108 	std::vector<T>	result;
109 
110 	result.reserve(numSamples);
111 	std::generate_n(std::back_inserter(result), numSamples, [&rnd, &samples]() { return rnd.choose<T>(begin(samples), end(samples)); });
112 
113 	return result;
114 }
115 
116 // Helper class to build each vector only once in a thread-safe way.
117 template <class T>
118 struct StaticVectorHelper
119 {
120 	std::vector<T> v;
121 
StaticVectorHelpervkt::shaderexecutor::__anon3b3c11db0111::StaticVectorHelper122 	StaticVectorHelper (de::Random& rnd)
123 	{
124 		v.reserve(kRandomSourcesPerType);
125 		for (size_t i = 0; i < kRandomSourcesPerType; ++i)
126 			v.push_back(getRandomNormal<T>(rnd));
127 	}
128 };
129 
130 // Get a list of random normal input values for type T.
131 template <class T>
getRandomNormals(de::Random & rnd)132 const std::vector<T>& getRandomNormals (de::Random& rnd)
133 {
134 	static StaticVectorHelper<T> helper(rnd);
135 	return helper.v;
136 }
137 
138 // Convert a vector of tcu::Float elements of type T1 to type T2.
139 template <class T1, class T2>
convertVector(const std::vector<T1> & orig)140 std::vector<T2> convertVector (const std::vector<T1>& orig)
141 {
142 	std::vector<T2> result;
143 	result.reserve(orig.size());
144 
145 	std::transform(begin(orig), end(orig), std::back_inserter(result),
146 		[](T1 f) { return T2::convert(f); });
147 
148 	return result;
149 }
150 
151 // Get converted normal values for other tcu::Float types smaller than T, which should be exact conversions when converting back to
152 // those types.
153 template <class T>
154 std::vector<T> getOtherNormals (de::Random& rnd);
155 
156 template<>
getOtherNormals(de::Random &)157 std::vector<tcu::Float16> getOtherNormals<tcu::Float16> (de::Random&)
158 {
159 	// Nothing below tcu::Float16.
160 	return std::vector<tcu::Float16>();
161 }
162 
163 template<>
getOtherNormals(de::Random & rnd)164 std::vector<tcu::Float32> getOtherNormals<tcu::Float32> (de::Random& rnd)
165 {
166 	// The ones from tcu::Float16.
167 	return convertVector<tcu::Float16, tcu::Float32>(getRandomNormals<tcu::Float16>(rnd));
168 }
169 
170 template<>
getOtherNormals(de::Random & rnd)171 std::vector<tcu::Float64> getOtherNormals<tcu::Float64> (de::Random& rnd)
172 {
173 	// The ones from both tcu::Float16 and tcu::Float64.
174 	auto v1 = convertVector<tcu::Float16, tcu::Float64>(getRandomNormals<tcu::Float16>(rnd));
175 	auto v2 = convertVector<tcu::Float32, tcu::Float64>(getRandomNormals<tcu::Float32>(rnd));
176 
177 	v1.reserve(v1.size() + v2.size());
178 	std::copy(begin(v2), end(v2), std::back_inserter(v1));
179 	return v1;
180 }
181 
182 // Get the full list of input values for type T.
183 template <class T>
getInputValues(de::Random & rnd)184 std::vector<T> getInputValues (de::Random& rnd)
185 {
186 	auto&	interesting		= interestingSamples<T>();
187 	auto&	normals			= getRandomNormals<T>(rnd);
188 	auto	otherNormals	= getOtherNormals<T>(rnd);
189 
190 	const size_t numValues		= interesting.size() + normals.size() + otherNormals.size();
191 	const size_t extraValues	= numValues % kGCFNumFloats;
192 	const size_t needed			= ((extraValues == 0) ? 0 : (kGCFNumFloats - extraValues));
193 
194 	auto extra = getRandomInteresting<T> (rnd, needed);
195 
196 	std::vector<T> values;
197 	values.reserve(interesting.size() + normals.size() + otherNormals.size() + extra.size());
198 
199 	std::copy(begin(interesting),	end(interesting),	std::back_inserter(values));
200 	std::copy(begin(normals),		end(normals),		std::back_inserter(values));
201 	std::copy(begin(otherNormals),	end(otherNormals),	std::back_inserter(values));
202 	std::copy(begin(extra),			end(extra),			std::back_inserter(values));
203 
204 	// Shuffle samples around a bit to make it more interesting.
205 	rnd.shuffle(begin(values), end(values));
206 
207 	return values;
208 }
209 
210 // This singleton makes sure generated samples are stable no matter the test order.
211 class InputGenerator
212 {
213 public:
getInstance()214 	static const InputGenerator& getInstance ()
215 	{
216 		static InputGenerator instance;
217 		return instance;
218 	}
219 
getInputValues16() const220 	const std::vector<tcu::Float16>& getInputValues16 () const
221 	{
222 		return m_values16;
223 	}
224 
getInputValues32() const225 	const std::vector<tcu::Float32>& getInputValues32 () const
226 	{
227 		return m_values32;
228 	}
229 
getInputValues64() const230 	const std::vector<tcu::Float64>& getInputValues64 () const
231 	{
232 		return m_values64;
233 	}
234 
235 private:
InputGenerator()236 	InputGenerator ()
237 		: m_rnd(kRandomSeed)
238 		, m_values16(getInputValues<tcu::Float16>(m_rnd))
239 		, m_values32(getInputValues<tcu::Float32>(m_rnd))
240 		, m_values64(getInputValues<tcu::Float64>(m_rnd))
241 	{
242 	}
243 
244 	// Cannot copy or assign.
245 	InputGenerator(const InputGenerator&)				= delete;
246 	InputGenerator& operator=(const InputGenerator&)	= delete;
247 
248 	de::Random					m_rnd;
249 	std::vector<tcu::Float16>	m_values16;
250 	std::vector<tcu::Float32>	m_values32;
251 	std::vector<tcu::Float64>	m_values64;
252 };
253 
254 // Check single result is as expected.
255 // Works for implementations of tcu::Float as T1 and T2.
256 template <class T1, class T2>
validConversion(const T1 & orig,const T2 & result)257 bool validConversion (const T1& orig, const T2& result)
258 {
259 	const T2	acceptedResults[]	= { T2::convert(orig, tcu::ROUND_DOWNWARD), T2::convert(orig, tcu::ROUND_UPWARD) };
260 	bool		valid				= false;
261 
262 	for (const auto& validResult : acceptedResults)
263 	{
264 		if (validResult.isNaN() && result.isNaN())
265 			valid = true;
266 		else if (validResult.isInf() && result.isInf())
267 			valid = true;
268 		else if (validResult.isZero() && result.isZero())
269 			valid = true;
270 		else if (validResult.isDenorm() && (result.isDenorm() || result.isZero()))
271 			valid = true;
272 		else if (validResult.bits() == result.bits()) // Exact conversion, up or down.
273 			valid = true;
274 	}
275 
276 	return valid;
277 }
278 
279 // Check results vector is as expected.
280 template <class T1, class T2>
validConversion(const std::vector<T1> & orig,const std::vector<T2> & converted,tcu::TestLog & log)281 bool validConversion (const std::vector<T1>& orig, const std::vector<T2>& converted, tcu::TestLog& log)
282 {
283 	DE_ASSERT(orig.size() == converted.size());
284 
285 	bool allValid = true;
286 
287 	for (size_t i = 0; i < orig.size(); ++i)
288 	{
289 		const bool valid = validConversion(orig[i], converted[i]);
290 
291 		{
292 			const double origD = orig[i].asDouble();
293 			const double convD = converted[i].asDouble();
294 
295 			std::ostringstream msg;
296 			msg << "[" << i << "] "
297 				<< std::setprecision(std::numeric_limits<double>::digits10 + 2) << std::scientific
298 				<< origD << " converted to " << convD << ": " << (valid ? "OK" : "FAILURE");
299 
300 			log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
301 		}
302 
303 		if (!valid)
304 			allValid = false;
305 	}
306 
307 	return allValid;
308 }
309 
310 // Helps calculate buffer sizes and other parameters for the given number of values and vector length using a given floating point
311 // type. This is mostly used in packFloats() below, but we also need this information in the iterate() method for the test instance,
312 // so it has been separated.
313 struct BufferSizeInfo
314 {
315 	template <class T>
calculatevkt::shaderexecutor::__anon3b3c11db0111::BufferSizeInfo316 	static BufferSizeInfo calculate (size_t numValues_, size_t vectorLength_)
317 	{
318 		// The vector length must be a known number.
319 		DE_ASSERT(vectorLength_ >= kMinVectorLength && vectorLength_ <= kMaxVectorLength);
320 		// The number of values must be appropriate for the vector length.
321 		DE_ASSERT(numValues_ % vectorLength_ == 0);
322 
323 		BufferSizeInfo info;
324 
325 		info.numValues		= numValues_;
326 		info.vectorLength	= vectorLength_;
327 		info.totalVectors	= numValues_ / vectorLength_;
328 
329 		const size_t elementSize		= sizeof(typename T::StorageType);
330 		const size_t effectiveLength	= kEffectiveLength[vectorLength_];
331 		const size_t vectorSize			= elementSize * effectiveLength;
332 		const size_t extraBytes			= vectorSize % kArrayAlignment;
333 
334 		info.vectorStrideBytes	= vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
335 		info.memorySizeBytes	= info.vectorStrideBytes * info.totalVectors;
336 
337 		return info;
338 	}
339 
340 	size_t numValues;
341 	size_t vectorLength;
342 	size_t totalVectors;
343 	size_t vectorStrideBytes;
344 	size_t memorySizeBytes;
345 };
346 
347 // Pack an array of tcu::Float values into a buffer to be read from a shader, as if it was an array of vectors with each vector
348 // having size vectorLength (e.g. 3 for a vec3). Note: assumes std140.
349 template <class T>
packFloats(const std::vector<T> & values,size_t vectorLength)350 std::vector<deUint8> packFloats (const std::vector<T>& values, size_t vectorLength)
351 {
352 	BufferSizeInfo sizeInfo = BufferSizeInfo::calculate<T>(values.size(), vectorLength);
353 
354 	std::vector<deUint8> memory(sizeInfo.memorySizeBytes);
355 	for (size_t i = 0; i < sizeInfo.totalVectors; ++i)
356 	{
357 		T* vectorPtr = reinterpret_cast<T*>(memory.data() + sizeInfo.vectorStrideBytes * i);
358 		for (size_t j = 0; j < vectorLength; ++j)
359 			vectorPtr[j] = values[i*vectorLength + j];
360 	}
361 
362 	return memory;
363 }
364 
365 // Unpack an array of vectors into an array of values, undoing what packFloats would do.
366 // expectedNumValues is used for verification.
367 template <class T>
unpackFloats(const std::vector<deUint8> & memory,size_t vectorLength,size_t expectedNumValues)368 std::vector<T> unpackFloats (const std::vector<deUint8>& memory, size_t vectorLength, size_t expectedNumValues)
369 {
370 	DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
371 
372 	const size_t effectiveLength	= kEffectiveLength[vectorLength];
373 	const size_t elementSize		= sizeof(typename T::StorageType);
374 	const size_t vectorSize			= elementSize * effectiveLength;
375 	const size_t extraBytes			= vectorSize % kArrayAlignment;
376 	const size_t vectorBlockSize	= vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
377 
378 	DE_ASSERT(memory.size() % vectorBlockSize == 0);
379 	const size_t numStoredVectors	= memory.size() / vectorBlockSize;
380 	const size_t numStoredValues	= numStoredVectors * vectorLength;
381 
382 	DE_UNREF(expectedNumValues); // For release builds.
383 	DE_ASSERT(numStoredValues == expectedNumValues);
384 	std::vector<T> values;
385 	values.reserve(numStoredValues);
386 
387 	for (size_t i = 0; i < numStoredVectors; ++i)
388 	{
389 		const T* vectorPtr = reinterpret_cast<const T*>(memory.data() + vectorBlockSize * i);
390 		for (size_t j = 0; j < vectorLength; ++j)
391 			values.push_back(vectorPtr[j]);
392 	}
393 
394 	return values;
395 }
396 
397 enum FloatType
398 {
399 	FLOAT_TYPE_16_BITS = 0,
400 	FLOAT_TYPE_32_BITS,
401 	FLOAT_TYPE_64_BITS,
402 	FLOAT_TYPE_MAX_ENUM,
403 };
404 
405 static const char* const kFloatNames[FLOAT_TYPE_MAX_ENUM] =
406 {
407 	"f16",
408 	"f32",
409 	"f64",
410 };
411 
412 static const char* const kGLSLTypes[][kMaxVectorLength + 1] =
413 {
414 	{ nullptr, "float16_t",	"f16vec2",	"f16vec3",	"f16vec4"	},
415 	{ nullptr, "float",		"vec2",		"vec3",		"vec4"		},
416 	{ nullptr, "double",	"dvec2",	"dvec3",	"dvec4"		},
417 };
418 
419 struct TestParams
420 {
421 	FloatType	from;
422 	FloatType	to;
423 	size_t		vectorLength;
424 
getInputTypeStrvkt::shaderexecutor::__anon3b3c11db0111::TestParams425 	std::string	getInputTypeStr		() const
426 	{
427 		DE_ASSERT(from >= 0 && from < FLOAT_TYPE_MAX_ENUM);
428 		DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
429 		return kGLSLTypes[from][vectorLength];
430 	}
431 
getOutputTypeStrvkt::shaderexecutor::__anon3b3c11db0111::TestParams432 	std::string getOutputTypeStr	() const
433 	{
434 		DE_ASSERT(to >= 0 && to < FLOAT_TYPE_MAX_ENUM);
435 		DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
436 		return kGLSLTypes[to][vectorLength];
437 	}
438 };
439 
440 class FConvertTestInstance : public TestInstance
441 {
442 public:
FConvertTestInstance(Context & context,const TestParams & params)443 							FConvertTestInstance	(Context& context, const TestParams& params)
444 								: TestInstance(context)
445 								, m_params(params)
446 								{}
447 
448 	virtual tcu::TestStatus	iterate					(void);
449 
450 private:
451 	TestParams	m_params;
452 };
453 
454 class FConvertTestCase : public TestCase
455 {
456 public:
FConvertTestCase(tcu::TestContext & context,const std::string & name,const std::string & desc,const TestParams & params)457 								FConvertTestCase	(tcu::TestContext& context, const std::string& name, const std::string& desc, const TestParams& params)
458 									: TestCase	(context, name, desc)
459 									, m_params	(params)
460 									{}
461 
~FConvertTestCase(void)462 								~FConvertTestCase	(void) {}
createInstance(Context & context) const463 	virtual TestInstance*		createInstance		(Context& context) const { return new FConvertTestInstance(context, m_params); }
464 	virtual	void				initPrograms		(vk::SourceCollections& programCollection) const;
465 	virtual void				checkSupport		(Context& context) const;
466 
467 private:
468 	TestParams	m_params;
469 };
470 
initPrograms(vk::SourceCollections & programCollection) const471 void FConvertTestCase::initPrograms (vk::SourceCollections& programCollection) const
472 {
473 	const std::string		inputType		= m_params.getInputTypeStr();
474 	const std::string		outputType		= m_params.getOutputTypeStr();
475 	const InputGenerator&	inputGenerator	= InputGenerator::getInstance();
476 
477 	size_t numValues = 0;
478 	switch (m_params.from)
479 	{
480 	case FLOAT_TYPE_16_BITS:
481 		numValues = inputGenerator.getInputValues16().size();
482 		break;
483 	case FLOAT_TYPE_32_BITS:
484 		numValues = inputGenerator.getInputValues32().size();
485 		break;
486 	case FLOAT_TYPE_64_BITS:
487 		numValues = inputGenerator.getInputValues64().size();
488 		break;
489 	default:
490 		DE_ASSERT(false);
491 		break;
492 	}
493 
494 	const size_t arraySize = numValues / m_params.vectorLength;
495 
496 	std::ostringstream shader;
497 
498 	shader
499 		<< "#version 450 core\n"
500 		<< ((m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS) ?
501 			"#extension GL_EXT_shader_16bit_storage: require\n"					// This is needed to use 16-bit float types in buffers.
502 			"#extension GL_EXT_shader_explicit_arithmetic_types: require\n"		// This is needed for some conversions.
503 			: "")
504 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
505 		<< "layout(set = 0, binding = 0, std140) buffer issbodef { " << inputType << " val[" << arraySize << "]; } issbo;\n"
506 		<< "layout(set = 0, binding = 1, std140) buffer ossbodef { " << outputType << " val[" << arraySize << "]; } ossbo;\n"
507 		<< "void main()\n"
508 		<< "{\n"
509 		<< "	ossbo.val[gl_WorkGroupID.x] = " << outputType << "(issbo.val[gl_WorkGroupID.x]);\n"
510 		<< "}\n";
511 
512 	programCollection.glslSources.add("comp") << glu::ComputeSource(shader.str());
513 }
514 
checkSupport(Context & context) const515 void FConvertTestCase::checkSupport (Context& context) const
516 {
517 	if (m_params.from == FLOAT_TYPE_64_BITS || m_params.to == FLOAT_TYPE_64_BITS)
518 	{
519 		// Check for 64-bit float support.
520 		auto features = context.getDeviceFeatures();
521 		if (!features.shaderFloat64)
522 			TCU_THROW(NotSupportedError, "64-bit floats not supported in shader code");
523 	}
524 
525 	if (m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS)
526 	{
527 		// Check for 16-bit float support.
528 		auto& features16 = context.getShaderFloat16Int8Features();
529 		if (!features16.shaderFloat16)
530 			TCU_THROW(NotSupportedError, "16-bit floats not supported in shader code");
531 
532 		auto& storage16 = context.get16BitStorageFeatures();
533 		if (!storage16.storageBuffer16BitAccess)
534 			TCU_THROW(NotSupportedError, "16-bit floats not supported for storage buffers");
535 	}
536 }
537 
iterate(void)538 tcu::TestStatus FConvertTestInstance::iterate (void)
539 {
540 	BufferSizeInfo			inputBufferSizeInfo;
541 	BufferSizeInfo			outputBufferSizeInfo;
542 	std::vector<deUint8>	inputMemory;
543 
544 	// Calculate buffer sizes and convert input values to a packed input memory format, depending on the input and output types.
545 	switch (m_params.from)
546 	{
547 	case FLOAT_TYPE_16_BITS:
548 		{
549 			auto& inputValues = InputGenerator::getInstance().getInputValues16();
550 			inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
551 			switch (m_params.to)
552 			{
553 			case FLOAT_TYPE_32_BITS:
554 				outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
555 				break;
556 			case FLOAT_TYPE_64_BITS:
557 				outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
558 				break;
559 			default:
560 				DE_ASSERT(false);
561 				break;
562 			}
563 			inputMemory = packFloats(inputValues, m_params.vectorLength);
564 		}
565 		break;
566 
567 	case FLOAT_TYPE_32_BITS:
568 		{
569 			auto& inputValues = InputGenerator::getInstance().getInputValues32();
570 			inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
571 			switch (m_params.to)
572 			{
573 			case FLOAT_TYPE_16_BITS:
574 				outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
575 				break;
576 			case FLOAT_TYPE_64_BITS:
577 				outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
578 				break;
579 			default:
580 				DE_ASSERT(false);
581 				break;
582 			}
583 			inputMemory = packFloats(inputValues, m_params.vectorLength);
584 		}
585 		break;
586 
587 	case FLOAT_TYPE_64_BITS:
588 		{
589 			auto& inputValues = InputGenerator::getInstance().getInputValues64();
590 			inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
591 			switch (m_params.to)
592 			{
593 			case FLOAT_TYPE_16_BITS:
594 				outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
595 				break;
596 			case FLOAT_TYPE_32_BITS:
597 				outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
598 				break;
599 			default:
600 				DE_ASSERT(false);
601 				break;
602 			}
603 			inputMemory = packFloats(inputValues, m_params.vectorLength);
604 		}
605 		break;
606 
607 	default:
608 		DE_ASSERT(false);
609 		break;
610 	}
611 
612 	// Prepare input and output buffers.
613 	auto&	vkd			= m_context.getDeviceInterface();
614 	auto	device		= m_context.getDevice();
615 	auto&	allocator	= m_context.getDefaultAllocator();
616 
617 	de::MovePtr<vk::BufferWithMemory> inputBuffer(
618 		new vk::BufferWithMemory(vkd, device, allocator,
619 								 vk::makeBufferCreateInfo(inputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
620 								 vk::MemoryRequirement::HostVisible)
621 	);
622 
623 	de::MovePtr<vk::BufferWithMemory> outputBuffer(
624 		new vk::BufferWithMemory(vkd, device, allocator,
625 								 vk::makeBufferCreateInfo(outputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
626 								 vk::MemoryRequirement::HostVisible)
627 	);
628 
629 	// Copy values to input buffer.
630 	{
631 		auto& alloc = inputBuffer->getAllocation();
632 		deMemcpy(reinterpret_cast<deUint8*>(alloc.getHostPtr()) + alloc.getOffset(), inputMemory.data(), inputMemory.size());
633 		vk::flushAlloc(vkd, device, alloc);
634 	}
635 
636 	// Create an array with the input and output buffers to make it easier to iterate below.
637 	const vk::VkBuffer buffers[] = { inputBuffer->get(), outputBuffer->get() };
638 
639 	// Create descriptor set layout.
640 	std::vector<vk::VkDescriptorSetLayoutBinding> bindings;
641 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
642 	{
643 		const vk::VkDescriptorSetLayoutBinding binding =
644 		{
645 			static_cast<deUint32>(i),								// uint32_t              binding;
646 			vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,					// VkDescriptorType      descriptorType;
647 			1u,														// uint32_t              descriptorCount;
648 			vk::VK_SHADER_STAGE_COMPUTE_BIT,						// VkShaderStageFlags    stageFlags;
649 			DE_NULL,													// const VkSampler*      pImmutableSamplers;
650 		};
651 		bindings.push_back(binding);
652 	}
653 
654 	const vk::VkDescriptorSetLayoutCreateInfo layoutCreateInfo =
655 	{
656 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,	// VkStructureType                        sType;
657 		DE_NULL,													// const void*                            pNext;
658 		0,															// VkDescriptorSetLayoutCreateFlags       flags;
659 		static_cast<deUint32>(bindings.size()),						// uint32_t                               bindingCount;
660 		bindings.data()												// const VkDescriptorSetLayoutBinding*    pBindings;
661 	};
662 	auto descriptorSetLayout = vk::createDescriptorSetLayout(vkd, device, &layoutCreateInfo);
663 
664 	// Create descriptor set.
665 	vk::DescriptorPoolBuilder poolBuilder;
666 	for (const auto& b : bindings)
667 		poolBuilder.addType(b.descriptorType, 1u);
668 	auto descriptorPool = poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
669 
670 	const vk::VkDescriptorSetAllocateInfo allocateInfo =
671 	{
672 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,	// VkStructureType                 sType;
673 		DE_NULL,											// const void*                     pNext;
674 		*descriptorPool,									// VkDescriptorPool                descriptorPool;
675 		1u,													// uint32_t                        descriptorSetCount;
676 		&descriptorSetLayout.get()							// const VkDescriptorSetLayout*    pSetLayouts;
677 	};
678 	auto descriptorSet = vk::allocateDescriptorSet(vkd, device, &allocateInfo);
679 
680 	// Update descriptor set.
681 	std::vector<vk::VkDescriptorBufferInfo>	descriptorBufferInfos;
682 	std::vector<vk::VkWriteDescriptorSet>	descriptorWrites;
683 
684 	for (const auto& buffer : buffers)
685 	{
686 		const vk::VkDescriptorBufferInfo bufferInfo =
687 		{
688 			buffer,			// VkBuffer        buffer;
689 			0u,				// VkDeviceSize    offset;
690 			VK_WHOLE_SIZE,	// VkDeviceSize    range;
691 		};
692 		descriptorBufferInfos.push_back(bufferInfo);
693 	}
694 
695 	for (size_t i = 0; i < bindings.size(); ++i)
696 	{
697 		const vk::VkWriteDescriptorSet write =
698 		{
699 			vk::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,	// VkStructureType                  sType;
700 			DE_NULL,									// const void*                      pNext;
701 			*descriptorSet,								// VkDescriptorSet                  dstSet;
702 			static_cast<deUint32>(i),					// uint32_t                         dstBinding;
703 			0u,											// uint32_t                         dstArrayElement;
704 			1u,											// uint32_t                         descriptorCount;
705 			bindings[i].descriptorType,					// VkDescriptorType                 descriptorType;
706 			DE_NULL,									// const VkDescriptorImageInfo*     pImageInfo;
707 			&descriptorBufferInfos[i],					// const VkDescriptorBufferInfo*    pBufferInfo;
708 			DE_NULL,									// const VkBufferView*              pTexelBufferView;
709 		};
710 		descriptorWrites.push_back(write);
711 	}
712 	vkd.updateDescriptorSets(device, static_cast<deUint32>(descriptorWrites.size()), descriptorWrites.data(), 0u, DE_NULL);
713 
714 	// Prepare barriers in advance so data is visible to the shaders and the host.
715 	std::vector<vk::VkBufferMemoryBarrier> hostToDevBarriers;
716 	std::vector<vk::VkBufferMemoryBarrier> devToHostBarriers;
717 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
718 	{
719 		const vk::VkBufferMemoryBarrier hostToDev =
720 		{
721 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,						// VkStructureType	sType;
722 			DE_NULL,															// const void*		pNext;
723 			vk::VK_ACCESS_HOST_WRITE_BIT,										// VkAccessFlags	srcAccessMask;
724 			(vk::VK_ACCESS_SHADER_READ_BIT | vk::VK_ACCESS_SHADER_WRITE_BIT),	// VkAccessFlags	dstAccessMask;
725 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			srcQueueFamilyIndex;
726 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			dstQueueFamilyIndex;
727 			buffers[i],															// VkBuffer			buffer;
728 			0u,																	// VkDeviceSize		offset;
729 			VK_WHOLE_SIZE,														// VkDeviceSize		size;
730 		};
731 		hostToDevBarriers.push_back(hostToDev);
732 
733 		const vk::VkBufferMemoryBarrier devToHost =
734 		{
735 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,						// VkStructureType	sType;
736 			DE_NULL,															// const void*		pNext;
737 			vk::VK_ACCESS_SHADER_WRITE_BIT,										// VkAccessFlags	srcAccessMask;
738 			vk::VK_ACCESS_HOST_READ_BIT,										// VkAccessFlags	dstAccessMask;
739 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			srcQueueFamilyIndex;
740 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			dstQueueFamilyIndex;
741 			buffers[i],															// VkBuffer			buffer;
742 			0u,																	// VkDeviceSize		offset;
743 			VK_WHOLE_SIZE,														// VkDeviceSize		size;
744 		};
745 		devToHostBarriers.push_back(devToHost);
746 	}
747 
748 	// Create command pool and command buffer.
749 	auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
750 
751 	const vk::VkCommandPoolCreateInfo cmdPoolCreateInfo =
752 	{
753 		vk::VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,	// VkStructureType				sType;
754 		DE_NULL,										// const void*					pNext;
755 		vk::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,		// VkCommandPoolCreateFlags		flags;
756 		queueFamilyIndex,								// deUint32						queueFamilyIndex;
757 	};
758 	auto cmdPool = vk::createCommandPool(vkd, device, &cmdPoolCreateInfo);
759 
760 	const vk::VkCommandBufferAllocateInfo cmdBufferAllocateInfo =
761 	{
762 		vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType			sType;
763 		DE_NULL,											// const void*				pNext;
764 		*cmdPool,											// VkCommandPool			commandPool;
765 		vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel		level;
766 		1u,													// deUint32					commandBufferCount;
767 	};
768 	auto cmdBuffer = vk::allocateCommandBuffer(vkd, device, &cmdBufferAllocateInfo);
769 
770 	// Create pipeline layout.
771 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
772 	{
773 		vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// VkStructureType					sType;
774 		DE_NULL,											// const void*						pNext;
775 		0,													// VkPipelineLayoutCreateFlags		flags;
776 		1u,													// deUint32							setLayoutCount;
777 		&descriptorSetLayout.get(),							// const VkDescriptorSetLayout*		pSetLayouts;
778 		0u,													// deUint32							pushConstantRangeCount;
779 		DE_NULL,											// const VkPushConstantRange*		pPushConstantRanges;
780 	};
781 	auto pipelineLayout = vk::createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
782 
783 	// Create compute pipeline.
784 	const vk::Unique<vk::VkShaderModule> shader(vk::createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0));
785 
786 	const vk::VkComputePipelineCreateInfo computeCreateInfo =
787 	{
788 		vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType                    sType;
789 		DE_NULL,											// const void*                        pNext;
790 		0,													// VkPipelineCreateFlags              flags;
791 		{													// VkPipelineShaderStageCreateInfo    stage;
792 			vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType;
793 			DE_NULL,													// const void*                         pNext;
794 			0,															// VkPipelineShaderStageCreateFlags    flags;
795 			vk::VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits               stage;
796 			*shader,													// VkShaderModule                      module;
797 			"main",														// const char*                         pName;
798 			DE_NULL,													// const VkSpecializationInfo*         pSpecializationInfo;
799 		},
800 		*pipelineLayout,									// VkPipelineLayout                   layout;
801 		DE_NULL,											// VkPipeline                         basePipelineHandle;
802 		0,													// int32_t                            basePipelineIndex;
803 	};
804 	auto computePipeline = vk::createComputePipeline(vkd, device, DE_NULL, &computeCreateInfo);
805 
806 	// Run the shader.
807 	vk::beginCommandBuffer(vkd, *cmdBuffer);
808 		vkd.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
809 		vkd.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
810 		vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
811 		vkd.cmdDispatch(*cmdBuffer, static_cast<deUint32>(inputBufferSizeInfo.totalVectors), 1u, 1u);
812 		vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
813 	vk::endCommandBuffer(vkd, *cmdBuffer);
814 	vk::submitCommandsAndWait(vkd, device, m_context.getUniversalQueue(), *cmdBuffer);
815 
816 	// Invalidate output allocation.
817 	vk::invalidateAlloc(vkd, device, outputBuffer->getAllocation());
818 
819 	// Copy output buffer data.
820 	std::vector<deUint8> outputMemory(outputBufferSizeInfo.memorySizeBytes);
821 	{
822 		auto& alloc = outputBuffer->getAllocation();
823 		deMemcpy(outputMemory.data(), reinterpret_cast<deUint8*>(alloc.getHostPtr()) + alloc.getOffset(), outputBufferSizeInfo.memorySizeBytes);
824 	}
825 
826 	// Unpack and verify output data.
827 	auto& testLog = m_context.getTestContext().getLog();
828 	bool conversionOk = false;
829 	switch (m_params.to)
830 	{
831 	case FLOAT_TYPE_16_BITS:
832 		{
833 			auto outputValues = unpackFloats<tcu::Float16>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
834 			switch (m_params.from)
835 			{
836 			case FLOAT_TYPE_32_BITS:
837 				{
838 					auto& inputValues = InputGenerator::getInstance().getInputValues32();
839 					conversionOk = validConversion(inputValues, outputValues, testLog);
840 				}
841 				break;
842 
843 			case FLOAT_TYPE_64_BITS:
844 				{
845 					auto& inputValues = InputGenerator::getInstance().getInputValues64();
846 					conversionOk = validConversion(inputValues, outputValues, testLog);
847 				}
848 				break;
849 
850 			default:
851 				DE_ASSERT(false);
852 				break;
853 			}
854 		}
855 		break;
856 
857 	case FLOAT_TYPE_32_BITS:
858 		{
859 			auto outputValues = unpackFloats<tcu::Float32>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
860 			switch (m_params.from)
861 			{
862 			case FLOAT_TYPE_16_BITS:
863 				{
864 					auto& inputValues = InputGenerator::getInstance().getInputValues16();
865 					conversionOk = validConversion(inputValues, outputValues, testLog);
866 				}
867 				break;
868 
869 			case FLOAT_TYPE_64_BITS:
870 				{
871 					auto& inputValues = InputGenerator::getInstance().getInputValues64();
872 					conversionOk = validConversion(inputValues, outputValues, testLog);
873 				}
874 				break;
875 
876 			default:
877 				DE_ASSERT(false);
878 				break;
879 			}
880 		}
881 		break;
882 
883 	case FLOAT_TYPE_64_BITS:
884 		{
885 			auto outputValues = unpackFloats<tcu::Float64>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
886 			switch (m_params.from)
887 			{
888 			case FLOAT_TYPE_16_BITS:
889 				{
890 					auto& inputValues = InputGenerator::getInstance().getInputValues16();
891 					conversionOk = validConversion(inputValues, outputValues, testLog);
892 				}
893 				break;
894 
895 			case FLOAT_TYPE_32_BITS:
896 				{
897 					auto& inputValues = InputGenerator::getInstance().getInputValues32();
898 					conversionOk = validConversion(inputValues, outputValues, testLog);
899 				}
900 				break;
901 
902 			default:
903 				DE_ASSERT(false);
904 				break;
905 			}
906 		}
907 		break;
908 
909 	default:
910 		DE_ASSERT(false);
911 		break;
912 	}
913 
914 	return (conversionOk ? tcu::TestStatus::pass("Pass") : tcu::TestStatus::fail("Fail"));
915 }
916 
917 } // anonymous
918 
createPrecisionFconvertGroup(tcu::TestContext & testCtx)919 tcu::TestCaseGroup*	createPrecisionFconvertGroup (tcu::TestContext& testCtx)
920 {
921 	tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, "precision_fconvert", "OpFConvert precision tests");
922 
923 	for (int i = 0; i < FLOAT_TYPE_MAX_ENUM; ++i)
924 	for (int j = 0; j < FLOAT_TYPE_MAX_ENUM; ++j)
925 	for (size_t k = kMinVectorLength; k <= kMaxVectorLength; ++k)
926 	{
927 		// No actual conversion if the types are the same.
928 		if (i == j)
929 			continue;
930 
931 		TestParams params = {
932 			static_cast<FloatType>(i),
933 			static_cast<FloatType>(j),
934 			k,
935 		};
936 
937 		std::string testName = std::string() + kFloatNames[i] + "_to_" + kFloatNames[j] + "_size_" + std::to_string(k);
938 		std::string testDescription = std::string("Conversion from ") + kFloatNames[i] + " to " + kFloatNames[j] + " with vectors of size " + std::to_string(k);
939 
940 		newGroup->addChild(new FConvertTestCase(testCtx, testName, testDescription, params));
941 	}
942 
943 	return newGroup;
944 }
945 
946 } // shaderexecutor
947 } // vkt
948