1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Valve Corporation.
6 * Copyright (c) 2019 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief OpFConvert tests.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktShaderFConvertTests.hpp"
26 #include "vktTestCase.hpp"
27
28 #include "vkBufferWithMemory.hpp"
29 #include "vkObjUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkPrograms.hpp"
33
34 #include "deDefs.hpp"
35 #include "deRandom.hpp"
36
37 #include "tcuFloat.hpp"
38 #include "tcuTestLog.hpp"
39 #include "tcuFormatUtil.hpp"
40
41 #include <vector>
42 #include <iterator>
43 #include <algorithm>
44 #include <memory>
45 #include <sstream>
46 #include <iomanip>
47 #include <string>
48 #include <limits>
49
50 namespace vkt
51 {
52 namespace shaderexecutor
53 {
54
55 namespace
56 {
57
58 constexpr deUint32 kRandomSeed = 0xdeadbeef;
59 constexpr size_t kRandomSourcesPerType = 240;
60 constexpr size_t kMinVectorLength = 1;
61 constexpr size_t kMaxVectorLength = 4;
62 constexpr size_t kArrayAlignment = 16; // Bytes.
63 constexpr size_t kEffectiveLength[kMaxVectorLength + 1] = { 0, 1, 2, 4, 4 }; // Effective length of a vector of size i.
64 constexpr size_t kGCFNumFloats = 12; // Greatest Common Factor of the number of floats in a test.
65
66 // Get a random normal number.
67 // Works for implementations of tcu::Float as T.
68 template <class T>
getRandomNormal(de::Random & rnd)69 T getRandomNormal (de::Random& rnd)
70 {
71 static constexpr typename T::StorageType kLeadingMantissaBit = (static_cast<typename T::StorageType>(1) << T::MANTISSA_BITS);
72 static constexpr int kSignValues[] = { -1, 1 };
73
74 int signBit = rnd.getInt(0, 1);
75 int exponent = rnd.getInt(1 - T::EXPONENT_BIAS, T::EXPONENT_BIAS + 1);
76 typename T::StorageType mantissa = static_cast<typename T::StorageType>(rnd.getUint64() & static_cast<deUint64>(kLeadingMantissaBit - 1));
77
78 // Construct number.
79 return T::construct(kSignValues[signBit], exponent, (kLeadingMantissaBit | mantissa));
80 }
81
82 // Get a list of hand-picked interesting samples for tcu::Float class T.
83 template <class T>
interestingSamples()84 const std::vector<T>& interestingSamples ()
85 {
86 static const std::vector<T> samples =
87 {
88 T::zero (-1),
89 T::zero ( 1),
90 //T::inf (-1),
91 //T::inf ( 1),
92 //T::nan ( ),
93 T::largestNormal (-1),
94 T::largestNormal ( 1),
95 T::smallestNormal (-1),
96 T::smallestNormal ( 1),
97 };
98
99 return samples;
100 }
101
102 // Get some random interesting numbers.
103 // Works for implementations of tcu::Float as T.
104 template <class T>
getRandomInteresting(de::Random & rnd,size_t numSamples)105 std::vector<T> getRandomInteresting (de::Random& rnd, size_t numSamples)
106 {
107 auto& samples = interestingSamples<T>();
108 std::vector<T> result;
109
110 result.reserve(numSamples);
111 std::generate_n(std::back_inserter(result), numSamples, [&rnd, &samples]() { return rnd.choose<T>(begin(samples), end(samples)); });
112
113 return result;
114 }
115
116 // Helper class to build each vector only once in a thread-safe way.
117 template <class T>
118 struct StaticVectorHelper
119 {
120 std::vector<T> v;
121
StaticVectorHelpervkt::shaderexecutor::__anon3b3c11db0111::StaticVectorHelper122 StaticVectorHelper (de::Random& rnd)
123 {
124 v.reserve(kRandomSourcesPerType);
125 for (size_t i = 0; i < kRandomSourcesPerType; ++i)
126 v.push_back(getRandomNormal<T>(rnd));
127 }
128 };
129
130 // Get a list of random normal input values for type T.
131 template <class T>
getRandomNormals(de::Random & rnd)132 const std::vector<T>& getRandomNormals (de::Random& rnd)
133 {
134 static StaticVectorHelper<T> helper(rnd);
135 return helper.v;
136 }
137
138 // Convert a vector of tcu::Float elements of type T1 to type T2.
139 template <class T1, class T2>
convertVector(const std::vector<T1> & orig)140 std::vector<T2> convertVector (const std::vector<T1>& orig)
141 {
142 std::vector<T2> result;
143 result.reserve(orig.size());
144
145 std::transform(begin(orig), end(orig), std::back_inserter(result),
146 [](T1 f) { return T2::convert(f); });
147
148 return result;
149 }
150
151 // Get converted normal values for other tcu::Float types smaller than T, which should be exact conversions when converting back to
152 // those types.
153 template <class T>
154 std::vector<T> getOtherNormals (de::Random& rnd);
155
156 template<>
getOtherNormals(de::Random &)157 std::vector<tcu::Float16> getOtherNormals<tcu::Float16> (de::Random&)
158 {
159 // Nothing below tcu::Float16.
160 return std::vector<tcu::Float16>();
161 }
162
163 template<>
getOtherNormals(de::Random & rnd)164 std::vector<tcu::Float32> getOtherNormals<tcu::Float32> (de::Random& rnd)
165 {
166 // The ones from tcu::Float16.
167 return convertVector<tcu::Float16, tcu::Float32>(getRandomNormals<tcu::Float16>(rnd));
168 }
169
170 template<>
getOtherNormals(de::Random & rnd)171 std::vector<tcu::Float64> getOtherNormals<tcu::Float64> (de::Random& rnd)
172 {
173 // The ones from both tcu::Float16 and tcu::Float64.
174 auto v1 = convertVector<tcu::Float16, tcu::Float64>(getRandomNormals<tcu::Float16>(rnd));
175 auto v2 = convertVector<tcu::Float32, tcu::Float64>(getRandomNormals<tcu::Float32>(rnd));
176
177 v1.reserve(v1.size() + v2.size());
178 std::copy(begin(v2), end(v2), std::back_inserter(v1));
179 return v1;
180 }
181
182 // Get the full list of input values for type T.
183 template <class T>
getInputValues(de::Random & rnd)184 std::vector<T> getInputValues (de::Random& rnd)
185 {
186 auto& interesting = interestingSamples<T>();
187 auto& normals = getRandomNormals<T>(rnd);
188 auto otherNormals = getOtherNormals<T>(rnd);
189
190 const size_t numValues = interesting.size() + normals.size() + otherNormals.size();
191 const size_t extraValues = numValues % kGCFNumFloats;
192 const size_t needed = ((extraValues == 0) ? 0 : (kGCFNumFloats - extraValues));
193
194 auto extra = getRandomInteresting<T> (rnd, needed);
195
196 std::vector<T> values;
197 values.reserve(interesting.size() + normals.size() + otherNormals.size() + extra.size());
198
199 std::copy(begin(interesting), end(interesting), std::back_inserter(values));
200 std::copy(begin(normals), end(normals), std::back_inserter(values));
201 std::copy(begin(otherNormals), end(otherNormals), std::back_inserter(values));
202 std::copy(begin(extra), end(extra), std::back_inserter(values));
203
204 // Shuffle samples around a bit to make it more interesting.
205 rnd.shuffle(begin(values), end(values));
206
207 return values;
208 }
209
210 // This singleton makes sure generated samples are stable no matter the test order.
211 class InputGenerator
212 {
213 public:
getInstance()214 static const InputGenerator& getInstance ()
215 {
216 static InputGenerator instance;
217 return instance;
218 }
219
getInputValues16() const220 const std::vector<tcu::Float16>& getInputValues16 () const
221 {
222 return m_values16;
223 }
224
getInputValues32() const225 const std::vector<tcu::Float32>& getInputValues32 () const
226 {
227 return m_values32;
228 }
229
getInputValues64() const230 const std::vector<tcu::Float64>& getInputValues64 () const
231 {
232 return m_values64;
233 }
234
235 private:
InputGenerator()236 InputGenerator ()
237 : m_rnd(kRandomSeed)
238 , m_values16(getInputValues<tcu::Float16>(m_rnd))
239 , m_values32(getInputValues<tcu::Float32>(m_rnd))
240 , m_values64(getInputValues<tcu::Float64>(m_rnd))
241 {
242 }
243
244 // Cannot copy or assign.
245 InputGenerator(const InputGenerator&) = delete;
246 InputGenerator& operator=(const InputGenerator&) = delete;
247
248 de::Random m_rnd;
249 std::vector<tcu::Float16> m_values16;
250 std::vector<tcu::Float32> m_values32;
251 std::vector<tcu::Float64> m_values64;
252 };
253
254 // Check single result is as expected.
255 // Works for implementations of tcu::Float as T1 and T2.
256 template <class T1, class T2>
validConversion(const T1 & orig,const T2 & result)257 bool validConversion (const T1& orig, const T2& result)
258 {
259 const T2 acceptedResults[] = { T2::convert(orig, tcu::ROUND_DOWNWARD), T2::convert(orig, tcu::ROUND_UPWARD) };
260 bool valid = false;
261
262 for (const auto& validResult : acceptedResults)
263 {
264 if (validResult.isNaN() && result.isNaN())
265 valid = true;
266 else if (validResult.isInf() && result.isInf())
267 valid = true;
268 else if (validResult.isZero() && result.isZero())
269 valid = true;
270 else if (validResult.isDenorm() && (result.isDenorm() || result.isZero()))
271 valid = true;
272 else if (validResult.bits() == result.bits()) // Exact conversion, up or down.
273 valid = true;
274 }
275
276 return valid;
277 }
278
279 // Check results vector is as expected.
280 template <class T1, class T2>
validConversion(const std::vector<T1> & orig,const std::vector<T2> & converted,tcu::TestLog & log)281 bool validConversion (const std::vector<T1>& orig, const std::vector<T2>& converted, tcu::TestLog& log)
282 {
283 DE_ASSERT(orig.size() == converted.size());
284
285 bool allValid = true;
286
287 for (size_t i = 0; i < orig.size(); ++i)
288 {
289 const bool valid = validConversion(orig[i], converted[i]);
290
291 {
292 const double origD = orig[i].asDouble();
293 const double convD = converted[i].asDouble();
294
295 std::ostringstream msg;
296 msg << "[" << i << "] "
297 << std::setprecision(std::numeric_limits<double>::digits10 + 2) << std::scientific
298 << origD << " converted to " << convD << ": " << (valid ? "OK" : "FAILURE");
299
300 log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
301 }
302
303 if (!valid)
304 allValid = false;
305 }
306
307 return allValid;
308 }
309
310 // Helps calculate buffer sizes and other parameters for the given number of values and vector length using a given floating point
311 // type. This is mostly used in packFloats() below, but we also need this information in the iterate() method for the test instance,
312 // so it has been separated.
313 struct BufferSizeInfo
314 {
315 template <class T>
calculatevkt::shaderexecutor::__anon3b3c11db0111::BufferSizeInfo316 static BufferSizeInfo calculate (size_t numValues_, size_t vectorLength_)
317 {
318 // The vector length must be a known number.
319 DE_ASSERT(vectorLength_ >= kMinVectorLength && vectorLength_ <= kMaxVectorLength);
320 // The number of values must be appropriate for the vector length.
321 DE_ASSERT(numValues_ % vectorLength_ == 0);
322
323 BufferSizeInfo info;
324
325 info.numValues = numValues_;
326 info.vectorLength = vectorLength_;
327 info.totalVectors = numValues_ / vectorLength_;
328
329 const size_t elementSize = sizeof(typename T::StorageType);
330 const size_t effectiveLength = kEffectiveLength[vectorLength_];
331 const size_t vectorSize = elementSize * effectiveLength;
332 const size_t extraBytes = vectorSize % kArrayAlignment;
333
334 info.vectorStrideBytes = vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
335 info.memorySizeBytes = info.vectorStrideBytes * info.totalVectors;
336
337 return info;
338 }
339
340 size_t numValues;
341 size_t vectorLength;
342 size_t totalVectors;
343 size_t vectorStrideBytes;
344 size_t memorySizeBytes;
345 };
346
347 // Pack an array of tcu::Float values into a buffer to be read from a shader, as if it was an array of vectors with each vector
348 // having size vectorLength (e.g. 3 for a vec3). Note: assumes std140.
349 template <class T>
packFloats(const std::vector<T> & values,size_t vectorLength)350 std::vector<deUint8> packFloats (const std::vector<T>& values, size_t vectorLength)
351 {
352 BufferSizeInfo sizeInfo = BufferSizeInfo::calculate<T>(values.size(), vectorLength);
353
354 std::vector<deUint8> memory(sizeInfo.memorySizeBytes);
355 for (size_t i = 0; i < sizeInfo.totalVectors; ++i)
356 {
357 T* vectorPtr = reinterpret_cast<T*>(memory.data() + sizeInfo.vectorStrideBytes * i);
358 for (size_t j = 0; j < vectorLength; ++j)
359 vectorPtr[j] = values[i*vectorLength + j];
360 }
361
362 return memory;
363 }
364
365 // Unpack an array of vectors into an array of values, undoing what packFloats would do.
366 // expectedNumValues is used for verification.
367 template <class T>
unpackFloats(const std::vector<deUint8> & memory,size_t vectorLength,size_t expectedNumValues)368 std::vector<T> unpackFloats (const std::vector<deUint8>& memory, size_t vectorLength, size_t expectedNumValues)
369 {
370 DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
371
372 const size_t effectiveLength = kEffectiveLength[vectorLength];
373 const size_t elementSize = sizeof(typename T::StorageType);
374 const size_t vectorSize = elementSize * effectiveLength;
375 const size_t extraBytes = vectorSize % kArrayAlignment;
376 const size_t vectorBlockSize = vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
377
378 DE_ASSERT(memory.size() % vectorBlockSize == 0);
379 const size_t numStoredVectors = memory.size() / vectorBlockSize;
380 const size_t numStoredValues = numStoredVectors * vectorLength;
381
382 DE_UNREF(expectedNumValues); // For release builds.
383 DE_ASSERT(numStoredValues == expectedNumValues);
384 std::vector<T> values;
385 values.reserve(numStoredValues);
386
387 for (size_t i = 0; i < numStoredVectors; ++i)
388 {
389 const T* vectorPtr = reinterpret_cast<const T*>(memory.data() + vectorBlockSize * i);
390 for (size_t j = 0; j < vectorLength; ++j)
391 values.push_back(vectorPtr[j]);
392 }
393
394 return values;
395 }
396
397 enum FloatType
398 {
399 FLOAT_TYPE_16_BITS = 0,
400 FLOAT_TYPE_32_BITS,
401 FLOAT_TYPE_64_BITS,
402 FLOAT_TYPE_MAX_ENUM,
403 };
404
405 static const char* const kFloatNames[FLOAT_TYPE_MAX_ENUM] =
406 {
407 "f16",
408 "f32",
409 "f64",
410 };
411
412 static const char* const kGLSLTypes[][kMaxVectorLength + 1] =
413 {
414 { nullptr, "float16_t", "f16vec2", "f16vec3", "f16vec4" },
415 { nullptr, "float", "vec2", "vec3", "vec4" },
416 { nullptr, "double", "dvec2", "dvec3", "dvec4" },
417 };
418
419 struct TestParams
420 {
421 FloatType from;
422 FloatType to;
423 size_t vectorLength;
424
getInputTypeStrvkt::shaderexecutor::__anon3b3c11db0111::TestParams425 std::string getInputTypeStr () const
426 {
427 DE_ASSERT(from >= 0 && from < FLOAT_TYPE_MAX_ENUM);
428 DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
429 return kGLSLTypes[from][vectorLength];
430 }
431
getOutputTypeStrvkt::shaderexecutor::__anon3b3c11db0111::TestParams432 std::string getOutputTypeStr () const
433 {
434 DE_ASSERT(to >= 0 && to < FLOAT_TYPE_MAX_ENUM);
435 DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
436 return kGLSLTypes[to][vectorLength];
437 }
438 };
439
440 class FConvertTestInstance : public TestInstance
441 {
442 public:
FConvertTestInstance(Context & context,const TestParams & params)443 FConvertTestInstance (Context& context, const TestParams& params)
444 : TestInstance(context)
445 , m_params(params)
446 {}
447
448 virtual tcu::TestStatus iterate (void);
449
450 private:
451 TestParams m_params;
452 };
453
454 class FConvertTestCase : public TestCase
455 {
456 public:
FConvertTestCase(tcu::TestContext & context,const std::string & name,const std::string & desc,const TestParams & params)457 FConvertTestCase (tcu::TestContext& context, const std::string& name, const std::string& desc, const TestParams& params)
458 : TestCase (context, name, desc)
459 , m_params (params)
460 {}
461
~FConvertTestCase(void)462 ~FConvertTestCase (void) {}
createInstance(Context & context) const463 virtual TestInstance* createInstance (Context& context) const { return new FConvertTestInstance(context, m_params); }
464 virtual void initPrograms (vk::SourceCollections& programCollection) const;
465 virtual void checkSupport (Context& context) const;
466
467 private:
468 TestParams m_params;
469 };
470
initPrograms(vk::SourceCollections & programCollection) const471 void FConvertTestCase::initPrograms (vk::SourceCollections& programCollection) const
472 {
473 const std::string inputType = m_params.getInputTypeStr();
474 const std::string outputType = m_params.getOutputTypeStr();
475 const InputGenerator& inputGenerator = InputGenerator::getInstance();
476
477 size_t numValues = 0;
478 switch (m_params.from)
479 {
480 case FLOAT_TYPE_16_BITS:
481 numValues = inputGenerator.getInputValues16().size();
482 break;
483 case FLOAT_TYPE_32_BITS:
484 numValues = inputGenerator.getInputValues32().size();
485 break;
486 case FLOAT_TYPE_64_BITS:
487 numValues = inputGenerator.getInputValues64().size();
488 break;
489 default:
490 DE_ASSERT(false);
491 break;
492 }
493
494 const size_t arraySize = numValues / m_params.vectorLength;
495
496 std::ostringstream shader;
497
498 shader
499 << "#version 450 core\n"
500 << ((m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS) ?
501 "#extension GL_EXT_shader_16bit_storage: require\n" // This is needed to use 16-bit float types in buffers.
502 "#extension GL_EXT_shader_explicit_arithmetic_types: require\n" // This is needed for some conversions.
503 : "")
504 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
505 << "layout(set = 0, binding = 0, std140) buffer issbodef { " << inputType << " val[" << arraySize << "]; } issbo;\n"
506 << "layout(set = 0, binding = 1, std140) buffer ossbodef { " << outputType << " val[" << arraySize << "]; } ossbo;\n"
507 << "void main()\n"
508 << "{\n"
509 << " ossbo.val[gl_WorkGroupID.x] = " << outputType << "(issbo.val[gl_WorkGroupID.x]);\n"
510 << "}\n";
511
512 programCollection.glslSources.add("comp") << glu::ComputeSource(shader.str());
513 }
514
checkSupport(Context & context) const515 void FConvertTestCase::checkSupport (Context& context) const
516 {
517 if (m_params.from == FLOAT_TYPE_64_BITS || m_params.to == FLOAT_TYPE_64_BITS)
518 {
519 // Check for 64-bit float support.
520 auto features = context.getDeviceFeatures();
521 if (!features.shaderFloat64)
522 TCU_THROW(NotSupportedError, "64-bit floats not supported in shader code");
523 }
524
525 if (m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS)
526 {
527 // Check for 16-bit float support.
528 auto& features16 = context.getShaderFloat16Int8Features();
529 if (!features16.shaderFloat16)
530 TCU_THROW(NotSupportedError, "16-bit floats not supported in shader code");
531
532 auto& storage16 = context.get16BitStorageFeatures();
533 if (!storage16.storageBuffer16BitAccess)
534 TCU_THROW(NotSupportedError, "16-bit floats not supported for storage buffers");
535 }
536 }
537
iterate(void)538 tcu::TestStatus FConvertTestInstance::iterate (void)
539 {
540 BufferSizeInfo inputBufferSizeInfo;
541 BufferSizeInfo outputBufferSizeInfo;
542 std::vector<deUint8> inputMemory;
543
544 // Calculate buffer sizes and convert input values to a packed input memory format, depending on the input and output types.
545 switch (m_params.from)
546 {
547 case FLOAT_TYPE_16_BITS:
548 {
549 auto& inputValues = InputGenerator::getInstance().getInputValues16();
550 inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
551 switch (m_params.to)
552 {
553 case FLOAT_TYPE_32_BITS:
554 outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
555 break;
556 case FLOAT_TYPE_64_BITS:
557 outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
558 break;
559 default:
560 DE_ASSERT(false);
561 break;
562 }
563 inputMemory = packFloats(inputValues, m_params.vectorLength);
564 }
565 break;
566
567 case FLOAT_TYPE_32_BITS:
568 {
569 auto& inputValues = InputGenerator::getInstance().getInputValues32();
570 inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
571 switch (m_params.to)
572 {
573 case FLOAT_TYPE_16_BITS:
574 outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
575 break;
576 case FLOAT_TYPE_64_BITS:
577 outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
578 break;
579 default:
580 DE_ASSERT(false);
581 break;
582 }
583 inputMemory = packFloats(inputValues, m_params.vectorLength);
584 }
585 break;
586
587 case FLOAT_TYPE_64_BITS:
588 {
589 auto& inputValues = InputGenerator::getInstance().getInputValues64();
590 inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
591 switch (m_params.to)
592 {
593 case FLOAT_TYPE_16_BITS:
594 outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
595 break;
596 case FLOAT_TYPE_32_BITS:
597 outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
598 break;
599 default:
600 DE_ASSERT(false);
601 break;
602 }
603 inputMemory = packFloats(inputValues, m_params.vectorLength);
604 }
605 break;
606
607 default:
608 DE_ASSERT(false);
609 break;
610 }
611
612 // Prepare input and output buffers.
613 auto& vkd = m_context.getDeviceInterface();
614 auto device = m_context.getDevice();
615 auto& allocator = m_context.getDefaultAllocator();
616
617 de::MovePtr<vk::BufferWithMemory> inputBuffer(
618 new vk::BufferWithMemory(vkd, device, allocator,
619 vk::makeBufferCreateInfo(inputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
620 vk::MemoryRequirement::HostVisible)
621 );
622
623 de::MovePtr<vk::BufferWithMemory> outputBuffer(
624 new vk::BufferWithMemory(vkd, device, allocator,
625 vk::makeBufferCreateInfo(outputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
626 vk::MemoryRequirement::HostVisible)
627 );
628
629 // Copy values to input buffer.
630 {
631 auto& alloc = inputBuffer->getAllocation();
632 deMemcpy(reinterpret_cast<deUint8*>(alloc.getHostPtr()) + alloc.getOffset(), inputMemory.data(), inputMemory.size());
633 vk::flushAlloc(vkd, device, alloc);
634 }
635
636 // Create an array with the input and output buffers to make it easier to iterate below.
637 const vk::VkBuffer buffers[] = { inputBuffer->get(), outputBuffer->get() };
638
639 // Create descriptor set layout.
640 std::vector<vk::VkDescriptorSetLayoutBinding> bindings;
641 for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
642 {
643 const vk::VkDescriptorSetLayoutBinding binding =
644 {
645 static_cast<deUint32>(i), // uint32_t binding;
646 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // VkDescriptorType descriptorType;
647 1u, // uint32_t descriptorCount;
648 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlags stageFlags;
649 DE_NULL, // const VkSampler* pImmutableSamplers;
650 };
651 bindings.push_back(binding);
652 }
653
654 const vk::VkDescriptorSetLayoutCreateInfo layoutCreateInfo =
655 {
656 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // VkStructureType sType;
657 DE_NULL, // const void* pNext;
658 0, // VkDescriptorSetLayoutCreateFlags flags;
659 static_cast<deUint32>(bindings.size()), // uint32_t bindingCount;
660 bindings.data() // const VkDescriptorSetLayoutBinding* pBindings;
661 };
662 auto descriptorSetLayout = vk::createDescriptorSetLayout(vkd, device, &layoutCreateInfo);
663
664 // Create descriptor set.
665 vk::DescriptorPoolBuilder poolBuilder;
666 for (const auto& b : bindings)
667 poolBuilder.addType(b.descriptorType, 1u);
668 auto descriptorPool = poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
669
670 const vk::VkDescriptorSetAllocateInfo allocateInfo =
671 {
672 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType sType;
673 DE_NULL, // const void* pNext;
674 *descriptorPool, // VkDescriptorPool descriptorPool;
675 1u, // uint32_t descriptorSetCount;
676 &descriptorSetLayout.get() // const VkDescriptorSetLayout* pSetLayouts;
677 };
678 auto descriptorSet = vk::allocateDescriptorSet(vkd, device, &allocateInfo);
679
680 // Update descriptor set.
681 std::vector<vk::VkDescriptorBufferInfo> descriptorBufferInfos;
682 std::vector<vk::VkWriteDescriptorSet> descriptorWrites;
683
684 for (const auto& buffer : buffers)
685 {
686 const vk::VkDescriptorBufferInfo bufferInfo =
687 {
688 buffer, // VkBuffer buffer;
689 0u, // VkDeviceSize offset;
690 VK_WHOLE_SIZE, // VkDeviceSize range;
691 };
692 descriptorBufferInfos.push_back(bufferInfo);
693 }
694
695 for (size_t i = 0; i < bindings.size(); ++i)
696 {
697 const vk::VkWriteDescriptorSet write =
698 {
699 vk::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // VkStructureType sType;
700 DE_NULL, // const void* pNext;
701 *descriptorSet, // VkDescriptorSet dstSet;
702 static_cast<deUint32>(i), // uint32_t dstBinding;
703 0u, // uint32_t dstArrayElement;
704 1u, // uint32_t descriptorCount;
705 bindings[i].descriptorType, // VkDescriptorType descriptorType;
706 DE_NULL, // const VkDescriptorImageInfo* pImageInfo;
707 &descriptorBufferInfos[i], // const VkDescriptorBufferInfo* pBufferInfo;
708 DE_NULL, // const VkBufferView* pTexelBufferView;
709 };
710 descriptorWrites.push_back(write);
711 }
712 vkd.updateDescriptorSets(device, static_cast<deUint32>(descriptorWrites.size()), descriptorWrites.data(), 0u, DE_NULL);
713
714 // Prepare barriers in advance so data is visible to the shaders and the host.
715 std::vector<vk::VkBufferMemoryBarrier> hostToDevBarriers;
716 std::vector<vk::VkBufferMemoryBarrier> devToHostBarriers;
717 for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
718 {
719 const vk::VkBufferMemoryBarrier hostToDev =
720 {
721 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
722 DE_NULL, // const void* pNext;
723 vk::VK_ACCESS_HOST_WRITE_BIT, // VkAccessFlags srcAccessMask;
724 (vk::VK_ACCESS_SHADER_READ_BIT | vk::VK_ACCESS_SHADER_WRITE_BIT), // VkAccessFlags dstAccessMask;
725 VK_QUEUE_FAMILY_IGNORED, // deUint32 srcQueueFamilyIndex;
726 VK_QUEUE_FAMILY_IGNORED, // deUint32 dstQueueFamilyIndex;
727 buffers[i], // VkBuffer buffer;
728 0u, // VkDeviceSize offset;
729 VK_WHOLE_SIZE, // VkDeviceSize size;
730 };
731 hostToDevBarriers.push_back(hostToDev);
732
733 const vk::VkBufferMemoryBarrier devToHost =
734 {
735 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
736 DE_NULL, // const void* pNext;
737 vk::VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
738 vk::VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
739 VK_QUEUE_FAMILY_IGNORED, // deUint32 srcQueueFamilyIndex;
740 VK_QUEUE_FAMILY_IGNORED, // deUint32 dstQueueFamilyIndex;
741 buffers[i], // VkBuffer buffer;
742 0u, // VkDeviceSize offset;
743 VK_WHOLE_SIZE, // VkDeviceSize size;
744 };
745 devToHostBarriers.push_back(devToHost);
746 }
747
748 // Create command pool and command buffer.
749 auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
750
751 const vk::VkCommandPoolCreateInfo cmdPoolCreateInfo =
752 {
753 vk::VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
754 DE_NULL, // const void* pNext;
755 vk::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, // VkCommandPoolCreateFlags flags;
756 queueFamilyIndex, // deUint32 queueFamilyIndex;
757 };
758 auto cmdPool = vk::createCommandPool(vkd, device, &cmdPoolCreateInfo);
759
760 const vk::VkCommandBufferAllocateInfo cmdBufferAllocateInfo =
761 {
762 vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
763 DE_NULL, // const void* pNext;
764 *cmdPool, // VkCommandPool commandPool;
765 vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
766 1u, // deUint32 commandBufferCount;
767 };
768 auto cmdBuffer = vk::allocateCommandBuffer(vkd, device, &cmdBufferAllocateInfo);
769
770 // Create pipeline layout.
771 const vk::VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
772 {
773 vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
774 DE_NULL, // const void* pNext;
775 0, // VkPipelineLayoutCreateFlags flags;
776 1u, // deUint32 setLayoutCount;
777 &descriptorSetLayout.get(), // const VkDescriptorSetLayout* pSetLayouts;
778 0u, // deUint32 pushConstantRangeCount;
779 DE_NULL, // const VkPushConstantRange* pPushConstantRanges;
780 };
781 auto pipelineLayout = vk::createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
782
783 // Create compute pipeline.
784 const vk::Unique<vk::VkShaderModule> shader(vk::createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0));
785
786 const vk::VkComputePipelineCreateInfo computeCreateInfo =
787 {
788 vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
789 DE_NULL, // const void* pNext;
790 0, // VkPipelineCreateFlags flags;
791 { // VkPipelineShaderStageCreateInfo stage;
792 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
793 DE_NULL, // const void* pNext;
794 0, // VkPipelineShaderStageCreateFlags flags;
795 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
796 *shader, // VkShaderModule module;
797 "main", // const char* pName;
798 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
799 },
800 *pipelineLayout, // VkPipelineLayout layout;
801 DE_NULL, // VkPipeline basePipelineHandle;
802 0, // int32_t basePipelineIndex;
803 };
804 auto computePipeline = vk::createComputePipeline(vkd, device, DE_NULL, &computeCreateInfo);
805
806 // Run the shader.
807 vk::beginCommandBuffer(vkd, *cmdBuffer);
808 vkd.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
809 vkd.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
810 vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
811 vkd.cmdDispatch(*cmdBuffer, static_cast<deUint32>(inputBufferSizeInfo.totalVectors), 1u, 1u);
812 vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
813 vk::endCommandBuffer(vkd, *cmdBuffer);
814 vk::submitCommandsAndWait(vkd, device, m_context.getUniversalQueue(), *cmdBuffer);
815
816 // Invalidate output allocation.
817 vk::invalidateAlloc(vkd, device, outputBuffer->getAllocation());
818
819 // Copy output buffer data.
820 std::vector<deUint8> outputMemory(outputBufferSizeInfo.memorySizeBytes);
821 {
822 auto& alloc = outputBuffer->getAllocation();
823 deMemcpy(outputMemory.data(), reinterpret_cast<deUint8*>(alloc.getHostPtr()) + alloc.getOffset(), outputBufferSizeInfo.memorySizeBytes);
824 }
825
826 // Unpack and verify output data.
827 auto& testLog = m_context.getTestContext().getLog();
828 bool conversionOk = false;
829 switch (m_params.to)
830 {
831 case FLOAT_TYPE_16_BITS:
832 {
833 auto outputValues = unpackFloats<tcu::Float16>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
834 switch (m_params.from)
835 {
836 case FLOAT_TYPE_32_BITS:
837 {
838 auto& inputValues = InputGenerator::getInstance().getInputValues32();
839 conversionOk = validConversion(inputValues, outputValues, testLog);
840 }
841 break;
842
843 case FLOAT_TYPE_64_BITS:
844 {
845 auto& inputValues = InputGenerator::getInstance().getInputValues64();
846 conversionOk = validConversion(inputValues, outputValues, testLog);
847 }
848 break;
849
850 default:
851 DE_ASSERT(false);
852 break;
853 }
854 }
855 break;
856
857 case FLOAT_TYPE_32_BITS:
858 {
859 auto outputValues = unpackFloats<tcu::Float32>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
860 switch (m_params.from)
861 {
862 case FLOAT_TYPE_16_BITS:
863 {
864 auto& inputValues = InputGenerator::getInstance().getInputValues16();
865 conversionOk = validConversion(inputValues, outputValues, testLog);
866 }
867 break;
868
869 case FLOAT_TYPE_64_BITS:
870 {
871 auto& inputValues = InputGenerator::getInstance().getInputValues64();
872 conversionOk = validConversion(inputValues, outputValues, testLog);
873 }
874 break;
875
876 default:
877 DE_ASSERT(false);
878 break;
879 }
880 }
881 break;
882
883 case FLOAT_TYPE_64_BITS:
884 {
885 auto outputValues = unpackFloats<tcu::Float64>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
886 switch (m_params.from)
887 {
888 case FLOAT_TYPE_16_BITS:
889 {
890 auto& inputValues = InputGenerator::getInstance().getInputValues16();
891 conversionOk = validConversion(inputValues, outputValues, testLog);
892 }
893 break;
894
895 case FLOAT_TYPE_32_BITS:
896 {
897 auto& inputValues = InputGenerator::getInstance().getInputValues32();
898 conversionOk = validConversion(inputValues, outputValues, testLog);
899 }
900 break;
901
902 default:
903 DE_ASSERT(false);
904 break;
905 }
906 }
907 break;
908
909 default:
910 DE_ASSERT(false);
911 break;
912 }
913
914 return (conversionOk ? tcu::TestStatus::pass("Pass") : tcu::TestStatus::fail("Fail"));
915 }
916
917 } // anonymous
918
createPrecisionFconvertGroup(tcu::TestContext & testCtx)919 tcu::TestCaseGroup* createPrecisionFconvertGroup (tcu::TestContext& testCtx)
920 {
921 tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, "precision_fconvert", "OpFConvert precision tests");
922
923 for (int i = 0; i < FLOAT_TYPE_MAX_ENUM; ++i)
924 for (int j = 0; j < FLOAT_TYPE_MAX_ENUM; ++j)
925 for (size_t k = kMinVectorLength; k <= kMaxVectorLength; ++k)
926 {
927 // No actual conversion if the types are the same.
928 if (i == j)
929 continue;
930
931 TestParams params = {
932 static_cast<FloatType>(i),
933 static_cast<FloatType>(j),
934 k,
935 };
936
937 std::string testName = std::string() + kFloatNames[i] + "_to_" + kFloatNames[j] + "_size_" + std::to_string(k);
938 std::string testDescription = std::string("Conversion from ") + kFloatNames[i] + " to " + kFloatNames[j] + " with vectors of size " + std::to_string(k);
939
940 newGroup->addChild(new FConvertTestCase(testCtx, testName, testDescription, params));
941 }
942
943 return newGroup;
944 }
945
946 } // shaderexecutor
947 } // vkt
948