1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Normal include guard for non-SIMD portion of this header.
16 #ifndef HWY_TESTS_TEST_UTIL_H_
17 #define HWY_TESTS_TEST_UTIL_H_
18 
19 // Helper functions for use by *_test.cc.
20 
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <string.h>
25 
26 #include <cstddef>
27 #include <string>
28 #include <utility>  // std::forward
29 
30 #include "hwy/aligned_allocator.h"
31 #include "hwy/base.h"
32 #include "hwy/highway.h"
33 
34 #include "gtest/gtest.h"
35 
36 namespace hwy {
37 
38 // The maximum vector size used in tests when defining test data. DEPRECATED.
39 constexpr size_t kTestMaxVectorSize = 64;
40 
41 // googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead
42 // used INSTANTIATE_TEST_CASE_P which is now deprecated.
43 #ifdef INSTANTIATE_TEST_SUITE_P
44 #define HWY_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
45 #else
46 #define HWY_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
47 #endif
48 
49 // Helper class to run parametric tests using the hwy target as parameter. To
50 // use this define the following in your test:
51 //   class MyTestSuite : public TestWithParamTarget {
52 //    ...
53 //   };
54 //   HWY_TARGET_INSTANTIATE_TEST_SUITE_P(MyTestSuite);
55 //   TEST_P(MyTestSuite, MyTest) { ... }
56 class TestWithParamTarget : public testing::TestWithParam<uint32_t> {
57  protected:
SetUp()58   void SetUp() override { SetSupportedTargetsForTest(GetParam()); }
59 
TearDown()60   void TearDown() override {
61     // Check that the parametric test calls SupportedTargets() when the source
62     // was compiled with more than one target. In the single-target case only
63     // static dispatch will be used anyway.
64 #if (HWY_TARGETS & (HWY_TARGETS - 1)) != 0
65     EXPECT_TRUE(SupportedTargetsCalledForTest())
66         << "This hwy target parametric test doesn't use dynamic-dispatch and "
67            "doesn't need to be parametric.";
68 #endif
69     SetSupportedTargetsForTest(0);
70   }
71 };
72 
73 // Function to convert the test parameter of a TestWithParamTarget for
74 // displaying it in the gtest test name.
TestParamTargetName(const testing::TestParamInfo<uint32_t> & info)75 static inline std::string TestParamTargetName(
76     const testing::TestParamInfo<uint32_t>& info) {
77   return TargetName(info.param);
78 }
79 
80 #define HWY_TARGET_INSTANTIATE_TEST_SUITE_P(suite)              \
81   HWY_GTEST_INSTANTIATE_TEST_SUITE_P(                           \
82       suite##Group, suite,                                      \
83       testing::ValuesIn(::hwy::SupportedAndGeneratedTargets()), \
84       ::hwy::TestParamTargetName)
85 
86 // Helper class similar to TestWithParamTarget to run parametric tests that
87 // depend on the target and another parametric test. If you need to use multiple
88 // extra parameters use a std::tuple<> of them and ::testing::Generate(...) as
89 // the generator. To use this class define the following in your test:
90 //   class MyTestSuite : public TestWithParamTargetT<int> {
91 //    ...
92 //   };
93 //   HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(MyTestSuite, ::testing::Range(0, 9));
94 //   TEST_P(MyTestSuite, MyTest) { ... GetParam() .... }
95 template <typename T>
96 class TestWithParamTargetAndT
97     : public ::testing::TestWithParam<std::tuple<uint32_t, T>> {
98  public:
99   // Expose the parametric type here so it can be used by the
100   // HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T macro.
101   using HwyParamType = T;
102 
103  protected:
SetUp()104   void SetUp() override {
105     SetSupportedTargetsForTest(std::get<0>(
106         ::testing::TestWithParam<std::tuple<uint32_t, T>>::GetParam()));
107   }
108 
TearDown()109   void TearDown() override {
110     // Check that the parametric test calls SupportedTargets() when the source
111     // was compiled with more than one target. In the single-target case only
112     // static dispatch will be used anyway.
113 #if (HWY_TARGETS & (HWY_TARGETS - 1)) != 0
114     EXPECT_TRUE(SupportedTargetsCalledForTest())
115         << "This hwy target parametric test doesn't use dynamic-dispatch and "
116            "doesn't need to be parametric.";
117 #endif
118     SetSupportedTargetsForTest(0);
119   }
120 
GetParam()121   T GetParam() {
122     return std::get<1>(
123         ::testing::TestWithParam<std::tuple<uint32_t, T>>::GetParam());
124   }
125 };
126 
127 template <typename T>
TestParamTargetNameAndT(const testing::TestParamInfo<std::tuple<uint32_t,T>> & info)128 std::string TestParamTargetNameAndT(
129     const testing::TestParamInfo<std::tuple<uint32_t, T>>& info) {
130   return std::string(TargetName(std::get<0>(info.param))) + "_" +
131          ::testing::PrintToString(std::get<1>(info.param));
132 }
133 
134 #define HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(suite, generator)     \
135   HWY_GTEST_INSTANTIATE_TEST_SUITE_P(                               \
136       suite##Group, suite,                                          \
137       ::testing::Combine(                                           \
138           testing::ValuesIn(::hwy::SupportedAndGeneratedTargets()), \
139           generator),                                               \
140       ::hwy::TestParamTargetNameAndT<suite::HwyParamType>)
141 
142 // Helper macro to export a function and define a test that tests it. This is
143 // equivalent to do a HWY_EXPORT of a void(void) function and run it in a test:
144 //   class MyTestSuite : public TestWithParamTarget {
145 //    ...
146 //   };
147 //   HWY_TARGET_INSTANTIATE_TEST_SUITE_P(MyTestSuite);
148 //   HWY_EXPORT_AND_TEST_P(MyTestSuite, MyTest);
149 #define HWY_EXPORT_AND_TEST_P(suite, func_name)                   \
150   HWY_EXPORT(func_name);                                          \
151   TEST_P(suite, func_name) { HWY_DYNAMIC_DISPATCH(func_name)(); } \
152   static_assert(true, "For requiring trailing semicolon")
153 
154 #define HWY_EXPORT_AND_TEST_P_T(suite, func_name)                           \
155   HWY_EXPORT(func_name);                                                    \
156   TEST_P(suite, func_name) { HWY_DYNAMIC_DISPATCH(func_name)(GetParam()); } \
157   static_assert(true, "For requiring trailing semicolon")
158 
159 #define HWY_BEFORE_TEST(suite)                      \
160   class suite : public hwy::TestWithParamTarget {}; \
161   HWY_TARGET_INSTANTIATE_TEST_SUITE_P(suite);       \
162   static_assert(true, "For requiring trailing semicolon")
163 
164 // 64-bit random generator (Xorshift128+). Much smaller state than std::mt19937,
165 // which triggers a compiler bug.
166 class RandomState {
167  public:
168   explicit RandomState(const uint64_t seed = 0x123456789ull) {
169     s0_ = SplitMix64(seed + 0x9E3779B97F4A7C15ull);
170     s1_ = SplitMix64(s0_);
171   }
172 
operator()173   HWY_INLINE uint64_t operator()() {
174     uint64_t s1 = s0_;
175     const uint64_t s0 = s1_;
176     const uint64_t bits = s1 + s0;
177     s0_ = s0;
178     s1 ^= s1 << 23;
179     s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
180     s1_ = s1;
181     return bits;
182   }
183 
184  private:
SplitMix64(uint64_t z)185   static uint64_t SplitMix64(uint64_t z) {
186     z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
187     z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
188     return z ^ (z >> 31);
189   }
190 
191   uint64_t s0_;
192   uint64_t s1_;
193 };
194 
Random32(RandomState * rng)195 static HWY_INLINE uint32_t Random32(RandomState* rng) {
196   return static_cast<uint32_t>((*rng)());
197 }
198 
199 // Prevents the compiler from eliding the computations that led to "output".
200 // Works by indicating to the compiler that "output" is being read and modified.
201 // The +r constraint avoids unnecessary writes to memory, but only works for
202 // built-in types.
203 template <class T>
PreventElision(T && output)204 inline void PreventElision(T&& output) {
205 #if HWY_COMPILER_MSVC
206   (void)output;
207 #else   // HWY_COMPILER_MSVC
208   asm volatile("" : "+r"(output) : : "memory");
209 #endif  // HWY_COMPILER_MSVC
210 }
211 
212 // Returns a name for the vector/part/scalar. The type prefix is u/i/f for
213 // unsigned/signed/floating point, followed by the number of bits per lane;
214 // then 'x' followed by the number of lanes. Example: u8x16. This is useful for
215 // understanding which instantiation of a generic test failed.
216 template <typename T>
TypeName(T,size_t N)217 static inline std::string TypeName(T /*unused*/, size_t N) {
218   const char prefix = IsFloat<T>() ? 'f' : (IsSigned<T>() ? 'i' : 'u');
219   char name[64];
220   // Omit the xN suffix for scalars.
221   if (N == 1) {
222     snprintf(name, sizeof(name), "%c%zu", prefix, sizeof(T) * 8);
223   } else {
224     snprintf(name, sizeof(name), "%c%zux%zu", prefix, sizeof(T) * 8, N);
225   }
226   return name;
227 }
228 
229 // String comparison
230 
231 template <typename T1, typename T2>
232 inline bool BytesEqual(const T1* p1, const T2* p2, const size_t size,
233                        size_t* pos = nullptr) {
234   const uint8_t* bytes1 = reinterpret_cast<const uint8_t*>(p1);
235   const uint8_t* bytes2 = reinterpret_cast<const uint8_t*>(p2);
236   for (size_t i = 0; i < size; ++i) {
237     if (bytes1[i] != bytes2[i]) {
238       fprintf(stderr, "Mismatch at byte %zu of %zu: %d != %d (%s, %s)\n", i,
239               size, bytes1[i], bytes2[i], TypeName(T1(), 1).c_str(),
240               TypeName(T2(), 1).c_str());
241       if (pos != nullptr) {
242         *pos = i;
243       }
244       return false;
245     }
246   }
247   return true;
248 }
249 
StringsEqual(const char * s1,const char * s2)250 inline bool StringsEqual(const char* s1, const char* s2) {
251   while (*s1 == *s2++) {
252     if (*s1++ == '\0') return true;
253   }
254   return false;
255 }
256 
257 }  // namespace hwy
258 
259 #endif  // HWY_TESTS_TEST_UTIL_H_
260 
261 // Per-target include guard
262 #if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == defined(HWY_TARGET_TOGGLE)
263 #ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
264 #undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
265 #else
266 #define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
267 #endif
268 
269 HWY_BEFORE_NAMESPACE();
270 namespace hwy {
271 namespace HWY_NAMESPACE {
272 
273 // Prints lanes around `lane`, in memory order.
274 template <class D>
275 HWY_NOINLINE void Print(const D d, const char* caption, const Vec<D> v,
276                         intptr_t lane = 0) {
277   using T = TFromD<D>;
278   const size_t N = Lanes(d);
279   auto lanes = AllocateAligned<T>(N);
280   Store(v, d, lanes.get());
281   const size_t begin = static_cast<size_t>(std::max<intptr_t>(0, lane - 2));
282   const size_t end = std::min(begin + 7, N);
283   fprintf(stderr, "%s %s [%zu+ ->]:\n  ", TypeName(T(), N).c_str(), caption,
284           begin);
285   for (size_t i = begin; i < end; ++i) {
286     fprintf(stderr, "%g,", double(lanes[i]));
287   }
288   if (begin >= end) fprintf(stderr, "(out of bounds)");
289   fprintf(stderr, "\n");
290 }
291 
NotifyFailure(const char * filename,const int line,const char * type_name,const size_t lane,const char * expected,const char * actual)292 static HWY_NORETURN HWY_NOINLINE void NotifyFailure(
293     const char* filename, const int line, const char* type_name,
294     const size_t lane, const char* expected, const char* actual) {
295   hwy::Abort(filename, line,
296              "%s, %s lane %zu mismatch: expected '%s', got '%s'.\n",
297              hwy::TargetName(HWY_TARGET), type_name, lane, expected, actual);
298 }
299 
300 template <class Out, class In>
BitCast(const In & in)301 inline Out BitCast(const In& in) {
302   static_assert(sizeof(Out) == sizeof(In), "");
303   Out out;
304   CopyBytes<sizeof(out)>(&in, &out);
305   return out;
306 }
307 
308 // Computes the difference in units of last place between x and y.
309 template <typename TF>
ComputeUlpDelta(TF x,TF y)310 MakeUnsigned<TF> ComputeUlpDelta(TF x, TF y) {
311   static_assert(IsFloat<TF>(), "Only makes sense for floating-point");
312   using TU = MakeUnsigned<TF>;
313 
314   // Handle -0 == 0 and infinities.
315   if (x == y) return 0;
316 
317   // Consider "equal" if both are NaN, so we can verify an expected NaN.
318   // Needs a special case because there are many possible NaN representations.
319   if (std::isnan(x) && std::isnan(y)) return 0;
320 
321   // NOTE: no need to check for differing signs; they will result in large
322   // differences, which is fine, and we avoid overflow.
323 
324   const TU ux = BitCast<TU>(x);
325   const TU uy = BitCast<TU>(y);
326   // Avoid unsigned->signed cast: 2's complement is only guaranteed by C++20.
327   return std::max(ux, uy) - std::min(ux, uy);
328 }
329 
330 template <typename T, HWY_IF_NOT_FLOAT(T)>
IsEqual(const T expected,const T actual)331 HWY_NOINLINE bool IsEqual(const T expected, const T actual) {
332   return expected == actual;
333 }
334 
335 template <typename T, HWY_IF_FLOAT(T)>
IsEqual(const T expected,const T actual)336 HWY_NOINLINE bool IsEqual(const T expected, const T actual) {
337   return ComputeUlpDelta(expected, actual) <= 1;
338 }
339 
340 // Compare non-vector, non-string T.
341 template <typename T>
342 HWY_NOINLINE void AssertEqual(const T expected, const T actual,
343                               const std::string& type_name,
344                               const char* filename = "", const int line = -1,
345                               const size_t lane = 0) {
346   if (!IsEqual(expected, actual)) {
347     char expected_str[100];
348     snprintf(expected_str, sizeof(expected_str), "%g", double(expected));
349     char actual_str[100];
350     snprintf(actual_str, sizeof(actual_str), "%g", double(actual));
351     NotifyFailure(filename, line, type_name.c_str(), lane, expected_str,
352                   actual_str);
353   }
354 }
355 
356 static HWY_NOINLINE HWY_MAYBE_UNUSED void AssertStringEqual(
357     const char* expected, const char* actual, const char* filename = "",
358     const int line = -1, const size_t lane = 0) {
359   if (!hwy::StringsEqual(expected, actual)) {
360     NotifyFailure(filename, line, "string", lane, expected, actual);
361   }
362 }
363 
364 // Compare expected vector to vector.
365 template <class D, class V>
AssertVecEqual(D d,const V expected,const V actual,const char * filename,const int line)366 HWY_NOINLINE void AssertVecEqual(D d, const V expected, const V actual,
367                                  const char* filename, const int line) {
368   using T = TFromD<D>;
369   const size_t N = Lanes(d);
370   auto expected_lanes = AllocateAligned<T>(N);
371   auto actual_lanes = AllocateAligned<T>(N);
372   Store(expected, d, expected_lanes.get());
373   Store(actual, d, actual_lanes.get());
374   for (size_t i = 0; i < N; ++i) {
375     if (!IsEqual(expected_lanes[i], actual_lanes[i])) {
376       fprintf(stderr, "\n\n");
377       Print(d, "expect", expected, i);
378       Print(d, "actual", actual, i);
379 
380       char expected_str[100];
381       snprintf(expected_str, sizeof(expected_str), "%g",
382                double(expected_lanes[i]));
383       char actual_str[100];
384       snprintf(actual_str, sizeof(actual_str), "%g", double(actual_lanes[i]));
385 
386       NotifyFailure(filename, line, hwy::TypeName(T(), N).c_str(), i,
387                     expected_str, actual_str);
388     }
389   }
390 }
391 
392 // Compare expected lanes to vector.
393 template <class D>
AssertVecEqual(D d,const TFromD<D> * expected,Vec<D> actual,const char * filename,int line)394 HWY_NOINLINE void AssertVecEqual(D d, const TFromD<D>* expected, Vec<D> actual,
395                                  const char* filename, int line) {
396   AssertVecEqual(d, LoadU(d, expected), actual, filename, line);
397 }
398 
399 template <class D>
AssertMaskEqual(D d,Mask<D> a,Mask<D> b,const char * filename,int line)400 HWY_NOINLINE void AssertMaskEqual(D d, Mask<D> a, Mask<D> b,
401                                   const char* filename, int line) {
402   AssertVecEqual(d, VecFromMask(d, a), VecFromMask(d, b), filename, line);
403 
404   const std::string type_name = TypeName(TFromD<D>(), Lanes(d));
405   AssertEqual(CountTrue(a), CountTrue(b), type_name, filename, line, 0);
406   AssertEqual(AllTrue(a), AllTrue(b), type_name, filename, line, 0);
407   AssertEqual(AllFalse(a), AllFalse(b), type_name, filename, line, 0);
408 
409   // TODO(janwas): StoreMaskBits
410 }
411 
412 template <class D>
MaskTrue(const D d)413 HWY_NOINLINE Mask<D> MaskTrue(const D d) {
414   const auto v0 = Zero(d);
415   return Eq(v0, v0);
416 }
417 
418 template <class D>
MaskFalse(const D d)419 HWY_NOINLINE Mask<D> MaskFalse(const D d) {
420   // Lt is only for signed types and we cannot yet cast mask types.
421   return Eq(Zero(d), Set(d, 1));
422 }
423 
424 #ifndef HWY_ASSERT_EQ
425 
426 #define HWY_ASSERT_EQ(expected, actual) \
427   AssertEqual(expected, actual, hwy::TypeName(expected, 1), __FILE__, __LINE__)
428 
429 #define HWY_ASSERT_STRING_EQ(expected, actual) \
430   AssertStringEqual(expected, actual, __FILE__, __LINE__)
431 
432 #define HWY_ASSERT_VEC_EQ(d, expected, actual) \
433   AssertVecEqual(d, expected, actual, __FILE__, __LINE__)
434 
435 #define HWY_ASSERT_MASK_EQ(d, expected, actual) \
436   AssertMaskEqual(d, expected, actual, __FILE__, __LINE__)
437 
438 #endif  // HWY_ASSERT_EQ
439 
440 // Helpers for instantiating tests with combinations of lane types / counts.
441 
442 // For all powers of two in [kMinLanes, N * kMinLanes] (so that recursion stops
443 // at N == 0)
444 template <typename T, size_t N, size_t kMinLanes, class Test>
445 struct ForeachSizeR {
DoForeachSizeR446   static void Do() {
447     static_assert(N != 0, "End of recursion");
448     Test()(T(), Simd<T, N * kMinLanes>());
449     ForeachSizeR<T, N / 2, kMinLanes, Test>::Do();
450   }
451 };
452 
453 // Base case to stop the recursion.
454 template <typename T, size_t kMinLanes, class Test>
455 struct ForeachSizeR<T, 0, kMinLanes, Test> {
456   static void Do() {}
457 };
458 
459 // These adapters may be called directly, or via For*Types:
460 
461 // Calls Test for all powers of two in [kMinLanes, HWY_LANES(T) / kDivLanes].
462 template <class Test, size_t kDivLanes = 1, size_t kMinLanes = 1>
463 struct ForPartialVectors {
464   template <typename T>
465   void operator()(T /*unused*/) const {
466 #if HWY_TARGET == HWY_RVV
467     // Only m1..8 for now, can ignore kMaxLanes because HWY_*_LANES are full.
468     ForeachSizeR<T, 8 / kDivLanes, HWY_LANES(T), Test>::Do();
469 #else
470     ForeachSizeR<T, HWY_LANES(T) / kDivLanes / kMinLanes, kMinLanes,
471                  Test>::Do();
472 #endif
473   }
474 };
475 
476 // Calls Test for all vectors that can be demoted log2(kFactor) times.
477 template <class Test, size_t kFactor>
478 struct ForDemoteVectors {
479   template <typename T>
480   void operator()(T /*unused*/) const {
481 #if HWY_TARGET == HWY_RVV
482     // Only m1..8 for now.
483     ForeachSizeR<T, 8 / kFactor, kFactor * HWY_LANES(T), Test>::Do();
484 #else
485     ForeachSizeR<T, HWY_LANES(T), 1, Test>::Do();
486 #endif
487   }
488 };
489 
490 // Calls Test for all powers of two in [128 bits, max bits].
491 template <class Test>
492 struct ForGE128Vectors {
493   template <typename T>
494   void operator()(T /*unused*/) const {
495 #if HWY_TARGET == HWY_RVV
496     ForeachSizeR<T, 8, HWY_LANES(T), Test>::Do();
497 #else
498     ForeachSizeR<T, HWY_LANES(T) / (16 / sizeof(T)), (16 / sizeof(T)),
499                  Test>::Do();
500 
501 #endif
502   }
503 };
504 
505 // Calls Test for all vectors that can be expanded by kFactor.
506 template <class Test, size_t kFactor = 2>
507 struct ForExtendableVectors {
508   template <typename T>
509   void operator()(T /*unused*/) const {
510 #if HWY_TARGET == HWY_RVV
511     ForeachSizeR<T, 8 / kFactor, HWY_LANES(T), Test>::Do();
512 #else
513     ForeachSizeR<T, HWY_LANES(T) / kFactor / (16 / sizeof(T)), (16 / sizeof(T)),
514                  Test>::Do();
515 #endif
516   }
517 };
518 
519 // Type lists to shorten call sites:
520 
521 template <class Func>
522 void ForSignedTypes(const Func& func) {
523   func(int8_t());
524   func(int16_t());
525   func(int32_t());
526 #if HWY_CAP_INTEGER64
527   func(int64_t());
528 #endif
529 }
530 
531 template <class Func>
532 void ForUnsignedTypes(const Func& func) {
533   func(uint8_t());
534   func(uint16_t());
535   func(uint32_t());
536 #if HWY_CAP_INTEGER64
537   func(uint64_t());
538 #endif
539 }
540 
541 template <class Func>
542 void ForIntegerTypes(const Func& func) {
543   ForSignedTypes(func);
544   ForUnsignedTypes(func);
545 }
546 
547 template <class Func>
548 void ForFloatTypes(const Func& func) {
549   func(float());
550 #if HWY_CAP_FLOAT64
551   func(double());
552 #endif
553 }
554 
555 template <class Func>
556 void ForAllTypes(const Func& func) {
557   ForIntegerTypes(func);
558   ForFloatTypes(func);
559 }
560 
561 // NOLINTNEXTLINE(google-readability-namespace-comments)
562 }  // namespace HWY_NAMESPACE
563 }  // namespace hwy
564 HWY_AFTER_NAMESPACE();
565 
566 #endif  // per-target include guard
567