1 /*  Copyright (C) 2012  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_TEST_UTILS_TEST_HELPERS_H
9 #define LIBSIMDPP_TEST_UTILS_TEST_HELPERS_H
10 
11 #include <simdpp/simd.h>
12 #include <simdpp/detail/align_v128.h>
13 #include <simdpp/detail/mem_block.h>
14 #include <iostream>
15 #include "test_results_set.h"
16 #include "test_reporter.h"
17 #include <cfenv>
18 #include <float.h>
19 
20 
set_round_to_zero()21 inline void set_round_to_zero()
22 {
23 #if _MSC_VER
24 #pragma warning(push)
25 #pragma warning(disable:4996) // disable security warning
26     _controlfp(_MCW_RC, _RC_CHOP);
27 #pragma warning(pop)
28 #else
29     std::fesetround(FE_TOWARDZERO);
30 #endif
31 #if SIMDPP_USE_SSE2
32     _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
33 #endif
34 }
35 
set_round_to_nearest()36 inline void set_round_to_nearest()
37 {
38 #if _MSC_VER
39 #pragma warning(push)
40 #pragma warning(disable:4996) // disable security warning
41     _controlfp(_MCW_RC, _RC_NEAR);
42 #pragma warning(pop)
43 #else
44     std::fesetround(FE_TONEAREST);
45 #endif
46 #if SIMDPP_USE_SSE2
47     _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
48 #endif
49 }
50 
prevent_optimization_impl(const void * ptr)51 inline void prevent_optimization_impl(const void* ptr)
52 {
53     std::cout << ptr;
54 }
55 
56 // Some compilers are really clever figuring out ways to access to data that
57 // would allow them to optimize things. Storing and reading a pointer from
58 // volatile location seems to work around this.
59 template<class T>
prevent_optimization(T * ptr)60 T* prevent_optimization(T* ptr)
61 {
62     volatile bool never = false;
63     if (never) {
64         prevent_optimization_impl(ptr);
65     }
66     T* volatile* volatile opaque;
67     opaque = &ptr;
68     return *opaque;
69 }
70 
71 namespace SIMDPP_ARCH_NAMESPACE {
72 
73 
74 /*  Certain compilers deduce that we perform tests on constant data and
75     precompute the results. We want actual instructions to execute though. This
76     class solves the issue by accessing the test data through a volatile
77     pointer.
78 */
79 template<class T>
80 class TestData {
81 public:
82 
TestData()83     TestData() {}
84 
TestData(const TestData & other)85     TestData(const TestData& other)
86     {
87         data_ = other.data_;
88     }
89 
90     TestData& operator=(const TestData& other)
91     {
92         data_ = other.data_;
93     }
94 
95     template<class U>
add(const U & u)96     void add(const U& u)
97     {
98         T t = (T) u;
99         data_.push_back(t);
100     }
101 
add(const TestData & other)102     void add(const TestData& other)
103     {
104         data_.insert(data_.end(), other.data_.begin(), other.data_.end());
105     }
106 
size()107     size_t size() const { return data_.size(); }
108 
109     const T& operator[](unsigned i) const
110     {
111         return *(prevent_optimization(&data_.front()) + i);
112     }
113 
114 private:
115     std::vector<T, simdpp::aligned_allocator<T, sizeof(T)>> data_;
116 };
117 
118 
119 /*  A bunch of overloads that wrap the TestSuite::push() method. The push()
120     method accepts a type enum plus a pointer; the wrapper overloads determine
121     the type enum from the type of the supplied argument.
122 */
test_push_internal(TestResultsSet & t,std::int8_t data,const char * file,unsigned line)123 inline void test_push_internal(TestResultsSet& t, std::int8_t data,
124                                const char* file, unsigned line)
125 {
126     t.push(TYPE_INT8, 1, file, line).set(0, &data);
127 }
128 
test_push_internal(TestResultsSet & t,std::uint8_t data,const char * file,unsigned line)129 inline void test_push_internal(TestResultsSet& t, std::uint8_t data,
130                                const char* file, unsigned line)
131 {
132     t.push(TYPE_UINT8, 1, file, line).set(0, &data);
133 }
134 
test_push_internal(TestResultsSet & t,std::int16_t data,const char * file,unsigned line)135 inline void test_push_internal(TestResultsSet& t, std::int16_t data,
136                                const char* file, unsigned line)
137 {
138     t.push(TYPE_INT16, 1, file, line).set(0, &data);
139 }
140 
test_push_internal(TestResultsSet & t,std::uint16_t data,const char * file,unsigned line)141 inline void test_push_internal(TestResultsSet& t, std::uint16_t data,
142                                const char* file, unsigned line)
143 {
144     t.push(TYPE_UINT16, 1, file, line).set(0, &data);
145 }
146 
test_push_internal(TestResultsSet & t,std::int32_t data,const char * file,unsigned line)147 inline void test_push_internal(TestResultsSet& t, std::int32_t data,
148                                const char* file, unsigned line)
149 {
150     t.push(TYPE_INT32, 1, file, line).set(0, &data);
151 }
152 
test_push_internal(TestResultsSet & t,std::uint32_t data,const char * file,unsigned line)153 inline void test_push_internal(TestResultsSet& t, std::uint32_t data,
154                                const char* file, unsigned line)
155 {
156     t.push(TYPE_UINT32, 1, file, line).set(0, &data);
157 }
158 
test_push_internal(TestResultsSet & t,std::int64_t data,const char * file,unsigned line)159 inline void test_push_internal(TestResultsSet& t, std::int64_t data,
160                                const char* file, unsigned line)
161 {
162     t.push(TYPE_INT64, 1, file, line).set(0, &data);
163 }
164 
test_push_internal(TestResultsSet & t,std::uint64_t data,const char * file,unsigned line)165 inline void test_push_internal(TestResultsSet& t, std::uint64_t data,
166                                const char* file, unsigned line)
167 {
168     t.push(TYPE_UINT64, 1, file, line).set(0, &data);
169 }
170 
test_push_internal(TestResultsSet & t,float data,const char * file,unsigned line)171 inline void test_push_internal(TestResultsSet& t, float data,
172                                const char* file, unsigned line)
173 {
174     t.push(TYPE_FLOAT32, 1, file, line).set(0, &data);
175 }
176 
test_push_internal(TestResultsSet & t,double data,const char * file,unsigned line)177 inline void test_push_internal(TestResultsSet& t, double data,
178                                const char* file, unsigned line)
179 {
180     t.push(TYPE_FLOAT64, 1, file, line).set(0, &data);
181 }
182 
183 template<class V>
test_push_internal_vec(TestResultsSet::Result & res,const V & data)184 void test_push_internal_vec(TestResultsSet::Result& res, const V& data)
185 {
186     static_assert(sizeof(data) == V::length_bytes,
187                   "Vector uses unsupported data layout");
188     for (unsigned i = 0; i < data.vec_length; ++i) {
189         using Base = typename V::base_vector_type;
190         std::memcpy(res.data.data() + i * Base::length_bytes,
191                     &data.vec(i), Base::length_bytes);
192     }
193 }
194 
195 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::int8<N> & data,const char * file,unsigned line)196 void test_push_internal(TestResultsSet& t, const simdpp::int8<N>& data,
197                         const char* file, unsigned line)
198 {
199     test_push_internal_vec(t.push(TYPE_INT8, N, file, line), data);
200 }
201 
202 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::uint8<N> & data,const char * file,unsigned line)203 void test_push_internal(TestResultsSet& t, const simdpp::uint8<N>& data,
204                         const char* file, unsigned line)
205 {
206     test_push_internal_vec(t.push(TYPE_UINT8, N, file, line), data);
207 }
208 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::int16<N> & data,const char * file,unsigned line)209 void test_push_internal(TestResultsSet& t, const simdpp::int16<N>& data,
210                         const char* file, unsigned line)
211 {
212     test_push_internal_vec(t.push(TYPE_INT16, N, file, line), data);
213 }
214 
215 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::uint16<N> & data,const char * file,unsigned line)216 void test_push_internal(TestResultsSet& t, const simdpp::uint16<N>& data,
217                         const char* file, unsigned line)
218 {
219     test_push_internal_vec(t.push(TYPE_UINT16, N, file, line), data);
220 }
221 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::int32<N> & data,const char * file,unsigned line)222 void test_push_internal(TestResultsSet& t, const simdpp::int32<N>& data,
223                         const char* file, unsigned line)
224 {
225     test_push_internal_vec(t.push(TYPE_INT32, N, file, line), data);
226 }
227 
228 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::uint32<N> & data,const char * file,unsigned line)229 void test_push_internal(TestResultsSet& t, const simdpp::uint32<N>& data,
230                         const char* file, unsigned line)
231 {
232     test_push_internal_vec(t.push(TYPE_UINT32, N, file, line), data);
233 }
234 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::int64<N> & data,const char * file,unsigned line)235 void test_push_internal(TestResultsSet& t, const simdpp::int64<N>& data,
236                         const char* file, unsigned line)
237 {
238     test_push_internal_vec(t.push(TYPE_INT64, N, file, line), data);
239 }
240 
241 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::uint64<N> & data,const char * file,unsigned line)242 void test_push_internal(TestResultsSet& t, const simdpp::uint64<N>& data,
243                         const char* file, unsigned line)
244 {
245     test_push_internal_vec(t.push(TYPE_UINT64, N, file, line), data);
246 }
247 
248 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::float32<N> & data,const char * file,unsigned line)249 void test_push_internal(TestResultsSet& t, const simdpp::float32<N>& data,
250                         const char* file, unsigned line)
251 {
252     test_push_internal_vec(t.push(TYPE_FLOAT32, N, file, line), data);
253 }
254 
255 template<unsigned N>
test_push_internal(TestResultsSet & t,const simdpp::float64<N> & data,const char * file,unsigned line)256 void test_push_internal(TestResultsSet& t, const simdpp::float64<N>& data,
257                         const char* file, unsigned line)
258 {
259     test_push_internal_vec(t.push(TYPE_FLOAT64, N, file, line), data);
260 }
261 
262 template<class V>
test_push_stored(TestResultsSet & t,const typename V::element_type * data,unsigned count,const char * file,unsigned line)263 void test_push_stored(TestResultsSet& t, const typename V::element_type* data,
264                       unsigned count, const char* file, unsigned line)
265 {
266     ElementType type = GetElementType<V>::value;
267     TestResultsSet::Result& res = t.push(type, count, file, line);
268     std::memcpy(res.data.data(), data, count * sizeof(data[0]));
269 }
270 
271 template<class V>
print_vector_hex(std::ostream & out,const V & v)272 void print_vector_hex(std::ostream& out, const V& v)
273 {
274     simdpp::detail::mem_block<V> block(v);
275     print_vector_hex(out, GetElementType<V>::value, v.length, block.data());
276 }
277 
278 template<class V>
print_vector_numeric(std::ostream & out,const V & v)279 void print_vector_numeric(std::ostream& out, const V& v)
280 {
281     simdpp::detail::mem_block<V> block(v);
282     print_vector_numeric(out, GetElementType<V>::value, v.length, block.data());
283 }
284 
285 } // namespace SIMDPP_ARCH_NAMESPACE
286 
287 // we are supposed to call this from within the test function which is in
288 // SIMDPP_ARCH_NAMESPACE namespace
289 
290 /*
291     T - type
292     D - an object of type T to push
293     A - array of objects to push
294     O1, Q2 - arguments to apply OP to
295     OP - operation to apply to the array or arguments
296     R - type to cast the object to be pushed to
297 */
298 #define TEST_PUSH(TC,T,D)                                                       \
299     { test_push_internal((TC), (T)(D), __FILE__, __LINE__); }
300 
301 // The following macro is the same as TEST_PUSH, except that D is a pointer to
302 // data of COUNT vectors of type T stored to memory. D must be V::element_type.
303 #define TEST_PUSH_STORED(TC, T, D, COUNT)                                       \
304 { test_push_stored<V>((TC), (D), COUNT, __FILE__, __LINE__); }
305 
306 #define TEST_PUSH_ARRAY(TC, T, A)                                       \
307 {                                                                       \
308     (TC).reset_seq();                                                   \
309     for (unsigned i = 0; i < sizeof(A) / sizeof((A)[0]); i++) {                 \
310         T l = (T) (A)[i];                                                       \
311         TEST_PUSH(TC, T, l);                                                    \
312     }                                                                   \
313 }
314 
315 #define TEST_PUSH_ARRAY_OP1(TC, T, OP, A)                               \
316 {                                                                       \
317     (TC).reset_seq();                                                   \
318     for (unsigned i = 0; i < (A).size(); i++) {                         \
319         T l = (T) (A)[i];                                                       \
320         TEST_PUSH(TC, T, OP(l));                                                \
321     }                                                                   \
322 }
323 
324 #define TEST_PUSH_ARRAY_OP1_T(TC, R, T, OP, A)                          \
325 {                                                                       \
326     (TC).reset_seq();                                                   \
327     for (unsigned i = 0; i < (A).size(); i++) {                         \
328         T l = (T) (A)[i];                                                       \
329         TEST_PUSH(TC, R, OP(l));                                                \
330     }                                                                   \
331 }
332 
333 #define TEST_PUSH_ARRAY_OP2(TC, T, OP, A, B)                            \
334 {                                                                       \
335     (TC).reset_seq();                                                   \
336     for (unsigned i = 0; i < (A).size(); i++) {                         \
337         T l = (T) (A)[i];                                                       \
338         T r = (T) (B)[i];                                                       \
339         TEST_PUSH(TC, T, OP(l, r));                                             \
340     }                                                                   \
341 }
342 
343 // tests OP on all pairs of elements within array A
344 #define TEST_PUSH_ALL_COMB_OP1(TC, T, OP, A)                            \
345 {                                                                       \
346     (TC).reset_seq();                                                   \
347     for (unsigned i = 0; i < (A).size(); i++) {                         \
348         T l = (T) (A)[i];                                                       \
349         for (unsigned rot = 0; rot < 128 / T::num_bits; rot++) {        \
350             TEST_PUSH(TC, T, OP(l));                                    \
351             l = simdpp::detail::align_v128<1>(l, l);                    \
352         }                                                               \
353     }                                                                   \
354 }
355 
356 #define TEST_PUSH_ALL_COMB_OP1_T(TC, R, T, OP, A)                       \
357 {                                                                       \
358     (TC).reset_seq();                                                   \
359     for (unsigned i = 0; i < (A).size(); i++) {                         \
360         T l = (T) (A)[i];                                                       \
361         for (unsigned rot = 0; rot < 128 / T::num_bits; rot++) {        \
362             TEST_PUSH(TC, R, OP(l));                                    \
363             l = simdpp::detail::align_v128<1>(l, l);                    \
364         }                                                               \
365     }                                                                   \
366 }
367 
368 #define TEST_PUSH_ALL_COMB_OP2(TC, T, OP, A)                            \
369 {                                                                       \
370     (TC).reset_seq();                                                   \
371     for (unsigned i = 0; i < (A).size(); i++) {                         \
372         for (unsigned j = 0; j < (A).size(); j++) {                     \
373             T l = (T) (A)[i];                                                   \
374             T r = (T) (A)[j];                                                   \
375             for (unsigned rot = 0; rot < 128 / T::num_bits; rot++) {    \
376                 TEST_PUSH(TC, T, OP(l, r));                             \
377                 l = simdpp::detail::align_v128<1>(l, l);                \
378             }                                                           \
379         }                                                               \
380     }                                                                   \
381 }
382 
383 #define TEST_PUSH_ALL_COMB_OP2_T(TC, R, T, OP, A)                       \
384 {                                                                       \
385     (TC).reset_seq();                                                   \
386     for (unsigned i = 0; i < (A).size(); i++) {                         \
387         for (unsigned j = 0; j < (A).size(); j++) {                     \
388             T l = (T) (A)[i];                                                   \
389             T r = (T) (A)[j];                                                   \
390             for (unsigned rot = 0; rot < 128 / T::num_bits; rot++) {    \
391                 TEST_PUSH(TC, R, OP(l, r));                             \
392                 l = simdpp::detail::align_v128<1>(l, l);                \
393             }                                                           \
394         }                                                               \
395     }                                                                   \
396 }
397 
398 #define TEST_PUSH_ALL_COMB_OP2_SEPARATE_T(TC, R, T1, T2, OP, A1, A2)            \
399 {                                                                               \
400     (TC).reset_seq();                                                           \
401     for (unsigned i = 0; i < (A1).size(); i++) {                                \
402         for (unsigned j = 0; j < (A2).size(); j++) {                            \
403             T1 l = (T1) (A1)[i];                                                \
404             T2 r = (T2) (A2)[j];                                                \
405             for (unsigned rot = 0; rot < 128 / T1::num_bits; rot++) {           \
406                 TEST_PUSH(TC, R, OP(l, r));                                     \
407                 l = simdpp::detail::align_v128<1>(l, l);                        \
408             }                                                                   \
409         }                                                                       \
410     }                                                                           \
411 }
412 
413 #define TEST_EQUAL_ALL_COMB_OP2_EXPLICIT(TR, T, OP1, OP2, A)                    \
414 {                                                                               \
415     for (unsigned i = 0; i < (A).size(); i++) {                                 \
416         for (unsigned j = 0; j < (A).size(); j++) {                             \
417             T ARG1 = (T) (A)[i];                                                \
418             T ARG2 = (T) (A)[j];                                                \
419             for (unsigned rot = 0; rot < 128 / T::num_bits; rot++) {            \
420                 TEST_EQUAL(TR, (OP1), (OP2));                                   \
421                 ARG1 = simdpp::detail::align_v128<1>(ARG1, ARG1);               \
422             }                                                                   \
423         }                                                                       \
424     }                                                                           \
425 }
426 
427 #define TEST_PUSH_ALL_COMB_OP3(TC, T, OP, A)                            \
428 {                                                                       \
429     (TC).reset_seq();                                                   \
430     for (unsigned i0 = 0; i0 < (A).size(); i0++) {                      \
431     for (unsigned i1 = 0; i1 < (A).size(); i1++) {                      \
432     for (unsigned i2 = 0; i2 < (A).size(); i2++) {                      \
433         T v0 = (T) (A)[i0];                                                     \
434         T v1 = (T) (A)[i1];                                                     \
435         T v2 = (T) (A)[i2];                                                     \
436         for (unsigned rot0 = 0; rot0 < 128 / T::num_bits; rot0++) {     \
437             for (unsigned rot1 = 0; rot1 < 128 / T::num_bits; rot1++) { \
438                 TEST_PUSH(TC, T, OP(v0, v1, v2));                       \
439                 v0 = simdpp::detail::align_v128<1>(v0, v0);             \
440             }                                                           \
441             v1 = simdpp::detail::align_v128<1>(v1, v1);                 \
442         }                                                               \
443     }}}                                                                 \
444 }
445 
446 
447 // Implements TemplateTestHelper functionality. Template recursion is optimized
448 // by potentially putting a large number of test template instantiations into a
449 // single instantiation of TemplateTestHelperImpl.
450 enum {
451     TemplateTestHelperImpl_InstBatchSize = 30
452 };
453 
454 template<template<class, unsigned> class F,
455          class V, bool large, unsigned i, unsigned limit>
456 struct TemplateTestHelperImpl;
457 
458 template<template<class, unsigned> class F,
459          class V, unsigned i, unsigned limit>
460 struct TemplateTestHelperImpl<F, V, false, i, limit> {
461 
462     static void run(TestResultsSet& tc, const V& a)
463     {
464         F<V, i>::test(tc, a);
465         const unsigned batch_size = TemplateTestHelperImpl_InstBatchSize;
466         const bool is_large = i + batch_size < limit;
467         TemplateTestHelperImpl<F, V, is_large, i+1, limit>::run(tc, a);
468     }
469 
470     static void run(TestResultsSet& tc, const V& a, const V& b)
471     {
472         F<V, i>::test(tc, a, b);
473         const unsigned batch_size = TemplateTestHelperImpl_InstBatchSize;
474         const bool is_large = i + batch_size < limit;
475         TemplateTestHelperImpl<F, V, is_large, i+1, limit>::run(tc, a, b);
476     }
477 };
478 
479 template<template<class, unsigned> class F, class V, unsigned i, unsigned limit>
480 struct TemplateTestHelperImpl<F, V, true, i, limit> {
481 
482     static void run(TestResultsSet& tc, const V& a)
483     {
484         F<V, i>::test(tc, a);
485         F<V, i+1>::test(tc, a);
486         F<V, i+2>::test(tc, a);
487         F<V, i+3>::test(tc, a);
488         F<V, i+4>::test(tc, a);
489         F<V, i+5>::test(tc, a);
490         F<V, i+6>::test(tc, a);
491         F<V, i+7>::test(tc, a);
492         F<V, i+8>::test(tc, a);
493         F<V, i+9>::test(tc, a);
494         F<V, i+10>::test(tc, a);
495         F<V, i+11>::test(tc, a);
496         F<V, i+12>::test(tc, a);
497         F<V, i+13>::test(tc, a);
498         F<V, i+14>::test(tc, a);
499         F<V, i+15>::test(tc, a);
500         F<V, i+16>::test(tc, a);
501         F<V, i+17>::test(tc, a);
502         F<V, i+18>::test(tc, a);
503         F<V, i+19>::test(tc, a);
504         F<V, i+20>::test(tc, a);
505         F<V, i+20>::test(tc, a);
506         F<V, i+21>::test(tc, a);
507         F<V, i+22>::test(tc, a);
508         F<V, i+23>::test(tc, a);
509         F<V, i+24>::test(tc, a);
510         F<V, i+25>::test(tc, a);
511         F<V, i+26>::test(tc, a);
512         F<V, i+27>::test(tc, a);
513         F<V, i+28>::test(tc, a);
514         F<V, i+29>::test(tc, a);
515         const unsigned batch_size = TemplateTestHelperImpl_InstBatchSize;
516         const bool is_large = i + batch_size*2 < limit;
517         TemplateTestHelperImpl<F, V, is_large, i+batch_size, limit>::run(tc, a);
518     }
519 
520     static void run(TestResultsSet& tc, const V& a, const V& b)
521     {
522         F<V, i>::test(tc, a, b);
523         F<V, i+1>::test(tc, a, b);
524         F<V, i+2>::test(tc, a, b);
525         F<V, i+3>::test(tc, a, b);
526         F<V, i+4>::test(tc, a, b);
527         F<V, i+5>::test(tc, a, b);
528         F<V, i+6>::test(tc, a, b);
529         F<V, i+7>::test(tc, a, b);
530         F<V, i+8>::test(tc, a, b);
531         F<V, i+9>::test(tc, a, b);
532         F<V, i+10>::test(tc, a, b);
533         F<V, i+11>::test(tc, a, b);
534         F<V, i+12>::test(tc, a, b);
535         F<V, i+13>::test(tc, a, b);
536         F<V, i+14>::test(tc, a, b);
537         F<V, i+15>::test(tc, a, b);
538         F<V, i+16>::test(tc, a, b);
539         F<V, i+17>::test(tc, a, b);
540         F<V, i+18>::test(tc, a, b);
541         F<V, i+19>::test(tc, a, b);
542         F<V, i+20>::test(tc, a, b);
543         F<V, i+20>::test(tc, a, b);
544         F<V, i+21>::test(tc, a, b);
545         F<V, i+22>::test(tc, a, b);
546         F<V, i+23>::test(tc, a, b);
547         F<V, i+24>::test(tc, a, b);
548         F<V, i+25>::test(tc, a, b);
549         F<V, i+26>::test(tc, a, b);
550         F<V, i+27>::test(tc, a, b);
551         F<V, i+28>::test(tc, a, b);
552         F<V, i+29>::test(tc, a, b);
553         const unsigned batch_size = TemplateTestHelperImpl_InstBatchSize;
554         const bool is_large = i + batch_size*2 < limit;
555         TemplateTestHelperImpl<F, V, is_large, i+batch_size, limit>::run(tc, a, b);
556     }
557 };
558 
559 template<template<class, unsigned> class F, class V, unsigned i>
560 struct TemplateTestHelperImpl<F, V, true, i, i> {
561     static void run(TestResultsSet&, const V&) {}
562     static void run(TestResultsSet&, const V&, const V&) {}
563 };
564 
565 template<template<class, unsigned> class F, class V, unsigned i>
566 struct TemplateTestHelperImpl<F, V, false, i, i> {
567     static void run(TestResultsSet&, const V&) {}
568     static void run(TestResultsSet&, const V&, const V&) {}
569 };
570 
571 /**
572     Used to test functions that depend on a template parameter. Essentially
573     implements a for loop realised in templates.
574 
575     The template calls F<V, i>::test(tc, a) for each i from 0 to F<V, 0>::limit.
576 */
577 template<template<class, unsigned> class F, class V>
578 struct TemplateTestHelper {
579     static void run(TestResultsSet& tc, const V& a)
580     {
581         const unsigned limit = F<V,0>::limit;
582 
583         tc.reset_seq();
584         TemplateTestHelperImpl<F, V, false, 0, limit>::run(tc, a);
585     }
586 
587     static void run(TestResultsSet& tc, const V& a, const V& b)
588     {
589         const unsigned limit = F<V,0>::limit;
590 
591         tc.reset_seq();
592         TemplateTestHelperImpl<F, V, false, 0, limit>::run(tc, a, b);
593     }
594 };
595 
596 template<template<class, unsigned> class F, class V>
597 struct TemplateTestArrayHelper {
598     static void run(TestResultsSet& tc, V* a, unsigned n)
599     {
600         const unsigned limit = F<V,0>::limit;
601 
602         tc.reset_seq();
603         for (unsigned i = 0; i < n; i++) {
604             TemplateTestHelperImpl<F, V, false, 0, limit>::run(tc, *(a+i));
605         }
606     }
607 
608     static void run(TestResultsSet& tc, V* a, V* b, unsigned n)
609     {
610         const unsigned limit = F<V,0>::limit;
611 
612         tc.reset_seq();
613         for (unsigned i = 0; i < n; i++) {
614             TemplateTestHelperImpl<F, V, false, 0, limit>::run(tc, *(a+i), *(b+i));
615         }
616     }
617 };
618 
619 template<class E>
620 void test_cmp_memory(TestReporter& tr, const E* e1, const E* e2, unsigned count,
621                      bool expected_equal, unsigned line, const char* file)
622 {
623     if (count == 0) {
624         tr.add_result(true);
625         return;
626     }
627 
628     int memcmp_result = std::memcmp(e1, e2, sizeof(E) * count);
629     bool success = expected_equal ? memcmp_result == 0 : memcmp_result != 0;
630     tr.add_result(success);
631 
632     if (!success) {
633         print_separator(tr.out());
634         print_file_info(tr.out(), file, line);
635         tr.out() << (expected_equal ? "Memory not equal:\n"
636                                     : "Memory equal:\n");
637         print_data_diff(tr.out(), GetElementType<E>::value, count, e1, e2);
638     }
639 }
640 
641 template<class V1, class V2>
642 void test_cmp_equal_impl(std::true_type /*is_V1_vector*/, TestReporter& tr,
643                          const V1& q1, const V2& q2,
644                          bool expected_equal, unsigned line, const char* file)
645 {
646     using V = typename simdpp::detail::get_expr_nomask<V1>::type;
647     V v1, v2;
648     v1 = q1.eval(); v2 = q2.eval();
649 
650     int memcmp_result = std::memcmp(&v1, &v2, V::length_bytes);
651     bool success = expected_equal ? memcmp_result == 0 : memcmp_result != 0;
652     tr.add_result(success);
653 
654     if (!success) {
655         print_separator(tr.out());
656         print_file_info(tr.out(), file, line);
657         tr.out() << (expected_equal ? "Vectors not equal:\n" : "Vectors equal:\n");
658         print_data_diff(tr.out(), GetElementType<V>::value, V::length, &v1, &v2);
659     }
660 }
661 
662 template<class T1, class T2>
663 void test_cmp_equal_impl(std::false_type /*is_T1_vector*/, TestReporter& tr,
664                          const T1& a1, const T2& a2,
665                          bool expected_equal, unsigned line, const char* file)
666 {
667     static_assert(sizeof(T1) == sizeof(T2), "Data type sizes must be equal");
668     bool success = expected_equal ? std::memcmp(&a1, &a2, sizeof(a1)) == 0 :
669                                     std::memcmp(&a1, &a2, sizeof(a1)) != 0;
670     tr.add_result(success);
671 
672     if (!success) {
673         print_separator(tr.out());
674         print_file_info(tr.out(), file, line);
675         tr.out() << (expected_equal ? "Data not equal:\n" : "Data equal:\n");
676         print_data_diff(tr.out(), GetElementType<T1>::value, 1, &a1, &a2);
677     }
678 }
679 
680 template<class T1, class T2>
681 void test_cmp_equal(TestReporter& tr, const T1& a1, const T2& a2,
682                     bool expected_equal, unsigned line, const char* file)
683 {
684     static_assert(simdpp::is_vector<T1>::value == simdpp::is_vector<T2>::value,
685                   "Invalid types for comparison");
686     test_cmp_equal_impl(simdpp::is_vector<T1>(), tr, a1, a2, expected_equal,
687                         line, file);
688 }
689 
690 #define TEST_EQUAL(TR, V1, V2)                                                  \
691     do { test_cmp_equal(TR, V1, V2, true, __LINE__, __FILE__); } while(0)
692 
693 #define TEST_NOT_EQUAL(TR, V1, V2)                                              \
694     do { test_cmp_equal(TR, V1, V2, false, __LINE__, __FILE__); } while(0)
695 
696 #define TEST_EQUAL_MEMORY(TR, E1, E2, COUNT)                                    \
697     do { test_cmp_memory((TR), (E1), (E2), (COUNT), true, __LINE__, __FILE__); } while(0)
698 
699 #define TEST_NOT_EQUAL_MEMORY(TR, E1, E2, COUNT)                                \
700     do { test_cmp_memory((TR), (E1), (E2), (COUNT), false, __LINE__, __FILE__); } while(0)
701 
702 #endif
703