1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4
5 // clang-format off
6
7 #include <sstream>
8 #include <type_traits>
9
10 #include <OpenImageIO/argparse.h>
11 #include <OpenImageIO/benchmark.h>
12 #include <OpenImageIO/fmath.h>
13 #include <OpenImageIO/imageio.h>
14 #include <OpenImageIO/simd.h>
15 #include <OpenImageIO/strutil.h>
16 #include <OpenImageIO/timer.h>
17 #include <OpenImageIO/typedesc.h>
18 #include <OpenImageIO/unittest.h>
19 #include <OpenImageIO/ustring.h>
20
21
22
23 using namespace OIIO;
24
25 using namespace OIIO::simd;
26
27
28 static int iterations = 1000000;
29 static int ntrials = 5;
30 static Sysutil::Term term(std::cout);
31 OIIO_SIMD16_ALIGN float dummy_float[16];
32 OIIO_SIMD16_ALIGN float dummy_float2[16];
33 OIIO_SIMD16_ALIGN float dummy_int[16];
34
35
36
37 static void
getargs(int argc,char * argv[])38 getargs(int argc, char* argv[])
39 {
40 ArgParse ap;
41 ap.intro("simd_test -- unit test and benchmarks for OpenImageIO/simd.h\n"
42 OIIO_INTRO_STRING)
43 .usage("simd_test [options]");
44
45 ap.arg("--iterations %d", &iterations)
46 .help(Strutil::sprintf("Number of iterations (default: %d)", iterations));
47 ap.arg("--trials %d", &ntrials)
48 .help("Number of trials");
49
50 ap.parse_args(argc, (const char**)argv);
51 }
52
53
54
55 static void
category_heading(string_view name)56 category_heading(string_view name)
57 {
58 std::cout << "\n" << term.ansi("bold,underscore,yellow", name) << "\n\n";
59 }
60
61
62
63 static void
test_heading(string_view name,string_view name2="")64 test_heading(string_view name, string_view name2 = "")
65 {
66 std::cout << term.ansi("bold") << name << ' ' << name2
67 << term.ansi("normal") << "\n";
68 }
69
70
71
72 // What I really want to do is merge benchmark() and benchmark2() into
73 // one template using variadic arguments, like this:
74 // template <typename FUNC, typename ...ARGS>
75 // void benchmark (size_t work, string_view funcname, FUNC func, ARGS... args)
76 // But it seems that although this works for Clang, it does not for gcc 4.8
77 // (but does for 4.9). Some day I'll get back to this simplification, but
78 // for now, gcc 4.8 seems like an important barrier.
79
80
81 template<typename FUNC, typename T>
82 void
benchmark(string_view funcname,FUNC func,T x,size_t work=0)83 benchmark(string_view funcname, FUNC func, T x, size_t work = 0)
84 {
85 if (!work)
86 work = SimdElements<decltype(func(x))>::size;
87 auto repeat_func = [&](){
88 // Unroll the loop 8 times
89 auto r = func(x); DoNotOptimize (r); clobber_all_memory();
90 r = func(x); DoNotOptimize (r); clobber_all_memory();
91 r = func(x); DoNotOptimize (r); clobber_all_memory();
92 r = func(x); DoNotOptimize (r); clobber_all_memory();
93 r = func(x); DoNotOptimize (r); clobber_all_memory();
94 r = func(x); DoNotOptimize (r); clobber_all_memory();
95 r = func(x); DoNotOptimize (r); clobber_all_memory();
96 r = func(x); DoNotOptimize (r); clobber_all_memory();
97 };
98 float time = time_trial(repeat_func, ntrials, iterations / 8);
99 Strutil::printf(" %s: %7.1f Mvals/sec, (%.1f Mcalls/sec)\n",
100 funcname, ((iterations * work) / 1.0e6) / time,
101 (iterations / 1.0e6) / time);
102 }
103
104
105 template<typename FUNC, typename T, typename U>
106 void
benchmark2(string_view funcname,FUNC func,T x,U y,size_t work=0)107 benchmark2(string_view funcname, FUNC func, T x, U y, size_t work = 0)
108 {
109 if (!work)
110 work = SimdElements<decltype(func(x, y))>::size;
111 auto repeat_func = [&]() {
112 // Unroll the loop 8 times
113 auto r = func(x, y); DoNotOptimize (r); clobber_all_memory();
114 r = func(x, y); DoNotOptimize (r); clobber_all_memory();
115 r = func(x, y); DoNotOptimize (r); clobber_all_memory();
116 r = func(x, y); DoNotOptimize (r); clobber_all_memory();
117 r = func(x, y); DoNotOptimize (r); clobber_all_memory();
118 r = func(x, y); DoNotOptimize (r); clobber_all_memory();
119 r = func(x, y); DoNotOptimize (r); clobber_all_memory();
120 r = func(x, y); DoNotOptimize (r); clobber_all_memory();
121 };
122 float time = time_trial(repeat_func, ntrials, iterations / 8);
123 Strutil::printf(" %s: %7.1f Mvals/sec, (%.1f Mcalls/sec)\n",
124 funcname, ((iterations * work) / 1.0e6) / time,
125 (iterations / 1.0e6) / time);
126 }
127
128
129
130 template<typename VEC>
131 inline VEC
mkvec(typename VEC::value_t a,typename VEC::value_t b,typename VEC::value_t c,typename VEC::value_t d=0)132 mkvec(typename VEC::value_t a, typename VEC::value_t b, typename VEC::value_t c,
133 typename VEC::value_t d = 0)
134 {
135 return VEC(a, b, c, d);
136 }
137
138 template<>
139 inline vfloat3
mkvec(float a,float b,float c,float)140 mkvec<vfloat3>(float a, float b, float c, float /*d*/)
141 {
142 return vfloat3(a, b, c);
143 }
144
145 template<>
146 inline vfloat8
mkvec(float a,float b,float c,float d)147 mkvec<vfloat8>(float a, float b, float c, float d)
148 {
149 return vfloat8(a, b, c, d, a, b, c, d);
150 }
151
152 template<>
153 inline vfloat16
mkvec(float a,float b,float c,float d)154 mkvec<vfloat16>(float a, float b, float c, float d)
155 {
156 return vfloat16(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d);
157 }
158
159 template<>
160 inline vint8
mkvec(int a,int b,int c,int d)161 mkvec<vint8>(int a, int b, int c, int d)
162 {
163 return vint8(a, b, c, d, a, b, c, d);
164 }
165
166 template<>
167 inline vint16
mkvec(int a,int b,int c,int d)168 mkvec<vint16>(int a, int b, int c, int d)
169 {
170 return vint16(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d);
171 }
172
173 template<>
174 inline vbool8
mkvec(bool a,bool b,bool c,bool d)175 mkvec<vbool8>(bool a, bool b, bool c, bool d)
176 {
177 return vbool8(a, b, c, d, a, b, c, d);
178 }
179
180 template<>
181 inline vbool16
mkvec(bool a,bool b,bool c,bool d)182 mkvec<vbool16>(bool a, bool b, bool c, bool d)
183 {
184 return vbool16(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d);
185 }
186
187
188
189 template<typename VEC>
190 inline VEC
mkvec(typename VEC::value_t a,typename VEC::value_t b,typename VEC::value_t c,typename VEC::value_t d,typename VEC::value_t e,typename VEC::value_t f,typename VEC::value_t g,typename VEC::value_t h)191 mkvec(typename VEC::value_t a, typename VEC::value_t b, typename VEC::value_t c,
192 typename VEC::value_t d, typename VEC::value_t e, typename VEC::value_t f,
193 typename VEC::value_t g, typename VEC::value_t h)
194 {
195 return VEC(a, b, c, d, e, f, g, h);
196 }
197
198
199 template<>
200 inline vbool4
mkvec(bool a,bool b,bool c,bool d,bool,bool,bool,bool)201 mkvec<vbool4>(bool a, bool b, bool c, bool d, bool, bool, bool, bool)
202 {
203 return vbool4(a, b, c, d);
204 }
205
206 template<>
207 inline vint4
mkvec(int a,int b,int c,int d,int,int,int,int)208 mkvec<vint4>(int a, int b, int c, int d, int, int, int, int)
209 {
210 return vint4(a, b, c, d);
211 }
212
213 template<>
214 inline vint16
mkvec(int a,int b,int c,int d,int e,int f,int g,int h)215 mkvec<vint16>(int a, int b, int c, int d, int e, int f, int g, int h)
216 {
217 return vint16(a, b, c, d, e, f, g, h, h + 1, h + 2, h + 3, h + 4, h + 5,
218 h + 6, h + 7, h + 8);
219 }
220
221 template<>
222 inline vfloat4
mkvec(float a,float b,float c,float d,float,float,float,float)223 mkvec<vfloat4>(float a, float b, float c, float d, float, float, float, float)
224 {
225 return vfloat4(a, b, c, d);
226 }
227
228 template<>
229 inline vfloat3
mkvec(float a,float b,float c,float,float,float,float,float)230 mkvec<vfloat3>(float a, float b, float c, float, float, float, float, float)
231 {
232 return vfloat3(a, b, c);
233 }
234
235 template<>
236 inline vfloat16
mkvec(float a,float b,float c,float d,float e,float f,float g,float h)237 mkvec<vfloat16>(float a, float b, float c, float d, float e, float f, float g,
238 float h)
239 {
240 return vfloat16(a, b, c, d, e, f, g, h, h + 1, h + 2, h + 3, h + 4, h + 5,
241 h + 6, h + 7, h + 8);
242 }
243
244
245
246 template<typename VEC>
247 inline int
loadstore_vec(int)248 loadstore_vec(int /*dummy*/)
249 {
250 typedef typename VEC::value_t ELEM;
251 ELEM B[VEC::elements];
252 VEC v;
253 v.load((ELEM*)dummy_float);
254 DoNotOptimize(v);
255 clobber_all_memory();
256 v.store((ELEM*)B);
257 DoNotOptimize(B[0]);
258 return 0;
259 }
260
261 template<typename VEC>
262 inline VEC
load_vec(int)263 load_vec(int /*dummy*/)
264 {
265 typedef typename VEC::value_t ELEM;
266 VEC v;
267 v.load((ELEM*)dummy_float);
268 return v;
269 }
270
271 template<typename VEC>
272 inline int
store_vec(const VEC & v)273 store_vec(const VEC& v)
274 {
275 typedef typename VEC::value_t ELEM;
276 v.store((ELEM*)dummy_float);
277 return 0;
278 }
279
280 template<typename VEC>
281 inline VEC
load_scalar(int)282 load_scalar(int /*dummy*/)
283 {
284 typedef typename VEC::value_t ELEM;
285 VEC v;
286 OIIO_PRAGMA_WARNING_PUSH
287 OIIO_GCC_ONLY_PRAGMA(GCC diagnostic ignored "-Wstrict-aliasing")
288 v.load(*(ELEM*)dummy_float);
289 OIIO_PRAGMA_WARNING_POP
290 return v;
291 }
292
293 template<typename VEC, int N>
294 inline VEC
load_vec_N(typename VEC::value_t *)295 load_vec_N(typename VEC::value_t* /*B*/)
296 {
297 typedef typename VEC::value_t ELEM;
298 VEC v;
299 v.load((ELEM*)dummy_float, N);
300 return v;
301 }
302
303 template<typename VEC, int N>
304 inline int
store_vec_N(const VEC & v)305 store_vec_N(const VEC& v)
306 {
307 typedef typename VEC::value_t ELEM;
308 v.store((ELEM*)dummy_float, N);
309 DoNotOptimize(dummy_float[0]);
310 return 0;
311 }
312
313
314
315 inline float
dot_imath(const Imath::V3f & v)316 dot_imath(const Imath::V3f& v)
317 {
318 return v.dot(v);
319 }
320 inline float
dot_imath_simd(const Imath::V3f & v_)321 dot_imath_simd(const Imath::V3f& v_)
322 {
323 vfloat3 v(v_);
324 return simd::dot(v, v);
325 }
326 inline float
dot_simd(const simd::vfloat3 & v)327 dot_simd(const simd::vfloat3& v)
328 {
329 return dot(v, v);
330 }
331
332 inline Imath::V3f
norm_imath(const Imath::V3f & a)333 norm_imath(const Imath::V3f& a)
334 {
335 return a.normalized();
336 }
337
338 inline Imath::V3f
norm_imath_simd(const vfloat3 & a)339 norm_imath_simd(const vfloat3& a)
340 {
341 return a.normalized().V3f();
342 }
343
344 inline Imath::V3f
norm_imath_simd_fast(const vfloat3 & a)345 norm_imath_simd_fast(const vfloat3& a)
346 {
347 return a.normalized_fast().V3f();
348 }
349
350 inline vfloat3
norm_simd_fast(const vfloat3 & a)351 norm_simd_fast(const vfloat3& a)
352 {
353 return a.normalized_fast();
354 }
355
356 inline vfloat3
norm_simd(const vfloat3 & a)357 norm_simd(const vfloat3& a)
358 {
359 return a.normalized();
360 }
361
362
363 inline Imath::M44f
inverse_imath(const Imath::M44f & M)364 inverse_imath(const Imath::M44f& M)
365 {
366 return M.inverse();
367 }
368
369
370 inline matrix44
inverse_simd(const matrix44 & M)371 inverse_simd(const matrix44& M)
372 {
373 return M.inverse();
374 }
375
376
377
378 template<typename VEC>
379 void
test_loadstore()380 test_loadstore()
381 {
382 typedef typename VEC::value_t ELEM;
383 test_heading("load/store ", VEC::type_name());
384 OIIO_SIMD16_ALIGN ELEM oneval[]
385 = { 101, 101, 101, 101, 101, 101, 101, 101,
386 101, 101, 101, 101, 101, 101, 101, 101 };
387 OIIO_CHECK_SIMD_EQUAL(VEC(oneval), VEC(oneval[0]));
388 { VEC a = oneval[0]; OIIO_CHECK_SIMD_EQUAL(VEC(oneval), a); }
389 OIIO_SIMD16_ALIGN VEC C1234 = VEC::Iota(1);
390 OIIO_SIMD16_ALIGN ELEM partial[]
391 = { 101, 102, 103, 104, 105, 106, 107, 108,
392 109, 110, 111, 112, 113, 114, 115, 116 };
393 OIIO_CHECK_SIMD_EQUAL(VEC(partial), VEC::Iota(101));
394 for (int i = 1; i <= VEC::elements; ++i) {
395 VEC a(ELEM(0));
396 a.load(partial, i);
397 for (int j = 0; j < VEC::elements; ++j)
398 OIIO_CHECK_EQUAL(a[j], j < i ? partial[j] : ELEM(0));
399 std::cout << " partial load " << i << " : " << a << "\n";
400 ELEM stored[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
401 C1234.store(stored, i);
402 for (int j = 0; j < VEC::elements; ++j)
403 OIIO_CHECK_EQUAL(stored[j], j < i ? ELEM(j + 1) : ELEM(0));
404 std::cout << " partial store " << i << " :";
405 for (int c = 0; c < VEC::elements; ++c)
406 std::cout << ' ' << stored[c];
407 std::cout << std::endl;
408 }
409
410 benchmark("load scalar", load_scalar<VEC>, 0, VEC::elements);
411 benchmark("load vec", load_vec<VEC>, 0, VEC::elements);
412 benchmark("store vec", store_vec<VEC>, 0, VEC::elements);
413 OIIO_SIMD16_ALIGN ELEM tmp[VEC::elements];
414 if (VEC::elements == 16) {
415 benchmark("load 16 comps", load_vec_N<VEC, 16>, tmp, 16);
416 benchmark("load 13 comps", load_vec_N<VEC, 13>, tmp, 13);
417 benchmark("load 9 comps", load_vec_N<VEC, 9>, tmp, 9);
418 }
419 if (VEC::elements > 4) {
420 benchmark("load 8 comps", load_vec_N<VEC, 8>, tmp, 8);
421 benchmark("load 7 comps", load_vec_N<VEC, 7>, tmp, 7);
422 benchmark("load 6 comps", load_vec_N<VEC, 6>, tmp, 6);
423 benchmark("load 5 comps", load_vec_N<VEC, 5>, tmp, 5);
424 }
425 if (VEC::elements >= 4) {
426 benchmark("load 4 comps", load_vec_N<VEC, 4>, tmp, 4);
427 }
428 benchmark("load 3 comps", load_vec_N<VEC, 3>, tmp, 3);
429 benchmark("load 2 comps", load_vec_N<VEC, 2>, tmp, 2);
430 benchmark("load 1 comps", load_vec_N<VEC, 1>, tmp, 1);
431
432 if (VEC::elements == 16) {
433 benchmark("store 16 comps", store_vec_N<VEC, 16>, C1234, 16);
434 benchmark("store 13 comps", store_vec_N<VEC, 13>, C1234, 13);
435 benchmark("store 9 comps", store_vec_N<VEC, 9>, C1234, 9);
436 }
437 if (VEC::elements > 4) {
438 benchmark("store 8 comps", store_vec_N<VEC, 8>, C1234, 8);
439 benchmark("store 7 comps", store_vec_N<VEC, 7>, C1234, 7);
440 benchmark("store 6 comps", store_vec_N<VEC, 6>, C1234, 6);
441 benchmark("store 5 comps", store_vec_N<VEC, 5>, C1234, 5);
442 }
443 if (VEC::elements >= 4) {
444 benchmark("store 4 comps", store_vec_N<VEC, 4>, C1234, 4);
445 }
446 benchmark("store 3 comps", store_vec_N<VEC, 3>, C1234, 3);
447 benchmark("store 2 comps", store_vec_N<VEC, 2>, C1234, 2);
448 benchmark("store 1 comps", store_vec_N<VEC, 1>, C1234, 1);
449 }
450
451
452
453 template<typename VEC>
454 void
test_conversion_loadstore_float()455 test_conversion_loadstore_float()
456 {
457 typedef typename VEC::value_t ELEM;
458 test_heading("load/store with conversion", VEC::type_name());
459 VEC C1234 = VEC::Iota(1);
460 ELEM partial[] = { 101, 102, 103, 104, 105, 106, 107, 108,
461 109, 110, 111, 112, 113, 114, 115, 116 };
462 OIIO_CHECK_SIMD_EQUAL(VEC(partial), VEC::Iota(101));
463
464 // Check load from integers
465 unsigned short us1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
466 short s1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
467 unsigned char uc1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
468 char c1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
469 half h1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
470 OIIO_CHECK_SIMD_EQUAL (VEC(us1234), C1234);
471 OIIO_CHECK_SIMD_EQUAL (VEC( s1234), C1234);
472 OIIO_CHECK_SIMD_EQUAL (VEC(uc1234), C1234);
473 OIIO_CHECK_SIMD_EQUAL (VEC( c1234), C1234);
474
475 benchmark ("load from unsigned short[]", [](const unsigned short *d){ return VEC(d); }, us1234);
476 benchmark ("load from short[]", [](const short *d){ return VEC(d); }, s1234);
477 benchmark ("load from unsigned char[]", [](const unsigned char *d){ return VEC(d); }, uc1234);
478 benchmark ("load from char[]", [](const char *d){ return VEC(d); }, c1234);
479 benchmark ("load from half[]", [](const half *d){ return VEC(d); }, h1234);
480
481 benchmark ("store to half[]", [=](half *d){ C1234.store(d); return 0; }, h1234, VEC::elements);
482 }
483
484
485
486 template<typename VEC>
test_conversion_loadstore_int()487 void test_conversion_loadstore_int ()
488 {
489 typedef typename VEC::value_t ELEM;
490 test_heading ("load/store with conversion", VEC::type_name());
491 VEC C1234 = VEC::Iota(1);
492 ELEM partial[] = { 101, 102, 103, 104, 105, 106, 107, 108,
493 109, 110, 111, 112, 113, 114, 115, 116 };
494 OIIO_CHECK_SIMD_EQUAL (VEC(partial), VEC::Iota(101));
495
496 // Check load from integers
497 int i1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
498 unsigned short us1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
499 short s1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
500 unsigned char uc1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
501 char c1234[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
502 OIIO_CHECK_SIMD_EQUAL (VEC( i1234), C1234);
503 OIIO_CHECK_SIMD_EQUAL (VEC(us1234), C1234);
504 OIIO_CHECK_SIMD_EQUAL (VEC( s1234), C1234);
505 OIIO_CHECK_SIMD_EQUAL (VEC(uc1234), C1234);
506 OIIO_CHECK_SIMD_EQUAL (VEC( c1234), C1234);
507
508 benchmark ("load from int[]", [](const int *d){ return VEC(d); }, i1234);
509 benchmark ("load from unsigned short[]", [](const unsigned short *d){ return VEC(d); }, us1234);
510 benchmark ("load from short[]", [](const short *d){ return VEC(d); }, s1234);
511 benchmark ("load from unsigned char[]", [](const unsigned char *d){ return VEC(d); }, uc1234);
512 benchmark ("load from char[]", [](const char *d){ return VEC(d); }, c1234);
513
514 benchmark ("store to unsigned short[]", [=](unsigned short *d){ C1234.store(d); return 0; }, us1234, VEC::elements);
515 benchmark ("store to unsigned char[]", [=](unsigned char *d){ C1234.store(d); return 0; }, uc1234, VEC::elements);
516 }
517
518
519
520 template<typename VEC>
test_vint_to_uint16s()521 void test_vint_to_uint16s ()
522 {
523 test_heading (Strutil::sprintf("test converting %s to uint16", VEC::type_name()));
524 VEC ival = VEC::Iota (0xffff0000);
525 unsigned short buf[VEC::elements];
526 ival.store (buf);
527 for (int i = 0; i < VEC::elements; ++i)
528 OIIO_CHECK_EQUAL (int(buf[i]), i);
529
530 benchmark2 ("load from uint16", [](VEC& a, unsigned short *s){ a.load(s); return 1; }, ival, buf, VEC::elements);
531 benchmark2 ("convert to uint16", [](const VEC& a, unsigned short *s){ a.store(s); return 1; }, ival, buf, VEC::elements);
532 }
533
534
535
536 template<typename VEC>
test_vint_to_uint8s()537 void test_vint_to_uint8s ()
538 {
539 test_heading (Strutil::sprintf("test converting %s to uint8", VEC::type_name()));
540 VEC ival = VEC::Iota (0xffffff00);
541 unsigned char buf[VEC::elements];
542 ival.store (buf);
543 for (int i = 0; i < VEC::elements; ++i)
544 OIIO_CHECK_EQUAL (int(buf[i]), i);
545
546 benchmark2 ("load from uint8", [](VEC& a, unsigned char *s){ a.load(s); return 1; }, ival, buf, VEC::elements);
547 benchmark2 ("convert to uint16", [](const VEC& a, unsigned char *s){ a.store(s); return 1; }, ival, buf, VEC::elements);
548 }
549
550
551
552 template<typename VEC>
test_masked_loadstore()553 void test_masked_loadstore ()
554 {
555 typedef typename VEC::value_t ELEM;
556 typedef typename VEC::vbool_t BOOL;
557 test_heading ("masked loadstore ", VEC::type_name());
558 ELEM iota[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
559 BOOL mask1 = mkvec<BOOL> (true, false, true, false);
560 BOOL mask2 = mkvec<BOOL> (true, true, false,false);
561
562 VEC v;
563 v = -1;
564 v.load_mask (mask1, iota);
565 ELEM r1[] = { 1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0 };
566 OIIO_CHECK_SIMD_EQUAL (v, VEC(r1));
567 ELEM buf[] = { -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2 };
568 v.store_mask (mask2, buf);
569 ELEM r2[] = { 1, 0, -2, -2, 5, 0, -2, -2, 9, 0, -2, -2, 13, 0, -2, -2 };
570 OIIO_CHECK_SIMD_EQUAL (VEC(buf), VEC(r2));
571
572 benchmark ("masked load with int mask", [](const ELEM *d){ VEC v; v.load_mask (0xffff, d); return v; }, iota);
573 benchmark ("masked load with bool mask", [](const ELEM *d){ VEC v; v.load_mask (BOOL::True(), d); return v; }, iota);
574 benchmark ("masked store with int mask", [&](ELEM *d){ v.store_mask (0xffff, d); return 0; }, r2);
575 benchmark ("masked store with bool mask", [&](ELEM *d){ v.store_mask (BOOL::True(), d); return 0; }, r2);
576 }
577
578
579
580 template<typename VEC>
581 void
test_gatherscatter()582 test_gatherscatter()
583 {
584 typedef typename VEC::value_t ELEM;
585 typedef typename VEC::vbool_t BOOL;
586 test_heading("scatter & gather ", VEC::type_name());
587
588 const int spacing = 3;
589 const int bufsize = VEC::elements * 3 + 1;
590 std::vector<ELEM> gather_source(bufsize);
591 for (int i = 0; i < bufsize; ++i)
592 gather_source[i] = ((i % spacing) == 1) ? i / 3 : -1;
593 // gather_source will contain: -1 0 -1 -1 1 -1 -1 2 -1 -1 3 -1 ...
594
595 auto indices = VEC::vint_t::Iota(1, 3);
596 VEC g, gm;
597 g.gather(gather_source.data(), indices);
598 OIIO_CHECK_SIMD_EQUAL(g, VEC::Iota());
599
600 BOOL mask = BOOL::from_bitmask(0x55555555); // every other one
601 ELEM every_other_iota[] = { 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0 };
602 gm = 0;
603 gm.gather_mask (mask, gather_source.data(), indices);
604 OIIO_CHECK_SIMD_EQUAL (gm, VEC(every_other_iota));
605
606 std::vector<ELEM> scatter_out (bufsize, (ELEM)-1);
607 g.scatter (scatter_out.data(), indices);
608 OIIO_CHECK_ASSERT (scatter_out == gather_source);
609
610 std::fill (scatter_out.begin(), scatter_out.end(), -1);
611 VEC::Iota().scatter_mask (mask, scatter_out.data(), indices);
612 for (int i = 0; i < (int)scatter_out.size(); ++i)
613 OIIO_CHECK_EQUAL (scatter_out[i], ((i%3) == 1 && (i&1) ? i/3 : -1));
614
615 benchmark ("gather", [&](const ELEM *d){ VEC v; v.gather (d, indices); return v; }, gather_source.data());
616 benchmark ("gather_mask", [&](const ELEM *d){ VEC v; v.gather_mask (mask, d, indices); return v; }, gather_source.data());
617 benchmark ("scatter", [&](ELEM *d){ g.scatter (d, indices); return g; }, scatter_out.data());
618 benchmark ("scatter_mask", [&](ELEM *d){ g.scatter_mask (mask, d, indices); return g; }, scatter_out.data());
619 }
620
621
622
623 template<typename T>
test_extract3()624 void test_extract3 ()
625 {
626 const T vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
627 using VEC = typename VecType<T,3>::type;
628 VEC b (vals);
629 for (int i = 0; i < VEC::elements; ++i)
630 OIIO_CHECK_EQUAL (b[i], vals[i]);
631 OIIO_CHECK_EQUAL (extract<0>(b), 0);
632 OIIO_CHECK_EQUAL (extract<1>(b), 1);
633 OIIO_CHECK_EQUAL (extract<2>(b), 2);
634 }
635
636 template<typename T>
637 void
test_extract4()638 test_extract4()
639 {
640 const T vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
641 using VEC = typename VecType<T, 4>::type;
642 VEC b(vals);
643 for (int i = 0; i < VEC::elements; ++i)
644 OIIO_CHECK_EQUAL(b[i], vals[i]);
645 OIIO_CHECK_EQUAL(extract<0>(b), 0);
646 OIIO_CHECK_EQUAL(extract<1>(b), 1);
647 OIIO_CHECK_EQUAL(extract<2>(b), 2);
648 OIIO_CHECK_EQUAL(extract<3>(b), 3);
649 }
650
651 template<typename T>
652 void
test_extract8()653 test_extract8()
654 {
655 test_extract4<T>();
656
657 const T vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
658 using VEC = typename VecType<T, 8>::type;
659 VEC b(vals);
660 for (int i = 0; i < VEC::elements; ++i)
661 OIIO_CHECK_EQUAL(b[i], vals[i]);
662 OIIO_CHECK_EQUAL(extract<4>(b), 4);
663 OIIO_CHECK_EQUAL(extract<5>(b), 5);
664 OIIO_CHECK_EQUAL(extract<6>(b), 6);
665 OIIO_CHECK_EQUAL(extract<7>(b), 7);
666 }
667
668 template<typename T>
669 void
test_extract16()670 test_extract16()
671 {
672 test_extract8<T>();
673
674 const T vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
675 using VEC = typename VecType<T, 16>::type;
676 VEC b(vals);
677 for (int i = 0; i < VEC::elements; ++i)
678 OIIO_CHECK_EQUAL(b[i], vals[i]);
679 OIIO_CHECK_EQUAL(extract<8>(b), 8);
680 OIIO_CHECK_EQUAL(extract<9>(b), 9);
681 OIIO_CHECK_EQUAL(extract<10>(b), 10);
682 OIIO_CHECK_EQUAL(extract<11>(b), 11);
683 OIIO_CHECK_EQUAL(extract<12>(b), 12);
684 OIIO_CHECK_EQUAL(extract<13>(b), 13);
685 OIIO_CHECK_EQUAL(extract<14>(b), 14);
686 OIIO_CHECK_EQUAL(extract<15>(b), 15);
687 }
688
689
690
691 template<typename T, int SIZE> void test_extract ();
test_extract()692 template<> void test_extract<float,16> () { test_extract16<float>(); }
test_extract()693 template<> void test_extract<int,16> () { test_extract16<int>(); }
test_extract()694 template<> void test_extract<float,8> () { test_extract8<float>(); }
test_extract()695 template<> void test_extract<int,8> () { test_extract8<int>(); }
test_extract()696 template<> void test_extract<float,4> () { test_extract4<float>(); }
test_extract()697 template<> void test_extract<int,4> () { test_extract4<int>(); }
test_extract()698 template<> void test_extract<float,3> () { test_extract3<float>(); }
699
700
701
702 template<typename VEC>
703 void
test_component_access()704 test_component_access()
705 {
706 typedef typename VEC::value_t ELEM;
707 test_heading("component_access ", VEC::type_name());
708
709 const ELEM vals[]
710 = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
711 VEC a = VEC::Iota();
712 for (int i = 0; i < VEC::elements; ++i)
713 OIIO_CHECK_EQUAL(a[i], vals[i]);
714
715 if (VEC::elements <= 4) {
716 OIIO_CHECK_EQUAL(a.x(), 0);
717 OIIO_CHECK_EQUAL(a.y(), 1);
718 OIIO_CHECK_EQUAL(a.z(), 2);
719 if (SimdElements<VEC>::size > 3)
720 OIIO_CHECK_EQUAL(a.w(), 3);
721 VEC t;
722 t = a;
723 t.set_x(42);
724 OIIO_CHECK_SIMD_EQUAL(t, mkvec<VEC>(42, 1, 2, 3, 4, 5, 6, 7));
725 t = a;
726 t.set_y(42);
727 OIIO_CHECK_SIMD_EQUAL(t, mkvec<VEC>(0, 42, 2, 3, 4, 5, 6, 7));
728 t = a;
729 t.set_z(42);
730 OIIO_CHECK_SIMD_EQUAL(t, mkvec<VEC>(0, 1, 42, 3, 4, 5, 6, 7));
731 if (SimdElements<VEC>::size > 3) {
732 t = a;
733 t.set_w(42);
734 OIIO_CHECK_SIMD_EQUAL(t, mkvec<VEC>(0, 1, 2, 42, 4, 5, 6, 7));
735 }
736 }
737
738 OIIO_CHECK_EQUAL(extract<0>(a), 0);
739 OIIO_CHECK_EQUAL(extract<1>(a), 1);
740 OIIO_CHECK_EQUAL(extract<2>(a), 2);
741 if (SimdElements<VEC>::size > 3)
742 OIIO_CHECK_EQUAL (extract<3>(a), 3);
743 OIIO_CHECK_SIMD_EQUAL (insert<0>(a, ELEM(42)), mkvec<VEC>(42,1,2,3,4,5,6,7));
744 OIIO_CHECK_SIMD_EQUAL (insert<1>(a, ELEM(42)), mkvec<VEC>(0,42,2,3,4,5,6,7));
745 OIIO_CHECK_SIMD_EQUAL (insert<2>(a, ELEM(42)), mkvec<VEC>(0,1,42,3,4,5,6,7));
746 if (SimdElements<VEC>::size > 3)
747 OIIO_CHECK_SIMD_EQUAL (insert<3>(a, ELEM(42)), mkvec<VEC>(0,1,2,42,4,5,6,7));
748
749 VEC b(vals);
750 #if 1
751 test_extract<ELEM, VEC::elements>();
752 #else
753 for (int i = 0; i < VEC::elements; ++i)
754 OIIO_CHECK_EQUAL(b[i], vals[i]);
755 OIIO_CHECK_EQUAL(extract<0>(b), 0);
756 OIIO_CHECK_EQUAL(extract<1>(b), 1);
757 OIIO_CHECK_EQUAL(extract<2>(b), 2);
758 if (SimdElements<VEC>::size > 3)
759 OIIO_CHECK_EQUAL(extract<3>(b), 3);
760 if (SimdElements<VEC>::size > 4) {
761 OIIO_CHECK_EQUAL(extract<4>(b), 4);
762 OIIO_CHECK_EQUAL(extract<5>(b), 5);
763 OIIO_CHECK_EQUAL(extract<6>(b), 6);
764 OIIO_CHECK_EQUAL(extract<7>(b), 7);
765 }
766 if (SimdElements<VEC>::size > 8) {
767 OIIO_CHECK_EQUAL(extract<8>(b), 8);
768 OIIO_CHECK_EQUAL(extract<9>(b), 9);
769 OIIO_CHECK_EQUAL(extract<10>(b), 10);
770 OIIO_CHECK_EQUAL(extract<11>(b), 11);
771 OIIO_CHECK_EQUAL(extract<12>(b), 12);
772 OIIO_CHECK_EQUAL(extract<13>(b), 13);
773 OIIO_CHECK_EQUAL(extract<14>(b), 14);
774 OIIO_CHECK_EQUAL(extract<15>(b), 15);
775 }
776 #endif
777
778 benchmark2 ("operator[i]", [&](const VEC& v, int i){ return v[i]; }, b, 2, 1 /*work*/);
779 benchmark2 ("operator[2]", [&](const VEC& v, int /*i*/){ return v[2]; }, b, 2, 1 /*work*/);
780 benchmark2 ("operator[0]", [&](const VEC& v, int /*i*/){ return v[0]; }, b, 0, 1 /*work*/);
781 benchmark2 ("extract<2> ", [&](const VEC& v, int /*i*/){ return extract<2>(v); }, b, 2, 1 /*work*/);
782 benchmark2 ("extract<0> ", [&](const VEC& v, int /*i*/){ return extract<0>(v); }, b, 0, 1 /*work*/);
783 benchmark2 ("insert<2> ", [&](const VEC& v, ELEM i){ return insert<2>(v, i); }, b, ELEM(1), 1 /*work*/);
784 }
785
786
787
788 template<>
789 void
test_component_access()790 test_component_access<vbool4>()
791 {
792 typedef vbool4 VEC;
793 typedef VEC::value_t ELEM;
794 test_heading("component_access ", VEC::type_name());
795
796 for (int bit = 0; bit < VEC::elements; ++bit) {
797 VEC ctr(bit == 0, bit == 1, bit == 2, bit == 3);
798 VEC a;
799 a.clear();
800 for (int b = 0; b < VEC::elements; ++b)
801 a.setcomp(b, b == bit);
802 OIIO_CHECK_SIMD_EQUAL(ctr, a);
803 for (int b = 0; b < VEC::elements; ++b)
804 OIIO_CHECK_EQUAL(bool(a[b]), b == bit);
805 OIIO_CHECK_EQUAL(extract<0>(a), bit == 0);
806 OIIO_CHECK_EQUAL(extract<1>(a), bit == 1);
807 OIIO_CHECK_EQUAL(extract<2>(a), bit == 2);
808 OIIO_CHECK_EQUAL(extract<3>(a), bit == 3);
809 }
810
811 VEC a;
812 a.load(0, 0, 0, 0);
813 OIIO_CHECK_SIMD_EQUAL(insert<0>(a, 1), VEC(1, 0, 0, 0));
814 OIIO_CHECK_SIMD_EQUAL(insert<1>(a, 1), VEC(0, 1, 0, 0));
815 OIIO_CHECK_SIMD_EQUAL(insert<2>(a, 1), VEC(0, 0, 1, 0));
816 OIIO_CHECK_SIMD_EQUAL(insert<3>(a, 1), VEC(0, 0, 0, 1));
817 a.load(1, 1, 1, 1);
818 OIIO_CHECK_SIMD_EQUAL(insert<0>(a, 0), VEC(0, 1, 1, 1));
819 OIIO_CHECK_SIMD_EQUAL(insert<1>(a, 0), VEC(1, 0, 1, 1));
820 OIIO_CHECK_SIMD_EQUAL(insert<2>(a, 0), VEC(1, 1, 0, 1));
821 OIIO_CHECK_SIMD_EQUAL(insert<3>(a, 0), VEC(1, 1, 1, 0));
822 }
823
824
825
826 template<>
827 void
test_component_access()828 test_component_access<vbool8>()
829 {
830 typedef vbool8 VEC;
831 typedef VEC::value_t ELEM;
832 test_heading("component_access ", VEC::type_name());
833
834 for (int bit = 0; bit < VEC::elements; ++bit) {
835 VEC ctr(bit == 0, bit == 1, bit == 2, bit == 3, bit == 4, bit == 5,
836 bit == 6, bit == 7);
837 VEC a;
838 a.clear();
839 for (int b = 0; b < VEC::elements; ++b)
840 a.setcomp(b, b == bit);
841 OIIO_CHECK_SIMD_EQUAL(ctr, a);
842 for (int b = 0; b < VEC::elements; ++b)
843 OIIO_CHECK_EQUAL(bool(a[b]), b == bit);
844 OIIO_CHECK_EQUAL(extract<0>(a), bit == 0);
845 OIIO_CHECK_EQUAL(extract<1>(a), bit == 1);
846 OIIO_CHECK_EQUAL(extract<2>(a), bit == 2);
847 OIIO_CHECK_EQUAL(extract<3>(a), bit == 3);
848 OIIO_CHECK_EQUAL(extract<4>(a), bit == 4);
849 OIIO_CHECK_EQUAL(extract<5>(a), bit == 5);
850 OIIO_CHECK_EQUAL(extract<6>(a), bit == 6);
851 OIIO_CHECK_EQUAL(extract<7>(a), bit == 7);
852 }
853
854 VEC a;
855 a.load(0, 0, 0, 0, 0, 0, 0, 0);
856 OIIO_CHECK_SIMD_EQUAL(insert<0>(a, 1), VEC(1, 0, 0, 0, 0, 0, 0, 0));
857 OIIO_CHECK_SIMD_EQUAL(insert<1>(a, 1), VEC(0, 1, 0, 0, 0, 0, 0, 0));
858 OIIO_CHECK_SIMD_EQUAL(insert<2>(a, 1), VEC(0, 0, 1, 0, 0, 0, 0, 0));
859 OIIO_CHECK_SIMD_EQUAL(insert<3>(a, 1), VEC(0, 0, 0, 1, 0, 0, 0, 0));
860 OIIO_CHECK_SIMD_EQUAL(insert<4>(a, 1), VEC(0, 0, 0, 0, 1, 0, 0, 0));
861 OIIO_CHECK_SIMD_EQUAL(insert<5>(a, 1), VEC(0, 0, 0, 0, 0, 1, 0, 0));
862 OIIO_CHECK_SIMD_EQUAL(insert<6>(a, 1), VEC(0, 0, 0, 0, 0, 0, 1, 0));
863 OIIO_CHECK_SIMD_EQUAL(insert<7>(a, 1), VEC(0, 0, 0, 0, 0, 0, 0, 1));
864 a.load(1, 1, 1, 1, 1, 1, 1, 1);
865 OIIO_CHECK_SIMD_EQUAL(insert<0>(a, 0), VEC(0, 1, 1, 1, 1, 1, 1, 1));
866 OIIO_CHECK_SIMD_EQUAL(insert<1>(a, 0), VEC(1, 0, 1, 1, 1, 1, 1, 1));
867 OIIO_CHECK_SIMD_EQUAL(insert<2>(a, 0), VEC(1, 1, 0, 1, 1, 1, 1, 1));
868 OIIO_CHECK_SIMD_EQUAL(insert<3>(a, 0), VEC(1, 1, 1, 0, 1, 1, 1, 1));
869 OIIO_CHECK_SIMD_EQUAL(insert<4>(a, 0), VEC(1, 1, 1, 1, 0, 1, 1, 1));
870 OIIO_CHECK_SIMD_EQUAL(insert<5>(a, 0), VEC(1, 1, 1, 1, 1, 0, 1, 1));
871 OIIO_CHECK_SIMD_EQUAL(insert<6>(a, 0), VEC(1, 1, 1, 1, 1, 1, 0, 1));
872 OIIO_CHECK_SIMD_EQUAL(insert<7>(a, 0), VEC(1, 1, 1, 1, 1, 1, 1, 0));
873 }
874
875
876
877 template<>
878 void
test_component_access()879 test_component_access<vbool16>()
880 {
881 typedef vbool16 VEC;
882 typedef VEC::value_t ELEM;
883 test_heading("component_access ", VEC::type_name());
884
885 for (int bit = 0; bit < VEC::elements; ++bit) {
886 VEC ctr(bit == 0, bit == 1, bit == 2, bit == 3, bit == 4, bit == 5,
887 bit == 6, bit == 7, bit == 8, bit == 9, bit == 10, bit == 11,
888 bit == 12, bit == 13, bit == 14, bit == 15);
889 VEC a;
890 a.clear();
891 for (int b = 0; b < VEC::elements; ++b)
892 a.setcomp(b, b == bit);
893 OIIO_CHECK_SIMD_EQUAL(ctr, a);
894 for (int b = 0; b < VEC::elements; ++b)
895 OIIO_CHECK_EQUAL(bool(a[b]), b == bit);
896 OIIO_CHECK_EQUAL(extract<0>(a), bit == 0);
897 OIIO_CHECK_EQUAL(extract<1>(a), bit == 1);
898 OIIO_CHECK_EQUAL(extract<2>(a), bit == 2);
899 OIIO_CHECK_EQUAL(extract<3>(a), bit == 3);
900 OIIO_CHECK_EQUAL(extract<4>(a), bit == 4);
901 OIIO_CHECK_EQUAL(extract<5>(a), bit == 5);
902 OIIO_CHECK_EQUAL(extract<6>(a), bit == 6);
903 OIIO_CHECK_EQUAL(extract<7>(a), bit == 7);
904 OIIO_CHECK_EQUAL(extract<8>(a), bit == 8);
905 OIIO_CHECK_EQUAL(extract<9>(a), bit == 9);
906 OIIO_CHECK_EQUAL(extract<10>(a), bit == 10);
907 OIIO_CHECK_EQUAL(extract<11>(a), bit == 11);
908 OIIO_CHECK_EQUAL(extract<12>(a), bit == 12);
909 OIIO_CHECK_EQUAL(extract<13>(a), bit == 13);
910 OIIO_CHECK_EQUAL(extract<14>(a), bit == 14);
911 OIIO_CHECK_EQUAL(extract<15>(a), bit == 15);
912 }
913
914 VEC a;
915 a.load (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
916 OIIO_CHECK_SIMD_EQUAL (insert<0> (a, 1), VEC(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
917 OIIO_CHECK_SIMD_EQUAL (insert<1> (a, 1), VEC(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
918 OIIO_CHECK_SIMD_EQUAL (insert<2> (a, 1), VEC(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0));
919 OIIO_CHECK_SIMD_EQUAL (insert<3> (a, 1), VEC(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0));
920 OIIO_CHECK_SIMD_EQUAL (insert<4> (a, 1), VEC(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0));
921 OIIO_CHECK_SIMD_EQUAL (insert<5> (a, 1), VEC(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0));
922 OIIO_CHECK_SIMD_EQUAL (insert<6> (a, 1), VEC(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0));
923 OIIO_CHECK_SIMD_EQUAL (insert<7> (a, 1), VEC(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0));
924 OIIO_CHECK_SIMD_EQUAL (insert<8> (a, 1), VEC(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0));
925 OIIO_CHECK_SIMD_EQUAL (insert<9> (a, 1), VEC(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0));
926 OIIO_CHECK_SIMD_EQUAL (insert<10>(a, 1), VEC(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0));
927 OIIO_CHECK_SIMD_EQUAL (insert<11>(a, 1), VEC(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0));
928 OIIO_CHECK_SIMD_EQUAL (insert<12>(a, 1), VEC(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0));
929 OIIO_CHECK_SIMD_EQUAL (insert<13>(a, 1), VEC(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0));
930 OIIO_CHECK_SIMD_EQUAL (insert<14>(a, 1), VEC(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0));
931 OIIO_CHECK_SIMD_EQUAL (insert<15>(a, 1), VEC(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1));
932 a.load (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
933 OIIO_CHECK_SIMD_EQUAL (insert<0> (a, 0), VEC(0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1));
934 OIIO_CHECK_SIMD_EQUAL (insert<1> (a, 0), VEC(1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1));
935 OIIO_CHECK_SIMD_EQUAL (insert<2> (a, 0), VEC(1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1));
936 OIIO_CHECK_SIMD_EQUAL (insert<3> (a, 0), VEC(1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1));
937 OIIO_CHECK_SIMD_EQUAL (insert<4> (a, 0), VEC(1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1));
938 OIIO_CHECK_SIMD_EQUAL (insert<5> (a, 0), VEC(1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1));
939 OIIO_CHECK_SIMD_EQUAL (insert<6> (a, 0), VEC(1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1));
940 OIIO_CHECK_SIMD_EQUAL (insert<7> (a, 0), VEC(1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1));
941 OIIO_CHECK_SIMD_EQUAL (insert<8> (a, 0), VEC(1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1));
942 OIIO_CHECK_SIMD_EQUAL (insert<9> (a, 0), VEC(1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1));
943 OIIO_CHECK_SIMD_EQUAL (insert<10>(a, 0), VEC(1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1));
944 OIIO_CHECK_SIMD_EQUAL (insert<11>(a, 0), VEC(1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1));
945 OIIO_CHECK_SIMD_EQUAL (insert<12>(a, 0), VEC(1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1));
946 OIIO_CHECK_SIMD_EQUAL (insert<13>(a, 0), VEC(1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1));
947 OIIO_CHECK_SIMD_EQUAL (insert<14>(a, 0), VEC(1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1));
948 OIIO_CHECK_SIMD_EQUAL (insert<15>(a, 0), VEC(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0));
949 }
950
951
952
do_neg(const T & a)953 template<typename T> inline T do_neg (const T &a) { return -a; }
do_add(const T & a,const T & b)954 template<typename T> inline T do_add (const T &a, const T &b) { return a+b; }
do_sub(const T & a,const T & b)955 template<typename T> inline T do_sub (const T &a, const T &b) { return a-b; }
do_mul(const T & a,const U & b)956 template<typename T, typename U=T> inline auto do_mul (const T &a, const U &b) -> decltype(a*b) { return a*b; }
do_div(const T & a,const T & b)957 template<typename T> inline T do_div (const T &a, const T &b) { return a/b; }
do_safe_div(const T & a,const T & b)958 template<typename T> inline T do_safe_div (const T &a, const T &b) { return T(safe_div(a,b)); }
add_vec_simd(const Imath::V3f & a,const Imath::V3f & b)959 inline Imath::V3f add_vec_simd (const Imath::V3f &a, const Imath::V3f &b) {
960 return (vfloat3(a)+vfloat3(b)).V3f();
961 }
do_abs(const T & a)962 template<typename T> inline T do_abs (const T &a) { return abs(a); }
963
964
965 template<typename VEC>
test_arithmetic()966 void test_arithmetic ()
967 {
968 typedef typename VEC::value_t ELEM;
969 test_heading ("arithmetic ", VEC::type_name());
970
971 VEC a = VEC::Iota (1, 3);
972 VEC b = VEC::Iota (1, 1);
973 VEC add(ELEM(0)), sub(ELEM(0)), mul(ELEM(0)), div(ELEM(0));
974 ELEM bsum(ELEM(0));
975 for (int i = 0; i < VEC::elements; ++i) {
976 add[i] = a[i] + b[i];
977 sub[i] = a[i] - b[i];
978 mul[i] = a[i] * b[i];
979 div[i] = a[i] / b[i];
980 bsum += b[i];
981 }
982 OIIO_CHECK_SIMD_EQUAL (a+b, add);
983 OIIO_CHECK_SIMD_EQUAL (a-b, sub);
984 OIIO_CHECK_SIMD_EQUAL (a*b, mul);
985 OIIO_CHECK_SIMD_EQUAL (a/b, div);
986 OIIO_CHECK_SIMD_EQUAL (a*ELEM(2), a*VEC(ELEM(2)));
987 OIIO_CHECK_SIMD_EQUAL (ELEM(2)*a, a*VEC(ELEM(2)));
988 { VEC r = a; r += b; OIIO_CHECK_SIMD_EQUAL (r, add); }
989 { VEC r = a; r -= b; OIIO_CHECK_SIMD_EQUAL (r, sub); }
990 { VEC r = a; r *= b; OIIO_CHECK_SIMD_EQUAL (r, mul); }
991 { VEC r = a; r /= b; OIIO_CHECK_SIMD_EQUAL (r, div); }
992 { VEC r = a; r *= ELEM(2); OIIO_CHECK_SIMD_EQUAL (r, a*ELEM(2)); }
993 // Test to make sure * works for negative 32 bit ints on all SIMD levels,
994 // because it's a different code path for sse2.
995 VEC negA = mkvec<VEC>(-1, 1, -2, 2);
996 VEC negB = mkvec<VEC>(2, 2, -2, -2);
997 OIIO_CHECK_SIMD_EQUAL(negA * negB, mkvec<VEC>(-2, 2, 4, -4));
998
999 OIIO_CHECK_EQUAL (reduce_add(b), bsum);
1000 OIIO_CHECK_SIMD_EQUAL (vreduce_add(b), VEC(bsum));
1001 OIIO_CHECK_EQUAL (reduce_add(VEC(1.0f)), SimdElements<VEC>::size);
1002
1003 benchmark2 ("operator+", do_add<VEC>, a, b);
1004 benchmark2 ("operator-", do_sub<VEC>, a, b);
1005 benchmark ("operator- (neg)", do_neg<VEC>, a);
1006 benchmark2 ("operator*", do_mul<VEC>, a, b);
1007 benchmark2 ("operator* (scalar)", do_mul<VEC,ELEM>, a, ELEM(2));
1008 benchmark2 ("operator/", do_div<VEC>, a, b);
1009 benchmark ("abs", do_abs<VEC>, a);
1010 benchmark ("reduce_add", [](const VEC& a){ return vreduce_add(a); }, a);
1011 if (is_same<VEC,vfloat3>::value) { // For vfloat3, compare to Imath
1012 Imath::V3f a(2.51f,1.0f,1.0f), b(3.1f,1.0f,1.0f);
1013 benchmark2 ("add Imath::V3f", do_add<Imath::V3f>, a, b, 3 /*work*/);
1014 benchmark2 ("add Imath::V3f with simd", add_vec_simd, a, b, 3 /*work*/);
1015 benchmark2 ("sub Imath::V3f", do_sub<Imath::V3f>, a, b, 3 /*work*/);
1016 benchmark2 ("mul Imath::V3f", do_mul<Imath::V3f>, a, b, 3 /*work*/);
1017 benchmark2 ("div Imath::V3f", do_div<Imath::V3f>, a, b, 3 /*work*/);
1018 }
1019 benchmark2 ("reference: add scalar", do_add<ELEM>, a[2], b[1]);
1020 benchmark2 ("reference: mul scalar", do_mul<ELEM>, a[2], b[1]);
1021 benchmark2 ("reference: div scalar", do_div<ELEM>, a[2], b[1]);
1022 }
1023
1024
1025
1026 template<typename VEC>
test_fused()1027 void test_fused ()
1028 {
1029 test_heading ("fused ", VEC::type_name());
1030
1031 VEC a = VEC::Iota (10);
1032 VEC b = VEC::Iota (1);
1033 VEC c = VEC::Iota (0.5f);
1034 OIIO_CHECK_SIMD_EQUAL (madd (a, b, c), a*b+c);
1035 OIIO_CHECK_SIMD_EQUAL (msub (a, b, c), a*b-c);
1036 OIIO_CHECK_SIMD_EQUAL (nmadd (a, b, c), -(a*b)+c);
1037 OIIO_CHECK_SIMD_EQUAL (nmsub (a, b, c), -(a*b)-c);
1038
1039 benchmark2 ("madd old *+", [&](const VEC& a, const VEC& b){ return a*b+c; }, a, b);
1040 benchmark2 ("madd fused", [&](const VEC& a, const VEC& b){ return madd(a,b,c); }, a, b);
1041 benchmark2 ("msub old *-", [&](const VEC& a, const VEC& b){ return a*b-c; }, a, b);
1042 benchmark2 ("msub fused", [&](const VEC& a, const VEC& b){ return msub(a,b,c); }, a, b);
1043 benchmark2 ("nmadd old (-*)+", [&](const VEC& a, const VEC& b){ return c-(a*b); }, a, b);
1044 benchmark2 ("nmadd fused", [&](const VEC& a, const VEC& b){ return nmadd(a,b,c); }, a, b);
1045 benchmark2 ("nmsub old -(*+)", [&](const VEC& a, const VEC& b){ return -(a*b)-c; }, a, b);
1046 benchmark2 ("nmsub fused", [&](const VEC& a, const VEC& b){ return nmsub(a,b,c); }, a, b);
1047 }
1048
1049
1050
do_and(const T & a,const T & b)1051 template<typename T> T do_and (const T& a, const T& b) { return a & b; }
do_or(const T & a,const T & b)1052 template<typename T> T do_or (const T& a, const T& b) { return a | b; }
do_xor(const T & a,const T & b)1053 template<typename T> T do_xor (const T& a, const T& b) { return a ^ b; }
do_compl(const T & a)1054 template<typename T> T do_compl (const T& a) { return ~a; }
do_andnot(const T & a,const T & b)1055 template<typename T> T do_andnot (const T& a, const T& b) { return andnot(a,b); }
1056
1057
1058
1059 template<typename VEC>
1060 void
test_bitwise_int()1061 test_bitwise_int()
1062 {
1063 test_heading("bitwise ", VEC::type_name());
1064
1065 VEC a(0x12341234);
1066 VEC b(0x11111111);
1067 OIIO_CHECK_SIMD_EQUAL(a & b, VEC(0x10101010));
1068 OIIO_CHECK_SIMD_EQUAL(a | b, VEC(0x13351335));
1069 OIIO_CHECK_SIMD_EQUAL(a ^ b, VEC(0x03250325));
1070 OIIO_CHECK_SIMD_EQUAL(~(a), VEC(0xedcbedcb));
1071 OIIO_CHECK_SIMD_EQUAL(andnot(b, a), (~(b)) & a);
1072 OIIO_CHECK_SIMD_EQUAL(andnot(b, a), VEC(0x02240224));
1073
1074 VEC atest(15);
1075 atest[1] = 7;
1076 OIIO_CHECK_EQUAL(reduce_and(atest), 7);
1077
1078 VEC otest(0);
1079 otest[1] = 3;
1080 otest[2] = 4;
1081 OIIO_CHECK_EQUAL(reduce_or(otest), 7);
1082
1083 benchmark2("operator&", do_and<VEC>, a, b);
1084 benchmark2("operator|", do_or<VEC>, a, b);
1085 benchmark2("operator^", do_xor<VEC>, a, b);
1086 benchmark("operator!", do_compl<VEC>, a);
1087 benchmark2("andnot", do_andnot<VEC>, a, b);
1088 benchmark("reduce_and", [](const VEC& a) { return reduce_and(a); }, a);
1089 benchmark("reduce_or ", [](const VEC& a) { return reduce_or(a); }, a);
1090 }
1091
1092
1093
1094 template<typename VEC>
test_bitwise_bool()1095 void test_bitwise_bool ()
1096 {
1097 test_heading ("bitwise ", VEC::type_name());
1098
1099 bool A[] = { true, true, false, false, false, false, true, true,
1100 true, true, false, false, false, false, true, true };
1101 bool B[] = { true, false, true, false, true, false, true, false,
1102 true, false, true, false, true, false, true, false };
1103 bool AND[] = { true, false, false, false, false, false, true, false,
1104 true, false, false, false, false, false, true, false };
1105 bool OR[] = { true, true, true, false, true, false, true, true,
1106 true, true, true, false, true, false, true, true };
1107 bool XOR[] = { false, true, true, false, true, false, false, true,
1108 false, true, true, false, true, false, false, true };
1109 bool NOT[] = { false, false, true, true, true, true, false, false,
1110 false, false, true, true, true, true, false, false };
1111 VEC a(A), b(B), rand(AND), ror(OR), rxor(XOR), rnot(NOT);
1112 OIIO_CHECK_SIMD_EQUAL (a & b, rand);
1113 OIIO_CHECK_SIMD_EQUAL (a | b, ror);
1114 OIIO_CHECK_SIMD_EQUAL (a ^ b, rxor);
1115 OIIO_CHECK_SIMD_EQUAL (~a, rnot);
1116
1117 VEC onebit(false); onebit.setcomp(3,true);
1118 OIIO_CHECK_EQUAL (reduce_or(VEC::False()), false);
1119 OIIO_CHECK_EQUAL (reduce_or(onebit), true);
1120 OIIO_CHECK_EQUAL (reduce_and(VEC::True()), true);
1121 OIIO_CHECK_EQUAL (reduce_and(onebit), false);
1122 OIIO_CHECK_EQUAL (all(VEC::True()), true);
1123 OIIO_CHECK_EQUAL (any(VEC::True()), true);
1124 OIIO_CHECK_EQUAL (none(VEC::True()), false);
1125 OIIO_CHECK_EQUAL (all(VEC::False()), false);
1126 OIIO_CHECK_EQUAL (any(VEC::False()), false);
1127 OIIO_CHECK_EQUAL (none(VEC::False()), true);
1128
1129 benchmark2 ("operator&", do_and<VEC>, a, b);
1130 benchmark2 ("operator|", do_or<VEC>, a, b);
1131 benchmark2 ("operator^", do_xor<VEC>, a, b);
1132 benchmark ("operator!", do_compl<VEC>, a);
1133 benchmark ("reduce_and", [](const VEC& a){ return reduce_and(a); }, a);
1134 benchmark ("reduce_or ", [](const VEC& a){ return reduce_or(a); }, a);
1135 }
1136
1137
1138
do_lt(const T & a,const T & b)1139 template<class T, class B> B do_lt (const T& a, const T& b) { return a < b; }
do_gt(const T & a,const T & b)1140 template<class T, class B> B do_gt (const T& a, const T& b) { return a > b; }
do_le(const T & a,const T & b)1141 template<class T, class B> B do_le (const T& a, const T& b) { return a <= b; }
do_ge(const T & a,const T & b)1142 template<class T, class B> B do_ge (const T& a, const T& b) { return a >= b; }
do_eq(const T & a,const T & b)1143 template<class T, class B> B do_eq (const T& a, const T& b) { return a == b; }
do_ne(const T & a,const T & b)1144 template<class T, class B> B do_ne (const T& a, const T& b) { return a != b; }
1145
1146
1147
1148 template<typename VEC>
1149 void
test_comparisons()1150 test_comparisons()
1151 {
1152 typedef typename VEC::value_t ELEM;
1153 typedef typename VEC::vbool_t bool_t;
1154 test_heading("comparisons ", VEC::type_name());
1155
1156 VEC a = VEC::Iota();
1157 bool lt2[] = { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1158 bool gt2[] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
1159 bool le2[] = { 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1160 bool ge2[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
1161 bool eq2[] = { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1162 bool ne2[] = { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
1163 OIIO_CHECK_SIMD_EQUAL((a < 2), bool_t(lt2));
1164 OIIO_CHECK_SIMD_EQUAL((a > 2), bool_t(gt2));
1165 OIIO_CHECK_SIMD_EQUAL((a <= 2), bool_t(le2));
1166 OIIO_CHECK_SIMD_EQUAL((a >= 2), bool_t(ge2));
1167 OIIO_CHECK_SIMD_EQUAL((a == 2), bool_t(eq2));
1168 OIIO_CHECK_SIMD_EQUAL((a != 2), bool_t(ne2));
1169 VEC b(ELEM(2));
1170 OIIO_CHECK_SIMD_EQUAL((a < b), bool_t(lt2));
1171 OIIO_CHECK_SIMD_EQUAL((a > b), bool_t(gt2));
1172 OIIO_CHECK_SIMD_EQUAL((a <= b), bool_t(le2));
1173 OIIO_CHECK_SIMD_EQUAL((a >= b), bool_t(ge2));
1174 OIIO_CHECK_SIMD_EQUAL((a == b), bool_t(eq2));
1175 OIIO_CHECK_SIMD_EQUAL((a != b), bool_t(ne2));
1176
1177 benchmark2("operator< ", do_lt<VEC, bool_t>, a, b);
1178 benchmark2("operator> ", do_gt<VEC, bool_t>, a, b);
1179 benchmark2("operator<=", do_le<VEC, bool_t>, a, b);
1180 benchmark2("operator>=", do_ge<VEC, bool_t>, a, b);
1181 benchmark2("operator==", do_eq<VEC, bool_t>, a, b);
1182 benchmark2("operator!=", do_ne<VEC, bool_t>, a, b);
1183 }
1184
1185
1186
1187 template<typename VEC>
1188 void
test_shuffle4()1189 test_shuffle4()
1190 {
1191 typedef typename VEC::value_t ELEM;
1192 test_heading("shuffle ", VEC::type_name());
1193
1194 VEC a(0, 1, 2, 3);
1195 OIIO_CHECK_SIMD_EQUAL((shuffle<3, 2, 1, 0>(a)), VEC(3, 2, 1, 0));
1196 OIIO_CHECK_SIMD_EQUAL((shuffle<0, 0, 2, 2>(a)), VEC(0, 0, 2, 2));
1197 OIIO_CHECK_SIMD_EQUAL((shuffle<1, 1, 3, 3>(a)), VEC(1, 1, 3, 3));
1198 OIIO_CHECK_SIMD_EQUAL((shuffle<0, 1, 0, 1>(a)), VEC(0, 1, 0, 1));
1199 OIIO_CHECK_SIMD_EQUAL((shuffle<2>(a)), VEC(ELEM(2)));
1200
1201 benchmark("shuffle<...> ",
1202 [&](const VEC& v) { return shuffle<3, 2, 1, 0>(v); }, a);
1203 benchmark("shuffle<0> ", [&](const VEC& v) { return shuffle<0>(v); }, a);
1204 benchmark("shuffle<1> ", [&](const VEC& v) { return shuffle<1>(v); }, a);
1205 benchmark("shuffle<2> ", [&](const VEC& v) { return shuffle<2>(v); }, a);
1206 benchmark("shuffle<3> ", [&](const VEC& v) { return shuffle<3>(v); }, a);
1207 }
1208
1209
1210
1211 template<typename VEC>
test_shuffle8()1212 void test_shuffle8 ()
1213 {
1214 typedef typename VEC::value_t ELEM;
1215 test_heading ("shuffle ", VEC::type_name());
1216 VEC a (0, 1, 2, 3, 4, 5, 6, 7);
1217 OIIO_CHECK_SIMD_EQUAL ((shuffle<3,2,1,0,3,2,1,0>(a)), VEC(3,2,1,0,3,2,1,0));
1218 OIIO_CHECK_SIMD_EQUAL ((shuffle<0,0,2,2,0,0,2,2>(a)), VEC(0,0,2,2,0,0,2,2));
1219 OIIO_CHECK_SIMD_EQUAL ((shuffle<1,1,3,3,1,1,3,3>(a)), VEC(1,1,3,3,1,1,3,3));
1220 OIIO_CHECK_SIMD_EQUAL ((shuffle<0,1,0,1,0,1,0,1>(a)), VEC(0,1,0,1,0,1,0,1));
1221 OIIO_CHECK_SIMD_EQUAL ((shuffle<2>(a)), VEC(ELEM(2)));
1222
1223 benchmark ("shuffle<...> ", [&](const VEC& v){ return shuffle<7,6,5,4,3,2,1,0>(v); }, a);
1224 benchmark ("shuffle<0> ", [&](const VEC& v){ return shuffle<0>(v); }, a);
1225 benchmark ("shuffle<1> ", [&](const VEC& v){ return shuffle<1>(v); }, a);
1226 benchmark ("shuffle<2> ", [&](const VEC& v){ return shuffle<2>(v); }, a);
1227 benchmark ("shuffle<3> ", [&](const VEC& v){ return shuffle<3>(v); }, a);
1228 benchmark ("shuffle<4> ", [&](const VEC& v){ return shuffle<4>(v); }, a);
1229 benchmark ("shuffle<5> ", [&](const VEC& v){ return shuffle<5>(v); }, a);
1230 benchmark ("shuffle<6> ", [&](const VEC& v){ return shuffle<6>(v); }, a);
1231 benchmark ("shuffle<7> ", [&](const VEC& v){ return shuffle<7>(v); }, a);
1232 }
1233
1234
1235
1236 template<typename VEC>
test_shuffle16()1237 void test_shuffle16 ()
1238 {
1239 test_heading ("shuffle ", VEC::type_name());
1240 VEC a (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1241
1242 // Shuffle groups of 4
1243 OIIO_CHECK_SIMD_EQUAL ((shuffle4<3,2,1,0>(a)),
1244 VEC(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3));
1245 OIIO_CHECK_SIMD_EQUAL ((shuffle4<3>(a)),
1246 VEC(12,13,14,15,12,13,14,15,12,13,14,15,12,13,14,15));
1247
1248 // Shuffle within groups of 4
1249 OIIO_CHECK_SIMD_EQUAL ((shuffle<3,2,1,0>(a)),
1250 VEC(3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12));
1251 OIIO_CHECK_SIMD_EQUAL ((shuffle<3>(a)),
1252 VEC(3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15));
1253
1254 benchmark ("shuffle4<> ", [&](const VEC& v){ return shuffle<3,2,1,0>(v); }, a);
1255 benchmark ("shuffle<> ", [&](const VEC& v){ return shuffle<3,2,1,0>(v); }, a);
1256 }
1257
1258
1259
1260 template<typename VEC>
1261 void
test_swizzle()1262 test_swizzle()
1263 {
1264 test_heading("swizzle ", VEC::type_name());
1265
1266 VEC a = VEC::Iota(0);
1267 VEC b = VEC::Iota(10);
1268 OIIO_CHECK_SIMD_EQUAL(AxyBxy(a, b), VEC(0, 1, 10, 11));
1269 OIIO_CHECK_SIMD_EQUAL(AxBxAyBy(a, b), VEC(0, 10, 1, 11));
1270 OIIO_CHECK_SIMD_EQUAL(b.xyz0(), VEC(10, 11, 12, 0));
1271 OIIO_CHECK_SIMD_EQUAL(b.xyz1(), VEC(10, 11, 12, 1));
1272 }
1273
1274
1275
1276 template<typename VEC>
test_blend()1277 void test_blend ()
1278 {
1279 test_heading ("blend ", VEC::type_name());
1280 typedef typename VEC::value_t ELEM;
1281 typedef typename VEC::vbool_t bool_t;
1282
1283 VEC a = VEC::Iota (1);
1284 VEC b = VEC::Iota (10);
1285 bool_t f(false), t(true);
1286 bool tf_values[] = { true, false, true, false, true, false, true, false,
1287 true, false, true, false, true, false, true, false };
1288 bool_t tf ((bool *)tf_values);
1289
1290 OIIO_CHECK_SIMD_EQUAL (blend (a, b, f), a);
1291 OIIO_CHECK_SIMD_EQUAL (blend (a, b, t), b);
1292
1293 ELEM r1[] = { 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16 };
1294 OIIO_CHECK_SIMD_EQUAL (blend (a, b, tf), VEC(r1));
1295
1296 OIIO_CHECK_SIMD_EQUAL (blend0 (a, f), VEC::Zero());
1297 OIIO_CHECK_SIMD_EQUAL (blend0 (a, t), a);
1298 ELEM r2[] = { 1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0 };
1299 OIIO_CHECK_SIMD_EQUAL (blend0 (a, tf), VEC(r2));
1300
1301 OIIO_CHECK_SIMD_EQUAL (blend0not (a, f), a);
1302 OIIO_CHECK_SIMD_EQUAL (blend0not (a, t), VEC::Zero());
1303 ELEM r3[] = { 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16 };
1304 OIIO_CHECK_SIMD_EQUAL (blend0not (a, tf), VEC(r3));
1305
1306 benchmark2 ("blend", [&](const VEC& a, const VEC& b){ return blend(a,b,tf); }, a, b);
1307 benchmark2 ("blend0", [](const VEC& a, const bool_t& b){ return blend0(a,b); }, a, tf);
1308 benchmark2 ("blend0not", [](const VEC& a, const bool_t& b){ return blend0not(a,b); }, a, tf);
1309 }
1310
1311
1312
1313 template<typename VEC>
1314 void
test_transpose4()1315 test_transpose4()
1316 {
1317 test_heading("transpose ", VEC::type_name());
1318
1319 VEC a(0, 1, 2, 3);
1320 VEC b(4, 5, 6, 7);
1321 VEC c(8, 9, 10, 11);
1322 VEC d(12, 13, 14, 15);
1323
1324 OIIO_CHECK_SIMD_EQUAL(AxBxCxDx(a, b, c, d), VEC(0, 4, 8, 12));
1325
1326 std::cout << " before transpose:\n";
1327 std::cout << "\t" << a << "\n";
1328 std::cout << "\t" << b << "\n";
1329 std::cout << "\t" << c << "\n";
1330 std::cout << "\t" << d << "\n";
1331 transpose(a, b, c, d);
1332 std::cout << " after transpose:\n";
1333 std::cout << "\t" << a << "\n";
1334 std::cout << "\t" << b << "\n";
1335 std::cout << "\t" << c << "\n";
1336 std::cout << "\t" << d << "\n";
1337 OIIO_CHECK_SIMD_EQUAL(a, VEC(0, 4, 8, 12));
1338 OIIO_CHECK_SIMD_EQUAL(b, VEC(1, 5, 9, 13));
1339 OIIO_CHECK_SIMD_EQUAL(c, VEC(2, 6, 10, 14));
1340 OIIO_CHECK_SIMD_EQUAL(d, VEC(3, 7, 11, 15));
1341 }
1342
1343
1344
do_shl(const T & a,int b)1345 template<typename T> inline T do_shl (const T &a, int b) { return a<<b; }
do_shr(const T & a,int b)1346 template<typename T> inline T do_shr (const T &a, int b) { return a>>b; }
do_srl(const T & a,int b)1347 template<typename T> inline T do_srl (const T &a, int b) { return srl(a,b); }
do_rotl(const T & a,int b)1348 template<typename T> inline T do_rotl (const T &a, int b) { return rotl(a,b); }
1349
1350
1351 template<typename VEC>
1352 void
test_shift()1353 test_shift()
1354 {
1355 test_heading("shift ", VEC::type_name());
1356
1357 // Basics of << and >>
1358 VEC i = VEC::Iota(10, 10); // 10, 20, 30 ...
1359 OIIO_CHECK_SIMD_EQUAL(i << 2, VEC::Iota(40, 40));
1360 OIIO_CHECK_SIMD_EQUAL(i >> 1, VEC::Iota(5, 5));
1361
1362 // Tricky cases with high bits, and the difference between >> and srl
1363 int vals[4] = { 1 << 31, -1, 0xffff, 3 };
1364 for (auto hard : vals) {
1365 VEC vhard(hard);
1366 OIIO_CHECK_SIMD_EQUAL (vhard >> 1, VEC(hard>>1));
1367 OIIO_CHECK_SIMD_EQUAL (srl(vhard,1), VEC(unsigned(hard)>>1));
1368 Strutil::printf(" [%x] >> 1 == [%x]\n", vhard, vhard>>1);
1369 Strutil::printf(" [%x] srl 1 == [%x]\n", vhard, srl(vhard,1));
1370 OIIO_CHECK_SIMD_EQUAL (srl(vhard,4), VEC(unsigned(hard)>>4));
1371 Strutil::printf(" [%x] >> 4 == [%x]\n", vhard, vhard>>4);
1372 Strutil::printf(" [%x] srl 4 == [%x]\n", vhard, srl(vhard,4));
1373 }
1374
1375 // Test <<= and >>=
1376 i = VEC::Iota (10, 10); i <<= 2;
1377 OIIO_CHECK_SIMD_EQUAL (i, VEC::Iota(40, 40));
1378 i = VEC::Iota (10, 10); i >>= 1;
1379 OIIO_CHECK_SIMD_EQUAL (i, VEC::Iota(5, 5));
1380
1381 // Test rotl
1382 {
1383 vint4 v (0x12345678, 0xabcdef01, 0x98765432, 0x31415926);
1384 vint4 r (0x23456781, 0xbcdef01a, 0x87654329, 0x14159263);
1385 OIIO_CHECK_SIMD_EQUAL (rotl(v,4), r);
1386 }
1387
1388 // Benchmark
1389 benchmark2 ("operator<<", do_shl<VEC>, i, 2);
1390 benchmark2 ("operator>>", do_shr<VEC>, i, 2);
1391 benchmark2 ("srl ", do_srl<VEC>, i, 2);
1392 benchmark2 ("rotl ", do_rotl<VEC>, i, 2);
1393 }
1394
1395
1396
1397 void
test_vectorops_vfloat4()1398 test_vectorops_vfloat4()
1399 {
1400 typedef vfloat4 VEC;
1401 typedef VEC::value_t ELEM;
1402 test_heading("vectorops ", VEC::type_name());
1403
1404 VEC a = mkvec<VEC> (10, 11, 12, 13);
1405 VEC b = mkvec<VEC> (1, 2, 3, 4);
1406 OIIO_CHECK_EQUAL (dot(a,b), ELEM(10+22+36+52));
1407 OIIO_CHECK_EQUAL (dot3(a,b), ELEM(10+22+36));
1408 OIIO_CHECK_SIMD_EQUAL (vdot(a,b), VEC(10+22+36+52));
1409 OIIO_CHECK_SIMD_EQUAL (vdot3(a,b), VEC(10+22+36));
1410 OIIO_CHECK_SIMD_EQUAL (hdiv(vfloat4(1.0f,2.0f,3.0f,2.0f)), vfloat3(0.5f,1.0f,1.5f));
1411
1412 benchmark2 ("vdot", [](const VEC& a, const VEC& b){ return vdot(a,b); }, a, b);
1413 benchmark2 ("dot", [](const VEC& a, const VEC& b){ return dot(a,b); }, a, b);
1414 benchmark2 ("vdot3", [](const VEC& a, const VEC& b){ return vdot3(a,b); }, a, b);
1415 benchmark2 ("dot3", [](const VEC& a, const VEC& b){ return dot3(a,b); }, a, b);
1416 }
1417
1418
1419
test_vectorops_vfloat3()1420 void test_vectorops_vfloat3 ()
1421 {
1422 typedef vfloat3 VEC;
1423 typedef VEC::value_t ELEM;
1424 test_heading ("vectorops ", VEC::type_name());
1425
1426 VEC a = mkvec<VEC> (10, 11, 12);
1427 VEC b = mkvec<VEC> (1, 2, 3);
1428 OIIO_CHECK_EQUAL (dot(a,b), ELEM(10+22+36));
1429 OIIO_CHECK_EQUAL (dot3(a,b), ELEM(10+22+36));
1430 OIIO_CHECK_SIMD_EQUAL (vdot(a,b), VEC(10+22+36));
1431 OIIO_CHECK_SIMD_EQUAL (vdot3(a,b), VEC(10+22+36));
1432 OIIO_CHECK_SIMD_EQUAL (vfloat3(1.0f,2.0f,3.0f).normalized(),
1433 vfloat3(norm_imath(Imath::V3f(1.0f,2.0f,3.0f))));
1434 OIIO_CHECK_SIMD_EQUAL_THRESH (vfloat3(1.0f,2.0f,3.0f).normalized_fast(),
1435 vfloat3(norm_imath(Imath::V3f(1.0f,2.0f,3.0f))), 0.0005);
1436
1437 benchmark2 ("vdot", [](const VEC& a, const VEC& b){ return vdot(a,b); }, a, b);
1438 benchmark2 ("dot", [](const VEC& a, const VEC& b){ return dot(a,b); }, a, b);
1439 benchmark ("dot vfloat3", dot_simd, vfloat3(2.0f,1.0f,0.0f), 1);
1440 // benchmark2 ("dot Imath::V3f", [](Imath::V3f& a, Imath::V3f& b){ return a.dot(b); }, a.V3f(), b.V3f());
1441 benchmark ("dot Imath::V3f", dot_imath, Imath::V3f(2.0f,1.0f,0.0f), 1);
1442 benchmark ("dot Imath::V3f with simd", dot_imath_simd, Imath::V3f(2.0f,1.0f,0.0f), 1);
1443 benchmark ("normalize Imath", norm_imath, Imath::V3f(1.0f,4.0f,9.0f));
1444 benchmark ("normalize Imath with simd", norm_imath_simd, Imath::V3f(1.0f,4.0f,9.0f));
1445 benchmark ("normalize Imath with simd fast", norm_imath_simd_fast, Imath::V3f(1.0f,4.0f,9.0f));
1446 benchmark ("normalize simd", norm_simd, vfloat3(1.0f,4.0f,9.0f));
1447 benchmark ("normalize simd fast", norm_simd_fast, vfloat3(1.0f,4.0f,9.0f));
1448 }
1449
1450
1451
test_constants()1452 void test_constants ()
1453 {
1454 test_heading ("constants");
1455
1456 OIIO_CHECK_SIMD_EQUAL (vbool4::False(), vbool4(false));
1457 OIIO_CHECK_SIMD_EQUAL (vbool4::True(), vbool4(true));
1458
1459 OIIO_CHECK_SIMD_EQUAL (vbool8::False(), vbool8(false));
1460 OIIO_CHECK_SIMD_EQUAL (vbool8::True(), vbool8(true));
1461
1462 OIIO_CHECK_SIMD_EQUAL (vbool16::False(), vbool16(false));
1463 OIIO_CHECK_SIMD_EQUAL (vbool16::True(), vbool16(true));
1464 OIIO_CHECK_SIMD_EQUAL (vbool16::False(), vbool16(false));
1465 OIIO_CHECK_SIMD_EQUAL (vbool16::True(), vbool16(true));
1466
1467 OIIO_CHECK_SIMD_EQUAL (vint4::Zero(), vint4(0));
1468 OIIO_CHECK_SIMD_EQUAL (vint4::One(), vint4(1));
1469 OIIO_CHECK_SIMD_EQUAL (vint4::NegOne(), vint4(-1));
1470 OIIO_CHECK_SIMD_EQUAL (vint4::Iota(), vint4(0,1,2,3));
1471 OIIO_CHECK_SIMD_EQUAL (vint4::Iota(3), vint4(3,4,5,6));
1472 OIIO_CHECK_SIMD_EQUAL (vint4::Iota(3,2), vint4(3,5,7,9));
1473 OIIO_CHECK_SIMD_EQUAL (vint4::Giota(), vint4(1,2,4,8));
1474
1475 OIIO_CHECK_SIMD_EQUAL (vint8::Zero(), vint8(0));
1476 OIIO_CHECK_SIMD_EQUAL (vint8::One(), vint8(1));
1477 OIIO_CHECK_SIMD_EQUAL (vint8::NegOne(), vint8(-1));
1478 OIIO_CHECK_SIMD_EQUAL (vint8::Iota(), vint8(0,1,2,3, 4,5,6,7));
1479 OIIO_CHECK_SIMD_EQUAL (vint8::Iota(3), vint8(3,4,5,6, 7,8,9,10));
1480 OIIO_CHECK_SIMD_EQUAL (vint8::Iota(3,2), vint8(3,5,7,9, 11,13,15,17));
1481 OIIO_CHECK_SIMD_EQUAL (vint8::Giota(), vint8(1,2,4,8, 16,32,64,128));
1482
1483 OIIO_CHECK_SIMD_EQUAL (vint16::Zero(), vint16(0));
1484 OIIO_CHECK_SIMD_EQUAL (vint16::One(), vint16(1));
1485 OIIO_CHECK_SIMD_EQUAL (vint16::NegOne(), vint16(-1));
1486 OIIO_CHECK_SIMD_EQUAL (vint16::Iota(), vint16(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15));
1487 OIIO_CHECK_SIMD_EQUAL (vint16::Iota(3), vint16(3,4,5,6, 7,8,9,10, 11,12,13,14, 15,16,17,18));
1488 OIIO_CHECK_SIMD_EQUAL (vint16::Iota(3,2), vint16(3,5,7,9, 11,13,15,17, 19,21,23,25, 27,29,31,33));
1489 OIIO_CHECK_SIMD_EQUAL (vint16::Giota(), vint16(1,2,4,8, 16,32,64,128, 256,512,1024,2048, 4096,8192,16384,32768));
1490
1491 OIIO_CHECK_SIMD_EQUAL (vfloat4::Zero(), vfloat4(0.0f));
1492 OIIO_CHECK_SIMD_EQUAL (vfloat4::One(), vfloat4(1.0f));
1493 OIIO_CHECK_SIMD_EQUAL (vfloat4::Iota(), vfloat4(0,1,2,3));
1494 OIIO_CHECK_SIMD_EQUAL (vfloat4::Iota(3.0f), vfloat4(3,4,5,6));
1495 OIIO_CHECK_SIMD_EQUAL (vfloat4::Iota(3.0f,2.0f), vfloat4(3,5,7,9));
1496
1497 OIIO_CHECK_SIMD_EQUAL (vfloat3::Zero(), vfloat3(0.0f));
1498 OIIO_CHECK_SIMD_EQUAL (vfloat3::One(), vfloat3(1.0f));
1499 OIIO_CHECK_SIMD_EQUAL (vfloat3::Iota(), vfloat3(0,1,2));
1500 OIIO_CHECK_SIMD_EQUAL (vfloat3::Iota(3.0f), vfloat3(3,4,5));
1501 OIIO_CHECK_SIMD_EQUAL (vfloat3::Iota(3.0f,2.0f), vfloat3(3,5,7));
1502
1503 OIIO_CHECK_SIMD_EQUAL (vfloat8::Zero(), vfloat8(0.0f));
1504 OIIO_CHECK_SIMD_EQUAL (vfloat8::One(), vfloat8(1.0f));
1505 OIIO_CHECK_SIMD_EQUAL (vfloat8::Iota(), vfloat8(0,1,2,3,4,5,6,7));
1506 OIIO_CHECK_SIMD_EQUAL (vfloat8::Iota(3.0f), vfloat8(3,4,5,6,7,8,9,10));
1507 OIIO_CHECK_SIMD_EQUAL (vfloat8::Iota(3.0f,2.0f), vfloat8(3,5,7,9,11,13,15,17));
1508
1509 OIIO_CHECK_SIMD_EQUAL (vfloat16::Zero(), vfloat16(0.0f));
1510 OIIO_CHECK_SIMD_EQUAL (vfloat16::One(), vfloat16(1.0f));
1511 OIIO_CHECK_SIMD_EQUAL (vfloat16::Iota(), vfloat16(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15));
1512 OIIO_CHECK_SIMD_EQUAL (vfloat16::Iota(3.0f), vfloat16(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18));
1513 OIIO_CHECK_SIMD_EQUAL (vfloat16::Iota(3.0f,2.0f), vfloat16(3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33));
1514
1515 benchmark ("vfloat4 = float(const)", [](float f){ return vfloat4(f); }, 1.0f);
1516 benchmark ("vfloat4 = Zero()", [](int){ return vfloat4::Zero(); }, 0);
1517 benchmark ("vfloat4 = One()", [](int){ return vfloat4::One(); }, 0);
1518 benchmark ("vfloat4 = Iota()", [](int){ return vfloat4::Iota(); }, 0);
1519
1520 benchmark ("vfloat8 = float(const)", [](float f){ return vfloat8(f); }, 1.0f);
1521 benchmark ("vfloat8 = Zero()", [](int){ return vfloat8::Zero(); }, 0);
1522 benchmark ("vfloat8 = One()", [](int){ return vfloat8::One(); }, 0);
1523 benchmark ("vfloat8 = Iota()", [](int){ return vfloat8::Iota(); }, 0);
1524
1525 benchmark ("vfloat16 = float(const)", [](float f){ return vfloat16(f); }, 1.0f);
1526 benchmark ("vfloat16 = Zero()", [](int){ return vfloat16::Zero(); }, 0);
1527 benchmark ("vfloat16 = One()", [](int){ return vfloat16::One(); }, 0);
1528 benchmark ("vfloat16 = Iota()", [](int){ return vfloat16::Iota(); }, 0);
1529 }
1530
1531
1532
1533 // Miscellaneous one-off stuff not caught by other tests
1534 void
test_special()1535 test_special()
1536 {
1537 test_heading("special");
1538 {
1539 // Make sure a vfloat4 constructed from saturated unsigned short,
1540 // short, unsigned char, or char values, then divided by the float
1541 // max, exactly equals 1.0.
1542 short s32767[] = {32767, 32767, 32767, 32767};
1543 unsigned short us65535[] = {65535, 65535, 65535, 65535};
1544 char c127[] = {127, 127, 127, 127};
1545 unsigned char uc255[] = {255, 255, 255, 255};
1546 OIIO_CHECK_SIMD_EQUAL (vfloat4(us65535)/vfloat4(65535.0), vfloat4(1.0f));
1547 OIIO_CHECK_SIMD_EQUAL (vfloat4(us65535)*vfloat4(1.0f/65535.0), vfloat4(1.0f));
1548 OIIO_CHECK_SIMD_EQUAL (vfloat4(s32767)/vfloat4(32767.0), vfloat4(1.0f));
1549 OIIO_CHECK_SIMD_EQUAL (vfloat4(s32767)*vfloat4(1.0f/32767.0), vfloat4(1.0f));
1550 OIIO_CHECK_SIMD_EQUAL (vfloat4(uc255)/vfloat4(255.0), vfloat4(1.0f));
1551 OIIO_CHECK_SIMD_EQUAL (vfloat4(uc255)*vfloat4(1.0f/255.0), vfloat4(1.0f));
1552 OIIO_CHECK_SIMD_EQUAL (vfloat4(c127)/vfloat4(127.0), vfloat4(1.0f));
1553 OIIO_CHECK_SIMD_EQUAL (vfloat4(c127)*vfloat4(1.0f/127.0), vfloat4(1.0f));
1554 }
1555 }
1556
1557
1558
1559 // Wrappers to resolve the return type ambiguity
fast_exp_float(float x)1560 inline float fast_exp_float (float x) { return fast_exp(x); }
fast_exp_vfloat4(const vfloat4 & x)1561 inline vfloat4 fast_exp_vfloat4 (const vfloat4& x) { return fast_exp(x); }
fast_log_float(float x)1562 inline float fast_log_float (float x) { return fast_log(x); }
1563 //inline vfloat4 fast_log_float (const vfloat4& x) { return fast_log(x); }
rsqrtf(float f)1564 inline float rsqrtf (float f) { return 1.0f / sqrtf(f); }
rcp(float f)1565 inline float rcp (float f) { return 1.0f / f; }
1566
1567
1568
1569 template<typename VEC>
test_mathfuncs()1570 void test_mathfuncs ()
1571 {
1572 typedef typename VEC::vint_t vint_t;
1573 test_heading ("mathfuncs", VEC::type_name());
1574
1575 VEC F = mkvec<VEC> (-1.5f, 0.0f, 1.9f, 4.1f);
1576 OIIO_CHECK_SIMD_EQUAL (abs(F), mkvec<VEC>(std::abs(F[0]), std::abs(F[1]), std::abs(F[2]), std::abs(F[3])));
1577 // OIIO_CHECK_SIMD_EQUAL (sign(F), mkvec<VEC>(std::sign(F[0]), std::sign(F[1]), std::sign(F[2]), std::sign(F[3])));
1578 OIIO_CHECK_SIMD_EQUAL (ceil(F), mkvec<VEC>(std::ceil(F[0]), std::ceil(F[1]), std::ceil(F[2]), std::ceil(F[3])));
1579 OIIO_CHECK_SIMD_EQUAL (floor(F), mkvec<VEC>(std::floor(F[0]), std::floor(F[1]), std::floor(F[2]), std::floor(F[3])));
1580 OIIO_CHECK_SIMD_EQUAL (round(F), mkvec<VEC>(std::round(F[0]), std::round(F[1]), std::round(F[2]), std::round(F[3])));
1581 benchmark ("simd abs", [](const VEC& v){ return abs(v); }, 1.1f);
1582 benchmark ("simd sign", [](const VEC& v){ return sign(v); }, 1.1f);
1583 benchmark ("simd ceil", [](const VEC& v){ return ceil(v); }, 1.1f);
1584 benchmark ("simd floor", [](const VEC& v){ return floor(v); }, 1.1f);
1585 benchmark ("simd round", [](const VEC& v){ return round(v); }, 1.1f);
1586
1587 VEC A = mkvec<VEC> (-1.0f, 0.0f, 1.0f, 4.5f);
1588 VEC expA = mkvec<VEC> (0.367879441171442f, 1.0f, 2.718281828459045f, 90.0171313005218f);
1589 OIIO_CHECK_SIMD_EQUAL (exp(A), expA);
1590 OIIO_CHECK_SIMD_EQUAL_THRESH (log(expA), A, 1e-6f);
1591 OIIO_CHECK_SIMD_EQUAL (fast_exp(A),
1592 mkvec<VEC>(fast_exp(A[0]), fast_exp(A[1]), fast_exp(A[2]), fast_exp(A[3])));
1593 OIIO_CHECK_SIMD_EQUAL (fast_log(expA),
1594 mkvec<VEC>(fast_log(expA[0]), fast_log(expA[1]), fast_log(expA[2]), fast_log(expA[3])));
1595 OIIO_CHECK_SIMD_EQUAL_THRESH (fast_pow_pos(VEC(2.0f), A),
1596 mkvec<VEC>(0.5f, 1.0f, 2.0f, 22.62741699796952f), 0.0001f);
1597
1598 OIIO_CHECK_SIMD_EQUAL (safe_div(mkvec<VEC>(1.0f,2.0f,3.0f,4.0f), mkvec<VEC>(2.0f,0.0f,2.0f,0.0f)),
1599 mkvec<VEC>(0.5f,0.0f,1.5f,0.0f));
1600 OIIO_CHECK_SIMD_EQUAL (sqrt(mkvec<VEC>(1.0f,4.0f,9.0f,16.0f)), mkvec<VEC>(1.0f,2.0f,3.0f,4.0f));
1601 OIIO_CHECK_SIMD_EQUAL (rsqrt(mkvec<VEC>(1.0f,4.0f,9.0f,16.0f)), VEC(1.0f)/mkvec<VEC>(1.0f,2.0f,3.0f,4.0f));
1602 OIIO_CHECK_SIMD_EQUAL_THRESH (rsqrt_fast(mkvec<VEC>(1.0f,4.0f,9.0f,16.0f)),
1603 VEC(1.0f)/mkvec<VEC>(1.0f,2.0f,3.0f,4.0f), 0.0005f);
1604 OIIO_CHECK_SIMD_EQUAL_THRESH (rcp_fast(VEC::Iota(1.0f)),
1605 VEC(1.0f)/VEC::Iota(1.0f), 0.0005f);
1606
1607 benchmark2 ("simd operator/", do_div<VEC>, A, A);
1608 benchmark2 ("simd safe_div", do_safe_div<VEC>, A, A);
1609 benchmark ("simd rcp_fast", [](const VEC& v){ return rcp_fast(v); }, mkvec<VEC>(1.0f,4.0f,9.0f,16.0f));
1610
1611 OIIO_CHECK_SIMD_EQUAL (ifloor(mkvec<VEC>(0.0f, 0.999f, 1.0f, 1.001f)),
1612 mkvec<vint_t>(0, 0, 1, 1));
1613 OIIO_CHECK_SIMD_EQUAL (ifloor(mkvec<VEC>(0.0f, -0.999f, -1.0f, -1.001f)),
1614 mkvec<vint_t>(0, -1, -1, -2));
1615 benchmark ("float ifloor", [](float&v){ return ifloor(v); }, 1.1f);
1616 benchmark ("simd ifloor", [](const VEC&v){ return simd::ifloor(v); }, VEC(1.1f));
1617
1618 int iscalar;
1619 vint_t ival;
1620 VEC fval = -1.1;
1621 OIIO_CHECK_EQUAL_APPROX (floorfrac(VEC(0.0f), &ival), 0.0f); OIIO_CHECK_SIMD_EQUAL (ival, 0);
1622 OIIO_CHECK_EQUAL_APPROX (floorfrac(VEC(-0.999f), &ival), 0.001f); OIIO_CHECK_SIMD_EQUAL (ival, -1);
1623 OIIO_CHECK_EQUAL_APPROX (floorfrac(VEC(-1.0f), &ival), 0.0f); OIIO_CHECK_SIMD_EQUAL (ival, -1);
1624 OIIO_CHECK_EQUAL_APPROX (floorfrac(VEC(-1.001f), &ival), 0.999f); OIIO_CHECK_SIMD_EQUAL (ival, -2);
1625 OIIO_CHECK_EQUAL_APPROX (floorfrac(VEC(0.999f), &ival), 0.999f); OIIO_CHECK_SIMD_EQUAL (ival, 0);
1626 OIIO_CHECK_EQUAL_APPROX (floorfrac(VEC(1.0f), &ival), 0.0f); OIIO_CHECK_SIMD_EQUAL (ival, 1);
1627 OIIO_CHECK_EQUAL_APPROX (floorfrac(VEC(1.001f), &ival), 0.001f); OIIO_CHECK_SIMD_EQUAL (ival, 1);
1628 benchmark ("float floorfrac", [&](float x){ return DoNotOptimize(floorfrac(x,&iscalar)); }, 1.1f);
1629 benchmark ("simd floorfrac", [&](const VEC& x){ return DoNotOptimize(floorfrac(x,&ival)); }, fval);
1630
1631 benchmark ("float expf", expf, 0.67f);
1632 benchmark ("float fast_exp", fast_exp_float, 0.67f);
1633 benchmark ("simd exp", [](const VEC& v){ return simd::exp(v); }, VEC(0.67f));
1634 benchmark ("simd fast_exp", [](const VEC& v){ return fast_exp(v); }, VEC(0.67f));
1635
1636 benchmark ("float logf", logf, 0.67f);
1637 benchmark ("fast_log", fast_log_float, 0.67f);
1638 benchmark ("simd log", [](const VEC& v){ return simd::log(v); }, VEC(0.67f));
1639 benchmark ("simd fast_log", fast_log<VEC>, VEC(0.67f));
1640 benchmark2 ("float powf", powf, 0.67f, 0.67f);
1641 benchmark2 ("simd fast_pow_pos", [](const VEC& x,const VEC& y){ return fast_pow_pos(x,y); }, VEC(0.67f), VEC(0.67f));
1642 benchmark ("float sqrt", sqrtf, 4.0f);
1643 benchmark ("simd sqrt", [](const VEC& v){ return sqrt(v); }, mkvec<VEC>(1.0f,4.0f,9.0f,16.0f));
1644 benchmark ("float rsqrt", rsqrtf, 4.0f);
1645 benchmark ("simd rsqrt", [](const VEC& v){ return rsqrt(v); }, mkvec<VEC>(1.0f,4.0f,9.0f,16.0f));
1646 benchmark ("simd rsqrt_fast", [](const VEC& v){ return rsqrt_fast(v); }, mkvec<VEC>(1.0f,4.0f,9.0f,16.0f));
1647 }
1648
1649
1650
test_metaprogramming()1651 void test_metaprogramming ()
1652 {
1653 test_heading ("metaprogramming");
1654 OIIO_CHECK_EQUAL (SimdSize<vfloat4>::size, 4);
1655 OIIO_CHECK_EQUAL (SimdSize<vfloat3>::size, 4);
1656 OIIO_CHECK_EQUAL (SimdSize<vint4>::size, 4);
1657 OIIO_CHECK_EQUAL (SimdSize<vbool4>::size, 4);
1658 OIIO_CHECK_EQUAL (SimdSize<vfloat8>::size, 8);
1659 OIIO_CHECK_EQUAL (SimdSize<vint8>::size, 8);
1660 OIIO_CHECK_EQUAL (SimdSize<vbool8>::size, 8);
1661 OIIO_CHECK_EQUAL (SimdSize<vfloat16>::size, 16);
1662 OIIO_CHECK_EQUAL (SimdSize<vint16>::size, 16);
1663 OIIO_CHECK_EQUAL (SimdSize<vbool16>::size, 16);
1664 OIIO_CHECK_EQUAL (SimdSize<float>::size, 1);
1665 OIIO_CHECK_EQUAL (SimdSize<int>::size, 1);
1666 OIIO_CHECK_EQUAL (SimdSize<bool>::size, 1);
1667
1668 OIIO_CHECK_EQUAL (SimdElements<vfloat4>::size, 4);
1669 OIIO_CHECK_EQUAL (SimdElements<vfloat3>::size, 3);
1670 OIIO_CHECK_EQUAL (SimdElements<vint4>::size, 4);
1671 OIIO_CHECK_EQUAL (SimdElements<vbool4>::size, 4);
1672 OIIO_CHECK_EQUAL (SimdElements<vfloat8>::size, 8);
1673 OIIO_CHECK_EQUAL (SimdElements<vint8>::size, 8);
1674 OIIO_CHECK_EQUAL (SimdElements<vbool8>::size, 8);
1675 OIIO_CHECK_EQUAL (SimdElements<vfloat16>::size, 16);
1676 OIIO_CHECK_EQUAL (SimdElements<vint16>::size, 16);
1677 OIIO_CHECK_EQUAL (SimdElements<vbool16>::size, 16);
1678 OIIO_CHECK_EQUAL (SimdElements<float>::size, 1);
1679 OIIO_CHECK_EQUAL (SimdElements<int>::size, 1);
1680 OIIO_CHECK_EQUAL (SimdElements<bool>::size, 1);
1681
1682 OIIO_CHECK_EQUAL (vfloat4::elements, 4);
1683 OIIO_CHECK_EQUAL (vfloat3::elements, 3);
1684 OIIO_CHECK_EQUAL (vint4::elements, 4);
1685 OIIO_CHECK_EQUAL (vbool4::elements, 4);
1686 // OIIO_CHECK_EQUAL (vfloat8::elements, 8);
1687 OIIO_CHECK_EQUAL (vint8::elements, 8);
1688 OIIO_CHECK_EQUAL (vbool8::elements, 8);
1689 OIIO_CHECK_EQUAL (vfloat16::elements, 16);
1690 OIIO_CHECK_EQUAL (vint16::elements, 16);
1691 OIIO_CHECK_EQUAL (vbool16::elements, 16);
1692
1693 // Make sure that VTYPE::value_t returns the right element type
1694 OIIO_CHECK_ASSERT((std::is_same<vfloat4::value_t, float>::value));
1695 OIIO_CHECK_ASSERT((std::is_same<vfloat3::value_t, float>::value));
1696 OIIO_CHECK_ASSERT((std::is_same<vfloat8::value_t, float>::value));
1697 OIIO_CHECK_ASSERT((std::is_same<vfloat16::value_t, float>::value));
1698 OIIO_CHECK_ASSERT((std::is_same<vint4::value_t, int>::value));
1699 OIIO_CHECK_ASSERT((std::is_same<vint8::value_t, int>::value));
1700 OIIO_CHECK_ASSERT((std::is_same<vint16::value_t, int>::value));
1701 OIIO_CHECK_ASSERT((std::is_same<vbool4::value_t, bool>::value));
1702 OIIO_CHECK_ASSERT((std::is_same<vbool8::value_t, bool>::value));
1703 OIIO_CHECK_ASSERT((std::is_same<vbool16::value_t, bool>::value));
1704
1705 // Make sure that VTYPE::vfloat_t returns the same-sized float type
1706 OIIO_CHECK_ASSERT((std::is_same<vfloat4::vfloat_t, vfloat4>::value));
1707 OIIO_CHECK_ASSERT((std::is_same<vfloat8::vfloat_t, vfloat8>::value));
1708 OIIO_CHECK_ASSERT((std::is_same<vfloat16::vfloat_t, vfloat16>::value));
1709 OIIO_CHECK_ASSERT((std::is_same<vint4::vfloat_t, vfloat4>::value));
1710 OIIO_CHECK_ASSERT((std::is_same<vint8::vfloat_t, vfloat8>::value));
1711 OIIO_CHECK_ASSERT((std::is_same<vint16::vfloat_t, vfloat16>::value));
1712
1713 // Make sure that VTYPE::vint_t returns the same-sized int type
1714 OIIO_CHECK_ASSERT((std::is_same<vfloat4::vint_t, vint4>::value));
1715 OIIO_CHECK_ASSERT((std::is_same<vfloat8::vint_t, vint8>::value));
1716 OIIO_CHECK_ASSERT((std::is_same<vfloat16::vint_t, vint16>::value));
1717 OIIO_CHECK_ASSERT((std::is_same<vint4::vint_t, vint4>::value));
1718 OIIO_CHECK_ASSERT((std::is_same<vint8::vint_t, vint8>::value));
1719 OIIO_CHECK_ASSERT((std::is_same<vint16::vint_t, vint16>::value));
1720
1721 // Make sure that VTYPE::vbool_t returns the same-sized bool type
1722 OIIO_CHECK_ASSERT((std::is_same<vfloat4::vbool_t, vbool4>::value));
1723 OIIO_CHECK_ASSERT((std::is_same<vfloat8::vbool_t, vbool8>::value));
1724 OIIO_CHECK_ASSERT((std::is_same<vfloat16::vbool_t, vbool16>::value));
1725 OIIO_CHECK_ASSERT((std::is_same<vint4::vbool_t, vbool4>::value));
1726 OIIO_CHECK_ASSERT((std::is_same<vint8::vbool_t, vbool8>::value));
1727 OIIO_CHECK_ASSERT((std::is_same<vint16::vbool_t, vbool16>::value));
1728 }
1729
1730
1731
1732 // Transform a point by a matrix using regular Imath
1733 inline Imath::V3f
transformp_imath(const Imath::V3f & v,const Imath::M44f & m)1734 transformp_imath(const Imath::V3f& v, const Imath::M44f& m)
1735 {
1736 Imath::V3f r;
1737 m.multVecMatrix(v, r);
1738 return r;
1739 }
1740
1741 // Transform a point by a matrix using simd ops on Imath types.
1742 inline Imath::V3f
transformp_imath_simd(const Imath::V3f & v,const Imath::M44f & m)1743 transformp_imath_simd(const Imath::V3f& v, const Imath::M44f& m)
1744 {
1745 return simd::transformp(m, v).V3f();
1746 }
1747
1748 // Transform a simd point by an Imath matrix using SIMD
1749 inline vfloat3
transformp_simd(const vfloat3 & v,const Imath::M44f & m)1750 transformp_simd(const vfloat3& v, const Imath::M44f& m)
1751 {
1752 return simd::transformp(m, v);
1753 }
1754
1755 // Transform a point by a matrix using regular Imath
1756 inline Imath::V3f
transformv_imath(const Imath::V3f & v,const Imath::M44f & m)1757 transformv_imath(const Imath::V3f& v, const Imath::M44f& m)
1758 {
1759 Imath::V3f r;
1760 m.multDirMatrix(v, r);
1761 return r;
1762 }
1763
1764 inline Imath::V4f
mul_vm_imath(const Imath::V4f & v,const Imath::M44f & m)1765 mul_vm_imath(const Imath::V4f& v, const Imath::M44f& m)
1766 {
1767 return v*m;
1768 }
1769
1770 // inline Imath::V4f
1771 // mul_mv_imath(const Imath::M44f& m, const Imath::V4f& v)
1772 // {
1773 // return m*v;
1774 // }
1775
1776 inline vfloat4
mul_vm_simd(const vfloat4 & v,const matrix44 & m)1777 mul_vm_simd(const vfloat4& v, const matrix44& m)
1778 {
1779 return v*m;
1780 }
1781
1782 inline vfloat4
mul_mv_simd(const matrix44 & m,const vfloat4 v)1783 mul_mv_simd(const matrix44& m, const vfloat4 v)
1784 {
1785 return m*v;
1786 }
1787
1788
1789
1790 inline bool
mx_equal_thresh(const matrix44 & a,const matrix44 & b,float thresh)1791 mx_equal_thresh(const matrix44& a, const matrix44& b, float thresh)
1792 {
1793 for (int j = 0; j < 4; ++j)
1794 for (int i = 0; i < 4; ++i)
1795 if (fabsf(a[j][i] - b[j][i]) > thresh)
1796 return false;
1797 return true;
1798 }
1799
1800
1801
1802 inline Imath::M44f
mat_transpose(const Imath::M44f & m)1803 mat_transpose(const Imath::M44f& m)
1804 {
1805 return m.transposed();
1806 }
1807
1808 inline Imath::M44f
mat_transpose_simd(const Imath::M44f & m)1809 mat_transpose_simd(const Imath::M44f& m)
1810 {
1811 return matrix44(m).transposed().M44f();
1812 }
1813
1814
1815
1816 void
test_matrix()1817 test_matrix()
1818 {
1819 Imath::V3f P(1.0f, 0.0f, 0.0f);
1820 Imath::M44f Mtrans(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 10, 11, 12, 1);
1821 Imath::M44f Mrot = Imath::M44f().rotate(Imath::V3f(0.0f, M_PI_2, 0.0f));
1822
1823 test_heading("Testing matrix ops:");
1824 std::cout << " P = " << P << "\n";
1825 std::cout << " Mtrans = " << Mtrans << "\n";
1826 std::cout << " Mrot = " << Mrot << "\n";
1827 OIIO_CHECK_EQUAL(simd::transformp(Mtrans, P).V3f(),
1828 transformp_imath(P, Mtrans));
1829 std::cout << " P translated = " << simd::transformp(Mtrans, P) << "\n";
1830 OIIO_CHECK_EQUAL(simd::transformv(Mtrans, P).V3f(), P);
1831 OIIO_CHECK_EQUAL(simd::transformp(Mrot, P).V3f(),
1832 transformp_imath(P, Mrot));
1833 std::cout << " P rotated = " << simd::transformp(Mrot, P) << "\n";
1834 OIIO_CHECK_EQUAL(simd::transformvT(Mrot, P).V3f(),
1835 transformv_imath(P, Mrot.transposed()));
1836 std::cout << " P rotated by the transpose = " << simd::transformv(Mrot, P)
1837 << "\n";
1838 OIIO_CHECK_EQUAL(matrix44(Mrot).transposed().M44f(), Mrot.transposed());
1839 std::cout << " Mrot transposed = " << matrix44(Mrot).transposed().M44f()
1840 << "\n";
1841
1842 // Test m44 * v4, v4 * m44
1843 {
1844 Imath::M44f M(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
1845 matrix44 m(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
1846 Imath::V4f V(1,2,3,4);
1847 vfloat4 v(1,2,3,4);
1848 vfloat4 vm = v*m;
1849 OIIO_CHECK_SIMD_EQUAL(vm, vfloat4(V*M));
1850 // vfloat4 mv = m*v;
1851 // OIIO_CHECK_SIMD_EQUAL(mv, M*V);
1852 benchmark2("V4 * M44 Imath", mul_vm_imath, V, M, 1);
1853 // benchmark2("M44 * V4 Imath", mul_mv_imath, mx, v4x, 1);
1854 benchmark2("M44 * V4 simd", mul_mv_simd, m, v, 1);
1855 benchmark2("V4 * M44 simd", mul_vm_simd, v, m, 1);
1856 }
1857
1858 // Test ==, !=
1859 {
1860 matrix44 mt(Mtrans), mr(Mrot);
1861 OIIO_CHECK_EQUAL(mt, mt);
1862 OIIO_CHECK_EQUAL(mt, Mtrans);
1863 OIIO_CHECK_EQUAL(Mtrans, mt);
1864 OIIO_CHECK_NE(mt, mr);
1865 OIIO_CHECK_NE(mr, Mtrans);
1866 OIIO_CHECK_NE(Mtrans, mr);
1867 }
1868 OIIO_CHECK_ASSERT(
1869 mx_equal_thresh(matrix44(Mtrans.inverse()), matrix44(Mtrans).inverse(),
1870 1.0e-6f));
1871 OIIO_CHECK_ASSERT(
1872 mx_equal_thresh(matrix44(Mrot.inverse()), matrix44(Mrot).inverse(),
1873 1.0e-6f));
1874 OIIO_CHECK_EQUAL(
1875 matrix44(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
1876 Imath::M44f(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
1877
1878 Imath::V3f vx(2.51f, 1.0f, 1.0f);
1879 Imath::M44f mx(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 10, 11, 12, 1);
1880 benchmark2("transformp Imath", transformp_imath, vx, mx, 1);
1881 benchmark2("transformp Imath with simd", transformp_imath_simd, vx, mx, 1);
1882 benchmark2("transformp simd", transformp_simd, vfloat3(vx), mx, 1);
1883
1884 benchmark("transpose m44", mat_transpose, mx, 1);
1885 benchmark("transpose m44 with simd", mat_transpose_simd, mx, 1);
1886 // Reduce the iterations of the ones below, if we can
1887 iterations /= 2;
1888 benchmark("m44 inverse Imath", inverse_imath, mx, 1);
1889 // std::cout << "inv " << matrix44(inverse_imath(mx)) << "\n";
1890 benchmark("m44 inverse_simd", inverse_simd, matrix44(mx), 1);
1891 // std::cout << "inv " << inverse_simd(mx) << "\n";
1892 benchmark("m44 inverse_simd native simd", inverse_simd, matrix44(mx), 1);
1893 // std::cout << "inv " << inverse_simd(mx) << "\n";
1894 iterations *= 2; // put things the way they were
1895 }
1896
1897
1898
1899 int
main(int argc,char * argv[])1900 main(int argc, char* argv[])
1901 {
1902 #if !defined(NDEBUG) || defined(OIIO_CI) || defined(OIIO_CODE_COVERAGE)
1903 // For the sake of test time, reduce the default iterations for DEBUG,
1904 // CI, and code coverage builds. Explicit use of --iters or --trials
1905 // will override this, since it comes before the getargs() call.
1906 iterations /= 10;
1907 ntrials = 1;
1908 #endif
1909 for (int i = 0; i < 16; ++i) {
1910 dummy_float[i] = 1.0f;
1911 dummy_int[i] = 1;
1912 }
1913
1914 getargs(argc, argv);
1915
1916 std::string oiiosimd = OIIO::get_string_attribute("oiio:simd");
1917 std::string hwsimd = OIIO::get_string_attribute("hw:simd");
1918 std::cout << "OIIO SIMD support is: " << (oiiosimd.size() ? oiiosimd : "")
1919 << "\n";
1920 std::cout << "Hardware SIMD support is: " << (hwsimd.size() ? hwsimd : "")
1921 << "\n";
1922 std::cout << "\n";
1923
1924 Timer timer;
1925
1926 vint4 dummy4(0);
1927 vint8 dummy8(0);
1928 benchmark("null benchmark 4", [](const vint4&) { return int(0); }, dummy4);
1929 benchmark("null benchmark 8", [](const vint8&) { return int(0); }, dummy8);
1930
1931 category_heading("vfloat4");
1932 test_loadstore<vfloat4>();
1933 test_conversion_loadstore_float<vfloat4>();
1934 test_masked_loadstore<vfloat4>();
1935 test_gatherscatter<vfloat4>();
1936 test_component_access<vfloat4>();
1937 test_arithmetic<vfloat4>();
1938 test_comparisons<vfloat4>();
1939 test_shuffle4<vfloat4>();
1940 test_swizzle<vfloat4>();
1941 test_blend<vfloat4>();
1942 test_transpose4<vfloat4>();
1943 test_vectorops_vfloat4();
1944 test_fused<vfloat4>();
1945 test_mathfuncs<vfloat4>();
1946
1947 category_heading("vfloat3");
1948 test_loadstore<vfloat3>();
1949 test_conversion_loadstore_float<vfloat3>();
1950 test_component_access<vfloat3>();
1951 test_arithmetic<vfloat3>();
1952 // Unnecessary to test these, they just use the vfloat4 ops.
1953 // test_comparisons<vfloat3> ();
1954 // test_shuffle4<vfloat3> ();
1955 // test_swizzle<vfloat3> ();
1956 // test_blend<vfloat3> ();
1957 // test_transpose4<vfloat3> ();
1958 test_vectorops_vfloat3();
1959 test_fused<vfloat3>();
1960 // test_mathfuncs<vfloat3>();
1961
1962 category_heading("vfloat8");
1963 test_loadstore<vfloat8>();
1964 test_conversion_loadstore_float<vfloat8>();
1965 test_masked_loadstore<vfloat8>();
1966 test_gatherscatter<vfloat8>();
1967 test_component_access<vfloat8>();
1968 test_arithmetic<vfloat8>();
1969 test_comparisons<vfloat8>();
1970 test_shuffle8<vfloat8>();
1971 test_blend<vfloat8>();
1972 test_fused<vfloat8>();
1973 test_mathfuncs<vfloat8>();
1974
1975 category_heading("vfloat16");
1976 test_loadstore<vfloat16>();
1977 test_conversion_loadstore_float<vfloat16>();
1978 test_masked_loadstore<vfloat16>();
1979 test_gatherscatter<vfloat16>();
1980 test_component_access<vfloat16>();
1981 test_arithmetic<vfloat16>();
1982 test_comparisons<vfloat16>();
1983 test_shuffle16<vfloat16>();
1984 test_blend<vfloat16>();
1985 test_fused<vfloat16>();
1986 test_mathfuncs<vfloat16>();
1987
1988 category_heading("vint4");
1989 test_loadstore<vint4>();
1990 test_conversion_loadstore_int<vint4>();
1991 test_masked_loadstore<vint4>();
1992 test_gatherscatter<vint4>();
1993 test_component_access<vint4>();
1994 test_arithmetic<vint4>();
1995 test_bitwise_int<vint4>();
1996 test_comparisons<vint4>();
1997 test_shuffle4<vint4>();
1998 test_blend<vint4>();
1999 test_vint_to_uint16s<vint4>();
2000 test_vint_to_uint8s<vint4>();
2001 test_shift<vint4>();
2002 test_transpose4<vint4>();
2003
2004 category_heading("vint8");
2005 test_loadstore<vint8>();
2006 test_conversion_loadstore_int<vint8>();
2007 test_masked_loadstore<vint8>();
2008 test_gatherscatter<vint8>();
2009 test_component_access<vint8>();
2010 test_arithmetic<vint8>();
2011 test_bitwise_int<vint8>();
2012 test_comparisons<vint8>();
2013 test_shuffle8<vint8>();
2014 test_blend<vint8>();
2015 test_vint_to_uint16s<vint8>();
2016 test_vint_to_uint8s<vint8>();
2017 test_shift<vint8>();
2018
2019 category_heading("vint16");
2020 test_loadstore<vint16>();
2021 test_conversion_loadstore_int<vint16>();
2022 test_masked_loadstore<vint16>();
2023 test_gatherscatter<vint16>();
2024 test_component_access<vint16>();
2025 test_arithmetic<vint16>();
2026 test_bitwise_int<vint16>();
2027 test_comparisons<vint16>();
2028 test_shuffle16<vint16>();
2029 test_blend<vint16>();
2030 test_vint_to_uint16s<vint16>();
2031 test_vint_to_uint16s<vint16>();
2032 test_shift<vint16>();
2033
2034 category_heading("vbool4");
2035 test_shuffle4<vbool4>();
2036 test_component_access<vbool4>();
2037 test_bitwise_bool<vbool4>();
2038
2039 category_heading("vbool8");
2040 test_shuffle8<vbool8>();
2041 test_component_access<vbool8>();
2042 test_bitwise_bool<vbool8>();
2043
2044 category_heading("vbool16");
2045 // test_shuffle16<vbool16> ();
2046 test_component_access<vbool16>();
2047 test_bitwise_bool<vbool16>();
2048
2049 category_heading("Odds and ends");
2050 test_constants();
2051 test_special();
2052 test_metaprogramming();
2053 test_matrix();
2054
2055 std::cout << "\nTotal time: " << Strutil::timeintervalformat(timer())
2056 << "\n";
2057
2058 return unit_test_failures;
2059 }
2060