1 /*
2 * Copyright 2020 The Emscripten Authors. All rights reserved.
3 * Emscripten is available under two separate licenses, the MIT license and the
4 * University of Illinois/NCSA Open Source License. Both these licenses can be
5 * found in the LICENSE file.
6 */
7 #pragma once
8
9 #include <stdio.h>
10 #include <math.h>
11 #include <time.h>
12 #include <inttypes.h>
13 #include <float.h>
14 #include <assert.h>
15 #include <string.h>
16
17 #ifdef __EMSCRIPTEN__
18 #include <emscripten/emscripten.h>
19 #define align1_int emscripten_align1_int
20 #define align1_int64 emscripten_align1_int64
21 #define align1_float emscripten_align1_float
22 #define align1_double emscripten_align1_double
23 #else
24 #define align1_int64 int64_t
25 #define align1_int int
26 #define align1_float float
27 #define align1_double double
28 #endif
29
30 // Recasts floating point representation of f to an integer.
fcastu(float f)31 uint32_t fcastu(float f) { return *(uint32_t*)&f; }
dcastu(double f)32 uint64_t dcastu(double f) { return *(uint64_t*)&f; }
ucastf(uint32_t t)33 float ucastf(uint32_t t) { return *(float*)&t; }
ucastd(uint64_t t)34 double ucastd(uint64_t t) { return *(double*)&t; }
35
36 // Data used in test. Store them global and access via a getter to confuse optimizer to not "solve" the whole test suite at compile-time,
37 // so that the operation will actually be performed at runtime, and not at compile-time. (Testing the capacity of the compiler to perform
38 // SIMD ops at compile-time would be interesting as well, but that's for another test)
39 float interesting_floats_[] = { -INFINITY, -FLT_MAX, -2.5f, -1.5f, -1.4f, -1.0f, -0.5f, -0.2f, -FLT_MIN, -0.f, 0.f,
40 1.401298464e-45f, FLT_MIN, 0.3f, 0.5f, 0.8f, 1.0f, 1.5f, 2.5f, 3.5f, 3.6f, FLT_MAX, INFINITY, NAN,
41 ucastf(0x01020304), ucastf(0x80000000), ucastf(0x7FFFFFFF), ucastf(0xFFFFFFFF)
42 };
43
44 double interesting_doubles_[] = { -INFINITY, -FLT_MAX, -2.5, -1.5, -1.4, -1.0, -0.5, -0.2, -FLT_MIN, -0.0, 0.0,
45 1.401298464e-45, FLT_MIN, 0.3, 0.5, 0.8, 1.0, 1.5, 2.5, 3.5, 3.6, FLT_MAX, INFINITY, NAN,
46 ucastd(0x0102030405060708ULL), ucastd(0x8000000000000000ULL),
47 ucastd(0x7FFFFFFFFFFFFFFFULL), ucastd(0xFFFFFFFFFFFFFFFFULL)
48 };
49
50 uint32_t interesting_ints_[] = { 0, 1, 2, 3, 0x01020304, 0x10203040, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x12345678, 0x9ABCDEF1, 0x80000000,
51 0x80808080, 0x7F7F7F7F, 0x01010101, 0x11111111, 0x20202020, 0x0F0F0F0F, 0xF0F0F0F0,
52 fcastu(-INFINITY), fcastu(-FLT_MAX), fcastu(-2.5f), fcastu(-1.5f), fcastu(-1.4f), fcastu(-1.0f), fcastu(-0.5f),
53 fcastu(-0.2f), fcastu(-FLT_MIN), 0xF9301AB9, 0x0039AB12, 0x19302BCD,
54 fcastu(1.401298464e-45f), fcastu(FLT_MIN), fcastu(0.3f), fcastu(0.5f), fcastu(0.8f), fcastu(1.0f), fcastu(1.5f),
55 fcastu(2.5f), fcastu(3.5f), fcastu(3.6f), fcastu(FLT_MAX), fcastu(INFINITY), fcastu(NAN) };
56
always_true()57 bool always_true() { return time(NULL) != 0; } // This function always returns true, but the compiler should not know this.
58
IsNan(float f)59 bool IsNan(float f) { return (fcastu(f) << 1) > 0xFF000000u; }
60
61 // Replaces all occurrences of 'src' in string 'str' with 'dst', operating in place. strlen(dst) <= strlen(src).
contract_inplace(char * str,const char * src,const char * dst)62 void contract_inplace(char *str, const char *src, const char *dst)
63 {
64 int dstLen = strlen(dst);
65 int srcLen = strlen(src);
66 int diff = srcLen - dstLen;
67 assert(diff >= 0);
68
69 while(true)
70 {
71 char *pos = strstr(str, src);
72 if (!pos) return;
73 str = pos;
74 strcpy(pos, dst);
75 pos += dstLen;
76 strcpy(pos, pos + diff);
77 }
78 }
79
80 // sprintf standard does not allow controlling how many leading zeros to use
81 // for printing out the exponent, and different compilers give different
82 // values. Perform a canonicalization step that enforces the printouts are
83 // the same.
CanonicalizeStringComparisons(char * s)84 void CanonicalizeStringComparisons(char *s)
85 {
86 contract_inplace(s, "e+00", "e+");
87 contract_inplace(s, "e-00", "e-");
88 contract_inplace(s, "e+0", "e+");
89 contract_inplace(s, "e-0", "e-");
90 contract_inplace(s, "1.#INF", "inf");
91 }
92
93 // Global test state that is used per-test to determine whether to validate the state of exact NaN bits
94 // in specific functions.
95 extern bool testNaNBits;
96
97 char *SerializeFloat(float f, char *dstStr, bool approximate = false)
98 {
99 if (IsNan(f))
100 {
101 uint32_t u = fcastu(f);
102 int numChars = testNaNBits ? sprintf(dstStr, "NaN(0x%8X)", (unsigned int)u) : sprintf(dstStr, "NaN");
103 return dstStr + numChars;
104 }
105 else
106 {
107 if (approximate > 0)
108 {
109 if (fabs(f) < FLT_MIN) // Flush denormals to zero (for _mm_rcp_ps)
110 sprintf(dstStr, "%f", copysign(0.f, f));
111 else if (fabs(f) >= 2.6e22f) // Flush large numbers to infinity (for _mm_rsqrt_ps)
112 sprintf(dstStr, "%f", copysign(INFINITY, f));
113 else
114 sprintf(dstStr, "%.2g", f);
115 }
116 else
117 sprintf(dstStr, "%.9g", f);
118 CanonicalizeStringComparisons(dstStr);
119 return dstStr + strlen(dstStr);
120 }
121 }
122
SerializeDouble(double f,char * dstStr)123 char *SerializeDouble(double f, char *dstStr)
124 {
125 if (IsNan(f))
126 {
127 uint64_t u = dcastu(f);
128 int numChars = testNaNBits ? sprintf(dstStr, "NaN(0x%08X%08X)", (unsigned int)(u>>32), (unsigned int)u) : sprintf(dstStr, "NaN");
129 return dstStr + numChars;
130 }
131 else
132 {
133 sprintf(dstStr, "%.17g", f);
134 CanonicalizeStringComparisons(dstStr);
135 return dstStr + strlen(dstStr);
136 }
137 }
138
tostr(__m128 * m,char * outstr)139 void tostr(__m128 *m, char *outstr)
140 {
141 union { __m128 m; float val[4]; } u;
142 u.m = *m;
143 char s[4][32];
144 SerializeFloat(u.val[0], s[0]);
145 SerializeFloat(u.val[1], s[1]);
146 SerializeFloat(u.val[2], s[2]);
147 SerializeFloat(u.val[3], s[3]);
148 sprintf(outstr, "[%s,%s,%s,%s]", s[3], s[2], s[1], s[0]);
149 }
150
tostr_approx(__m128 * m,char * outstr,bool approximate)151 void tostr_approx(__m128 *m, char *outstr, bool approximate)
152 {
153 union { __m128 m; float val[4]; } u;
154 u.m = *m;
155 char s[4][32];
156 SerializeFloat(u.val[0], s[0], approximate);
157 SerializeFloat(u.val[1], s[1], approximate);
158 SerializeFloat(u.val[2], s[2], approximate);
159 SerializeFloat(u.val[3], s[3], approximate);
160 sprintf(outstr, "[%s,%s,%s,%s]", s[3], s[2], s[1], s[0]);
161 }
162
tostr(__m128i * m,char * outstr)163 void tostr(__m128i *m, char *outstr)
164 {
165 union { __m128i m; uint32_t val[4]; } u;
166 u.m = *m;
167 sprintf(outstr, "[0x%08X,0x%08X,0x%08X,0x%08X]", u.val[3], u.val[2], u.val[1], u.val[0]);
168 }
169
170 #ifdef __SSE2__
171
tostr(__m128d * m,char * outstr)172 void tostr(__m128d *m, char *outstr)
173 {
174 union { __m128d m; double val[2]; } u;
175 u.m = *m;
176 char s[2][64];
177 SerializeDouble(u.val[0], s[0]);
178 SerializeDouble(u.val[1], s[1]);
179 sprintf(outstr, "[%s,%s]", s[1], s[0]);
180 }
181
ExtractInRandomOrder(uint32_t * arr,int i,int n,int prime)182 __m128i ExtractInRandomOrder(uint32_t *arr, int i, int n, int prime)
183 {
184 return _mm_set_epi32(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]);
185 }
186
ExtractInRandomOrder(double * arr,int i,int n,int prime)187 __m128d ExtractInRandomOrder(double *arr, int i, int n, int prime)
188 {
189 return _mm_set_pd(arr[(i*prime)%n], arr[((i+1)*prime)%n]);
190 }
191 #endif
192
tostr(align1_int * m,char * outstr)193 void tostr(align1_int *m, char *outstr)
194 {
195 sprintf(outstr, "0x%08X", *m);
196 }
197
tostr(align1_int64 * m,char * outstr)198 void tostr(align1_int64 *m, char *outstr)
199 {
200 sprintf(outstr, "0x%08X%08X", (int)(*m >> 32), (int)*m);
201 }
202
tostr(align1_float * m,char * outstr)203 void tostr(align1_float *m, char *outstr)
204 {
205 SerializeFloat(*m, outstr);
206 }
207
tostr(align1_double * m,char * outstr)208 void tostr(align1_double *m, char *outstr)
209 {
210 SerializeDouble(*m, outstr);
211 }
212
tostr(align1_double * m,int numElems,char * outstr)213 void tostr(align1_double *m, int numElems, char *outstr)
214 {
215 char s[2][64];
216 for(int i = 0; i < numElems; ++i)
217 SerializeDouble(m[i], s[i]);
218 switch(numElems)
219 {
220 case 1: sprintf(outstr, "{%s}", s[0]); break;
221 case 2: sprintf(outstr, "{%s,%s}", s[0], s[1]); break;
222 }
223 }
224
tostr(align1_float * m,int numElems,char * outstr)225 void tostr(align1_float *m, int numElems, char *outstr)
226 {
227 char s[4][64];
228 for(int i = 0; i < numElems; ++i)
229 SerializeFloat(m[i], s[i]);
230 switch(numElems)
231 {
232 case 1: sprintf(outstr, "{%s}", s[0]); break;
233 case 2: sprintf(outstr, "{%s,%s}", s[0], s[1]); break;
234 case 3: sprintf(outstr, "{%s,%s,%s}", s[0], s[1], s[2]); break;
235 case 4: sprintf(outstr, "{%s,%s,%s,%s}", s[0], s[1], s[2], s[3]); break;
236 }
237 }
238
tostr(align1_int * s,int numElems,char * outstr)239 void tostr(align1_int *s, int numElems, char *outstr)
240 {
241 switch(numElems)
242 {
243 case 1: sprintf(outstr, "{0x%08X}", s[0]); break;
244 case 2: sprintf(outstr, "{0x%08X,0x%08X}", s[0], s[1]); break;
245 case 3: sprintf(outstr, "{0x%08X,0x%08X,0x%08X}", s[0], s[1], s[2]); break;
246 case 4: sprintf(outstr, "{0x%08X,0x%08X,0x%08X,0x%08X}", s[0], s[1], s[2], s[3]); break;
247 }
248 }
249
tostr(align1_int64 * m,int numElems,char * outstr)250 void tostr(align1_int64 *m, int numElems, char *outstr)
251 {
252 switch(numElems)
253 {
254 case 1: sprintf(outstr, "{0x%08X%08X}", (int)(*m >> 32), (int)*m); break;
255 case 2: sprintf(outstr, "{0x%08X%08X,0x%08X%08X}", (int)(*m >> 32), (int)*m, (int)(m[1] >> 32), (int)m[1]);
256 }
257 }
258
259 // Accessors to the test data in a way that the compiler can't optimize at compile-time.
get_interesting_floats()260 __attribute__((noinline)) float *get_interesting_floats()
261 {
262 return always_true() ? interesting_floats_ : 0;
263 }
264
get_interesting_ints()265 __attribute__((noinline)) uint32_t *get_interesting_ints()
266 {
267 return always_true() ? interesting_ints_ : 0;
268 }
269
get_interesting_doubles()270 __attribute__((noinline)) double *get_interesting_doubles()
271 {
272 return always_true() ? interesting_doubles_ : 0;
273 }
274
ExtractFloatInRandomOrder(float * arr,int i,int n,int prime)275 __m128 ExtractFloatInRandomOrder(float *arr, int i, int n, int prime)
276 {
277 return _mm_set_ps(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]);
278 }
279
280 #ifdef __SSE2__
ExtractDoubleInRandomOrder(double * arr,int i,int n,int prime)281 __m128d ExtractDoubleInRandomOrder(double *arr, int i, int n, int prime)
282 {
283 return _mm_set_pd(arr[(i*prime)%n], arr[((i+1)*prime)%n]);
284 }
285 #endif
286
ExtractIntInRandomOrder(unsigned int * arr,int i,int n,int prime)287 __m128 ExtractIntInRandomOrder(unsigned int *arr, int i, int n, int prime)
288 {
289 return _mm_set_ps(*(float*)&arr[(i*prime)%n], *(float*)&arr[((i+1)*prime)%n], *(float*)&arr[((i+2)*prime)%n], *(float*)&arr[((i+3)*prime)%n]);
290 }
291
292 #define E1(arr, i, n) ExtractFloatInRandomOrder(arr, i, n, 1)
293 #define E2(arr, i, n) ExtractFloatInRandomOrder(arr, i, n, 1787)
294
295 #define E1_Double(arr, i, n) ExtractDoubleInRandomOrder(arr, i, n, 1)
296 #define E2_Double(arr, i, n) ExtractDoubleInRandomOrder(arr, i, n, 1787)
297
298 #define E1_Int(arr, i, n) ExtractIntInRandomOrder(arr, i, n, 1)
299 #define E2_Int(arr, i, n) ExtractIntInRandomOrder(arr, i, n, 1787)
300
301 #define M128i_M128i_M128i(func) \
302 for(int i = 0; i < numInterestingInts / 4; ++i) \
303 for(int k = 0; k < 4; ++k) \
304 for(int j = 0; j < numInterestingInts / 4; ++j) \
305 { \
306 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
307 __m128i m2 = (__m128i)E2_Int(interesting_ints, j*4, numInterestingInts); \
308 __m128i ret = func(m1, m2); \
309 /* a op b */ \
310 char str[256]; tostr(&m1, str); \
311 char str2[256]; tostr(&m2, str2); \
312 char str3[256]; tostr(&ret, str3); \
313 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
314 /* b op a */ \
315 ret = func(m2, m1); \
316 tostr(&m1, str); \
317 tostr(&m2, str2); \
318 tostr(&ret, str3); \
319 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
320 }
321
322 #define Ret_M128_Tint_body(Ret_type, func, Tint) \
323 for(int i = 0; i < numInterestingFloats / 4; ++i) \
324 for(int k = 0; k < 4; ++k) \
325 { \
326 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
327 Ret_type ret = func(m1, Tint); \
328 char str[256]; tostr(&m1, str); \
329 char str2[256]; tostr(&ret, str2); \
330 printf("%s(%s, %d) = %s\n", #func, str, Tint, str2); \
331 }
332
333 #define Ret_M128d_Tint_body(Ret_type, func, Tint) \
334 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
335 for(int k = 0; k < 2; ++k) \
336 { \
337 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
338 Ret_type ret = func(m1, Tint); \
339 char str[256]; tostr(&m1, str); \
340 char str2[256]; tostr(&ret, str2); \
341 printf("%s(%s, %d) = %s\n", #func, str, Tint, str2); \
342 }
343
344 #define Ret_M128i_Tint_body(Ret_type, func, Tint) \
345 for(int i = 0; i < numInterestingInts / 4; ++i) \
346 for(int k = 0; k < 4; ++k) \
347 { \
348 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
349 Ret_type ret = func(m1, Tint); \
350 char str[256]; tostr(&m1, str); \
351 char str2[256]; tostr(&ret, str2); \
352 printf("%s(%s, %d) = %s\n", #func, str, Tint, str2); \
353 }
354
355 #define Ret_M128i_int_Tint_body(Ret_type, func, Tint) \
356 for(int i = 0; i < numInterestingInts / 4; ++i) \
357 for(int j = 0; j < numInterestingInts; ++j) \
358 for(int k = 0; k < 4; ++k) \
359 { \
360 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
361 Ret_type ret = func(m1, interesting_ints[j], Tint); \
362 char str[256]; tostr(&m1, str); \
363 char str2[256]; tostr(&ret, str2); \
364 printf("%s(%s, 0x%08X, %d) = %s\n", #func, str, interesting_ints[j], Tint, str2); \
365 }
366
367 #define Ret_M128d_M128d_Tint_body(Ret_type, func, Tint) \
368 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
369 for(int k = 0; k < 2; ++k) \
370 for(int j = 0; j < numInterestingDoubles / 2; ++j) \
371 { \
372 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
373 __m128d m2 = E2_Double(interesting_doubles, j*2, numInterestingDoubles); \
374 Ret_type ret = func(m1, m2, Tint); \
375 /* a op b */ \
376 char str[256]; tostr(&m1, str); \
377 char str2[256]; tostr(&m2, str2); \
378 char str3[256]; tostr(&ret, str3); \
379 printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \
380 /* b op a */ \
381 ret = func(m2, m1, Tint); \
382 tostr(&m1, str); \
383 tostr(&m2, str2); \
384 tostr(&ret, str3); \
385 printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \
386 }
387
388 #define Ret_M128i_M128i_Tint_body(Ret_type, func, Tint) \
389 for(int i = 0; i < numInterestingInts / 4; ++i) \
390 for(int k = 0; k < 4; ++k) \
391 for(int j = 0; j < numInterestingInts / 4; ++j) \
392 { \
393 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
394 __m128i m2 = (__m128i)E2_Int(interesting_ints, j*4, numInterestingInts); \
395 Ret_type ret = func(m1, m2, Tint); \
396 /* a op b */ \
397 char str[256]; tostr(&m1, str); \
398 char str2[256]; tostr(&m2, str2); \
399 char str3[256]; tostr(&ret, str3); \
400 printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \
401 /* b op a */ \
402 ret = func(m2, m1, Tint); \
403 tostr(&m1, str); \
404 tostr(&m2, str2); \
405 tostr(&ret, str3); \
406 printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \
407 }
408
409 #define Ret_M128_M128_Tint_body(Ret_type, func, Tint) \
410 for(int i = 0; i < numInterestingFloats / 4; ++i) \
411 for(int k = 0; k < 4; ++k) \
412 for(int j = 0; j < numInterestingFloats / 4; ++j) \
413 { \
414 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
415 __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \
416 Ret_type ret = func(m1, m2, Tint); \
417 /* a op b */ \
418 char str[256]; tostr(&m1, str); \
419 char str2[256]; tostr(&m2, str2); \
420 char str3[256]; tostr(&ret, str3); \
421 printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \
422 /* b op a */ \
423 ret = func(m2, m1, Tint); \
424 tostr(&m1, str); \
425 tostr(&m2, str2); \
426 tostr(&ret, str3); \
427 printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \
428 }
429
430 #define const_int8_unroll(Ret_type, F, func) \
431 F(Ret_type, func, 0); \
432 F(Ret_type, func, 1); \
433 F(Ret_type, func, 2); \
434 F(Ret_type, func, 3); \
435 F(Ret_type, func, 5); \
436 F(Ret_type, func, 7); \
437 F(Ret_type, func, 11); \
438 F(Ret_type, func, 13); \
439 F(Ret_type, func, 15); \
440 F(Ret_type, func, 16); \
441 F(Ret_type, func, 17); \
442 F(Ret_type, func, 23); \
443 F(Ret_type, func, 29); \
444 F(Ret_type, func, 31); \
445 F(Ret_type, func, 37); \
446 F(Ret_type, func, 43); \
447 F(Ret_type, func, 47); \
448 F(Ret_type, func, 59); \
449 F(Ret_type, func, 127); \
450 F(Ret_type, func, 128); \
451 F(Ret_type, func, 191); \
452 F(Ret_type, func, 254); \
453 F(Ret_type, func, 255);
454
455 #define const_int5_full_unroll(Ret_type, F, func) \
456 F(Ret_type, func, 0); \
457 F(Ret_type, func, 1); \
458 F(Ret_type, func, 2); \
459 F(Ret_type, func, 3); \
460 F(Ret_type, func, 4); \
461 F(Ret_type, func, 5); \
462 F(Ret_type, func, 6); \
463 F(Ret_type, func, 7); \
464 F(Ret_type, func, 8); \
465 F(Ret_type, func, 9); \
466 F(Ret_type, func, 10); \
467 F(Ret_type, func, 11); \
468 F(Ret_type, func, 12); \
469 F(Ret_type, func, 13); \
470 F(Ret_type, func, 14); \
471 F(Ret_type, func, 15); \
472 F(Ret_type, func, 16); \
473 F(Ret_type, func, 17); \
474 F(Ret_type, func, 18); \
475 F(Ret_type, func, 19); \
476 F(Ret_type, func, 20); \
477 F(Ret_type, func, 21); \
478 F(Ret_type, func, 22); \
479 F(Ret_type, func, 23); \
480 F(Ret_type, func, 24); \
481 F(Ret_type, func, 25); \
482 F(Ret_type, func, 26); \
483 F(Ret_type, func, 27); \
484 F(Ret_type, func, 28); \
485 F(Ret_type, func, 29); \
486 F(Ret_type, func, 30); \
487 F(Ret_type, func, 31);
488
489 #define Ret_M128_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128_Tint_body, func)
490 #define Ret_M128d_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128d_Tint_body, func)
491 #define Ret_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_Tint_body, func)
492 #define Ret_M128i_int_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_int_Tint_body, func)
493 #define Ret_M128i_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_M128i_Tint_body, func)
494 #define Ret_M128d_M128d_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128d_M128d_Tint_body, func)
495 #define Ret_M128d_M128d_Tint_5bits(Ret_type, func) const_int5_full_unroll(Ret_type, Ret_M128d_M128d_Tint_body, func)
496 #define Ret_M128_M128_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128_M128_Tint_body, func)
497 #define Ret_M128_M128_Tint_5bits(Ret_type, func) const_int5_full_unroll(Ret_type, Ret_M128_M128_Tint_body, func)
498
499 #define Ret_M128d_M128d(Ret_type, func) \
500 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
501 for(int k = 0; k < 2; ++k) \
502 for(int j = 0; j < numInterestingDoubles / 2; ++j) \
503 { \
504 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
505 __m128d m2 = E2_Double(interesting_doubles, j*2, numInterestingDoubles); \
506 Ret_type ret = func(m1, m2); \
507 /* a op b */ \
508 char str[256]; tostr(&m1, str); \
509 char str2[256]; tostr(&m2, str2); \
510 char str3[256]; tostr(&ret, str3); \
511 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
512 /* b op a */ \
513 ret = func(m2, m1); \
514 tostr(&m1, str); \
515 tostr(&m2, str2); \
516 tostr(&ret, str3); \
517 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
518 }
519
520 #define Ret_M128d_M128d_M128d(Ret_type, func) \
521 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
522 for(int k = 0; k < 2; ++k) \
523 for(int j = 0; j < numInterestingDoubles / 2; ++j) \
524 for(int l = 0; l < numInterestingDoubles / 2; ++l) \
525 { \
526 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
527 __m128d m2 = E2_Double(interesting_doubles, j*2, numInterestingDoubles); \
528 __m128d m3 = E1_Double(interesting_doubles, l*2, numInterestingDoubles); \
529 Ret_type ret = func(m1, m2, m3); \
530 /* a, b, c */ \
531 char str[256]; tostr(&m1, str); \
532 char str2[256]; tostr(&m2, str2); \
533 char str3[256]; tostr(&m3, str3); \
534 char str4[256]; tostr(&ret, str4); \
535 printf("%s(%s, %s, %s) = %s\n", #func, str, str2, str3, str4); \
536 /* b, c, a */ \
537 ret = func(m2, m3, m1); \
538 tostr(&m1, str); \
539 tostr(&m2, str2); \
540 tostr(&m3, str3); \
541 tostr(&ret, str4); \
542 printf("%s(%s, %s, %s) = %s\n", #func, str, str2, str3, str4); \
543 /* c, a, b */ \
544 ret = func(m3, m1, m2); \
545 tostr(&m1, str); \
546 tostr(&m2, str2); \
547 tostr(&m3, str3); \
548 tostr(&ret, str4); \
549 printf("%s(%s, %s, %s) = %s\n", #func, str, str2, str3, str4); \
550 }
551
552 #define Ret_M128d_M128(Ret_type, func) \
553 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
554 for(int k = 0; k < 2; ++k) \
555 for(int j = 0; j < numInterestingDoubles / 2; ++j) \
556 { \
557 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
558 __m128 m2 = E2(interesting_floats, i*4+k, numInterestingFloats); \
559 Ret_type ret = func(m1, m2); \
560 /* a op b */ \
561 char str[256]; tostr(&m1, str); \
562 char str2[256]; tostr(&m2, str2); \
563 char str3[256]; tostr(&ret, str3); \
564 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
565 }
566
567 #define Ret_M128d_int(Ret_type, func) \
568 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
569 for(int k = 0; k < 2; ++k) \
570 for(int j = 0; j < numInterestingInts; ++j) \
571 { \
572 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
573 int m2 = interesting_ints[j]; \
574 Ret_type ret = func(m1, m2); \
575 char str[256]; tostr(&m1, str); \
576 char str2[256]; tostr(&m2, str2); \
577 char str3[256]; tostr(&ret, str3); \
578 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
579 }
580
581 #define Ret_M128d_int64(Ret_type, func) \
582 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
583 for(int k = 0; k < 2; ++k) \
584 for(int j = 0; j < numInterestingInts; ++j) \
585 for(int l = 0; l < numInterestingInts; ++l) \
586 { \
587 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
588 int64_t m2 = (int64_t)(((uint64_t)interesting_ints[j]) << 32 | (uint64_t)interesting_ints[l]); \
589 Ret_type ret = func(m1, m2); \
590 char str[256]; tostr(&m1, str); \
591 char str2[256]; tostr(&m2, str2); \
592 char str3[256]; tostr(&ret, str3); \
593 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
594 }
595
596 #define Ret_M128d(Ret_type, func) \
597 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
598 for(int k = 0; k < 2; ++k) \
599 { \
600 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
601 Ret_type ret = func(m1); \
602 char str[256]; tostr(&m1, str); \
603 char str2[256]; tostr(&ret, str2); \
604 printf("%s(%s) = %s\n", #func, str, str2); \
605 }
606
607 #define Ret_DoublePtr(Ret_type, func, numElemsAccessed, inc) \
608 for(int i = 0; i+numElemsAccessed <= numInterestingDoubles; i += inc) \
609 { \
610 double *ptr = interesting_doubles + i; \
611 Ret_type ret = func(ptr); \
612 char str[256]; tostr(ptr, numElemsAccessed, str); \
613 char str2[256]; tostr(&ret, str2); \
614 printf("%s(%s) = %s\n", #func, str, str2); \
615 }
616
617 #define Ret_DoublePtr_M128i(Ret_type, func, numElemsAccessed, inc) \
618 for(int i = 0; i+numElemsAccessed <= numInterestingDoubles; i += inc) \
619 for(int j = 0; j < numInterestingInts / 4; ++j) \
620 { \
621 double *ptr = interesting_doubles + i; \
622 __m128i m1 = (__m128i)E2_Int(interesting_ints, j*4, numInterestingInts); \
623 Ret_type ret = func(ptr, m1); \
624 char str[256]; tostr(ptr, numElemsAccessed, str); \
625 char str2[256]; tostr(&ret, str2); \
626 printf("%s(%s) = %s\n", #func, str, str2); \
627 }
628
629 float tempOutFloatStore[16];
getTempOutFloatStore(int alignmentBytes)630 float *getTempOutFloatStore(int alignmentBytes)
631 {
632 memset(tempOutFloatStore, 0, sizeof(tempOutFloatStore));
633 uintptr_t addr = (uintptr_t)tempOutFloatStore;
634 addr = (addr + alignmentBytes - 1) & ~(alignmentBytes-1);
635 return (float*)addr;
636 }
637
getTempOutIntStore(int alignmentBytes)638 int *getTempOutIntStore(int alignmentBytes) { return (int*)getTempOutFloatStore(alignmentBytes); }
getTempOutDoubleStore(int alignmentBytes)639 double *getTempOutDoubleStore(int alignmentBytes) { return (double*)getTempOutFloatStore(alignmentBytes); }
640
641 #define void_OutFloatPtr_M128(func, Ptr_type, numBytesWritten, alignmentBytes) \
642 for(int i = 0; i < numInterestingFloats / 4; ++i) \
643 for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \
644 for(int k = 0; k < 4; ++k) \
645 { \
646 uintptr_t base = (uintptr_t)getTempOutFloatStore(16); \
647 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
648 align1_float *out = (align1_float*)(base + offset); \
649 func((Ptr_type)out, m1); \
650 char str[256]; tostr(&m1, str); \
651 char str2[256]; tostr(out, numBytesWritten/sizeof(float), str2); \
652 printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \
653 }
654
655 #define void_OutFloatPtr_M128i_M128(func, Ptr_type, numBytesWritten, alignmentBytes) \
656 for(int i = 0; i < numInterestingFloats / 4; ++i) \
657 for(int j = 0; j < numInterestingInts / 4; ++j) \
658 for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \
659 for(int k = 0; k < 4; ++k) \
660 { \
661 uintptr_t base = (uintptr_t)getTempOutFloatStore(16); \
662 __m128i m1 = (__m128i)E1_Int(interesting_ints, j*4, numInterestingInts); \
663 __m128 m2 = E1(interesting_floats, i*4+k, numInterestingFloats); \
664 align1_float *out = (align1_float*)(base + offset); \
665 func((Ptr_type)out, m1, m2); \
666 char str[256]; tostr(&m1, str); \
667 char str2[256]; tostr(&m2, str2); \
668 char str3[256]; tostr(out, numBytesWritten/sizeof(float), str3); \
669 printf("%s(p:align=%d, %s, %s) = %s\n", #func, offset, str, str2, str3); \
670 }
671
672 #define void_OutDoublePtr_M128d(func, Ptr_type, numBytesWritten, alignmentBytes) \
673 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
674 for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \
675 for(int k = 0; k < 2; ++k) \
676 { \
677 uintptr_t base = (uintptr_t)getTempOutDoubleStore(16); \
678 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
679 align1_double *out = (align1_double*)(base + offset); \
680 func((Ptr_type)out, m1); \
681 char str[256]; tostr(&m1, str); \
682 char str2[256]; tostr(out, numBytesWritten/sizeof(double), str2); \
683 printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \
684 }
685
686 #define void_OutDoublePtr_M128i_M128d(func, Ptr_type, numBytesWritten, alignmentBytes) \
687 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
688 for(int j = 0; j < numInterestingInts / 4; ++j) \
689 for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \
690 for(int k = 0; k < 2; ++k) \
691 { \
692 uintptr_t base = (uintptr_t)getTempOutDoubleStore(16); \
693 __m128i m1 = (__m128i)E1_Int(interesting_ints, j*4, numInterestingInts); \
694 __m128d m2 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
695 align1_double *out = (align1_double*)(base + offset); \
696 func((Ptr_type)out, m1, m2); \
697 char str[256]; tostr(&m1, str); \
698 char str2[256]; tostr(&m2, str2); \
699 char str3[256]; tostr(out, numBytesWritten/sizeof(double), str3); \
700 printf("%s(p:align=%d, %s, %s) = %s\n", #func, offset, str, str2, str3); \
701 }
702
703 #define void_OutIntPtr_M128i(func, Ptr_type, numBytesWritten, alignmentBytes) \
704 for(int i = 0; i < numInterestingInts / 4; ++i) \
705 for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \
706 for(int k = 0; k < 4; ++k) \
707 { \
708 uintptr_t base = (uintptr_t)getTempOutIntStore(16); \
709 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
710 align1_int *out = (align1_int*)(base + offset); \
711 func((Ptr_type)out, m1); \
712 char str[256]; tostr(&m1, str); \
713 char str2[256]; tostr(out, (numBytesWritten+sizeof(int)-1)/sizeof(int), str2); \
714 printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \
715 }
716
717 #define void_OutIntPtr_int(func, Ptr_type, numBytesWritten, alignmentBytes) \
718 for(int i = 0; i < numInterestingInts; ++i) \
719 for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \
720 for(int k = 0; k < 4; ++k) \
721 { \
722 uintptr_t base = (uintptr_t)getTempOutIntStore(16); \
723 int m1 = interesting_ints[i]; \
724 align1_int *out = (align1_int*)(base + offset); \
725 func((Ptr_type)out, m1); \
726 char str[256]; tostr(&m1, str); \
727 char str2[256]; tostr(out, numBytesWritten/sizeof(int), str2); \
728 printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \
729 }
730
731 #define void_OutIntPtr_int64(func, Ptr_type, numBytesWritten, alignmentBytes) \
732 for(int i = 0; i < numInterestingInts; ++i) \
733 for(int j = 0; j < numInterestingInts; ++j) \
734 for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \
735 { \
736 uintptr_t base = (uintptr_t)getTempOutIntStore(16); \
737 int64_t m1 = (int64_t)(((uint64_t)interesting_ints[i]) << 32 | (uint64_t)interesting_ints[j]); \
738 align1_int64 *out = (align1_int64*)(base + offset); \
739 func((Ptr_type)out, m1); \
740 char str[256]; tostr(&m1, str); \
741 char str2[256]; tostr(out, numBytesWritten/sizeof(int64_t), str2); \
742 printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \
743 }
744
745 #define void_M128i_M128i_OutIntPtr(func, Ptr_type, numBytesWritten, alignmentBytes) \
746 for(int i = 0; i < numInterestingInts / 4; ++i) \
747 for(int j = 0; j < numInterestingInts / 4; ++j) \
748 for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \
749 for(int k = 0; k < 4; ++k) \
750 { \
751 uintptr_t base = (uintptr_t)getTempOutIntStore(16); \
752 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
753 __m128i m2 = (__m128i)E2_Int(interesting_ints, j*4, numInterestingInts); \
754 align1_int *out = (int*)(base + offset); \
755 func(m1, m2, (Ptr_type)out); \
756 char str[256]; tostr(&m1, str); \
757 char str2[256]; tostr(&m2, str2); \
758 char str3[256]; tostr(out, numBytesWritten/sizeof(int), str3); \
759 printf("%s(%s, %s, p:align=%d) = %s\n", #func, str, str2, offset, str3); \
760 }
761
762 #define Ret_M128(Ret_type, func) \
763 for(int i = 0; i < numInterestingFloats / 4; ++i) \
764 for(int k = 0; k < 4; ++k) \
765 { \
766 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
767 Ret_type ret = func(m1); \
768 char str[256]; tostr(&m1, str); \
769 char str2[256]; tostr(&ret, str2); \
770 printf("%s(%s) = %s\n", #func, str, str2); \
771 }
772
773 #define Ret_M128approx(Ret_type, func) \
774 for(int i = 0; i < numInterestingFloats / 4; ++i) \
775 for(int k = 0; k < 4; ++k) \
776 { \
777 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
778 Ret_type ret = func(m1); \
779 char str[256]; tostr(&m1, str); \
780 char str2[256]; tostr_approx(&ret, str2, true/*approximate*/); \
781 printf("%s(%s) = %s\n", #func, str, str2); \
782 }
783
784 #define Ret_FloatPtr(Ret_type, func, numElemsAccessed, inc) \
785 for(int i = 0; i+numElemsAccessed <= numInterestingFloats; i += inc) \
786 { \
787 float *ptr = interesting_floats + i; \
788 Ret_type ret = func(ptr); \
789 char str[256]; tostr(ptr, numElemsAccessed, str); \
790 char str2[256]; tostr(&ret, str2); \
791 printf("%s(%s) = %s\n", #func, str, str2); \
792 }
793
794 #define Ret_FloatPtr_M128i(Ret_type, func, numElemsAccessed, inc) \
795 for(int i = 0; i+numElemsAccessed <= numInterestingFloats; i += inc) \
796 for(int j = 0; j < numInterestingInts / 4; ++j) \
797 { \
798 float *ptr = interesting_floats + i; \
799 __m128i m1 = (__m128i)E1_Int(interesting_ints, j*4, numInterestingInts); \
800 Ret_type ret = func(ptr, m1); \
801 char str[256]; tostr(ptr, numElemsAccessed, str); \
802 char str2[256]; tostr(&ret, str2); \
803 printf("%s(%s) = %s\n", #func, str, str2); \
804 }
805
806 #define Ret_Float4(Ret_type, func, inc) \
807 for(int i = 0; i+4 <= numInterestingFloats; i += inc) \
808 { \
809 float *ptr = interesting_floats + i; \
810 Ret_type ret = func(ptr[0], ptr[1], ptr[2], ptr[3]); \
811 char str[256]; tostr(ptr, 4, str); \
812 char str2[256]; tostr(&ret, str2); \
813 printf("%s(%s) = %s\n", #func, str, str2); \
814 }
815
816 #define Ret_Float(Ret_type, func, inc) \
817 for(int i = 0; i+1 <= numInterestingFloats; i += inc) \
818 { \
819 float *ptr = interesting_floats + i; \
820 Ret_type ret = func(*ptr); \
821 char str[256]; tostr(ptr, 1, str); \
822 char str2[256]; tostr(&ret, str2); \
823 printf("%s(%s) = %s\n", #func, str, str2); \
824 }
825
826 #define Ret_IntPtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \
827 for(int i = 0; i+numElemsAccessed <= numInterestingInts; i += inc) \
828 { \
829 uint32_t *ptr = interesting_ints + i; \
830 Ret_type ret = func((Ptr_type)ptr); \
831 char str[256]; tostr((int*)ptr, numElemsAccessed, str); \
832 char str2[256]; tostr(&ret, str2); \
833 printf("%s(%s) = %s\n", #func, str, str2); \
834 }
835
836 #define Ret_M128_FloatPtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \
837 for(int i = 0; i < numInterestingFloats / 4; ++i) \
838 for(int k = 0; k < 4; ++k) \
839 for(int j = 0; j+numElemsAccessed <= numInterestingFloats; j += inc) \
840 { \
841 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
842 float *ptr = interesting_floats + j; \
843 Ret_type ret = func(m1, (Ptr_type)ptr); \
844 char str[256]; tostr(&m1, str); \
845 char str2[256]; tostr(ptr, numElemsAccessed, str2); \
846 char str3[256]; tostr(&ret, str3); \
847 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
848 }
849
850 #define Ret_M128d_DoublePtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \
851 for(int i = 0; i < numInterestingDoubles / 2; ++i) \
852 for(int k = 0; k < 2; ++k) \
853 for(int j = 0; j+numElemsAccessed <= numInterestingDoubles; j += inc) \
854 { \
855 __m128d m1 = E1_Double(interesting_doubles, i*2+k, numInterestingDoubles); \
856 double *ptr = interesting_doubles + j; \
857 Ret_type ret = func(m1, (Ptr_type)ptr); \
858 char str[256]; tostr(&m1, str); \
859 char str2[256]; tostr(ptr, numElemsAccessed, str2); \
860 char str3[256]; tostr(&ret, str3); \
861 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
862 }
863
864 #define Ret_M128i(Ret_type, func) \
865 for(int i = 0; i < numInterestingInts / 4; ++i) \
866 for(int k = 0; k < 4; ++k) \
867 { \
868 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
869 Ret_type ret = func(m1); \
870 char str[256]; tostr(&m1, str); \
871 char str2[256]; tostr(&ret, str2); \
872 printf("%s(%s) = %s\n", #func, str, str2); \
873 }
874
875 #define Ret_M128i_M128i(Ret_type, func) \
876 for(int i = 0; i < numInterestingInts / 4; ++i) \
877 for(int k = 0; k < 4; ++k) \
878 for(int j = 0; j < numInterestingInts / 4; ++j) \
879 { \
880 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
881 __m128i m2 = (__m128i)E2_Int(interesting_ints, j*4, numInterestingInts); \
882 Ret_type ret = func(m1, m2); \
883 char str[256]; tostr(&m1, str); \
884 char str2[256]; tostr(&m2, str2); \
885 char str3[256]; tostr(&ret, str3); \
886 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
887 }
888
889 #define Ret_M128i_M128i_M128i(Ret_type, func) \
890 for(int i = 0; i < numInterestingInts / 4; ++i) \
891 for(int k = 0; k < 4; ++k) \
892 for(int j = 0; j < numInterestingInts / 4; ++j) \
893 for(int l = 0; l < numInterestingInts / 4; ++l) \
894 { \
895 __m128i m1 = (__m128i)E1_Int(interesting_ints, i*4+k, numInterestingInts); \
896 __m128i m2 = (__m128i)E2_Int(interesting_ints, j*4, numInterestingInts); \
897 __m128i m3 = (__m128i)E1_Int(interesting_ints, l*4, numInterestingInts); \
898 Ret_type ret = func(m1, m2, m3); \
899 char str[256]; tostr(&m1, str); \
900 char str2[256]; tostr(&m2, str2); \
901 char str3[256]; tostr(&m3, str3); \
902 char str4[256]; tostr(&ret, str4); \
903 printf("%s(%s, %s, %s) = %s\n", #func, str, str2, str3, str4); \
904 }
905
906 #define Ret_int(Ret_type, func) \
907 for(int i = 0; i < numInterestingInts; ++i) \
908 { \
909 Ret_type ret = func(interesting_ints[i]); \
910 char str[256]; tostr((int*)&interesting_ints[i], str); \
911 char str2[256]; tostr(&ret, str2); \
912 printf("%s(%s) = %s\n", #func, str, str2); \
913 }
914
915 #define Ret_int64(Ret_type, func) \
916 for(int i = 0; i < numInterestingInts; ++i) \
917 for(int j = 0; j < numInterestingInts; ++j) \
918 { \
919 int64_t m1 = (int64_t)(((uint64_t)interesting_ints[i]) << 32 | (uint64_t)interesting_ints[j]); \
920 Ret_type ret = func(m1); \
921 char str[256]; tostr(&m1, str); \
922 char str2[256]; tostr(&ret, str2); \
923 printf("%s(%s) = %s\n", #func, str, str2); \
924 }
925
926 #define Ret_M128_M128(Ret_type, func) \
927 for(int i = 0; i < numInterestingFloats / 4; ++i) \
928 for(int k = 0; k < 4; ++k) \
929 for(int j = 0; j < numInterestingFloats / 4; ++j) \
930 { \
931 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
932 __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \
933 Ret_type ret = func(m1, m2); \
934 char str[256]; tostr(&m1, str); \
935 char str2[256]; tostr(&m2, str2); \
936 char str3[256]; tostr(&ret, str3); \
937 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
938 }
939
940 #define Ret_M128_M128_M128(Ret_type, func) \
941 for(int i = 0; i < numInterestingFloats / 4; ++i) \
942 for(int k = 0; k < 4; ++k) \
943 for(int j = 0; j < numInterestingFloats / 4; ++j) \
944 for(int l = 0; l < numInterestingFloats / 4; ++l) \
945 { \
946 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
947 __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \
948 __m128 m3 = E1(interesting_floats, l*4, numInterestingFloats); \
949 Ret_type ret = func(m1, m2, m3); \
950 char str[256]; tostr(&m1, str); \
951 char str2[256]; tostr(&m2, str2); \
952 char str3[256]; tostr(&m3, str3); \
953 char str4[256]; tostr(&ret, str4); \
954 printf("%s(%s, %s, %s) = %s\n", #func, str, str2, str3, str4); \
955 }
956
957 #define Ret_M128_int(Ret_type, func) \
958 for(int i = 0; i < numInterestingFloats / 4; ++i) \
959 for(int k = 0; k < 4; ++k) \
960 for(int j = 0; j < numInterestingInts; ++j) \
961 { \
962 __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \
963 int m2 = interesting_ints[j]; \
964 Ret_type ret = func(m1, m2); \
965 char str[256]; tostr(&m1, str); \
966 char str2[256]; tostr(&m2, str2); \
967 char str3[256]; tostr(&ret, str3); \
968 printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \
969 }
970