1 /*
2  * Included file to common source float/double checking
3  * The following macros should be defined:
4  *	TYPE	   -- floating point type
5  *	NAME	   -- convert a name to include the type
6  *	UNS_TYPE   -- type to hold TYPE as an unsigned number
7  *	EXP_SIZE   -- size in bits of the exponent
8  *	MAN_SIZE   -- size in bits of the mantissa
9  *	UNS_ABS	   -- absolute value for UNS_TYPE
10  *	FABS	   -- absolute value function for TYPE
11  *	FMAX	   -- maximum function for TYPE
12  *	FMIN	   -- minimum function for TYPE
13  *	SQRT	   -- square root function for TYPE
14  *	RMIN	   -- minimum random number to generate
15  *	RMAX	   -- maximum random number to generate
16  *	ASMDIV	   -- assembler instruction to do divide
17  *	ASMSQRT	   -- assembler instruction to do square root
18  *	BDIV	   -- # of bits of inaccuracy to allow for division
19  *	BRSQRT	   -- # of bits of inaccuracy to allow for 1/sqrt
20  *	INIT_DIV   -- Initial values to test 1/x against
21  *	INIT_RSQRT -- Initial values to test 1/sqrt(x) against
22  */
23 
24 typedef union
25 {
26   UNS_TYPE i;
27   TYPE x;
28 } NAME (union);
29 
30 /*
31  * Input/output arrays.
32  */
33 
34 static NAME (union) NAME (div_input)  [] __attribute__((__aligned__(32))) = INIT_DIV;
35 static NAME (union) NAME (rsqrt_input)[] __attribute__((__aligned__(32))) = INIT_RSQRT;
36 
37 #define DIV_SIZE   (sizeof (NAME (div_input))   / sizeof (TYPE))
38 #define RSQRT_SIZE (sizeof (NAME (rsqrt_input)) / sizeof (TYPE))
39 
40 static TYPE NAME (div_expected)[DIV_SIZE] __attribute__((__aligned__(32)));
41 static TYPE NAME (div_output)  [DIV_SIZE] __attribute__((__aligned__(32)));
42 
43 static TYPE NAME (rsqrt_expected)[RSQRT_SIZE] __attribute__((__aligned__(32)));
44 static TYPE NAME (rsqrt_output)  [RSQRT_SIZE] __attribute__((__aligned__(32)));
45 
46 
47 /*
48  * Crack a floating point number into sign bit, exponent, and mantissa.
49  */
50 
51 static void
NAME(crack)52 NAME (crack) (TYPE number, unsigned int *p_sign, unsigned *p_exponent, UNS_TYPE *p_mantissa)
53 {
54   NAME (union) u;
55   UNS_TYPE bits;
56 
57   u.x = number;
58   bits = u.i;
59 
60   *p_sign = (unsigned int)((bits >> (EXP_SIZE + MAN_SIZE)) & 0x1);
61   *p_exponent = (unsigned int)((bits >> MAN_SIZE) & ((((UNS_TYPE)1) << EXP_SIZE) - 1));
62   *p_mantissa = bits & ((((UNS_TYPE)1) << MAN_SIZE) - 1);
63   return;
64 }
65 
66 
67 /*
68  * Prevent optimizer from eliminating + 0.0 to remove -0.0.
69  */
70 
71 volatile TYPE NAME (math_diff_0) = ((TYPE) 0.0);
72 
73 /*
74  * Return negative if two numbers are significanly different or return the
75  * number of bits that are different in the mantissa.
76  */
77 
78 static int
NAME(math_diff)79 NAME (math_diff) (TYPE a, TYPE b, int bits)
80 {
81   TYPE zero = NAME (math_diff_0);
82   unsigned int sign_a, sign_b;
83   unsigned int exponent_a, exponent_b;
84   UNS_TYPE mantissa_a, mantissa_b, diff;
85   int i;
86 
87   /* eliminate signed zero.  */
88   a += zero;
89   b += zero;
90 
91   /* special case Nan.  */
92   if (__builtin_isnan (a))
93     return (__builtin_isnan (b) ? 0 : -1);
94 
95   if (a == b)
96     return 0;
97 
98   /* special case infinity.  */
99   if (__builtin_isinf (a))
100     return (__builtin_isinf (b) ? 0 : -1);
101 
102   /* punt on denormal numbers.  */
103   if (!__builtin_isnormal (a) || !__builtin_isnormal (b))
104     return -1;
105 
106   NAME (crack) (a, &sign_a, &exponent_a, &mantissa_a);
107   NAME (crack) (b, &sign_b, &exponent_b, &mantissa_b);
108 
109   /* If the sign is different, there is no hope.  */
110   if (sign_a != sign_b)
111     return -1;
112 
113   /* If the exponent is off by 1, see if the values straddle the power of two,
114      and adjust things to do the mantassa check if we can.  */
115   if ((exponent_a == (exponent_b+1)) || (exponent_a == (exponent_b-1)))
116     {
117       TYPE big = FMAX (a, b);
118       TYPE small = FMIN (a, b);
119       TYPE diff = FABS (a - b);
120       unsigned int sign_big, sign_small, sign_test;
121       unsigned int exponent_big, exponent_small, exponent_test;
122       UNS_TYPE mantissa_big, mantissa_small, mantissa_test;
123 
124       NAME (crack) (big, &sign_big, &exponent_big, &mantissa_big);
125       NAME (crack) (small, &sign_small, &exponent_small, &mantissa_small);
126 
127       NAME (crack) (small - diff, &sign_test, &exponent_test, &mantissa_test);
128       if ((sign_test == sign_small) && (exponent_test == exponent_small))
129 	{
130 	  mantissa_a = mantissa_small;
131 	  mantissa_b = mantissa_test;
132 	}
133 
134       else
135 	{
136 	  NAME (crack) (big + diff, &sign_test, &exponent_test, &mantissa_test);
137 	  if ((sign_test == sign_big) && (exponent_test == exponent_big))
138 	    {
139 	      mantissa_a = mantissa_big;
140 	      mantissa_b = mantissa_test;
141 	    }
142 
143 	  else
144 	    return -1;
145 	}
146     }
147 
148   else if (exponent_a != exponent_b)
149     return -1;
150 
151   diff = UNS_ABS (mantissa_a - mantissa_b);
152   for (i = MAN_SIZE; i > 0; i--)
153     {
154       if ((diff & ((UNS_TYPE)1) << (i-1)) != 0)
155 	return i;
156     }
157 
158   return -1;
159 }
160 
161 
162 /*
163  * Turn off inlining to make code inspection easier.
164  */
165 
166 static void NAME (asm_div) (void) __attribute__((__noinline__));
167 static void NAME (vector_div) (void) __attribute__((__noinline__));
168 static void NAME (scalar_div) (void) __attribute__((__noinline__));
169 static void NAME (asm_rsqrt) (void) __attribute__((__noinline__));
170 static void NAME (vector_rsqrt) (void) __attribute__((__noinline__));
171 static void NAME (scalar_rsqrt) (void) __attribute__((__noinline__));
172 static void NAME (check_div) (const char *) __attribute__((__noinline__));
173 static void NAME (check_rsqrt) (const char *) __attribute__((__noinline__));
174 static void NAME (run) (void) __attribute__((__noinline__));
175 
176 
177 /*
178  * Division function that might be vectorized.
179  */
180 
181 static void
NAME(vector_div)182 NAME (vector_div) (void)
183 {
184   size_t i;
185 
186   for (i = 0; i < DIV_SIZE; i++)
187     NAME (div_output)[i] = ((TYPE) 1.0) / NAME (div_input)[i].x;
188 }
189 
190 /*
191  * Division function that is not vectorized.
192  */
193 
194 static void
NAME(scalar_div)195 NAME (scalar_div) (void)
196 {
197   size_t i;
198 
199   for (i = 0; i < DIV_SIZE; i++)
200     {
201       TYPE x = ((TYPE) 1.0) / NAME (div_input)[i].x;
202       TYPE y;
203       __asm__ ("" : "=d" (y) : "0" (x));
204       NAME (div_output)[i] = y;
205     }
206 }
207 
208 /*
209  * Generate the division instruction via asm.
210  */
211 
212 static void
NAME(asm_div)213 NAME (asm_div) (void)
214 {
215   size_t i;
216 
217   for (i = 0; i < DIV_SIZE; i++)
218     {
219       TYPE x;
220       __asm__ (ASMDIV " %0,%1,%2"
221 	       : "=d" (x)
222 	       : "d" ((TYPE) 1.0), "d" (NAME (div_input)[i].x));
223       NAME (div_expected)[i] = x;
224     }
225 }
226 
227 /*
228  * Reciprocal square root function that might be vectorized.
229  */
230 
231 static void
NAME(vector_rsqrt)232 NAME (vector_rsqrt) (void)
233 {
234   size_t i;
235 
236   for (i = 0; i < RSQRT_SIZE; i++)
237     NAME (rsqrt_output)[i] = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x);
238 }
239 
240 /*
241  * Reciprocal square root function that is not vectorized.
242  */
243 
244 static void
NAME(scalar_rsqrt)245 NAME (scalar_rsqrt) (void)
246 {
247   size_t i;
248 
249   for (i = 0; i < RSQRT_SIZE; i++)
250     {
251       TYPE x = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x);
252       TYPE y;
253       __asm__ ("" : "=d" (y) : "0" (x));
254       NAME (rsqrt_output)[i] = y;
255     }
256 }
257 
258 /*
259  * Generate the 1/sqrt instructions via asm.
260  */
261 
262 static void
NAME(asm_rsqrt)263 NAME (asm_rsqrt) (void)
264 {
265   size_t i;
266 
267   for (i = 0; i < RSQRT_SIZE; i++)
268     {
269       TYPE x;
270       TYPE y;
271       __asm__ (ASMSQRT " %0,%1" : "=d" (x) : "d" (NAME (rsqrt_input)[i].x));
272       __asm__ (ASMDIV " %0,%1,%2" : "=d" (y) : "d" ((TYPE) 1.0), "d" (x));
273       NAME (rsqrt_expected)[i] = y;
274     }
275 }
276 
277 
278 /*
279  * Functions to abort or report errors.
280  */
281 
282 static int NAME (error_count) = 0;
283 
284 #ifdef VERBOSE
285 static int NAME (max_bits_div)   = 0;
286 static int NAME (max_bits_rsqrt) = 0;
287 #endif
288 
289 
290 /*
291  * Compare the expected value with the value we got.
292  */
293 
294 static void
NAME(check_div)295 NAME (check_div) (const char *test)
296 {
297   size_t i;
298   int b;
299 
300   for (i = 0; i < DIV_SIZE; i++)
301     {
302       TYPE exp = NAME (div_expected)[i];
303       TYPE out = NAME (div_output)[i];
304       b = NAME (math_diff) (exp, out, BDIV);
305 
306 #ifdef VERBOSE
307       if (b != 0)
308 	{
309 	  NAME (union) u_in = NAME (div_input)[i];
310 	  NAME (union) u_exp;
311 	  NAME (union) u_out;
312 	  char explanation[64];
313 	  const char *p_exp;
314 
315 	  if (b < 0)
316 	    p_exp = "failed";
317 	  else
318 	    {
319 	      p_exp = explanation;
320 	      sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : "");
321 	    }
322 
323 	  u_exp.x = exp;
324 	  u_out.x = out;
325 	  printf ("%s %s %s for 1.0 / %g [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n",
326 		  TNAME (TYPE), test, p_exp,
327 		  (double) u_in.x, (unsigned long long) u_in.i,
328 		  (double) exp,    (unsigned long long) u_exp.i,
329 		  (double) out,    (unsigned long long) u_out.i);
330 	}
331 #endif
332 
333       if (b < 0 || b > BDIV)
334 	NAME (error_count)++;
335 
336 #ifdef VERBOSE
337       if (b > NAME (max_bits_div))
338 	NAME (max_bits_div) = b;
339 #endif
340     }
341 }
342 
343 static void
NAME(check_rsqrt)344 NAME (check_rsqrt) (const char *test)
345 {
346   size_t i;
347   int b;
348 
349   for (i = 0; i < RSQRT_SIZE; i++)
350     {
351       TYPE exp = NAME (rsqrt_expected)[i];
352       TYPE out = NAME (rsqrt_output)[i];
353       b = NAME (math_diff) (exp, out, BRSQRT);
354 
355 #ifdef VERBOSE
356       if (b != 0)
357 	{
358 	  NAME (union) u_in = NAME (rsqrt_input)[i];
359 	  NAME (union) u_exp;
360 	  NAME (union) u_out;
361 	  char explanation[64];
362 	  const char *p_exp;
363 
364 	  if (b < 0)
365 	    p_exp = "failed";
366 	  else
367 	    {
368 	      p_exp = explanation;
369 	      sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : "");
370 	    }
371 
372 	  u_exp.x = exp;
373 	  u_out.x = out;
374 	  printf ("%s %s %s for 1 / sqrt (%g) [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n",
375 		  TNAME (TYPE), test, p_exp,
376 		  (double) u_in.x, (unsigned long long) u_in.i,
377 		  (double) exp,    (unsigned long long) u_exp.i,
378 		  (double) out,    (unsigned long long) u_out.i);
379 	}
380 #endif
381 
382       if (b < 0 || b > BRSQRT)
383 	NAME (error_count)++;
384 
385 #ifdef VERBOSE
386       if (b > NAME (max_bits_rsqrt))
387 	NAME (max_bits_rsqrt) = b;
388 #endif
389     }
390 }
391 
392 
393 /*
394  * Now do everything.
395  */
396 
397 static void
NAME(run)398 NAME (run) (void)
399 {
400 #ifdef VERBOSE
401   printf ("start run_%s, divide size = %ld, rsqrt size = %ld, %d bit%s for a/b, %d bit%s for 1/sqrt(a)\n",
402 	  TNAME (TYPE),
403 	  (long)DIV_SIZE,
404 	  (long)RSQRT_SIZE,
405 	  BDIV, (BDIV == 1) ? "" : "s",
406 	  BRSQRT, (BRSQRT == 1) ? "" : "s");
407 #endif
408 
409   NAME (asm_div) ();
410 
411   NAME (scalar_div) ();
412   NAME (check_div) ("scalar");
413 
414   NAME (vector_div) ();
415   NAME (check_div) ("vector");
416 
417   NAME (asm_rsqrt) ();
418 
419   NAME (scalar_rsqrt) ();
420   NAME (check_rsqrt) ("scalar");
421 
422   NAME (vector_rsqrt) ();
423   NAME (check_rsqrt) ("vector");
424 
425 #ifdef VERBOSE
426   printf ("end run_%s, errors = %d, max div bits = %d, max rsqrt bits = %d\n",
427 	  TNAME (TYPE),
428 	  NAME (error_count),
429 	  NAME (max_bits_div),
430 	  NAME (max_bits_rsqrt));
431 #endif
432 }
433