1 
2 /*  Copyright (C) 2006 Dave Nomura
3        dcnltc@us.ibm.com
4 
5     This program is free software; you can redistribute it and/or
6     modify it under the terms of the GNU General Public License as
7     published by the Free Software Foundation; either version 2 of the
8     License, or (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful, but
11     WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18     02111-1307, USA.
19 
20     The GNU General Public License is contained in the file COPYING.
21 */
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <limits.h>
26 
27 typedef enum { FALSE=0, TRUE } bool_t;
28 
29 typedef enum {
30 	FADDS, FSUBS, FMULS, FDIVS,
31 	FMADDS, FMSUBS, FNMADDS, FNMSUBS,
32 	FADD, FSUB, FMUL, FDIV, FMADD,
33 	FMSUB, FNMADD, FNMSUB, FSQRT
34 } flt_op_t;
35 
36 typedef enum {
37 	TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
39 
40 const char *flt_op_names[] = {
41 	"fadds", "fsubs", "fmuls", "fdivs",
42 	"fmadds", "fmsubs", "fnmadds", "fnmsubs",
43 	"fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
44 	"fnmsub", "fsqrt"
45 };
46 
47 typedef unsigned int fpscr_t;
48 
49 typedef union {
50 	float flt;
51 	struct {
52 #if defined(VGP_ppc64le_linux)
53       unsigned int frac:23;
54       unsigned int exp:8;
55       unsigned int sign:1;
56 #else
57 		unsigned int sign:1;
58 		unsigned int exp:8;
59 		unsigned int frac:23;
60 #endif
61 	} layout;
62 } flt_overlay;
63 
64 typedef union {
65 	double dbl;
66 	struct {
67 #if defined(VGP_ppc64le_linux)
68       unsigned int frac_lo:32;
69       unsigned int frac_hi:20;
70       unsigned int exp:11;
71       unsigned int sign:1;
72 #else
73 		unsigned int sign:1;
74 		unsigned int exp:11;
75 		unsigned int frac_hi:20;
76 		unsigned int frac_lo:32;
77 #endif
78 	} layout;
79 	struct {
80 		unsigned int hi;
81 		unsigned int lo;
82 	} dbl_pair;
83 } dbl_overlay;
84 
85 void assert_fail(const char *msg,
86 	const char* expr, const char* file, int line, const char*fn);
87 
88 #define STRING(__str)  #__str
89 #define assert(msg, expr)                                           \
90   ((void) ((expr) ? 0 :                                         \
91            (assert_fail (msg, STRING(expr),                  \
92                              __FILE__, __LINE__,                \
93                              __PRETTY_FUNCTION__), 0)))
94 float denorm_small;
95 double dbl_denorm_small;
96 float norm_small;
97 bool_t debug = FALSE;
98 bool_t long_is_64_bits = sizeof(long) == 8;
99 
assert_fail(msg,expr,file,line,fn)100 void assert_fail (msg, expr, file, line, fn)
101 const char* msg;
102 const char* expr;
103 const char* file;
104 int line;
105 const char*fn;
106 {
107    printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
108                msg, file, line, fn, expr );
109    exit( 1 );
110 }
set_rounding_mode(round_mode_t mode)111 void set_rounding_mode(round_mode_t mode)
112 {
113 	switch(mode) {
114 	case TO_NEAREST:
115 		asm volatile("mtfsfi 7, 0");
116 		break;
117 	case TO_ZERO:
118 		asm volatile("mtfsfi 7, 1");
119 		break;
120 	case TO_PLUS_INFINITY:
121 		asm volatile("mtfsfi 7, 2");
122 		break;
123 	case TO_MINUS_INFINITY:
124 		asm volatile("mtfsfi 7, 3");
125 		break;
126 	}
127 }
128 
print_double(char * msg,double dbl)129 void print_double(char *msg, double dbl)
130 {
131 	dbl_overlay D;
132 	D.dbl = dbl;
133 
134 	printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
135 			msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
136 			D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
137 }
138 
print_single(char * msg,float * flt)139 void print_single(char *msg, float *flt)
140 {
141 	flt_overlay F;
142 	F.flt = *flt;
143 
144 	/* NOTE: for the purposes of comparing the fraction of a single with
145 	**       a double left shift the .frac so that hex digits are grouped
146 	**	     from left to right.  this is necessary because the size of a
147 	**		 single mantissa (23) bits is not a multiple of 4
148 	*/
149 	printf("%15s : flt %-20a = %c(%4d, %06x)\n",
150 		msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
151 }
152 
check_dbl_to_flt_round(round_mode_t mode,double dbl,float * expected)153 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
154 {
155 	int status = 0;
156 	flt_overlay R, E;
157 	char *result;
158 
159 	set_rounding_mode(mode);
160 
161 	E.flt = *expected;
162 	R.flt = (float)dbl;
163 
164 	if ((R.layout.sign != E.layout.sign) ||
165 		(R.layout.exp != E.layout.exp) ||
166 		(R.layout.frac != E.layout.frac)) {
167 		result = "FAILED";
168 		status = 1;
169 	} else {
170 		result = "PASSED";
171 		status = 0;
172 	}
173 	printf("%s:%s:(double)(%-20a) = %20a",
174 		round_mode_name[mode], result, R.flt, dbl);
175 	if (status) {
176 		print_single("\n\texpected", &E.flt);
177 		print_single("\n\trounded ", &R.flt);
178 	}
179 	putchar('\n');
180 	return status;
181 }
182 
test_dbl_to_float_convert(char * msg,float * base)183 int test_dbl_to_float_convert(char *msg, float *base)
184 {
185 	int status = 0;
186 	double half = (double)denorm_small/2;
187 	double qtr = half/2;
188 	double D_hi = (double)*base + half + qtr;
189 	double D_lo = (double)*base + half - qtr;
190 	float F_lo = *base;
191 	float F_hi = F_lo + denorm_small;
192 
193 
194 	/*
195 	** .....+-----+-----+-----+-----+---....
196 	**      ^F_lo ^           ^     ^
197 	**            D_lo
198 	**                        D_hi
199 	**                              F_hi
200 	** F_lo and F_hi are two consecutive single float model numbers
201 	** denorm_small distance apart. D_lo and D_hi are two numbers
202 	** within that range that are not representable as single floats
203 	** and will be rounded to either F_lo or F_hi.
204 	*/
205 	printf("-------------------------- %s --------------------------\n", msg);
206 	if (debug) {
207 		print_double("D_lo", D_lo);
208 		print_double("D_hi", D_hi);
209 		print_single("F_lo", &F_lo);
210 		print_single("F_hi", &F_hi);
211 	}
212 
213 	/* round to nearest */
214 	status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
215 	status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
216 
217 	/* round to zero */
218 	status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
219 	status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
220 
221 	/* round to +inf */
222 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
223 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
224 
225 	/* round to -inf */
226 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
227 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
228 	return status;
229 }
230 
231 void
init()232 init()
233 {
234 	flt_overlay F;
235 	dbl_overlay D;
236 
237 	/* small is the smallest denormalized single float number */
238 	F.layout.sign = 0;
239 	F.layout.exp = 0;
240 	F.layout.frac = 1;
241 	denorm_small = F.flt;	/* == 2^(-149) */
242 	if (debug) {
243 		print_single("float small", &F.flt);
244 	}
245 
246 	D.layout.sign = 0;
247 	D.layout.exp = 0;
248 	D.layout.frac_hi = 0;
249 	D.layout.frac_lo = 1;
250 	dbl_denorm_small = D.dbl;	/* == 2^(-1022) */
251 	if (debug) {
252 		print_double("double small", D.dbl);
253 	}
254 
255 	/* n_small is the smallest normalized single precision float */
256 	F.layout.exp = 1;
257 	norm_small = F.flt;
258 }
259 
check_int_to_flt_round(round_mode_t mode,long L,float * expected)260 int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
261 {
262 	int status = 0;
263 	int I = L;
264 	char *int_name = "int";
265 	flt_overlay R, E;
266 	char *result;
267 	int iter;
268 
269 	set_rounding_mode(mode);
270 	E.flt = *expected;
271 
272 	for (iter = 0; iter < 2; iter++) {
273 		int stat = 0;
274 		R.flt = (iter == 0 ? (float)I : (float)L);
275 
276 		if ((R.layout.sign != E.layout.sign) ||
277 			(R.layout.exp != E.layout.exp) ||
278 			(R.layout.frac != E.layout.frac)) {
279 			result = "FAILED";
280 			stat = 1;
281 		} else {
282 			result = "PASSED";
283 			stat = 0;
284 		}
285 		printf("%s:%s:(float)(%4s)%9d = %11.1f",
286 			round_mode_name[mode], result, int_name, I, R.flt);
287 		if (stat) {
288 			print_single("\n\texpected: %.1f ", &E.flt);
289 			print_single("\n\trounded ", &R.flt);
290 		}
291 		putchar('\n');
292 		status |= stat;
293 
294 		if (!long_is_64_bits) break;
295 		int_name = "long";
296 	}
297 	return status;
298 }
299 
check_long_to_dbl_round(round_mode_t mode,long L,double * expected)300 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
301 {
302 	int status = 0;
303 	dbl_overlay R, E;
304 	char *result;
305 
306 	set_rounding_mode(mode);
307 	E.dbl = *expected;
308 
309 	R.dbl = (double)L;
310 
311 	if ((R.layout.sign != E.layout.sign) ||
312 		(R.layout.exp != E.layout.exp) ||
313 		(R.layout.frac_lo != E.layout.frac_lo) ||
314 		(R.layout.frac_hi != E.layout.frac_hi)) {
315 		result = "FAILED";
316 		status = 1;
317 	} else {
318 		result = "PASSED";
319 		status = 0;
320 	}
321 	printf("%s:%s:(double)(%18ld) = %20.1f",
322 		round_mode_name[mode], result, L, R.dbl);
323 	if (status) {
324 		printf("\n\texpected %.1f : ", E.dbl);
325 	}
326 	putchar('\n');
327 	return status;
328 }
329 
test_int_to_float_convert(char * msg)330 int test_int_to_float_convert(char *msg)
331 {
332 	int status = 0;
333 	int int24_hi = 0x03ff0fff;
334 	int int24_lo = 0x03ff0ffd;
335 	float pos_flt_lo = 67047420.0;
336 	float pos_flt_hi = 67047424.0;
337 	float neg_flt_lo = -67047420.0;
338 	float neg_flt_hi = -67047424.0;
339 
340 	printf("-------------------------- %s --------------------------\n", msg);
341 	status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
342 	status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
343 	status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
344 	status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
345 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
346 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
347 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
348 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
349 
350 	status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
351 	status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
352 	status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
353 	status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
354 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
355 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
356 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
357 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
358 	return status;
359 }
360 
361 #ifdef __powerpc64__
test_long_to_double_convert(char * msg)362 int test_long_to_double_convert(char *msg)
363 {
364 	int status = 0;
365 	long long55_hi = 0x07ff0ffffffffff;
366 	long long55_lo = 0x07ff0fffffffffd;
367 	double pos_dbl_lo = 36012304344547324.0;
368 	double pos_dbl_hi = 36012304344547328.0;
369 	double neg_dbl_lo = -36012304344547324.0;
370 	double neg_dbl_hi = -36012304344547328.0;
371 
372 	printf("-------------------------- %s --------------------------\n", msg);
373 	status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
374 	status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
375 	status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
376 	status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
377 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
378 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
379 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
380 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
381 
382 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
383 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
384 	status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
385 	status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
386 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
387 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
388 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
389 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
390 	return status;
391 }
392 #endif
393 
check_single_arithmetic_op(flt_op_t op)394 int check_single_arithmetic_op(flt_op_t op)
395 {
396 		char *result;
397         int status = 0;
398         dbl_overlay R, E;
399         double qtr, half, fA, fB, fD;
400 		round_mode_t mode;
401 		int q, s;
402 		bool_t two_args = TRUE;
403 		float whole = denorm_small;
404 
405 #define BINOP(op) \
406         __asm__ volatile( \
407 					op" %0, %1, %2\n\t" \
408 					: "=f"(fD) : "f"(fA) , "f"(fB));
409 #define UNOP(op) \
410         __asm__ volatile( \
411 					op" %0, %1\n\t" \
412 					: "=f"(fD) : "f"(fA));
413 
414 		half = (double)whole/2;
415 		qtr = half/2;
416 
417 		if (debug) {
418 			print_double("qtr", qtr);
419 			print_double("whole", whole);
420 			print_double("2*whole", 2*whole);
421 		}
422 
423 		for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
424 		for (s = -1; s < 2; s += 2)
425 		for (q = 1; q < 4; q += 2) {
426 			double expected;
427 			double lo = s*whole;
428 			double hi = s*2*whole;
429 
430 			switch(op) {
431 			case FADDS:
432 				fA = s*whole;
433 				fB = s*q*qtr;
434 				break;
435 			case FSUBS:
436 				fA = s*2*whole;
437 				fB = s*(q == 1 ? 3 : 1)*qtr;
438 				break;
439 			case FMULS:
440 				fA = 0.5;
441 				fB = s*(4+q)*half;
442 				break;
443 			case FDIVS:
444 				fA = s*(4+q)*half;
445 				fB = 2.0;
446 				break;
447 			default:
448 				assert("check_single_arithmetic_op: unexpected op",
449 					FALSE);
450 				break;
451 			}
452 
453 			switch(mode) {
454 			case TO_NEAREST:
455 				expected = (q == 1 ? lo : hi);
456 				break;
457 			case TO_ZERO:
458 				expected = lo;
459 				break;
460 			case TO_PLUS_INFINITY:
461 				expected = (s == 1 ? hi : lo);
462 				break;
463 			case TO_MINUS_INFINITY:
464 				expected = (s == 1 ? lo : hi);
465 				break;
466 			}
467 
468 			set_rounding_mode(mode);
469 
470 			/*
471 			** do the double precision dual operation just for comparison
472 			** when debugging
473 			*/
474 			switch(op) {
475 			case FADDS:
476 				BINOP("fadds");
477 				R.dbl = fD;
478 				BINOP("fadd");
479 				break;
480 			case FSUBS:
481 				BINOP("fsubs");
482 				R.dbl = fD;
483 				BINOP("fsub");
484 				break;
485 			case FMULS:
486 				BINOP("fmuls");
487 				R.dbl = fD;
488 				BINOP("fmul");
489 				break;
490 			case FDIVS:
491 				BINOP("fdivs");
492 				R.dbl = fD;
493 				BINOP("fdiv");
494 				break;
495 			default:
496 				assert("check_single_arithmetic_op: unexpected op",
497 					FALSE);
498 				break;
499 			}
500 #undef UNOP
501 #undef BINOP
502 
503 			E.dbl = expected;
504 
505 			if ((R.layout.sign != E.layout.sign) ||
506 				(R.layout.exp != E.layout.exp) ||
507 				(R.layout.frac_lo != E.layout.frac_lo) ||
508 				(R.layout.frac_hi != E.layout.frac_hi)) {
509 				result = "FAILED";
510 				status = 1;
511 			} else {
512 				result = "PASSED";
513 				status = 0;
514 			}
515 
516 			printf("%s:%s:%s(%-13a",
517 				round_mode_name[mode], result, flt_op_names[op], fA);
518 			if (two_args) printf(", %-13a", fB);
519 			printf(") = %-13a", R.dbl);
520 			if (status) printf("\n\texpected %a", E.dbl);
521 			putchar('\n');
522 
523 			if (debug) {
524 				print_double("hi", hi);
525 				print_double("lo", lo);
526 				print_double("expected", expected);
527 				print_double("got", R.dbl);
528 				print_double("double result", fD);
529 			}
530 		}
531 
532 		return status;
533 }
534 
check_single_guarded_arithmetic_op(flt_op_t op)535 int check_single_guarded_arithmetic_op(flt_op_t op)
536 {
537 		typedef struct {
538 			int num, den, frac;
539 		} fdivs_t;
540 
541 		char *result;
542         int status = 0;
543         flt_overlay A, B, Z;
544         dbl_overlay Res, Exp;
545         double fA, fB, fC, fD;
546 		round_mode_t mode;
547 		int g, s;
548 		int arg_count;
549 
550 		fdivs_t divs_guard_cases[16] = {
551 			{ 105, 56, 0x700000 },  /* : 0 */
552 			{ 100, 57, 0x608FB8 },  /* : 1 */
553 			{ 000, 00, 0x000000 },  /* : X */
554 			{ 100, 52, 0x762762 },  /* : 3 */
555 			{ 000, 00, 0x000000 },  /* : X */
556 			{ 100, 55, 0x68BA2E },  /* : 5 */
557 			{ 000, 00, 0x000000 },  /* : X */
558 			{ 100, 51, 0x7AFAFA },  /* : 7 */
559 			{ 000, 00, 0x000000 },  /* : X */
560 			{ 100, 56, 0x649249 },  /* : 9 */
561 			{ 000, 00, 0x000000 },  /* : X */
562 			{ 100, 54, 0x6D097B },  /* : B */
563 			{ 000, 00, 0x000000 },  /* : X */
564 			{ 100, 59, 0x58F2FB },  /* : D */
565 			{ 000, 00, 0x000000 },  /* : X */
566 			{ 101, 52, 0x789D89 }  /* : F */
567 		};
568 
569 		/*	0x1.00000 00000000p-3 */
570 		/* set up the invariant fields of B, the arg to cause rounding */
571 		B.flt = 0.0;
572 		B.layout.exp = 124;  /* -3 */
573 
574 		/* set up args so result is always Z = 1.200000000000<g>p+0 */
575 		Z.flt = 1.0;
576 		Z.layout.sign = 0;
577 
578 #define TERNOP(op) \
579 		arg_count = 3; \
580         __asm__ volatile( \
581 					op" %0, %1, %2, %3\n\t" \
582 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
583 #define BINOP(op) \
584 		arg_count = 2; \
585         __asm__ volatile( \
586 					op" %0, %1, %2\n\t" \
587 					: "=f"(fD) : "f"(fA) , "f"(fB));
588 #define UNOP(op) \
589 		arg_count = 1; \
590         __asm__ volatile( \
591 					op" %0, %1\n\t" \
592 					: "=f"(fD) : "f"(fA));
593 
594 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
595 	for (s = -1; s < 2; s += 2)
596 	for (g = 0; g < 16; g += 1) {
597 		double lo, hi, expected;
598 		int LSB;
599 		int guard = 0;
600 		int z_sign = s;
601 
602 		/*
603 		** one argument will have exponent = 0 as will the result (by
604 		** design) so choose the other argument with exponent -3 to
605 		** force a 3 bit shift for scaling leaving us with 3 guard bits
606 		** and the LSB bit at the bottom of the manitssa.
607 		*/
608 		switch(op) {
609 		case FADDS:
610 			/* 1p+0 + 1.00000<g>p-3 */
611 			B.layout.frac = g;
612 
613 			fB = s*B.flt;
614 			fA = s*1.0;
615 
616 			/* set up Z to be truncated result */
617 
618 			/* mask off LSB from resulting guard bits */
619 			guard = g & 7;
620 
621 			Z.layout.frac = 0x100000 | (g >> 3);
622 			break;
623 		case FSUBS:
624 			/* 1.200002p+0 - 1.000000000000<g>p-3 */
625 			A.flt = 1.125;
626 			/* add enough to avoid scaling of the result */
627 			A.layout.frac |= 0x2;
628 			fA = s*A.flt;
629 
630 			B.layout.frac = g;
631 			fB = s*B.flt;
632 
633 			/* set up Z to be truncated result */
634 			guard = (0x10-g);
635 			Z.layout.frac = guard>>3;
636 
637 			/* mask off LSB from resulting guard bits */
638 			guard &= 7;
639 			break;
640 		case FMULS:
641 			/* 1 + g*2^-23 */
642 			A.flt = 1.0;
643 			A.layout.frac = g;
644 			fA = s*A.flt;
645 			fB = 1.125;
646 
647 			/* set up Z to be truncated result */
648 			Z.flt = 1.0;
649 			Z.layout.frac = 0x100000;
650 			Z.layout.frac |= g + (g>>3);
651 			guard = g & 7;
652 			break;
653 		case FDIVS:
654 			/* g >> 3 == LSB, g & 7 == guard bits */
655 			guard = g & 7;
656 			if ((guard & 1) == 0) {
657 				/* special case: guard bit X = 0 */
658 				A.flt = denorm_small;
659 				A.layout.frac = g;
660 				fA = A.flt;
661 				fB = s*8.0;
662 				Z.flt = 0.0;
663 				Z.layout.frac |= (g >> 3);
664 			} else {
665 				fA = s*divs_guard_cases[g].num;
666 				fB = divs_guard_cases[g].den;
667 
668 				Z.flt = 1.0;
669 				Z.layout.frac = divs_guard_cases[g].frac;
670 			}
671 			break;
672 		case FMADDS:
673 		case FMSUBS:
674 		case FNMADDS:
675 		case FNMSUBS:
676 			/* 1 + g*2^-23 */
677 			A.flt = 1.0;
678 			A.layout.frac = g;
679 			fA = s*A.flt;
680 			fB = 1.125;
681 
682 			/* 1.000001p-1 */
683 			A.flt = 0.5;
684 			A.layout.frac = 1;
685 			fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
686 
687 			/* set up Z to be truncated result */
688 			z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
689 			guard = ((g & 7) + 0x4) & 7;
690 			Z.flt = 1.0;
691 			Z.layout.frac = 0x500000;
692 			Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
693 			break;
694 		default:
695 			assert("check_single_arithmetic_op: unexpected op",
696 				FALSE);
697 			break;
698 		}
699 
700 		/* get LSB for tie breaking */
701 		LSB = Z.layout.frac & 1;
702 
703 		/* set up hi and lo */
704 		lo = z_sign*Z.flt;
705 		Z.layout.frac += 1;
706 		hi = z_sign*Z.flt;
707 
708 		switch(mode) {
709 		case TO_NEAREST:
710 			/* look at 3 guard bits to determine expected rounding */
711 			switch(guard) {
712 			case 0:
713 			case 1: case 2: case 3:
714 				expected = lo;
715 				break;
716 			case 4:	/* tie: round to even */
717 				if (debug) printf("tie: LSB = %d\n", LSB);
718 				expected = (LSB == 0 ? lo : hi);
719 				break;
720 			case 5: case 6: case 7:
721 				expected = hi;
722 				break;
723 			default:
724 				assert("check_single_guarded_arithmetic_op: unexpected guard",
725 					FALSE);
726 			}
727 			break;
728 		case TO_ZERO:
729 			expected = lo;
730 			break;
731 		case TO_PLUS_INFINITY:
732 			if (guard == 0) {
733 				/* no rounding */
734 				expected = lo;
735 			} else {
736 				expected = (s == 1 ? hi : lo);
737 			}
738 			break;
739 		case TO_MINUS_INFINITY:
740 			if (guard == 0) {
741 				/* no rounding */
742 				expected = lo;
743 			} else {
744 				expected = (s == 1 ? lo : hi);
745 			}
746 			break;
747 		}
748 
749 		set_rounding_mode(mode);
750 
751 		/*
752 		** do the double precision dual operation just for comparison
753 		** when debugging
754 		*/
755 		switch(op) {
756 		case FADDS:
757 			BINOP("fadds");
758 			Res.dbl = fD;
759 			break;
760 		case FSUBS:
761 			BINOP("fsubs");
762 			Res.dbl = fD;
763 			break;
764 		case FMULS:
765 			BINOP("fmuls");
766 			Res.dbl = fD;
767 			break;
768 		case FDIVS:
769 			BINOP("fdivs");
770 			Res.dbl = fD;
771 			break;
772 		case FMADDS:
773 			TERNOP("fmadds");
774 			Res.dbl = fD;
775 			break;
776 		case FMSUBS:
777 			TERNOP("fmsubs");
778 			Res.dbl = fD;
779 			break;
780 		case FNMADDS:
781 			TERNOP("fnmadds");
782 			Res.dbl = fD;
783 			break;
784 		case FNMSUBS:
785 			TERNOP("fnmsubs");
786 			Res.dbl = fD;
787 			break;
788 		default:
789 			assert("check_single_guarded_arithmetic_op: unexpected op",
790 				FALSE);
791 			break;
792 		}
793 #undef UNOP
794 #undef BINOP
795 #undef TERNOP
796 
797 		Exp.dbl = expected;
798 
799 		if ((Res.layout.sign != Exp.layout.sign) ||
800 			(Res.layout.exp != Exp.layout.exp) ||
801 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
802 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
803 			result = "FAILED";
804 			status = 1;
805 		} else {
806 			result = "PASSED";
807 			status = 0;
808 		}
809 
810 		/* There seems to be some noise in the lower bits. The value
811 		* on the least significant digit seems to vary when printing
812 		* based on the rounding mode of the compiler.  Just trying
813 		* to get rid of the noise in the least significant bits when
814 		* printing the operand.
815 		*/
816 
817 		fA = ((long int)(fA*10000))/10000.0;
818 		/* Change -0.0 to a positive 0.0.  Some compilers print -0.0
819 		 * others do not.  Make it consistent.
820 		 */
821 		if (fA == -0.0)
822 		  fA = 0.0;
823 
824 		printf("%s:%s:%s(%-13.6f",
825 			round_mode_name[mode], result, flt_op_names[op], fA);
826 		if (arg_count > 1) printf(", %-13a", fB);
827 		if (arg_count > 2) printf(", %-13a", fC);
828 		printf(") = %-13a", Res.dbl);
829 		if (status) printf("\n\texpected %a", Exp.dbl);
830 		putchar('\n');
831 
832 		if (debug) {
833 			print_double("hi", hi);
834 			print_double("lo", lo);
835 			print_double("expected", expected);
836 			print_double("got", Res.dbl);
837 		}
838 	}
839 
840 	return status;
841 }
842 
check_double_guarded_arithmetic_op(flt_op_t op)843 int check_double_guarded_arithmetic_op(flt_op_t op)
844 {
845 	typedef struct {
846 		int num, den, hi, lo;
847 	} fdiv_t;
848 	typedef struct {
849 		double arg;
850 		int exp, hi, lo;
851 	} fsqrt_t;
852 
853 	char *result;
854 	int status = 0;
855 	dbl_overlay A, B, Z;
856 	dbl_overlay Res, Exp;
857 	double fA, fB, fC, fD;
858 	round_mode_t mode;
859 	int g, s;
860 	int arg_count;
861 	fdiv_t div_guard_cases[16] = {
862 		{ 62, 62, 0x00000, 0x00000000 },	/* 0 */
863 		{ 64, 62, 0x08421, 0x08421084 },	/* 1 */
864 		{ 66, 62, 0x10842, 0x10842108 },	/* 2 */
865 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* 3 */
866 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* X */
867 		{ 102, 62, 0xa5294, 0xa5294a52 },	/* 5 */
868 		{ 106, 62, 0xb5ad6, 0xb5ad6b5a },	/* 6 */
869 		{ 108, 62, 0xbdef7, 0xbdef7bde },	/* 7 */
870 		{ 108, 108, 0x00000, 0x00000000 },	/* 8 */
871 		{ 112, 62, 0xce739, 0xce739ce7 },	/* 9 */
872 		{ 114, 62, 0xd6b5a, 0xd6b5ad6b },	/* A */
873 		{ 116, 62, 0xdef7b, 0xdef7bdef },	/* B */
874 		{ 84, 62, 0x5ad6b, 0x5ad6b5ad },	/* X */
875 		{ 118, 62, 0xe739c, 0xe739ce73 },	/* D */
876 		{ 90, 62, 0x739ce, 0x739ce739 },	/* E */
877 		{ 92, 62, 0x7bdef, 0x7bdef7bd }		/* F */
878 	};
879 
880 
881 	fsqrt_t sqrt_guard_cases[16] = {
882 		{ 0x1.08800p0,  0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440  */
883 		{ 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910  */
884 		{ 0x1.A8220p0,  0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411  */
885 		{ 0x1.05A20p0,  0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1  */
886 		{ 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541  */
887 		{ 0x1.DCA20p0,  0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51  */
888 		{ 0x1.02C80p0,  0, 0x01630, 0x9cde7483}, /* :6 B6.8164  */
889 		{ 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40  */
890 		{ 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9  */
891 		{ 0x1.1D020p0,  0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81  */
892 		{ 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4  */
893 		{ 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400  */
894 		{ 0x1.48520p0,  0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429  */
895 		{ 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61  */
896 		{ 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684  */
897 		{ 0x1.76B20p0,  0, 0x35b67, 0x81aed827}  /* :F DB.BB59  */
898 	};
899 
900 	/*	0x1.00000 00000000p-3 */
901 	/* set up the invariant fields of B, the arg to cause rounding */
902 	B.dbl = 0.0;
903 	B.layout.exp = 1020;
904 
905 	/* set up args so result is always Z = 1.200000000000<g>p+0 */
906 	Z.dbl = 1.0;
907 	Z.layout.sign = 0;
908 
909 #define TERNOP(op) \
910 		arg_count = 3; \
911         __asm__ volatile( \
912 					op" %0, %1, %2, %3\n\t" \
913 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
914 #define BINOP(op) \
915 		arg_count = 2; \
916         __asm__ volatile( \
917 					op" %0, %1, %2\n\t" \
918 					: "=f"(fD) : "f"(fA) , "f"(fB));
919 #define UNOP(op) \
920 		arg_count = 1; \
921         __asm__ volatile( \
922 					op" %0, %1\n\t" \
923 					: "=f"(fD) : "f"(fA));
924 
925 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
926 	for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
927 	for (g = 0; g < 16; g += 1) {
928 		double lo, hi, expected;
929 		int LSB;
930 		int guard;
931 		int z_sign = s;
932 
933 		/*
934 		** one argument will have exponent = 0 as will the result (by
935 		** design) so choose the other argument with exponent -3 to
936 		** force a 3 bit shift for scaling leaving us with 3 guard bits
937 		** and the LSB bit at the bottom of the manitssa.
938 		*/
939 		switch(op) {
940 		case FADD:
941 			/* 1p+0 + 1.000000000000<g>p-3 */
942 			B.layout.frac_lo = g;
943 
944 			fB = s*B.dbl;
945 			fA = s*1.0;
946 
947 			/* set up Z to be truncated result */
948 
949 			/* mask off LSB from resulting guard bits */
950 			guard = g & 7;
951 
952 			Z.layout.frac_hi = 0x20000;
953 			Z.layout.frac_lo = g >> 3;
954 
955 			break;
956 		case FSUB:
957 			/* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
958 			A.dbl = 1.125;
959 			/* add enough to avoid scaling of the result */
960 			A.layout.frac_lo = 0x2;
961 			fA = s*A.dbl;
962 
963 			B.layout.frac_lo = g;
964 			fB = s*B.dbl;
965 
966 			/* set up Z to be truncated result */
967 			guard = (0x10-g);
968 			Z.layout.frac_hi = 0x0;
969 			Z.layout.frac_lo = guard>>3;
970 
971 			/* mask off LSB from resulting guard bits */
972 			guard &= 7;
973 			break;
974 		case FMUL:
975 			/* 1 + g*2^-52 */
976 			A.dbl = 1.0;
977 			A.layout.frac_lo = g;
978 			fA = s*A.dbl;
979 			fB = 1.125;
980 
981 			/* set up Z to be truncated result */
982 			Z.dbl = 1.0;
983 			Z.layout.frac_hi = 0x20000;
984 			Z.layout.frac_lo = g + (g>>3);
985 			guard = g & 7;
986 			break;
987 		case FMADD:
988 		case FMSUB:
989 		case FNMADD:
990 		case FNMSUB:
991 			/* 1 + g*2^-52 */
992 			A.dbl = 1.0;
993 			A.layout.frac_lo = g;
994 			fA = s*A.dbl;
995 			fB = 1.125;
996 
997 			/* 1.0000000000001p-1 */
998 			A.dbl = 0.5;
999 			A.layout.frac_lo = 1;
1000 			fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
1001 
1002 			/* set up Z to be truncated result */
1003 			z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
1004 			guard = ((g & 7) + 0x4) & 7;
1005 			Z.dbl = 1.0;
1006 			Z.layout.frac_hi = 0xa0000;
1007 			Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
1008 			break;
1009 		case FDIV:
1010 			/* g >> 3 == LSB, g & 7 == guard bits */
1011 			guard = g & 7;
1012 			if (guard == 0x4) {
1013 				/* special case guard bits == 4, inexact tie */
1014 				fB = s*2.0;
1015 				Z.dbl = 0.0;
1016 				if (g >> 3) {
1017 					fA = dbl_denorm_small + 2*dbl_denorm_small;
1018 					Z.layout.frac_lo = 0x1;
1019 				} else {
1020 					fA = dbl_denorm_small;
1021 				}
1022 			} else {
1023 				fA = s*div_guard_cases[g].num;
1024 				fB = div_guard_cases[g].den;
1025 
1026 				printf("%d/%d\n",
1027 					s*div_guard_cases[g].num,
1028 					div_guard_cases[g].den);
1029 				Z.dbl = 1.0;
1030 				Z.layout.frac_hi = div_guard_cases[g].hi;
1031 				Z.layout.frac_lo = div_guard_cases[g].lo;
1032 			}
1033 			break;
1034 		case FSQRT:
1035 			fA = s*sqrt_guard_cases[g].arg;
1036 			Z.dbl = 1.0;
1037 			Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
1038 			Z.layout.frac_hi = sqrt_guard_cases[g].hi;
1039 			Z.layout.frac_lo = sqrt_guard_cases[g].lo;
1040 			guard = g >> 1;
1041 			if (g & 1) guard |= 1;
1042 			/* don't have test cases for when X bit = 0 */
1043 			if (guard == 0 || guard == 4) continue;
1044 			break;
1045 		default:
1046 			assert("check_double_guarded_arithmetic_op: unexpected op",
1047 				FALSE);
1048 			break;
1049 		}
1050 
1051 		/* get LSB for tie breaking */
1052 		LSB = Z.layout.frac_lo & 1;
1053 
1054 		/* set up hi and lo */
1055 		lo = z_sign*Z.dbl;
1056 		Z.layout.frac_lo += 1;
1057 		hi = z_sign*Z.dbl;
1058 
1059 		switch(mode) {
1060 		case TO_NEAREST:
1061 			/* look at 3 guard bits to determine expected rounding */
1062 			switch(guard) {
1063 			case 0:
1064 			case 1: case 2: case 3:
1065 				expected = lo;
1066 				break;
1067 			case 4:	/* tie: round to even */
1068 				if (debug) printf("tie: LSB = %d\n", LSB);
1069 				expected = (LSB == 0 ? lo : hi);
1070 				break;
1071 			case 5: case 6: case 7:
1072 				expected = hi;
1073 				break;
1074 			default:
1075 				assert("check_double_guarded_arithmetic_op: unexpected guard",
1076 					FALSE);
1077 			}
1078 			break;
1079 		case TO_ZERO:
1080 			expected = lo;
1081 			break;
1082 		case TO_PLUS_INFINITY:
1083 			if (guard == 0) {
1084 				/* no rounding */
1085 				expected = lo;
1086 			} else {
1087 				expected = (s == 1 ? hi : lo);
1088 			}
1089 			break;
1090 		case TO_MINUS_INFINITY:
1091 			if (guard == 0) {
1092 				/* no rounding */
1093 				expected = lo;
1094 			} else {
1095 				expected = (s == 1 ? lo : hi);
1096 			}
1097 			break;
1098 		}
1099 
1100 		set_rounding_mode(mode);
1101 
1102 		/*
1103 		** do the double precision dual operation just for comparison
1104 		** when debugging
1105 		*/
1106 		switch(op) {
1107 		case FADD:
1108 			BINOP("fadd");
1109 			Res.dbl = fD;
1110 			break;
1111 		case FSUB:
1112 			BINOP("fsub");
1113 			Res.dbl = fD;
1114 			break;
1115 		case FMUL:
1116 			BINOP("fmul");
1117 			Res.dbl = fD;
1118 			break;
1119 		case FMADD:
1120 			TERNOP("fmadd");
1121 			Res.dbl = fD;
1122 			break;
1123 		case FMSUB:
1124 			TERNOP("fmsub");
1125 			Res.dbl = fD;
1126 			break;
1127 		case FNMADD:
1128 			TERNOP("fnmadd");
1129 			Res.dbl = fD;
1130 			break;
1131 		case FNMSUB:
1132 			TERNOP("fnmsub");
1133 			Res.dbl = fD;
1134 			break;
1135 		case FDIV:
1136 			BINOP("fdiv");
1137 			Res.dbl = fD;
1138 			break;
1139 		case FSQRT:
1140 			UNOP("fsqrt");
1141 			Res.dbl = fD;
1142 			break;
1143 		default:
1144 			assert("check_double_guarded_arithmetic_op: unexpected op",
1145 				FALSE);
1146 			break;
1147 		}
1148 #undef UNOP
1149 #undef BINOP
1150 #undef TERNOP
1151 
1152 		Exp.dbl = expected;
1153 
1154 		if ((Res.layout.sign != Exp.layout.sign) ||
1155 			(Res.layout.exp != Exp.layout.exp) ||
1156 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
1157 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
1158 			result = "FAILED";
1159 			status = 1;
1160 		} else {
1161 			result = "PASSED";
1162 			status = 0;
1163 		}
1164 
1165 		printf("%s:%s:%s(%-13a",
1166 			round_mode_name[mode], result, flt_op_names[op], fA);
1167 		if (arg_count > 1) printf(", %-13a", fB);
1168 		if (arg_count > 2) printf(", %-13a", fC);
1169 		printf(") = %-13a", Res.dbl);
1170 		if (status) printf("\n\texpected %a", Exp.dbl);
1171 		putchar('\n');
1172 
1173 		if (debug) {
1174 			print_double("hi", hi);
1175 			print_double("lo", lo);
1176 			print_double("expected", expected);
1177 			print_double("got", Res.dbl);
1178 		}
1179 	}
1180 
1181 	return status;
1182 }
1183 
test_float_arithmetic_ops()1184 int test_float_arithmetic_ops()
1185 {
1186 	int status = 0;
1187 	flt_op_t op;
1188 
1189 	/*
1190 	** choose FP operands whose result should be rounded to either
1191 	** lo or hi.
1192 	*/
1193 
1194 	printf("-------------------------- %s --------------------------\n",
1195 		"test rounding of float operators without guard bits");
1196 	for (op = FADDS; op <= FDIVS; op++) {
1197 		status |= check_single_arithmetic_op(op);
1198 	}
1199 
1200 	printf("-------------------------- %s --------------------------\n",
1201 		"test rounding of float operators with guard bits");
1202 	for (op = FADDS; op <= FNMSUBS; op++) {
1203 		status |= check_single_guarded_arithmetic_op(op);
1204 	}
1205 
1206 	printf("-------------------------- %s --------------------------\n",
1207 		"test rounding of double operators with guard bits");
1208 	for (op = FADD; op <= FSQRT; op++) {
1209 		status |= check_double_guarded_arithmetic_op(op);
1210 	}
1211 	return status;
1212 }
1213 
1214 
1215 int
main()1216 main()
1217 {
1218 	int status = 0;
1219 
1220 	init();
1221 
1222 	status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
1223 	status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
1224 	status |= test_int_to_float_convert("test (float)int convert");
1225 	status |= test_int_to_float_convert("test (float)int convert");
1226 
1227 #ifdef __powerpc64__
1228 	status |= test_long_to_double_convert("test (double)long convert");
1229 #endif
1230 	status |= test_float_arithmetic_ops();
1231 	return status;
1232 }
1233