1 /* Copyright (C) 2011 IBM
2
3 Author: Maynard Johnson <maynardj@us.ibm.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307, USA.
19
20 The GNU General Public License is contained in the file COPYING.
21 */
22
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <malloc.h>
28 #include <math.h>
29 #include <unistd.h> // getopt
30
31 #ifdef HAS_VSX
32
33 #include <altivec.h>
34
35 #ifndef __powerpc64__
36 typedef uint32_t HWord_t;
37 #else
38 typedef uint64_t HWord_t;
39 #endif /* __powerpc64__ */
40
41 #ifdef VGP_ppc64le_linux
42 #define isLE 1
43 #else
44 #define isLE 0
45 #endif
46
47 typedef unsigned char Bool;
48 #define True 1
49 #define False 0
50 register HWord_t r14 __asm__ ("r14");
51 register HWord_t r15 __asm__ ("r15");
52 register HWord_t r16 __asm__ ("r16");
53 register HWord_t r17 __asm__ ("r17");
54 register double f14 __asm__ ("fr14");
55 register double f15 __asm__ ("fr15");
56 register double f16 __asm__ ("fr16");
57 register double f17 __asm__ ("fr17");
58
59 static volatile unsigned int div_flags, div_xer;
60
61 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
62
63 #define SET_CR(_arg) \
64 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
65
66 #define SET_XER(_arg) \
67 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
68
69 #define GET_CR(_lval) \
70 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
71
72 #define GET_XER(_lval) \
73 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
74
75 #define GET_CR_XER(_lval_cr,_lval_xer) \
76 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
77
78 #define SET_CR_ZERO \
79 SET_CR(0)
80
81 #define SET_XER_ZERO \
82 SET_XER(0)
83
84 #define SET_CR_XER_ZERO \
85 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
86
87 #define SET_FPSCR_ZERO \
88 do { double _d = 0.0; \
89 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
90 } while (0)
91
92
93 typedef void (*test_func_t)(void);
94 typedef struct test_table test_table_t;
95
96 /* Defines for the instructiion groups, use bit field to identify */
97 #define SCALAR_DIV_INST 0x0001
98 #define OTHER_INST 0x0002
99
100 /* These functions below that construct a table of floating point
101 * values were lifted from none/tests/ppc32/jm-insns.c.
102 */
103
104 #if defined (DEBUG_ARGS_BUILD)
105 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
106 #else
107 #define AB_DPRINTF(fmt, args...) do { } while (0)
108 #endif
109
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)110 static inline void register_farg (void *farg,
111 int s, uint16_t _exp, uint64_t mant)
112 {
113 uint64_t tmp;
114
115 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
116 *(uint64_t *)farg = tmp;
117 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
118 s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
119 }
120
register_sp_farg(void * farg,int s,uint16_t _exp,uint32_t mant)121 static inline void register_sp_farg (void *farg,
122 int s, uint16_t _exp, uint32_t mant)
123 {
124 uint32_t tmp;
125 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
126 *(uint32_t *)farg = tmp;
127 }
128
129
130 typedef struct fp_test_args {
131 int fra_idx;
132 int frb_idx;
133 } fp_test_args_t;
134
135
136 fp_test_args_t two_arg_fp_tests[] = {
137 {8, 8},
138 {8, 14},
139 {15, 16},
140 {8, 5},
141 {8, 4},
142 {8, 7},
143 {8, 9},
144 {8, 11},
145 {14, 8},
146 {14, 14},
147 {14, 6},
148 {14, 5},
149 {14, 4},
150 {14, 7},
151 {14, 9},
152 {14, 11},
153 {6, 8},
154 {6, 14},
155 {6, 6},
156 {6, 5},
157 {6, 4},
158 {6, 7},
159 {6, 9},
160 {6, 11},
161 {5, 8},
162 {5, 14},
163 {5, 6},
164 {5, 5},
165 {5, 4},
166 {5, 7},
167 {5, 9},
168 {5, 11},
169 {4, 8},
170 {4, 14},
171 {4, 6},
172 {4, 5},
173 {4, 1},
174 {4, 7},
175 {4, 9},
176 {4, 11},
177 {7, 8},
178 {7, 14},
179 {7, 6},
180 {7, 5},
181 {7, 4},
182 {7, 7},
183 {7, 9},
184 {7, 11},
185 {10, 8},
186 {10, 14},
187 {12, 6},
188 {12, 5},
189 {10, 4},
190 {10, 7},
191 {10, 9},
192 {10, 11},
193 {12, 8 },
194 {12, 14},
195 {12, 6},
196 {15, 16},
197 {15, 16},
198 {9, 11},
199 {11, 11},
200 {11, 12},
201 {16, 18},
202 {17, 16},
203 {19, 19},
204 {19, 18}
205 };
206
207
208 static int nb_special_fargs;
209 static double * spec_fargs;
210 static float * spec_sp_fargs;
211
build_special_fargs_table(void)212 static void build_special_fargs_table(void)
213 {
214 /*
215 Entry Sign Exp fraction Special value
216 0 0 3fd 0x8000000000000ULL Positive finite number
217 1 0 404 0xf000000000000ULL ...
218 2 0 001 0x8000000b77501ULL ...
219 3 0 7fe 0x800000000051bULL ...
220 4 0 012 0x3214569900000ULL ...
221 5 0 000 0x0000000000000ULL +0.0 (+zero)
222 6 1 000 0x0000000000000ULL -0.0 (-zero)
223 7 0 7ff 0x0000000000000ULL +infinity
224 8 1 7ff 0x0000000000000ULL -infinity
225 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
226 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
227 11 0 7ff 0x8000000000000ULL +QNaN
228 12 1 7ff 0x8000000000000ULL -QNaN
229 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
230 14 1 40d 0x0650f5a07b353ULL Negative finite number
231 15 0 412 0x32585a9900000ULL A few more positive finite numbers
232 16 0 413 0x82511a2000000ULL ...
233 17 . . . . . . . . . . . . . . . . . . . . . . .
234 18 . . . . . . . . . . . . . . . . . . . . . . .
235 19 . . . . . . . . . . . . . . . . . . . . . . .
236 */
237
238 uint64_t mant;
239 uint32_t mant_sp;
240 uint16_t _exp;
241 int s;
242 int j, i = 0;
243
244 if (spec_fargs)
245 return;
246
247 spec_fargs = malloc( 20 * sizeof(double) );
248 spec_sp_fargs = malloc( 20 * sizeof(float) );
249
250 // #0
251 s = 0;
252 _exp = 0x3fd;
253 mant = 0x8000000000000ULL;
254 register_farg(&spec_fargs[i++], s, _exp, mant);
255
256 // #1
257 s = 0;
258 _exp = 0x404;
259 mant = 0xf000000000000ULL;
260 register_farg(&spec_fargs[i++], s, _exp, mant);
261
262 // #2
263 s = 0;
264 _exp = 0x001;
265 mant = 0x8000000b77501ULL;
266 register_farg(&spec_fargs[i++], s, _exp, mant);
267
268 // #3
269 s = 0;
270 _exp = 0x7fe;
271 mant = 0x800000000051bULL;
272 register_farg(&spec_fargs[i++], s, _exp, mant);
273
274 // #4
275 s = 0;
276 _exp = 0x012;
277 mant = 0x3214569900000ULL;
278 register_farg(&spec_fargs[i++], s, _exp, mant);
279
280
281 /* Special values */
282 /* +0.0 : 0 0x000 0x0000000000000 */
283 // #5
284 s = 0;
285 _exp = 0x000;
286 mant = 0x0000000000000ULL;
287 register_farg(&spec_fargs[i++], s, _exp, mant);
288
289 /* -0.0 : 1 0x000 0x0000000000000 */
290 // #6
291 s = 1;
292 _exp = 0x000;
293 mant = 0x0000000000000ULL;
294 register_farg(&spec_fargs[i++], s, _exp, mant);
295
296 /* +infinity : 0 0x7FF 0x0000000000000 */
297 // #7
298 s = 0;
299 _exp = 0x7FF;
300 mant = 0x0000000000000ULL;
301 register_farg(&spec_fargs[i++], s, _exp, mant);
302
303 /* -infinity : 1 0x7FF 0x0000000000000 */
304 // #8
305 s = 1;
306 _exp = 0x7FF;
307 mant = 0x0000000000000ULL;
308 register_farg(&spec_fargs[i++], s, _exp, mant);
309
310 /*
311 * This comment applies to values #9 and #10 below:
312 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
313 * so we can't just copy the double-precision value to the corresponding slot in the
314 * single-precision array (i.e., in the loop at the end of this function). Instead, we
315 * have to manually set the bits using register_sp_farg().
316 */
317
318 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
319 // #9
320 s = 0;
321 _exp = 0x7FF;
322 mant = 0x7FFFFFFFFFFFFULL;
323 register_farg(&spec_fargs[i++], s, _exp, mant);
324 _exp = 0xff;
325 mant_sp = 0x3FFFFF;
326 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
327
328 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
329 // #10
330 s = 1;
331 _exp = 0x7FF;
332 mant = 0x7FFFFFFFFFFFFULL;
333 register_farg(&spec_fargs[i++], s, _exp, mant);
334 _exp = 0xff;
335 mant_sp = 0x3FFFFF;
336 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
337
338 /* +QNaN : 0 0x7FF 0x8000000000000 */
339 // #11
340 s = 0;
341 _exp = 0x7FF;
342 mant = 0x8000000000000ULL;
343 register_farg(&spec_fargs[i++], s, _exp, mant);
344
345 /* -QNaN : 1 0x7FF 0x8000000000000 */
346 // #12
347 s = 1;
348 _exp = 0x7FF;
349 mant = 0x8000000000000ULL;
350 register_farg(&spec_fargs[i++], s, _exp, mant);
351
352 /* denormalized value */
353 // #13
354 s = 1;
355 _exp = 0x000;
356 mant = 0x8340000078000ULL;
357 register_farg(&spec_fargs[i++], s, _exp, mant);
358
359 /* Negative finite number */
360 // #14
361 s = 1;
362 _exp = 0x40d;
363 mant = 0x0650f5a07b353ULL;
364 register_farg(&spec_fargs[i++], s, _exp, mant);
365
366 /* A few positive finite numbers ... */
367 // #15
368 s = 0;
369 _exp = 0x412;
370 mant = 0x32585a9900000ULL;
371 register_farg(&spec_fargs[i++], s, _exp, mant);
372
373 // #16
374 s = 0;
375 _exp = 0x413;
376 mant = 0x82511a2000000ULL;
377 register_farg(&spec_fargs[i++], s, _exp, mant);
378
379 // #17
380 s = 0;
381 _exp = 0x403;
382 mant = 0x12ef5a9300000ULL;
383 register_farg(&spec_fargs[i++], s, _exp, mant);
384
385 // #18
386 s = 0;
387 _exp = 0x405;
388 mant = 0x14bf5d2300000ULL;
389 register_farg(&spec_fargs[i++], s, _exp, mant);
390
391 // #19
392 s = 0;
393 _exp = 0x409;
394 mant = 0x76bf982440000ULL;
395 register_farg(&spec_fargs[i++], s, _exp, mant);
396
397 nb_special_fargs = i;
398 for (j = 0; j < i; j++) {
399 if (!(j == 9 || j == 10))
400 spec_sp_fargs[j] = spec_fargs[j];
401 }
402 }
403
404
405 struct test_table
406 {
407 test_func_t test_category;
408 char * name;
409 unsigned int test_group;
410 };
411
412 /* Type of input for floating point operations.*/
413 typedef enum {
414 SINGLE_TEST,
415 DOUBLE_TEST
416 } precision_type_t;
417
418 typedef enum {
419 VX_SCALAR_CONV_TO_WORD,
420 VX_CONV_TO_SINGLE,
421 VX_CONV_TO_DOUBLE,
422 VX_ESTIMATE,
423 VX_DEFAULT
424 } vx_fp_test_type;
425
426 static vector unsigned int vec_out, vec_inA, vec_inB;
427
428 /* This function is for checking the reciprocal and reciprocal square root
429 * estimate instructions.
430 */
check_estimate(precision_type_t type,Bool is_rsqrte,int idx,int output_vec_idx)431 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
432 {
433 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
434 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
435 * does an actual reciprocal calculation versus estimation, so the answer we get back from
436 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
437 * precision) and the estimate may still be within expected tolerances. On top of that,
438 * we can't count on these estimates always being the same across implementations.
439 * For example, with the fre[s] instruction (which should be correct to within one part
440 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
441 * one implementation could return 1.0111_1111_0000 and another implementation could return
442 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
443 * single bit in common.
444 *
445 * The upshot is we can't validate the VEX output for these instructions by comparing against
446 * stored bit patterns. We must check that the result is within expected tolerances.
447 */
448
449
450 /* A mask to be used for validation as a last resort.
451 * Only use 12 bits of precision for reasons discussed above.
452 */
453 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
454 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
455
456 Bool result = False;
457 Bool dp_test = type == DOUBLE_TEST;
458 double src_dp, res_dp;
459 float src_sp, res_sp;
460 src_dp = res_dp = 0;
461 src_sp = res_sp = 0;
462 #define SRC (dp_test ? src_dp : src_sp)
463 #define RES (dp_test ? res_dp : res_sp)
464 Bool src_is_negative = False;
465 Bool res_is_negative = False;
466 unsigned long long * dst_dp = NULL;
467 unsigned int * dst_sp = NULL;
468 if (dp_test) {
469 unsigned long long * src_dp_ull;
470 dst_dp = (unsigned long long *) &vec_out;
471 src_dp = spec_fargs[idx];
472 src_dp_ull = (unsigned long long *) &src_dp;
473 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
474 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
475 memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
476 } else {
477 unsigned int * src_sp_uint;
478 dst_sp = (unsigned int *) &vec_out;
479 src_sp = spec_sp_fargs[idx];
480 src_sp_uint = (unsigned int *) &src_sp;
481 src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
482 res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
483 memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
484 }
485
486 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
487 if (isnan(SRC))
488 return isnan(RES);
489 if (fpclassify(SRC) == FP_ZERO)
490 return isinf(RES);
491 if (!src_is_negative && isinf(SRC))
492 return !res_is_negative && (fpclassify(RES) == FP_ZERO);
493 if (is_rsqrte) {
494 if (src_is_negative)
495 return isnan(RES);
496 } else {
497 if (src_is_negative && isinf(SRC))
498 return res_is_negative && (fpclassify(RES) == FP_ZERO);
499 }
500 if (dp_test) {
501 double calc_diff;
502 double real_diff;
503 double recip_divisor;
504 double div_result;
505 double calc_diff_tmp;
506
507 if (is_rsqrte)
508 recip_divisor = sqrt(src_dp);
509 else
510 recip_divisor = src_dp;
511
512 div_result = 1.0/recip_divisor;
513 calc_diff_tmp = recip_divisor * 16384.0;
514 if (isnormal(calc_diff_tmp)) {
515 calc_diff = fabs(1.0/calc_diff_tmp);
516 real_diff = fabs(res_dp - div_result);
517 result = ( ( res_dp == div_result )
518 || ( real_diff <= calc_diff ) );
519 } else {
520 /* Unable to compute theoretical difference, so we fall back to masking out
521 * un-precise bits.
522 */
523 unsigned long long * div_result_dp = (unsigned long long *) &div_result;
524 result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
525 }
526 /* For debug use . . .
527 if (!result) {
528 unsigned long long * dv = &div_result;
529 unsigned long long * rd = &real_diff;
530 unsigned long long * cd = &calc_diff;
531 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
532 *dv, *rd, *cd);
533 }
534 */
535 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
536 float calc_diff;
537 float real_diff;
538 float div_result;
539 float calc_diff_tmp;
540 float recip_divisor = sqrt(src_sp);
541
542 div_result = 1.0/recip_divisor;
543 calc_diff_tmp = recip_divisor * 16384.0;
544 if (isnormal(calc_diff_tmp)) {
545 calc_diff = fabsf(1.0/calc_diff_tmp);
546 real_diff = fabsf(res_sp - div_result);
547 result = ( ( res_sp == div_result )
548 || ( real_diff <= calc_diff ) );
549 } else {
550 /* Unable to compute theoretical difference, so we fall back to masking out
551 * un-precise bits.
552 */
553 unsigned int * div_result_sp = (unsigned int *) &div_result;
554 result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
555 }
556 /* For debug use . . .
557 if (!result) {
558 unsigned long long * dv = &div_result;
559 unsigned long long * rd = &real_diff;
560 unsigned long long * cd = &calc_diff;
561 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
562 *dv, *rd, *cd);
563 }
564 */
565 }
566 return result;
567 }
568
569 typedef struct vx_fp_test
570 {
571 test_func_t test_func;
572 const char * name;
573 fp_test_args_t * targs;
574 int num_tests;
575 precision_type_t precision;
576 vx_fp_test_type type;
577 const char * op;
578 } vx_fp_test_t;
579
580
581 static Bool do_dot;
582
test_xvredp(void)583 static void test_xvredp(void)
584 {
585 __asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
586 }
587
test_xsredp(void)588 static void test_xsredp(void)
589 {
590 __asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
591 }
592
test_xvrsqrtedp(void)593 static void test_xvrsqrtedp(void)
594 {
595 __asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
596 }
597
test_xsrsqrtedp(void)598 static void test_xsrsqrtedp(void)
599 {
600 __asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
601 }
602
test_xvrsqrtesp(void)603 static void test_xvrsqrtesp(void)
604 {
605 __asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
606 }
607
test_xstsqrtdp(void)608 static void test_xstsqrtdp(void)
609 {
610 __asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB));
611 }
612
test_xvtsqrtdp(void)613 static void test_xvtsqrtdp(void)
614 {
615 __asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB));
616 }
617
test_xvtsqrtsp(void)618 static void test_xvtsqrtsp(void)
619 {
620 __asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB));
621 }
622
test_xvsqrtdp(void)623 static void test_xvsqrtdp(void)
624 {
625 __asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
626 }
627
test_xvsqrtsp(void)628 static void test_xvsqrtsp(void)
629 {
630 __asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
631 }
632
test_xvtdivdp(void)633 static void test_xvtdivdp(void)
634 {
635 __asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
636 }
637
test_xvtdivsp(void)638 static void test_xvtdivsp(void)
639 {
640 __asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
641 }
642
test_xscvdpsp(void)643 static void test_xscvdpsp(void)
644 {
645 __asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
646 }
647
test_xscvdpuxws(void)648 static void test_xscvdpuxws(void)
649 {
650 __asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
651 }
652
test_xscvspdp(void)653 static void test_xscvspdp(void)
654 {
655 __asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
656 }
657
test_xvcvdpsp(void)658 static void test_xvcvdpsp(void)
659 {
660 __asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
661 }
662
test_xvcvdpuxds(void)663 static void test_xvcvdpuxds(void)
664 {
665 __asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
666 }
667
test_xvcvdpuxws(void)668 static void test_xvcvdpuxws(void)
669 {
670 __asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
671 }
672
test_xvcvspdp(void)673 static void test_xvcvspdp(void)
674 {
675 __asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
676 }
677
test_xvcvspsxds(void)678 static void test_xvcvspsxds(void)
679 {
680 __asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
681 }
682
test_xvcvspuxds(void)683 static void test_xvcvspuxds(void)
684 {
685 __asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
686 }
687
test_xvcvdpsxds(void)688 static void test_xvcvdpsxds(void)
689 {
690 __asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
691 }
692
test_xvcvspuxws(void)693 static void test_xvcvspuxws(void)
694 {
695 __asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
696 }
697
test_xvcvsxddp(void)698 static void test_xvcvsxddp(void)
699 {
700 __asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
701 }
702
test_xvcvuxddp(void)703 static void test_xvcvuxddp(void)
704 {
705 __asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
706 }
707
test_xvcvsxdsp(void)708 static void test_xvcvsxdsp(void)
709 {
710 __asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
711 }
712
test_xvcvuxdsp(void)713 static void test_xvcvuxdsp(void)
714 {
715 __asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
716 }
717
test_xvcvsxwdp(void)718 static void test_xvcvsxwdp(void)
719 {
720 __asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
721 }
722
test_xvcvuxwdp(void)723 static void test_xvcvuxwdp(void)
724 {
725 __asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
726 }
727
test_xvcvsxwsp(void)728 static void test_xvcvsxwsp(void)
729 {
730 __asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
731 }
732
test_xvcvuxwsp(void)733 static void test_xvcvuxwsp(void)
734 {
735 __asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
736 }
737
test_xsrdpic(void)738 static void test_xsrdpic(void)
739 {
740 __asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
741 }
742
test_xsrdpiz(void)743 static void test_xsrdpiz(void)
744 {
745 __asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
746 }
747
test_xsrdpi(void)748 static void test_xsrdpi(void)
749 {
750 __asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
751 }
752
test_xvabsdp(void)753 static void test_xvabsdp(void)
754 {
755 __asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
756 }
757
test_xvnabsdp(void)758 static void test_xvnabsdp(void)
759 {
760 __asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
761 }
762
test_xvnegdp(void)763 static void test_xvnegdp(void)
764 {
765 __asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
766 }
767
test_xvnegsp(void)768 static void test_xvnegsp(void)
769 {
770 __asm__ __volatile__ ("xvnegsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
771 }
772
test_xvabssp(void)773 static void test_xvabssp(void)
774 {
775 __asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
776 }
777
test_xvnabssp(void)778 static void test_xvnabssp(void)
779 {
780 __asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
781 }
782
test_xvrdpi(void)783 static void test_xvrdpi(void)
784 {
785 __asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
786 }
787
test_xvrdpic(void)788 static void test_xvrdpic(void)
789 {
790 __asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
791 }
792
test_xvrdpim(void)793 static void test_xvrdpim(void)
794 {
795 __asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
796 }
797
test_xvrdpip(void)798 static void test_xvrdpip(void)
799 {
800 __asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
801 }
802
test_xvrdpiz(void)803 static void test_xvrdpiz(void)
804 {
805 __asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
806 }
807
test_xvrspi(void)808 static void test_xvrspi(void)
809 {
810 __asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
811 }
812
test_xvrspic(void)813 static void test_xvrspic(void)
814 {
815 __asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
816 }
817
test_xvrspim(void)818 static void test_xvrspim(void)
819 {
820 __asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
821 }
822
test_xvrspip(void)823 static void test_xvrspip(void)
824 {
825 __asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
826 }
827
test_xvrspiz(void)828 static void test_xvrspiz(void)
829 {
830 __asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
831 }
832
833 static vx_fp_test_t
834 vsx_one_fp_arg_tests[] = {
835 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
836 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
837 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
838 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
839 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
840 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
841 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
842 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
843 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
844 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
845 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
846 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
847 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
848 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
849 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
850 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
851 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
852 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
853 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
854 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
855 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
856 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
857 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
858 { &test_xvnegsp, "xvnegsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "neg"},
859 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
860 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
861 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
862 { &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
863 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
864 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
865 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
866 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
867 { &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
868 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
869 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
870 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
871 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
872 { NULL, NULL, NULL, 0, 0, 0, NULL}
873 };
874
875 static vx_fp_test_t
876 vx_tdivORtsqrt_tests[] = {
877 { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
878 { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
879 { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
880 { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
881 { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
882 { NULL, NULL, NULL, 0 , 0, 0, NULL}
883 };
884
885 static unsigned long long doubleWord[] = { 0,
886 0xffffffff00000000LL,
887 0x00000000ffffffffLL,
888 0xffffffffffffffffLL,
889 0x89abcde123456789LL,
890 0x0102030405060708LL,
891 0x00000000a0b1c2d3LL,
892 0x1111222233334444LL
893 };
894
895 static unsigned int singleWord[] = {0,
896 0xffff0000,
897 0x0000ffff,
898 0xffffffff,
899 0x89a73522,
900 0x01020304,
901 0x0000abcd,
902 0x11223344
903 };
904
905 typedef struct vx_intToFp_test
906 {
907 test_func_t test_func;
908 const char * name;
909 void * targs;
910 int num_tests;
911 precision_type_t precision;
912 vx_fp_test_type type;
913 } vx_intToFp_test_t;
914
915 static vx_intToFp_test_t
916 intToFp_tests[] = {
917 { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
918 { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
919 { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
920 { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
921 { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
922 { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
923 { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
924 { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
925 { NULL, NULL, NULL, 0, 0 }
926 };
927
928 static Bool do_OE;
929 typedef enum {
930 DIV_BASE = 1,
931 DIV_OE = 2,
932 DIV_DOT = 4,
933 } div_type_t;
934 /* Possible divde type combinations are:
935 * - base
936 * - base+dot
937 * - base+OE
938 * - base+OE+dot
939 */
940 #ifdef __powerpc64__
test_divdeu(void)941 static void test_divdeu(void)
942 {
943 int divdeu_type = DIV_BASE;
944 if (do_OE)
945 divdeu_type |= DIV_OE;
946 if (do_dot)
947 divdeu_type |= DIV_DOT;
948
949 switch (divdeu_type) {
950 case 1:
951 SET_CR_XER_ZERO;
952 __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
953 GET_CR_XER(div_flags, div_xer);
954 break;
955 case 3:
956 SET_CR_XER_ZERO;
957 __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
958 GET_CR_XER(div_flags, div_xer);
959 break;
960 case 5:
961 SET_CR_XER_ZERO;
962 __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
963 GET_CR_XER(div_flags, div_xer);
964 break;
965 case 7:
966 SET_CR_XER_ZERO;
967 __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
968 GET_CR_XER(div_flags, div_xer);
969 break;
970 default:
971 fprintf(stderr, "Invalid divdeu type. Exiting\n");
972 exit(1);
973 }
974 }
975 #endif
976
test_divwe(void)977 static void test_divwe(void)
978 {
979 int divwe_type = DIV_BASE;
980 if (do_OE)
981 divwe_type |= DIV_OE;
982 if (do_dot)
983 divwe_type |= DIV_DOT;
984
985 switch (divwe_type) {
986 case 1:
987 SET_CR_XER_ZERO;
988 __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
989 GET_CR_XER(div_flags, div_xer);
990 break;
991 case 3:
992 SET_CR_XER_ZERO;
993 __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
994 GET_CR_XER(div_flags, div_xer);
995 break;
996 case 5:
997 SET_CR_XER_ZERO;
998 __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
999 GET_CR_XER(div_flags, div_xer);
1000 break;
1001 case 7:
1002 SET_CR_XER_ZERO;
1003 __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1004 GET_CR_XER(div_flags, div_xer);
1005 break;
1006 default:
1007 fprintf(stderr, "Invalid divweu type. Exiting\n");
1008 exit(1);
1009 }
1010 }
1011
1012
1013 typedef struct simple_test {
1014 test_func_t test_func;
1015 char * name;
1016 precision_type_t precision;
1017 } simple_test_t;
1018
1019
setup_sp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1020 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1021 {
1022 int a_idx, b_idx, i;
1023 void * inA, * inB;
1024 void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1025
1026 for (i = 0; i < 4; i++) {
1027 a_idx = targs->fra_idx;
1028 b_idx = targs->frb_idx;
1029 inA = (void *)&spec_sp_fargs[a_idx];
1030 inB = (void *)&spec_sp_fargs[b_idx];
1031 // copy single precision FP into vector element i
1032 memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1033 memcpy(vec_src + (i * 4), inB, 4);
1034 targs++;
1035 }
1036 }
1037
setup_dp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1038 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1039 {
1040 int a_idx, b_idx, i;
1041 void * inA, * inB;
1042 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1043
1044 for (i = 0; i < 2; i++) {
1045 a_idx = targs->fra_idx;
1046 b_idx = targs->frb_idx;
1047 inA = (void *)&spec_fargs[a_idx];
1048 inB = (void *)&spec_fargs[b_idx];
1049 // copy double precision FP into vector element i
1050 memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1051 memcpy(vec_src + (i * 8), inB, 8);
1052 targs++;
1053 }
1054 }
1055
1056 #define VX_NOT_CMP_OP 0xffffffff
print_vector_fp_result(unsigned int cc,vx_fp_test_t * test_group,int i,Bool print_vec_out)1057 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1058 {
1059 int a_idx, b_idx, k;
1060 char * name = malloc(20);
1061 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1062 int loops = dp ? 2 : 4;
1063 fp_test_args_t * targs = &test_group->targs[i];
1064 unsigned long long * frA_dp, * frB_dp, * dst_dp;
1065 unsigned int * frA_sp, *frB_sp, * dst_sp;
1066 strcpy(name, test_group->name);
1067 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1068 for (k = 0; k < loops; k++) {
1069 a_idx = targs->fra_idx;
1070 b_idx = targs->frb_idx;
1071 if (k)
1072 printf(" AND ");
1073 if (dp) {
1074 frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1075 frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1076 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1077 } else {
1078 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1079 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1080 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1081 }
1082 targs++;
1083 }
1084 if (cc != VX_NOT_CMP_OP)
1085 printf(" ? cc=%x", cc);
1086
1087 if (print_vec_out) {
1088 if (dp) {
1089 dst_dp = (unsigned long long *) &vec_out;
1090 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1091 } else {
1092 dst_sp = (unsigned int *) &vec_out;
1093 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1094 }
1095 } else {
1096 printf("\n");
1097 }
1098 free(name);
1099 }
1100
1101
1102
test_vsx_one_fp_arg(void)1103 static void test_vsx_one_fp_arg(void)
1104 {
1105 test_func_t func;
1106 int k;
1107 k = 0;
1108 build_special_fargs_table();
1109
1110 while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1111 int idx, i;
1112 vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1113 Bool estimate = (test_group.type == VX_ESTIMATE);
1114 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1115 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1116 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1117 Bool sparse_sp = False;
1118 int stride = dp ? 2 : 4;
1119 int loops = is_scalar ? 1 : stride;
1120 stride = is_scalar ? 1: stride;
1121
1122 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1123 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op
1124 * or
1125 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op
1126 *
1127 * For the vector op case, we need to adjust stride from '4' to '2', since
1128 * we'll only be loading two values per loop into the input register.
1129 */
1130 if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1131 sparse_sp = True;
1132 stride = 2;
1133 }
1134
1135 for (i = 0; i < test_group.num_tests; i+=stride) {
1136 unsigned int * pv;
1137 void * inB, * vecB_void_ptr = (void *)&vec_inB;
1138
1139 pv = (unsigned int *)&vec_out;
1140 // clear vec_out
1141 for (idx = 0; idx < 4; idx++, pv++)
1142 *pv = 0;
1143
1144 if (dp) {
1145 int j;
1146 unsigned long long * frB_dp, *dst_dp;
1147 for (j = 0; j < loops; j++) {
1148 inB = (void *)&spec_fargs[i + j];
1149 // copy double precision FP into vector element i
1150 if (isLE && is_scalar)
1151 vecB_void_ptr += 8;
1152 memcpy(vecB_void_ptr + (j * 8), inB, 8);
1153 }
1154 // execute test insn
1155 (*func)();
1156 dst_dp = (unsigned long long *) &vec_out;
1157 if (isLE && is_scalar)
1158 dst_dp++;
1159 printf("#%d: %s ", i/stride, test_group.name);
1160 for (j = 0; j < loops; j++) {
1161 if (j)
1162 printf("; ");
1163 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1164 printf("%s(%016llx)", test_group.op, *frB_dp);
1165 if (estimate) {
1166 Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
1167 printf(" ==> %s)", res ? "PASS" : "FAIL");
1168 /* For debugging . . .
1169 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1170 */
1171 } else {
1172 vx_fp_test_type type = test_group.type;
1173 switch (type) {
1174 case VX_SCALAR_CONV_TO_WORD:
1175 printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1176 break;
1177 case VX_CONV_TO_SINGLE:
1178 printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1179 break;
1180 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1181 printf(" = %016llx", dst_dp[j]);
1182 }
1183 }
1184 }
1185 printf("\n");
1186 } else {
1187 int j;
1188 unsigned int * frB_sp, * dst_sp = NULL;
1189 unsigned long long * dst_dp = NULL;
1190 if (sparse_sp)
1191 loops = 2;
1192 for (j = 0; j < loops; j++) {
1193 inB = (void *)&spec_sp_fargs[i + j];
1194 // copy single precision FP into vector element i
1195 if (sparse_sp) {
1196 if (isLE)
1197 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1198 else
1199 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1200 } else {
1201 if (isLE && is_scalar)
1202 vecB_void_ptr += 12;
1203 memcpy(vecB_void_ptr + (j * 4), inB, 4);
1204 }
1205 }
1206 // execute test insn
1207 (*func)();
1208 if (test_group.type == VX_CONV_TO_DOUBLE) {
1209 dst_dp = (unsigned long long *) &vec_out;
1210 if (isLE && is_scalar)
1211 dst_dp++;
1212 } else {
1213 dst_sp = (unsigned int *) &vec_out;
1214 if (isLE && is_scalar)
1215 dst_sp += 3;
1216 }
1217 // print result
1218 printf("#%d: %s ", i/stride, test_group.name);
1219 for (j = 0; j < loops; j++) {
1220 if (j)
1221 printf("; ");
1222 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1223 printf("%s(%08x)", test_group.op, *frB_sp);
1224 if (estimate) {
1225 Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
1226 printf(" ==> %s)", res ? "PASS" : "FAIL");
1227 } else {
1228 if (test_group.type == VX_CONV_TO_DOUBLE)
1229 printf(" = %016llx", dst_dp[j]);
1230 else
1231 /* Special case: Current VEX implementation for fsqrts (single precision)
1232 * uses the same implementation as that used for double precision fsqrt.
1233 * However, I've found that for xvsqrtsp, the result from that implementation
1234 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the
1235 * output to appear very different if you end up with a carry. But for the given
1236 * inputs in this testcase, we can simply mask out these bits.
1237 */
1238 printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1239 }
1240 }
1241 printf("\n");
1242 }
1243 }
1244 k++;
1245 printf( "\n" );
1246 }
1247 }
1248
test_int_to_fp_convert(void)1249 static void test_int_to_fp_convert(void)
1250 {
1251 test_func_t func;
1252 int k;
1253 k = 0;
1254
1255 while ((func = intToFp_tests[k].test_func)) {
1256 int idx, i;
1257 vx_intToFp_test_t test_group = intToFp_tests[k];
1258 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1259 Bool sparse_sp = False;
1260 int stride = dp ? 2 : 4;
1261 int loops = stride;
1262
1263 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1264 * |___ int___|_Unused_|___int___|__Unused__| // for vector op
1265 * or
1266 * We need to adjust stride from '4' to '2', since we'll only be loading
1267 * two values per loop into the input register.
1268 */
1269 if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1270 sparse_sp = True;
1271 stride = 2;
1272 }
1273
1274 for (i = 0; i < test_group.num_tests; i+=stride) {
1275 unsigned int * pv;
1276 void * inB;
1277
1278 pv = (unsigned int *)&vec_out;
1279 // clear vec_out
1280 for (idx = 0; idx < 4; idx++, pv++)
1281 *pv = 0;
1282
1283 if (dp) {
1284 int j;
1285 unsigned long long *dst_dw, * targs = test_group.targs;
1286 for (j = 0; j < loops; j++) {
1287 inB = (void *)&targs[i + j];
1288 // copy doubleword into vector element i
1289 memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1290 }
1291 // execute test insn
1292 (*func)();
1293 dst_dw = (unsigned long long *) &vec_out;
1294 printf("#%d: %s ", i/stride, test_group.name);
1295 for (j = 0; j < loops; j++) {
1296 if (j)
1297 printf("; ");
1298 printf("conv(%016llx)", targs[i + j]);
1299
1300 if (test_group.type == VX_CONV_TO_SINGLE)
1301 printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1302 else
1303 printf(" = %016llx", dst_dw[j]);
1304 }
1305 printf("\n");
1306 } else {
1307 int j;
1308 unsigned int * dst_sp = NULL;
1309 unsigned int * targs = test_group.targs;
1310 unsigned long long * dst_dp = NULL;
1311 void * vecB_void_ptr = (void *)&vec_inB;
1312 if (sparse_sp)
1313 loops = 2;
1314 for (j = 0; j < loops; j++) {
1315 inB = (void *)&targs[i + j];
1316 // copy single word into vector element i
1317 if (sparse_sp) {
1318 if (isLE)
1319 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1320 else
1321 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1322 } else {
1323 memcpy(vecB_void_ptr + (j * 4), inB, 4);
1324 }
1325 }
1326 // execute test insn
1327 (*func)();
1328 if (test_group.type == VX_CONV_TO_DOUBLE)
1329 dst_dp = (unsigned long long *) &vec_out;
1330 else
1331 dst_sp = (unsigned int *) &vec_out;
1332 // print result
1333 printf("#%d: %s ", i/stride, test_group.name);
1334 for (j = 0; j < loops; j++) {
1335 if (j)
1336 printf("; ");
1337 printf("conv(%08x)", targs[i + j]);
1338 if (test_group.type == VX_CONV_TO_DOUBLE)
1339 printf(" = %016llx", dst_dp[j]);
1340 else
1341 printf(" = %08x", dst_sp[j]);
1342 }
1343 printf("\n");
1344 }
1345 }
1346 k++;
1347 printf( "\n" );
1348 }
1349 }
1350
1351
1352
1353 // The div doubleword test data
1354 signed long long div_dw_tdata[13][2] = {
1355 { 4, -4 },
1356 { 4, -3 },
1357 { 4, 4 },
1358 { 4, -5 },
1359 { 3, 8 },
1360 { 0x8000000000000000ULL, 0xa },
1361 { 0x50c, -1 },
1362 { 0x50c, -4096 },
1363 { 0x1234fedc, 0x8000a873 },
1364 { 0xabcd87651234fedcULL, 0xa123b893 },
1365 { 0x123456789abdcULL, 0 },
1366 { 0, 2 },
1367 { 0x77, 0xa3499 }
1368 };
1369 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1370
1371 // The div word test data
1372 unsigned int div_w_tdata[6][2] = {
1373 { 0, 2 },
1374 { 2, 0 },
1375 { 0x7abc1234, 0xf0000000 },
1376 { 0xfabc1234, 5 },
1377 { 77, 66 },
1378 { 5, 0xfabc1234 },
1379 };
1380 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1381
1382 typedef struct div_ext_test
1383 {
1384 test_func_t test_func;
1385 const char *name;
1386 int num_tests;
1387 div_type_t div_type;
1388 precision_type_t precision;
1389 } div_ext_test_t;
1390
1391 static div_ext_test_t div_tests[] = {
1392 #ifdef __powerpc64__
1393 { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1394 { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1395 #endif
1396 { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1397 { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1398 { NULL, NULL, 0, 0, 0 }
1399 };
1400
test_div_extensions(void)1401 static void test_div_extensions(void)
1402 {
1403 test_func_t func;
1404 int k;
1405 k = 0;
1406
1407 while ((func = div_tests[k].test_func)) {
1408 int i, repeat = 1;
1409 div_ext_test_t test_group = div_tests[k];
1410 do_dot = False;
1411
1412 again:
1413 for (i = 0; i < test_group.num_tests; i++) {
1414 unsigned int condreg;
1415
1416 if (test_group.div_type == DIV_OE)
1417 do_OE = True;
1418 else
1419 do_OE = False;
1420
1421 if (test_group.precision == DOUBLE_TEST) {
1422 r14 = div_dw_tdata[i][0];
1423 r15 = div_dw_tdata[i][1];
1424 } else {
1425 r14 = div_w_tdata[i][0];
1426 r15 = div_w_tdata[i][1];
1427 }
1428 // execute test insn
1429 (*func)();
1430 condreg = (div_flags & 0xf0000000) >> 28;
1431 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1432 if (test_group.precision == DOUBLE_TEST) {
1433 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1434 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1435 } else {
1436 printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1437 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1438 }
1439 printf(" CR=%x; XER=%x\n", condreg, div_xer);
1440 }
1441 printf("\n");
1442 if (repeat) {
1443 repeat = 0;
1444 do_dot = True;
1445 goto again;
1446 }
1447 k++;
1448 printf( "\n" );
1449 }
1450 }
1451
1452
test_vx_tdivORtsqrt(void)1453 static void test_vx_tdivORtsqrt(void)
1454 {
1455 test_func_t func;
1456 int k, crx;
1457 unsigned int flags;
1458 k = 0;
1459 do_dot = False;
1460 build_special_fargs_table();
1461
1462 while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1463 int idx, i;
1464 vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1465 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1466 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1467 Bool two_args = test_group.targs ? True : False;
1468 int stride = dp ? 2 : 4;
1469 int loops = is_scalar ? 1 : stride;
1470 stride = is_scalar ? 1: stride;
1471
1472 for (i = 0; i < test_group.num_tests; i+=stride) {
1473 unsigned int * pv;
1474 void * inB, * vecB_void_ptr = (void *)&vec_inB;
1475
1476 pv = (unsigned int *)&vec_out;
1477 // clear vec_out
1478 for (idx = 0; idx < 4; idx++, pv++)
1479 *pv = 0;
1480
1481 if (dp) {
1482 int j;
1483 unsigned long long * frB_dp;
1484 if (two_args) {
1485 setup_dp_fp_args(&test_group.targs[i], False);
1486 } else {
1487 for (j = 0; j < loops; j++) {
1488 inB = (void *)&spec_fargs[i + j];
1489 // copy double precision FP into vector element i
1490 if (isLE && is_scalar)
1491 vecB_void_ptr += 8;
1492 memcpy(vecB_void_ptr + (j * 8), inB, 8);
1493 }
1494 }
1495 // execute test insn
1496 // Must do set/get of CRs immediately before/after calling the asm func
1497 // to avoid CRs being modified by other instructions.
1498 SET_FPSCR_ZERO;
1499 SET_CR_XER_ZERO;
1500 (*func)();
1501 GET_CR(flags);
1502 // assumes using CR1
1503 crx = (flags & 0x0f000000) >> 24;
1504 if (two_args) {
1505 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1506 } else {
1507 printf("#%d: %s ", i/stride, test_group.name);
1508 for (j = 0; j < loops; j++) {
1509 if (j)
1510 printf("; ");
1511 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1512 printf("%s(%016llx)", test_group.op, *frB_dp);
1513 }
1514 printf( " ? %x (CRx)\n", crx);
1515 }
1516 } else {
1517 int j;
1518 unsigned int * frB_sp;
1519 if (two_args) {
1520 setup_sp_fp_args(&test_group.targs[i], False);
1521 } else {
1522 for (j = 0; j < loops; j++) {
1523 inB = (void *)&spec_sp_fargs[i + j];
1524 // copy single precision FP into vector element i
1525 memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1526 }
1527 }
1528 // execute test insn
1529 SET_FPSCR_ZERO;
1530 SET_CR_XER_ZERO;
1531 (*func)();
1532 GET_CR(flags);
1533 crx = (flags & 0x0f000000) >> 24;
1534 // print result
1535 if (two_args) {
1536 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1537 } else {
1538 printf("#%d: %s ", i/stride, test_group.name);
1539 for (j = 0; j < loops; j++) {
1540 if (j)
1541 printf("; ");
1542 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1543 printf("%s(%08x)", test_group.op, *frB_sp);
1544 }
1545 printf( " ? %x (CRx)\n", crx);
1546 }
1547 }
1548 }
1549 k++;
1550 printf( "\n" );
1551 }
1552 }
1553
1554
test_ftsqrt(void)1555 static void test_ftsqrt(void)
1556 {
1557 int i, crx;
1558 unsigned int flags;
1559 unsigned long long * frbp;
1560 build_special_fargs_table();
1561
1562
1563 for (i = 0; i < nb_special_fargs; i++) {
1564 f14 = spec_fargs[i];
1565 frbp = (unsigned long long *)&spec_fargs[i];
1566 SET_FPSCR_ZERO;
1567 SET_CR_XER_ZERO;
1568 __asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14));
1569 GET_CR(flags);
1570 crx = (flags & 0x0f000000) >> 24;
1571 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1572 }
1573 printf( "\n" );
1574 }
1575
1576 static void
test_popcntw(void)1577 test_popcntw(void)
1578 {
1579 #ifdef __powerpc64__
1580 uint64_t res;
1581 unsigned long long src = 0x9182736405504536ULL;
1582 r14 = src;
1583 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1584 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1585 #else
1586 uint32_t res;
1587 unsigned int src = 0x9182730E;
1588 r14 = src;
1589 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1590 printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1591 #endif
1592 printf( "\n" );
1593 }
1594
1595
1596 static test_table_t
1597 all_tests[] =
1598 {
1599
1600 { &test_vsx_one_fp_arg,
1601 "Test VSX vector and scalar single argument instructions", OTHER_INST } ,
1602 { &test_int_to_fp_convert,
1603 "Test VSX vector integer to float conversion instructions", OTHER_INST },
1604 { &test_div_extensions,
1605 "Test div extensions", SCALAR_DIV_INST },
1606 { &test_ftsqrt,
1607 "Test ftsqrt instruction", OTHER_INST },
1608 { &test_vx_tdivORtsqrt,
1609 "Test vector and scalar tdiv and tsqrt instructions", OTHER_INST },
1610 { &test_popcntw,
1611 "Test popcntw instruction", OTHER_INST },
1612 { NULL, NULL }
1613 };
1614 #endif // HAS_VSX
1615
usage(void)1616 static void usage (void)
1617 {
1618 fprintf(stderr,
1619 "Usage: test_isa_3_0 [OPTIONS]\n"
1620 "\t-d: test scalar division instructions (default)\n"
1621 "\t-o: test non scalar division instructions (default)\n"
1622 "\t-A: test all instructions (default)\n"
1623 "\t-h: display this help and exit\n"
1624 );
1625 }
1626
main(int argc,char ** argv)1627 int main(int argc, char **argv)
1628 {
1629 #ifdef HAS_VSX
1630
1631 test_table_t aTest;
1632 test_func_t func;
1633 int c;
1634 int i = 0;
1635 unsigned int test_run_mask = 0;
1636
1637 /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1638 * bits are set on various arithimetic instructions. This means this
1639 * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1640 * hardware. The tests have been grouped so that the tests that generate
1641 * different results are in one test and the rest are in a different test.
1642 * this minimizes the size of the result expect files for the two cases.
1643 */
1644
1645 while ((c = getopt(argc, argv, "doAh")) != -1) {
1646 switch (c) {
1647 case 'd':
1648 test_run_mask |= SCALAR_DIV_INST;
1649 break;
1650 case 'o':
1651 test_run_mask |= OTHER_INST;
1652 break;
1653 case 'A':
1654 test_run_mask = 0xFFFF;
1655 break;
1656 case 'h':
1657 usage();
1658 return 0;
1659
1660 default:
1661 usage();
1662 fprintf(stderr, "Unknown argument: '%c'\n", c);
1663 return 1;
1664 }
1665 }
1666
1667 while ((func = all_tests[i].test_category)) {
1668 aTest = all_tests[i];
1669
1670 if(test_run_mask & aTest.test_group) {
1671 /* Test group specified on command line */
1672
1673 printf( "%s\n", aTest.name );
1674 (*func)();
1675 }
1676 i++;
1677 }
1678 if (spec_fargs)
1679 free(spec_fargs);
1680 if (spec_sp_fargs)
1681 free(spec_sp_fargs);
1682
1683 #endif // HAS _VSX
1684
1685 return 0;
1686 }
1687