1 /*  Copyright (C) 2011 IBM
2 
3  Author: Maynard Johnson <maynardj@us.ibm.com>
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU General Public License as
7  published by the Free Software Foundation; either version 2 of the
8  License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18  02111-1307, USA.
19 
20  The GNU General Public License is contained in the file COPYING.
21  */
22 
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <malloc.h>
28 #include <math.h>
29 #include <unistd.h>    // getopt
30 
31 #ifdef HAS_VSX
32 
33 #include <altivec.h>
34 
35 #ifndef __powerpc64__
36 typedef uint32_t HWord_t;
37 #else
38 typedef uint64_t HWord_t;
39 #endif /* __powerpc64__ */
40 
41 #ifdef VGP_ppc64le_linux
42 #define isLE 1
43 #else
44 #define isLE 0
45 #endif
46 
47 typedef unsigned char Bool;
48 #define True 1
49 #define False 0
50 register HWord_t r14 __asm__ ("r14");
51 register HWord_t r15 __asm__ ("r15");
52 register HWord_t r16 __asm__ ("r16");
53 register HWord_t r17 __asm__ ("r17");
54 register double f14 __asm__ ("fr14");
55 register double f15 __asm__ ("fr15");
56 register double f16 __asm__ ("fr16");
57 register double f17 __asm__ ("fr17");
58 
59 static volatile unsigned int div_flags, div_xer;
60 
61 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
62 
63 #define SET_CR(_arg) \
64       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
65 
66 #define SET_XER(_arg) \
67       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
68 
69 #define GET_CR(_lval) \
70       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
71 
72 #define GET_XER(_lval) \
73       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
74 
75 #define GET_CR_XER(_lval_cr,_lval_xer) \
76    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
77 
78 #define SET_CR_ZERO \
79       SET_CR(0)
80 
81 #define SET_XER_ZERO \
82       SET_XER(0)
83 
84 #define SET_CR_XER_ZERO \
85    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
86 
87 #define SET_FPSCR_ZERO \
88    do { double _d = 0.0; \
89         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
90    } while (0)
91 
92 
93 typedef void (*test_func_t)(void);
94 typedef struct test_table test_table_t;
95 
96 /* Defines for the instructiion groups, use bit field to identify */
97 #define SCALAR_DIV_INST    0x0001
98 #define OTHER_INST  0x0002
99 
100 /* These functions below that construct a table of floating point
101  * values were lifted from none/tests/ppc32/jm-insns.c.
102  */
103 
104 #if defined (DEBUG_ARGS_BUILD)
105 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
106 #else
107 #define AB_DPRINTF(fmt, args...) do { } while (0)
108 #endif
109 
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)110 static inline void register_farg (void *farg,
111                                   int s, uint16_t _exp, uint64_t mant)
112 {
113    uint64_t tmp;
114 
115    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
116    *(uint64_t *)farg = tmp;
117    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
118               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
119 }
120 
register_sp_farg(void * farg,int s,uint16_t _exp,uint32_t mant)121 static inline void register_sp_farg (void *farg,
122                                      int s, uint16_t _exp, uint32_t mant)
123 {
124    uint32_t tmp;
125    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
126    *(uint32_t *)farg = tmp;
127 }
128 
129 
130 typedef struct fp_test_args {
131    int fra_idx;
132    int frb_idx;
133 } fp_test_args_t;
134 
135 
136 fp_test_args_t two_arg_fp_tests[] = {
137                                      {8, 8},
138                                      {8, 14},
139                                      {15, 16},
140                                      {8, 5},
141                                      {8, 4},
142                                      {8, 7},
143                                      {8, 9},
144                                      {8, 11},
145                                      {14, 8},
146                                      {14, 14},
147                                      {14, 6},
148                                      {14, 5},
149                                      {14, 4},
150                                      {14, 7},
151                                      {14, 9},
152                                      {14, 11},
153                                      {6, 8},
154                                      {6, 14},
155                                      {6, 6},
156                                      {6, 5},
157                                      {6, 4},
158                                      {6, 7},
159                                      {6, 9},
160                                      {6, 11},
161                                      {5, 8},
162                                      {5, 14},
163                                      {5, 6},
164                                      {5, 5},
165                                      {5, 4},
166                                      {5, 7},
167                                      {5, 9},
168                                      {5, 11},
169                                      {4, 8},
170                                      {4, 14},
171                                      {4, 6},
172                                      {4, 5},
173                                      {4, 1},
174                                      {4, 7},
175                                      {4, 9},
176                                      {4, 11},
177                                      {7, 8},
178                                      {7, 14},
179                                      {7, 6},
180                                      {7, 5},
181                                      {7, 4},
182                                      {7, 7},
183                                      {7, 9},
184                                      {7, 11},
185                                      {10, 8},
186                                      {10, 14},
187                                      {12, 6},
188                                      {12, 5},
189                                      {10, 4},
190                                      {10, 7},
191                                      {10, 9},
192                                      {10, 11},
193                                      {12, 8 },
194                                      {12, 14},
195                                      {12, 6},
196                                      {15, 16},
197                                      {15, 16},
198                                      {9, 11},
199                                      {11, 11},
200                                      {11, 12},
201                                      {16, 18},
202                                      {17, 16},
203                                      {19, 19},
204                                      {19, 18}
205 };
206 
207 
208 static int nb_special_fargs;
209 static double * spec_fargs;
210 static float * spec_sp_fargs;
211 
build_special_fargs_table(void)212 static void build_special_fargs_table(void)
213 {
214 /*
215   Entry  Sign Exp   fraction                  Special value
216    0      0   3fd   0x8000000000000ULL         Positive finite number
217    1      0   404   0xf000000000000ULL         ...
218    2      0   001   0x8000000b77501ULL         ...
219    3      0   7fe   0x800000000051bULL         ...
220    4      0   012   0x3214569900000ULL         ...
221    5      0   000   0x0000000000000ULL         +0.0 (+zero)
222    6      1   000   0x0000000000000ULL         -0.0 (-zero)
223    7      0   7ff   0x0000000000000ULL         +infinity
224    8      1   7ff   0x0000000000000ULL         -infinity
225    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
226    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
227    11     0   7ff   0x8000000000000ULL         +QNaN
228    12     1   7ff   0x8000000000000ULL         -QNaN
229    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
230    14     1   40d   0x0650f5a07b353ULL         Negative finite number
231    15     0   412   0x32585a9900000ULL         A few more positive finite numbers
232    16     0   413   0x82511a2000000ULL         ...
233    17  . . . . . . . . . . . . . . . . . . . . . . .
234    18  . . . . . . . . . . . . . . . . . . . . . . .
235    19  . . . . . . . . . . . . . . . . . . . . . . .
236 */
237 
238    uint64_t mant;
239    uint32_t mant_sp;
240    uint16_t _exp;
241    int s;
242    int j, i = 0;
243 
244    if (spec_fargs)
245       return;
246 
247    spec_fargs = malloc( 20 * sizeof(double) );
248    spec_sp_fargs = malloc( 20 * sizeof(float) );
249 
250    // #0
251    s = 0;
252    _exp = 0x3fd;
253    mant = 0x8000000000000ULL;
254    register_farg(&spec_fargs[i++], s, _exp, mant);
255 
256    // #1
257    s = 0;
258    _exp = 0x404;
259    mant = 0xf000000000000ULL;
260    register_farg(&spec_fargs[i++], s, _exp, mant);
261 
262    // #2
263    s = 0;
264    _exp = 0x001;
265    mant = 0x8000000b77501ULL;
266    register_farg(&spec_fargs[i++], s, _exp, mant);
267 
268    // #3
269    s = 0;
270    _exp = 0x7fe;
271    mant = 0x800000000051bULL;
272    register_farg(&spec_fargs[i++], s, _exp, mant);
273 
274    // #4
275    s = 0;
276    _exp = 0x012;
277    mant = 0x3214569900000ULL;
278    register_farg(&spec_fargs[i++], s, _exp, mant);
279 
280 
281    /* Special values */
282    /* +0.0      : 0 0x000 0x0000000000000 */
283    // #5
284    s = 0;
285    _exp = 0x000;
286    mant = 0x0000000000000ULL;
287    register_farg(&spec_fargs[i++], s, _exp, mant);
288 
289    /* -0.0      : 1 0x000 0x0000000000000 */
290    // #6
291    s = 1;
292    _exp = 0x000;
293    mant = 0x0000000000000ULL;
294    register_farg(&spec_fargs[i++], s, _exp, mant);
295 
296    /* +infinity : 0 0x7FF 0x0000000000000  */
297    // #7
298    s = 0;
299    _exp = 0x7FF;
300    mant = 0x0000000000000ULL;
301    register_farg(&spec_fargs[i++], s, _exp, mant);
302 
303    /* -infinity : 1 0x7FF 0x0000000000000 */
304    // #8
305    s = 1;
306    _exp = 0x7FF;
307    mant = 0x0000000000000ULL;
308    register_farg(&spec_fargs[i++], s, _exp, mant);
309 
310    /*
311     * This comment applies to values #9 and #10 below:
312     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
313     * so we can't just copy the double-precision value to the corresponding slot in the
314     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
315     * have to manually set the bits using register_sp_farg().
316     */
317 
318    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
319    // #9
320    s = 0;
321    _exp = 0x7FF;
322    mant = 0x7FFFFFFFFFFFFULL;
323    register_farg(&spec_fargs[i++], s, _exp, mant);
324    _exp = 0xff;
325    mant_sp = 0x3FFFFF;
326    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
327 
328    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
329    // #10
330    s = 1;
331    _exp = 0x7FF;
332    mant = 0x7FFFFFFFFFFFFULL;
333    register_farg(&spec_fargs[i++], s, _exp, mant);
334    _exp = 0xff;
335    mant_sp = 0x3FFFFF;
336    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
337 
338    /* +QNaN     : 0 0x7FF 0x8000000000000 */
339    // #11
340    s = 0;
341    _exp = 0x7FF;
342    mant = 0x8000000000000ULL;
343    register_farg(&spec_fargs[i++], s, _exp, mant);
344 
345    /* -QNaN     : 1 0x7FF 0x8000000000000 */
346    // #12
347    s = 1;
348    _exp = 0x7FF;
349    mant = 0x8000000000000ULL;
350    register_farg(&spec_fargs[i++], s, _exp, mant);
351 
352    /* denormalized value */
353    // #13
354    s = 1;
355    _exp = 0x000;
356    mant = 0x8340000078000ULL;
357    register_farg(&spec_fargs[i++], s, _exp, mant);
358 
359    /* Negative finite number */
360    // #14
361    s = 1;
362    _exp = 0x40d;
363    mant = 0x0650f5a07b353ULL;
364    register_farg(&spec_fargs[i++], s, _exp, mant);
365 
366    /* A few positive finite numbers ... */
367    // #15
368    s = 0;
369    _exp = 0x412;
370    mant = 0x32585a9900000ULL;
371    register_farg(&spec_fargs[i++], s, _exp, mant);
372 
373    // #16
374    s = 0;
375    _exp = 0x413;
376    mant = 0x82511a2000000ULL;
377    register_farg(&spec_fargs[i++], s, _exp, mant);
378 
379    // #17
380    s = 0;
381    _exp = 0x403;
382    mant = 0x12ef5a9300000ULL;
383    register_farg(&spec_fargs[i++], s, _exp, mant);
384 
385    // #18
386    s = 0;
387    _exp = 0x405;
388    mant = 0x14bf5d2300000ULL;
389    register_farg(&spec_fargs[i++], s, _exp, mant);
390 
391    // #19
392    s = 0;
393    _exp = 0x409;
394    mant = 0x76bf982440000ULL;
395    register_farg(&spec_fargs[i++], s, _exp, mant);
396 
397    nb_special_fargs = i;
398    for (j = 0; j < i; j++) {
399       if (!(j == 9 || j == 10))
400          spec_sp_fargs[j] = spec_fargs[j];
401    }
402 }
403 
404 
405 struct test_table
406 {
407    test_func_t test_category;
408    char * name;
409    unsigned int test_group;
410 };
411 
412 /*  Type of input for floating point operations.*/
413 typedef enum {
414    SINGLE_TEST,
415    DOUBLE_TEST
416 } precision_type_t;
417 
418 typedef enum {
419    VX_SCALAR_CONV_TO_WORD,
420    VX_CONV_TO_SINGLE,
421    VX_CONV_TO_DOUBLE,
422    VX_ESTIMATE,
423    VX_DEFAULT
424 } vx_fp_test_type;
425 
426 static vector unsigned int vec_out, vec_inA, vec_inB;
427 
428 /* This function is for checking the reciprocal and reciprocal square root
429  * estimate instructions.
430  */
check_estimate(precision_type_t type,Bool is_rsqrte,int idx,int output_vec_idx)431 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
432 {
433    /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
434     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
435     * does an actual reciprocal calculation versus estimation, so the answer we get back from
436     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
437     * precision) and the estimate may still be within expected tolerances.  On top of that,
438     * we can't count on these estimates always being the same across implementations.
439     * For example, with the fre[s] instruction (which should be correct to within one part
440     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
441     * one implementation could return 1.0111_1111_0000 and another implementation could return
442     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
443     * single bit in common.
444     *
445     * The upshot is we can't validate the VEX output for these instructions by comparing against
446     * stored bit patterns.  We must check that the result is within expected tolerances.
447     */
448 
449 
450    /* A mask to be used for validation as a last resort.
451     * Only use 12 bits of precision for reasons discussed above.
452     */
453 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
454 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
455 
456    Bool result = False;
457    Bool dp_test = type == DOUBLE_TEST;
458    double src_dp, res_dp;
459    float src_sp, res_sp;
460    src_dp = res_dp = 0;
461    src_sp = res_sp = 0;
462 #define SRC (dp_test ? src_dp : src_sp)
463 #define RES (dp_test ? res_dp : res_sp)
464    Bool src_is_negative = False;
465    Bool res_is_negative = False;
466    unsigned long long * dst_dp = NULL;
467    unsigned int * dst_sp = NULL;
468    if (dp_test) {
469       unsigned long long * src_dp_ull;
470       dst_dp = (unsigned long long *) &vec_out;
471       src_dp = spec_fargs[idx];
472       src_dp_ull = (unsigned long long *) &src_dp;
473       src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
474       res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
475       memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
476    } else {
477       unsigned int * src_sp_uint;
478       dst_sp = (unsigned int *) &vec_out;
479       src_sp = spec_sp_fargs[idx];
480       src_sp_uint = (unsigned int *) &src_sp;
481       src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
482       res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
483       memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
484    }
485 
486    // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
487    if (isnan(SRC))
488       return isnan(RES);
489    if (fpclassify(SRC) == FP_ZERO)
490       return isinf(RES);
491    if (!src_is_negative && isinf(SRC))
492       return !res_is_negative && (fpclassify(RES) == FP_ZERO);
493    if (is_rsqrte) {
494       if (src_is_negative)
495          return isnan(RES);
496    } else {
497       if (src_is_negative && isinf(SRC))
498          return res_is_negative && (fpclassify(RES) == FP_ZERO);
499    }
500    if (dp_test) {
501       double calc_diff;
502       double real_diff;
503       double recip_divisor;
504       double div_result;
505       double calc_diff_tmp;
506 
507       if (is_rsqrte)
508          recip_divisor = sqrt(src_dp);
509       else
510          recip_divisor = src_dp;
511 
512       div_result = 1.0/recip_divisor;
513       calc_diff_tmp = recip_divisor * 16384.0;
514       if (isnormal(calc_diff_tmp)) {
515          calc_diff = fabs(1.0/calc_diff_tmp);
516          real_diff = fabs(res_dp - div_result);
517          result = ( ( res_dp == div_result )
518                   || ( real_diff <= calc_diff ) );
519       } else {
520          /* Unable to compute theoretical difference, so we fall back to masking out
521           * un-precise bits.
522           */
523          unsigned long long * div_result_dp = (unsigned long long *) &div_result;
524          result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
525       }
526       /* For debug use . . .
527          if (!result) {
528              unsigned long long * dv = &div_result;
529              unsigned long long * rd = &real_diff;
530              unsigned long long * cd = &calc_diff;
531              printf("\n\t {actual div_result: %016llx; real_diff:  %016llx; calc_diff:  %016llx}\n",
532        *dv, *rd, *cd);
533           }
534        */
535    } else {  // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
536       float calc_diff;
537       float real_diff;
538       float div_result;
539       float calc_diff_tmp;
540       float recip_divisor = sqrt(src_sp);
541 
542       div_result = 1.0/recip_divisor;
543       calc_diff_tmp = recip_divisor * 16384.0;
544       if (isnormal(calc_diff_tmp)) {
545          calc_diff = fabsf(1.0/calc_diff_tmp);
546          real_diff = fabsf(res_sp - div_result);
547          result = ( ( res_sp == div_result )
548                   || ( real_diff <= calc_diff ) );
549       } else {
550          /* Unable to compute theoretical difference, so we fall back to masking out
551           * un-precise bits.
552           */
553          unsigned int * div_result_sp = (unsigned int *) &div_result;
554          result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
555       }
556       /* For debug use . . .
557          if (!result) {
558              unsigned long long * dv = &div_result;
559              unsigned long long * rd = &real_diff;
560              unsigned long long * cd = &calc_diff;
561              printf("\n\t {actual div_result: %016llx; real_diff:  %016llx; calc_diff:  %016llx}\n",
562        *dv, *rd, *cd);
563           }
564        */
565    }
566    return result;
567 }
568 
569 typedef struct vx_fp_test
570 {
571    test_func_t test_func;
572    const char * name;
573    fp_test_args_t * targs;
574    int num_tests;
575    precision_type_t precision;
576    vx_fp_test_type type;
577    const char * op;
578 } vx_fp_test_t;
579 
580 
581 static Bool do_dot;
582 
test_xvredp(void)583 static void test_xvredp(void)
584 {
585    __asm__ __volatile__ ("xvredp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
586 }
587 
test_xsredp(void)588 static void test_xsredp(void)
589 {
590    __asm__ __volatile__ ("xsredp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
591 }
592 
test_xvrsqrtedp(void)593 static void test_xvrsqrtedp(void)
594 {
595    __asm__ __volatile__ ("xvrsqrtedp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
596 }
597 
test_xsrsqrtedp(void)598 static void test_xsrsqrtedp(void)
599 {
600    __asm__ __volatile__ ("xsrsqrtedp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
601 }
602 
test_xvrsqrtesp(void)603 static void test_xvrsqrtesp(void)
604 {
605    __asm__ __volatile__ ("xvrsqrtesp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
606 }
607 
test_xstsqrtdp(void)608 static void test_xstsqrtdp(void)
609 {
610    __asm__ __volatile__ ("xstsqrtdp   cr1, %x0" : : "wa" (vec_inB));
611 }
612 
test_xvtsqrtdp(void)613 static void test_xvtsqrtdp(void)
614 {
615    __asm__ __volatile__ ("xvtsqrtdp   cr1, %x0" : : "wa" (vec_inB));
616 }
617 
test_xvtsqrtsp(void)618 static void test_xvtsqrtsp(void)
619 {
620    __asm__ __volatile__ ("xvtsqrtsp   cr1, %x0" : : "wa" (vec_inB));
621 }
622 
test_xvsqrtdp(void)623 static void test_xvsqrtdp(void)
624 {
625    __asm__ __volatile__ ("xvsqrtdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
626 }
627 
test_xvsqrtsp(void)628 static void test_xvsqrtsp(void)
629 {
630    __asm__ __volatile__ ("xvsqrtsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
631 }
632 
test_xvtdivdp(void)633 static void test_xvtdivdp(void)
634 {
635    __asm__ __volatile__ ("xvtdivdp   cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
636 }
637 
test_xvtdivsp(void)638 static void test_xvtdivsp(void)
639 {
640    __asm__ __volatile__ ("xvtdivsp   cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
641 }
642 
test_xscvdpsp(void)643 static void test_xscvdpsp(void)
644 {
645    __asm__ __volatile__ ("xscvdpsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
646 }
647 
test_xscvdpuxws(void)648 static void test_xscvdpuxws(void)
649 {
650    __asm__ __volatile__ ("xscvdpuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
651 }
652 
test_xscvspdp(void)653 static void test_xscvspdp(void)
654 {
655    __asm__ __volatile__ ("xscvspdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
656 }
657 
test_xvcvdpsp(void)658 static void test_xvcvdpsp(void)
659 {
660    __asm__ __volatile__ ("xvcvdpsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
661 }
662 
test_xvcvdpuxds(void)663 static void test_xvcvdpuxds(void)
664 {
665    __asm__ __volatile__ ("xvcvdpuxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
666 }
667 
test_xvcvdpuxws(void)668 static void test_xvcvdpuxws(void)
669 {
670    __asm__ __volatile__ ("xvcvdpuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
671 }
672 
test_xvcvspdp(void)673 static void test_xvcvspdp(void)
674 {
675    __asm__ __volatile__ ("xvcvspdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
676 }
677 
test_xvcvspsxds(void)678 static void test_xvcvspsxds(void)
679 {
680    __asm__ __volatile__ ("xvcvspsxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
681 }
682 
test_xvcvspuxds(void)683 static void test_xvcvspuxds(void)
684 {
685    __asm__ __volatile__ ("xvcvspuxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
686 }
687 
test_xvcvdpsxds(void)688 static void test_xvcvdpsxds(void)
689 {
690    __asm__ __volatile__ ("xvcvdpsxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
691 }
692 
test_xvcvspuxws(void)693 static void test_xvcvspuxws(void)
694 {
695    __asm__ __volatile__ ("xvcvspuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
696 }
697 
test_xvcvsxddp(void)698 static void test_xvcvsxddp(void)
699 {
700    __asm__ __volatile__ ("xvcvsxddp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
701 }
702 
test_xvcvuxddp(void)703 static void test_xvcvuxddp(void)
704 {
705    __asm__ __volatile__ ("xvcvuxddp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
706 }
707 
test_xvcvsxdsp(void)708 static void test_xvcvsxdsp(void)
709 {
710    __asm__ __volatile__ ("xvcvsxdsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
711 }
712 
test_xvcvuxdsp(void)713 static void test_xvcvuxdsp(void)
714 {
715    __asm__ __volatile__ ("xvcvuxdsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
716 }
717 
test_xvcvsxwdp(void)718 static void test_xvcvsxwdp(void)
719 {
720    __asm__ __volatile__ ("xvcvsxwdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
721 }
722 
test_xvcvuxwdp(void)723 static void test_xvcvuxwdp(void)
724 {
725    __asm__ __volatile__ ("xvcvuxwdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
726 }
727 
test_xvcvsxwsp(void)728 static void test_xvcvsxwsp(void)
729 {
730    __asm__ __volatile__ ("xvcvsxwsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
731 }
732 
test_xvcvuxwsp(void)733 static void test_xvcvuxwsp(void)
734 {
735    __asm__ __volatile__ ("xvcvuxwsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
736 }
737 
test_xsrdpic(void)738 static void test_xsrdpic(void)
739 {
740    __asm__ __volatile__ ("xsrdpic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
741 }
742 
test_xsrdpiz(void)743 static void test_xsrdpiz(void)
744 {
745    __asm__ __volatile__ ("xsrdpiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
746 }
747 
test_xsrdpi(void)748 static void test_xsrdpi(void)
749 {
750    __asm__ __volatile__ ("xsrdpi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
751 }
752 
test_xvabsdp(void)753 static void test_xvabsdp(void)
754 {
755    __asm__ __volatile__ ("xvabsdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
756 }
757 
test_xvnabsdp(void)758 static void test_xvnabsdp(void)
759 {
760    __asm__ __volatile__ ("xvnabsdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
761 }
762 
test_xvnegdp(void)763 static void test_xvnegdp(void)
764 {
765    __asm__ __volatile__ ("xvnegdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
766 }
767 
test_xvnegsp(void)768 static void test_xvnegsp(void)
769 {
770    __asm__ __volatile__ ("xvnegsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
771 }
772 
test_xvabssp(void)773 static void test_xvabssp(void)
774 {
775    __asm__ __volatile__ ("xvabssp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
776 }
777 
test_xvnabssp(void)778 static void test_xvnabssp(void)
779 {
780    __asm__ __volatile__ ("xvnabssp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
781 }
782 
test_xvrdpi(void)783 static void test_xvrdpi(void)
784 {
785    __asm__ __volatile__ ("xvrdpi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
786 }
787 
test_xvrdpic(void)788 static void test_xvrdpic(void)
789 {
790    __asm__ __volatile__ ("xvrdpic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
791 }
792 
test_xvrdpim(void)793 static void test_xvrdpim(void)
794 {
795    __asm__ __volatile__ ("xvrdpim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
796 }
797 
test_xvrdpip(void)798 static void test_xvrdpip(void)
799 {
800    __asm__ __volatile__ ("xvrdpip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
801 }
802 
test_xvrdpiz(void)803 static void test_xvrdpiz(void)
804 {
805    __asm__ __volatile__ ("xvrdpiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
806 }
807 
test_xvrspi(void)808 static void test_xvrspi(void)
809 {
810    __asm__ __volatile__ ("xvrspi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
811 }
812 
test_xvrspic(void)813 static void test_xvrspic(void)
814 {
815    __asm__ __volatile__ ("xvrspic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
816 }
817 
test_xvrspim(void)818 static void test_xvrspim(void)
819 {
820    __asm__ __volatile__ ("xvrspim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
821 }
822 
test_xvrspip(void)823 static void test_xvrspip(void)
824 {
825    __asm__ __volatile__ ("xvrspip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
826 }
827 
test_xvrspiz(void)828 static void test_xvrspiz(void)
829 {
830    __asm__ __volatile__ ("xvrspiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
831 }
832 
833 static vx_fp_test_t
834 vsx_one_fp_arg_tests[] = {
835                                 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
836                                 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
837                                 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
838                                 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
839                                 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
840                                 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
841                                 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
842                                 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
843                                 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
844                                 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
845                                 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
846                                 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
847                                 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
848                                 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
849                                 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
850                                 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
851                                 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
852                                 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
853                                 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
854                                 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
855                                 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
856                                 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
857                                 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
858                                 { &test_xvnegsp, "xvnegsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "neg"},
859                                 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
860                                 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
861                                 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
862                                 { &test_xvrdpi,  "xvrdpi",  NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
863                                 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
864                                 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
865                                 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
866                                 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
867                                 { &test_xvrspi,  "xvrspi",  NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
868                                 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
869                                 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
870                                 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
871                                 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
872                                 { NULL, NULL, NULL, 0, 0, 0, NULL}
873 };
874 
875 static vx_fp_test_t
876 vx_tdivORtsqrt_tests[] = {
877                           { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
878                           { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
879                           { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
880                           { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
881                           { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
882                           { NULL, NULL, NULL, 0 , 0, 0, NULL}
883 };
884 
885 static unsigned long long doubleWord[] = { 0,
886                                   0xffffffff00000000LL,
887                                   0x00000000ffffffffLL,
888                                   0xffffffffffffffffLL,
889                                   0x89abcde123456789LL,
890                                   0x0102030405060708LL,
891                                   0x00000000a0b1c2d3LL,
892                                   0x1111222233334444LL
893 };
894 
895 static unsigned int singleWord[] = {0,
896                                   0xffff0000,
897                                   0x0000ffff,
898                                   0xffffffff,
899                                   0x89a73522,
900                                   0x01020304,
901                                   0x0000abcd,
902                                   0x11223344
903 };
904 
905 typedef struct vx_intToFp_test
906 {
907    test_func_t test_func;
908    const char * name;
909    void * targs;
910    int num_tests;
911    precision_type_t precision;
912    vx_fp_test_type type;
913 } vx_intToFp_test_t;
914 
915 static vx_intToFp_test_t
916 intToFp_tests[] = {
917                    { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
918                    { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
919                    { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
920                    { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
921                    { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
922                    { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
923                    { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
924                    { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
925                    { NULL, NULL, NULL, 0, 0 }
926 };
927 
928 static Bool do_OE;
929 typedef enum {
930    DIV_BASE = 1,
931    DIV_OE = 2,
932    DIV_DOT = 4,
933 } div_type_t;
934 /* Possible divde type combinations are:
935  *   - base
936  *   - base+dot
937  *   - base+OE
938  *   - base+OE+dot
939  */
940 #ifdef __powerpc64__
test_divdeu(void)941 static void test_divdeu(void)
942 {
943    int divdeu_type = DIV_BASE;
944    if (do_OE)
945       divdeu_type |= DIV_OE;
946    if (do_dot)
947       divdeu_type |= DIV_DOT;
948 
949    switch (divdeu_type) {
950       case 1:
951         SET_CR_XER_ZERO;
952          __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
953          GET_CR_XER(div_flags, div_xer);
954          break;
955       case 3:
956         SET_CR_XER_ZERO;
957          __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
958          GET_CR_XER(div_flags, div_xer);
959          break;
960       case 5:
961         SET_CR_XER_ZERO;
962          __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
963          GET_CR_XER(div_flags, div_xer);
964          break;
965       case 7:
966         SET_CR_XER_ZERO;
967          __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
968          GET_CR_XER(div_flags, div_xer);
969          break;
970       default:
971          fprintf(stderr, "Invalid divdeu type. Exiting\n");
972          exit(1);
973    }
974 }
975 #endif
976 
test_divwe(void)977 static void test_divwe(void)
978 {
979    int divwe_type = DIV_BASE;
980    if (do_OE)
981       divwe_type |= DIV_OE;
982    if (do_dot)
983       divwe_type |= DIV_DOT;
984 
985    switch (divwe_type) {
986       case 1:
987         SET_CR_XER_ZERO;
988          __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
989          GET_CR_XER(div_flags, div_xer);
990          break;
991       case 3:
992         SET_CR_XER_ZERO;
993          __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
994          GET_CR_XER(div_flags, div_xer);
995          break;
996       case 5:
997         SET_CR_XER_ZERO;
998          __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
999          GET_CR_XER(div_flags, div_xer);
1000          break;
1001       case 7:
1002         SET_CR_XER_ZERO;
1003          __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1004          GET_CR_XER(div_flags, div_xer);
1005          break;
1006       default:
1007          fprintf(stderr, "Invalid divweu type. Exiting\n");
1008          exit(1);
1009    }
1010 }
1011 
1012 
1013 typedef struct simple_test {
1014    test_func_t test_func;
1015    char * name;
1016    precision_type_t precision;
1017 } simple_test_t;
1018 
1019 
setup_sp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1020 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1021 {
1022    int a_idx, b_idx, i;
1023    void * inA, * inB;
1024    void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1025 
1026    for (i = 0; i < 4; i++) {
1027       a_idx = targs->fra_idx;
1028       b_idx = targs->frb_idx;
1029       inA = (void *)&spec_sp_fargs[a_idx];
1030       inB = (void *)&spec_sp_fargs[b_idx];
1031       // copy single precision FP  into vector element i
1032       memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1033       memcpy(vec_src + (i * 4), inB, 4);
1034       targs++;
1035    }
1036 }
1037 
setup_dp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1038 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1039 {
1040    int a_idx, b_idx, i;
1041    void * inA, * inB;
1042    void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1043 
1044    for (i = 0; i < 2; i++) {
1045       a_idx = targs->fra_idx;
1046       b_idx = targs->frb_idx;
1047       inA = (void *)&spec_fargs[a_idx];
1048       inB = (void *)&spec_fargs[b_idx];
1049       // copy double precision FP  into vector element i
1050       memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1051       memcpy(vec_src + (i * 8), inB, 8);
1052       targs++;
1053    }
1054 }
1055 
1056 #define VX_NOT_CMP_OP 0xffffffff
print_vector_fp_result(unsigned int cc,vx_fp_test_t * test_group,int i,Bool print_vec_out)1057 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1058 {
1059    int a_idx, b_idx, k;
1060    char * name = malloc(20);
1061    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1062    int loops = dp ? 2 : 4;
1063    fp_test_args_t * targs = &test_group->targs[i];
1064    unsigned long long * frA_dp, * frB_dp, * dst_dp;
1065    unsigned int * frA_sp, *frB_sp, * dst_sp;
1066    strcpy(name, test_group->name);
1067    printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1068    for (k = 0; k < loops; k++) {
1069       a_idx = targs->fra_idx;
1070       b_idx = targs->frb_idx;
1071       if (k)
1072          printf(" AND ");
1073       if (dp) {
1074          frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1075          frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1076          printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1077       } else {
1078          frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1079          frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1080          printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1081       }
1082       targs++;
1083    }
1084    if (cc != VX_NOT_CMP_OP)
1085       printf(" ? cc=%x", cc);
1086 
1087    if (print_vec_out) {
1088       if (dp) {
1089          dst_dp = (unsigned long long *) &vec_out;
1090          printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1091       } else {
1092          dst_sp = (unsigned int *) &vec_out;
1093          printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1094       }
1095    } else {
1096       printf("\n");
1097    }
1098    free(name);
1099 }
1100 
1101 
1102 
test_vsx_one_fp_arg(void)1103 static void test_vsx_one_fp_arg(void)
1104 {
1105    test_func_t func;
1106    int k;
1107    k = 0;
1108    build_special_fargs_table();
1109 
1110    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1111       int idx, i;
1112       vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1113       Bool estimate = (test_group.type == VX_ESTIMATE);
1114       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1115       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1116       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1117       Bool sparse_sp = False;
1118       int stride = dp ? 2 : 4;
1119       int loops = is_scalar ? 1 : stride;
1120       stride = is_scalar ? 1: stride;
1121 
1122       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1123        *    |___ SP___|_Unused_|___SP___|__Unused__|   // for vector op
1124        *                     or
1125        *    |___ SP___|_Unused_|_Unused_|__Unused__|   // for scalar op
1126        *
1127        * For the vector op case, we need to adjust stride from '4' to '2', since
1128        * we'll only be loading two values per loop into the input register.
1129        */
1130       if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1131          sparse_sp = True;
1132          stride = 2;
1133       }
1134 
1135       for (i = 0; i < test_group.num_tests; i+=stride) {
1136          unsigned int * pv;
1137          void * inB, * vecB_void_ptr = (void *)&vec_inB;
1138 
1139          pv = (unsigned int *)&vec_out;
1140          // clear vec_out
1141          for (idx = 0; idx < 4; idx++, pv++)
1142             *pv = 0;
1143 
1144          if (dp) {
1145             int j;
1146             unsigned long long * frB_dp, *dst_dp;
1147             for (j = 0; j < loops; j++) {
1148                inB = (void *)&spec_fargs[i + j];
1149                // copy double precision FP into vector element i
1150                if (isLE && is_scalar)
1151                   vecB_void_ptr += 8;
1152                memcpy(vecB_void_ptr + (j * 8), inB, 8);
1153             }
1154             // execute test insn
1155             (*func)();
1156             dst_dp = (unsigned long long *) &vec_out;
1157             if (isLE && is_scalar)
1158                dst_dp++;
1159             printf("#%d: %s ", i/stride, test_group.name);
1160             for (j = 0; j < loops; j++) {
1161                if (j)
1162                   printf("; ");
1163                frB_dp = (unsigned long long *)&spec_fargs[i + j];
1164                printf("%s(%016llx)", test_group.op, *frB_dp);
1165                if (estimate) {
1166                   Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
1167                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1168                   /* For debugging . . .
1169                    printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1170                    */
1171                } else {
1172                   vx_fp_test_type type = test_group.type;
1173                   switch (type) {
1174                      case VX_SCALAR_CONV_TO_WORD:
1175                         printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1176                         break;
1177                      case VX_CONV_TO_SINGLE:
1178                         printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1179                         break;
1180                      default:  // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1181                         printf(" = %016llx", dst_dp[j]);
1182                   }
1183                }
1184             }
1185             printf("\n");
1186          } else {
1187             int j;
1188             unsigned int * frB_sp, * dst_sp = NULL;
1189             unsigned long long * dst_dp = NULL;
1190             if (sparse_sp)
1191                loops = 2;
1192             for (j = 0; j < loops; j++) {
1193                inB = (void *)&spec_sp_fargs[i + j];
1194                // copy single precision FP into vector element i
1195                if (sparse_sp) {
1196                   if (isLE)
1197                      memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1198                   else
1199                      memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1200                } else {
1201                   if (isLE && is_scalar)
1202                      vecB_void_ptr += 12;
1203                   memcpy(vecB_void_ptr + (j * 4), inB, 4);
1204                }
1205             }
1206             // execute test insn
1207             (*func)();
1208             if (test_group.type == VX_CONV_TO_DOUBLE) {
1209                dst_dp = (unsigned long long *) &vec_out;
1210                if (isLE && is_scalar)
1211                   dst_dp++;
1212             } else {
1213                dst_sp = (unsigned int *) &vec_out;
1214                if (isLE && is_scalar)
1215                   dst_sp += 3;
1216             }
1217             // print result
1218             printf("#%d: %s ", i/stride, test_group.name);
1219             for (j = 0; j < loops; j++) {
1220                if (j)
1221                   printf("; ");
1222                frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1223                printf("%s(%08x)", test_group.op, *frB_sp);
1224                if (estimate) {
1225                   Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
1226                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1227                } else {
1228                   if (test_group.type == VX_CONV_TO_DOUBLE)
1229                         printf(" = %016llx", dst_dp[j]);
1230                   else
1231                   /* Special case: Current VEX implementation for fsqrts (single precision)
1232                    * uses the same implementation as that used for double precision fsqrt.
1233                    * However, I've found that for xvsqrtsp, the result from that implementation
1234                    * may be off by the two LSBs.  Generally, even this small inaccuracy can cause the
1235                    * output to appear very different if you end up with a carry.  But for the given
1236                    * inputs in this testcase, we can simply mask out these bits.
1237                    */
1238                      printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1239                }
1240             }
1241             printf("\n");
1242          }
1243       }
1244       k++;
1245       printf( "\n" );
1246    }
1247 }
1248 
test_int_to_fp_convert(void)1249 static void test_int_to_fp_convert(void)
1250 {
1251    test_func_t func;
1252    int k;
1253    k = 0;
1254 
1255    while ((func = intToFp_tests[k].test_func)) {
1256       int idx, i;
1257       vx_intToFp_test_t test_group = intToFp_tests[k];
1258       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1259       Bool sparse_sp = False;
1260       int stride = dp ? 2 : 4;
1261       int loops = stride;
1262 
1263       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1264        *    |___ int___|_Unused_|___int___|__Unused__|   // for vector op
1265        *                     or
1266        * We need to adjust stride from '4' to '2', since we'll only be loading
1267        * two values per loop into the input register.
1268        */
1269       if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1270          sparse_sp = True;
1271          stride = 2;
1272       }
1273 
1274       for (i = 0; i < test_group.num_tests; i+=stride) {
1275          unsigned int * pv;
1276          void * inB;
1277 
1278          pv = (unsigned int *)&vec_out;
1279          // clear vec_out
1280          for (idx = 0; idx < 4; idx++, pv++)
1281             *pv = 0;
1282 
1283          if (dp) {
1284             int j;
1285             unsigned long long  *dst_dw, * targs = test_group.targs;
1286             for (j = 0; j < loops; j++) {
1287                inB = (void *)&targs[i + j];
1288                // copy doubleword into vector element i
1289                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1290             }
1291             // execute test insn
1292             (*func)();
1293             dst_dw = (unsigned long long *) &vec_out;
1294             printf("#%d: %s ", i/stride, test_group.name);
1295             for (j = 0; j < loops; j++) {
1296                if (j)
1297                   printf("; ");
1298                printf("conv(%016llx)", targs[i + j]);
1299 
1300                if (test_group.type == VX_CONV_TO_SINGLE)
1301                   printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1302                else
1303                   printf(" = %016llx", dst_dw[j]);
1304             }
1305             printf("\n");
1306          } else {
1307             int j;
1308             unsigned int * dst_sp = NULL;
1309             unsigned int * targs = test_group.targs;
1310             unsigned long long * dst_dp = NULL;
1311             void * vecB_void_ptr = (void *)&vec_inB;
1312             if (sparse_sp)
1313                loops = 2;
1314             for (j = 0; j < loops; j++) {
1315                inB = (void *)&targs[i + j];
1316                // copy single word into vector element i
1317                if (sparse_sp) {
1318                   if (isLE)
1319                      memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1320                   else
1321                      memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1322                } else {
1323                   memcpy(vecB_void_ptr + (j * 4), inB, 4);
1324                }
1325             }
1326             // execute test insn
1327             (*func)();
1328             if (test_group.type == VX_CONV_TO_DOUBLE)
1329                dst_dp = (unsigned long long *) &vec_out;
1330             else
1331                dst_sp = (unsigned int *) &vec_out;
1332             // print result
1333             printf("#%d: %s ", i/stride, test_group.name);
1334             for (j = 0; j < loops; j++) {
1335                if (j)
1336                   printf("; ");
1337                printf("conv(%08x)", targs[i + j]);
1338                if (test_group.type == VX_CONV_TO_DOUBLE)
1339                   printf(" = %016llx", dst_dp[j]);
1340                else
1341                   printf(" = %08x", dst_sp[j]);
1342             }
1343             printf("\n");
1344          }
1345       }
1346       k++;
1347       printf( "\n" );
1348    }
1349 }
1350 
1351 
1352 
1353 // The div doubleword test data
1354 signed long long div_dw_tdata[13][2] = {
1355                                        { 4, -4 },
1356                                        { 4, -3 },
1357                                        { 4, 4 },
1358                                        { 4, -5 },
1359                                        { 3, 8 },
1360                                        { 0x8000000000000000ULL, 0xa },
1361                                        { 0x50c, -1 },
1362                                        { 0x50c, -4096 },
1363                                        { 0x1234fedc, 0x8000a873 },
1364                                        { 0xabcd87651234fedcULL, 0xa123b893 },
1365                                        { 0x123456789abdcULL, 0 },
1366                                        { 0, 2 },
1367                                        { 0x77, 0xa3499 }
1368 };
1369 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1370 
1371 // The div word test data
1372 unsigned int div_w_tdata[6][2] = {
1373                               { 0, 2 },
1374                               { 2, 0 },
1375                               { 0x7abc1234, 0xf0000000 },
1376                               { 0xfabc1234, 5 },
1377                               { 77, 66 },
1378                               { 5, 0xfabc1234 },
1379 };
1380 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1381 
1382 typedef struct div_ext_test
1383 {
1384    test_func_t test_func;
1385    const char *name;
1386    int num_tests;
1387    div_type_t div_type;
1388    precision_type_t precision;
1389 } div_ext_test_t;
1390 
1391 static div_ext_test_t div_tests[] = {
1392 #ifdef __powerpc64__
1393                                    { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1394                                    { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1395 #endif
1396                                    { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1397                                    { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1398                                    { NULL, NULL, 0, 0, 0 }
1399 };
1400 
test_div_extensions(void)1401 static void test_div_extensions(void)
1402 {
1403    test_func_t func;
1404    int k;
1405    k = 0;
1406 
1407    while ((func = div_tests[k].test_func)) {
1408       int i, repeat = 1;
1409       div_ext_test_t test_group = div_tests[k];
1410       do_dot = False;
1411 
1412 again:
1413       for (i = 0; i < test_group.num_tests; i++) {
1414          unsigned int condreg;
1415 
1416          if (test_group.div_type == DIV_OE)
1417             do_OE = True;
1418          else
1419             do_OE = False;
1420 
1421          if (test_group.precision == DOUBLE_TEST) {
1422             r14 = div_dw_tdata[i][0];
1423             r15 = div_dw_tdata[i][1];
1424          } else {
1425             r14 = div_w_tdata[i][0];
1426             r15 = div_w_tdata[i][1];
1427          }
1428          // execute test insn
1429          (*func)();
1430          condreg = (div_flags & 0xf0000000) >> 28;
1431          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1432          if (test_group.precision == DOUBLE_TEST) {
1433             printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1434                    div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1435          } else {
1436             printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1437                    div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1438          }
1439          printf(" CR=%x; XER=%x\n", condreg, div_xer);
1440       }
1441       printf("\n");
1442       if (repeat) {
1443          repeat = 0;
1444          do_dot = True;
1445          goto again;
1446       }
1447       k++;
1448       printf( "\n" );
1449    }
1450 }
1451 
1452 
test_vx_tdivORtsqrt(void)1453 static void test_vx_tdivORtsqrt(void)
1454 {
1455    test_func_t func;
1456    int k, crx;
1457    unsigned int flags;
1458    k = 0;
1459    do_dot = False;
1460    build_special_fargs_table();
1461 
1462    while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1463       int idx, i;
1464       vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1465       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1466       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1467       Bool two_args = test_group.targs ?  True : False;
1468       int stride = dp ? 2 : 4;
1469       int loops = is_scalar ? 1 : stride;
1470       stride = is_scalar ? 1: stride;
1471 
1472       for (i = 0; i < test_group.num_tests; i+=stride) {
1473          unsigned int * pv;
1474          void * inB, * vecB_void_ptr = (void *)&vec_inB;
1475 
1476          pv = (unsigned int *)&vec_out;
1477          // clear vec_out
1478          for (idx = 0; idx < 4; idx++, pv++)
1479             *pv = 0;
1480 
1481          if (dp) {
1482             int j;
1483             unsigned long long * frB_dp;
1484             if (two_args) {
1485                setup_dp_fp_args(&test_group.targs[i], False);
1486             } else {
1487                for (j = 0; j < loops; j++) {
1488                   inB = (void *)&spec_fargs[i + j];
1489                   // copy double precision FP into vector element i
1490                   if (isLE && is_scalar)
1491                      vecB_void_ptr += 8;
1492                   memcpy(vecB_void_ptr + (j * 8), inB, 8);
1493                }
1494             }
1495             // execute test insn
1496             // Must do set/get of CRs immediately before/after calling the asm func
1497             // to avoid CRs being modified by other instructions.
1498             SET_FPSCR_ZERO;
1499             SET_CR_XER_ZERO;
1500             (*func)();
1501             GET_CR(flags);
1502             // assumes using CR1
1503             crx = (flags & 0x0f000000) >> 24;
1504             if (two_args) {
1505                print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1506             } else {
1507                printf("#%d: %s ", i/stride, test_group.name);
1508                for (j = 0; j < loops; j++) {
1509                   if (j)
1510                      printf("; ");
1511                   frB_dp = (unsigned long long *)&spec_fargs[i + j];
1512                   printf("%s(%016llx)", test_group.op, *frB_dp);
1513                }
1514                printf( " ? %x (CRx)\n", crx);
1515             }
1516          } else {
1517             int j;
1518             unsigned int * frB_sp;
1519             if (two_args) {
1520                setup_sp_fp_args(&test_group.targs[i], False);
1521             } else {
1522                for (j = 0; j < loops; j++) {
1523                   inB = (void *)&spec_sp_fargs[i + j];
1524                   // copy single precision FP into vector element i
1525                   memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1526                }
1527             }
1528             // execute test insn
1529             SET_FPSCR_ZERO;
1530             SET_CR_XER_ZERO;
1531             (*func)();
1532             GET_CR(flags);
1533             crx = (flags & 0x0f000000) >> 24;
1534             // print result
1535             if (two_args) {
1536                print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1537             } else {
1538                printf("#%d: %s ", i/stride, test_group.name);
1539                for (j = 0; j < loops; j++) {
1540                   if (j)
1541                      printf("; ");
1542                   frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1543                   printf("%s(%08x)", test_group.op, *frB_sp);
1544                }
1545                printf( " ? %x (CRx)\n", crx);
1546             }
1547          }
1548       }
1549       k++;
1550       printf( "\n" );
1551    }
1552 }
1553 
1554 
test_ftsqrt(void)1555 static void test_ftsqrt(void)
1556 {
1557    int i, crx;
1558    unsigned int flags;
1559    unsigned long long * frbp;
1560    build_special_fargs_table();
1561 
1562 
1563    for (i = 0; i < nb_special_fargs; i++) {
1564       f14 = spec_fargs[i];
1565       frbp = (unsigned long long *)&spec_fargs[i];
1566       SET_FPSCR_ZERO;
1567       SET_CR_XER_ZERO;
1568       __asm__ __volatile__ ("ftsqrt           cr1, %0" : : "d" (f14));
1569       GET_CR(flags);
1570       crx = (flags & 0x0f000000) >> 24;
1571       printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1572    }
1573    printf( "\n" );
1574 }
1575 
1576 static void
test_popcntw(void)1577 test_popcntw(void)
1578 {
1579 #ifdef __powerpc64__
1580    uint64_t res;
1581    unsigned long long src = 0x9182736405504536ULL;
1582    r14 = src;
1583    __asm__ __volatile__ ("popcntw          %0, %1" : "=r" (res): "r" (r14));
1584    printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1585 #else
1586    uint32_t res;
1587    unsigned int src = 0x9182730E;
1588    r14 = src;
1589    __asm__ __volatile__ ("popcntw          %0, %1" : "=r" (res): "r" (r14));
1590    printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1591 #endif
1592    printf( "\n" );
1593 }
1594 
1595 
1596 static test_table_t
1597          all_tests[] =
1598 {
1599 
1600                     { &test_vsx_one_fp_arg,
1601                       "Test VSX vector and scalar single argument instructions", OTHER_INST } ,
1602                     { &test_int_to_fp_convert,
1603                       "Test VSX vector integer to float conversion instructions", OTHER_INST },
1604                     { &test_div_extensions,
1605 		      "Test div extensions", SCALAR_DIV_INST },
1606                     { &test_ftsqrt,
1607 		      "Test ftsqrt instruction", OTHER_INST },
1608                     { &test_vx_tdivORtsqrt,
1609 		      "Test vector and scalar tdiv and tsqrt instructions", OTHER_INST },
1610                     { &test_popcntw,
1611 		      "Test popcntw instruction", OTHER_INST },
1612                     { NULL, NULL }
1613 };
1614 #endif // HAS_VSX
1615 
usage(void)1616 static void usage (void)
1617 {
1618   fprintf(stderr,
1619 	  "Usage: test_isa_3_0 [OPTIONS]\n"
1620 	  "\t-d: test scalar division instructions (default)\n"
1621 	  "\t-o: test non scalar division instructions (default)\n"
1622 	  "\t-A: test all instructions (default)\n"
1623 	  "\t-h: display this help and exit\n"
1624 	  );
1625 }
1626 
main(int argc,char ** argv)1627 int main(int argc, char **argv)
1628 {
1629 #ifdef HAS_VSX
1630 
1631    test_table_t aTest;
1632    test_func_t func;
1633    int c;
1634    int i = 0;
1635    unsigned int test_run_mask = 0;
1636 
1637    /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1638     * bits are set on various arithimetic instructions.  This means this
1639     * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1640     * hardware.  The tests have been grouped so that the tests that generate
1641     * different results are in one test and the rest are in a different test.
1642     * this minimizes the size of the result expect files for the two cases.
1643     */
1644 
1645    while ((c = getopt(argc, argv, "doAh")) != -1) {
1646       switch (c) {
1647       case 'd':
1648 	test_run_mask |= SCALAR_DIV_INST;
1649          break;
1650       case 'o':
1651 	test_run_mask |= OTHER_INST;
1652          break;
1653       case 'A':
1654 	test_run_mask = 0xFFFF;
1655          break;
1656       case 'h':
1657          usage();
1658          return 0;
1659 
1660       default:
1661          usage();
1662          fprintf(stderr, "Unknown argument: '%c'\n", c);
1663          return 1;
1664       }
1665    }
1666 
1667    while ((func = all_tests[i].test_category)) {
1668       aTest = all_tests[i];
1669 
1670       if(test_run_mask & aTest.test_group) {
1671 	/* Test group  specified on command line */
1672 
1673 	printf( "%s\n", aTest.name );
1674 	(*func)();
1675       }
1676       i++;
1677    }
1678    if (spec_fargs)
1679      free(spec_fargs);
1680    if (spec_sp_fargs)
1681      free(spec_sp_fargs);
1682 
1683 #endif // HAS _VSX
1684 
1685    return 0;
1686 }
1687