1 /*  Copyright (C) 2011 IBM
2 
3  Author: Maynard Johnson <maynardj@us.ibm.com>
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU General Public License as
7  published by the Free Software Foundation; either version 2 of the
8  License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18  02111-1307, USA.
19 
20  The GNU General Public License is contained in the file COPYING.
21  */
22 
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <malloc.h>
28 
29 #ifdef HAS_VSX
30 
31 #include <altivec.h>
32 
33 #ifndef __powerpc64__
34 typedef uint32_t HWord_t;
35 #else
36 typedef uint64_t HWord_t;
37 #endif /* __powerpc64__ */
38 
39 #ifdef VGP_ppc64le_linux
40 #define isLE 1
41 #else
42 #define isLE 0
43 #endif
44 
45 register HWord_t r14 __asm__ ("r14");
46 register HWord_t r15 __asm__ ("r15");
47 register HWord_t r16 __asm__ ("r16");
48 register HWord_t r17 __asm__ ("r17");
49 register double f14 __asm__ ("fr14");
50 register double f15 __asm__ ("fr15");
51 register double f16 __asm__ ("fr16");
52 register double f17 __asm__ ("fr17");
53 
54 static volatile unsigned int cond_reg;
55 
56 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
57 
58 #define SET_CR(_arg) \
59       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
60 
61 #define SET_XER(_arg) \
62       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
63 
64 #define GET_CR(_lval) \
65       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
66 
67 #define GET_XER(_lval) \
68       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
69 
70 #define GET_CR_XER(_lval_cr,_lval_xer) \
71    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
72 
73 #define SET_CR_ZERO \
74       SET_CR(0)
75 
76 #define SET_XER_ZERO \
77       SET_XER(0)
78 
79 #define SET_CR_XER_ZERO \
80    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
81 
82 #define SET_FPSCR_ZERO \
83    do { double _d = 0.0; \
84         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
85    } while (0)
86 
87 
88 typedef void (*test_func_t)(void);
89 typedef struct ldst_test ldst_test_t;
90 typedef struct vsx_logic_test logic_test_t;
91 typedef struct xs_conv_test xs_conv_test_t;
92 typedef struct p7_fp_test fp_test_t;
93 typedef struct vx_fp_test vx_fp_test_t;
94 typedef struct vsx_move_test move_test_t;
95 typedef struct vsx_permute_test permute_test_t;
96 typedef struct test_table test_table_t;
97 
98 static double *fargs = NULL;
99 static int nb_fargs;
100 
101 /* These functions below that construct a table of floating point
102  * values were lifted from none/tests/ppc32/jm-insns.c.
103  */
104 
105 #if defined (DEBUG_ARGS_BUILD)
106 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
107 #else
108 #define AB_DPRINTF(fmt, args...) do { } while (0)
109 #endif
110 
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)111 static inline void register_farg (void *farg,
112                                   int s, uint16_t _exp, uint64_t mant)
113 {
114    uint64_t tmp;
115 
116    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
117    *(uint64_t *)farg = tmp;
118    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
119               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
120 }
121 
build_fargs_table(void)122 static void build_fargs_table(void)
123 /*
124  * Double precision:
125  * Sign goes from zero to one               (1 bit)
126  * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
127  * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
128  * + special values:
129  * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
130  * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
131  * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
132  * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
133  * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
134  * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
135  * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
136  * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
137  * (8 values)
138  *
139  * Single precision
140  * Sign:     1 bit
141  * Exponent: 8 bits
142  * Mantissa: 23 bits
143  * +0.0      : 0 0x00 0x000000 => 0x00000000
144  * -0.0      : 1 0x00 0x000000 => 0x80000000
145  * +infinity : 0 0xFF 0x000000 => 0x7F800000
146  * -infinity : 1 0xFF 0x000000 => 0xFF800000
147  * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
148  * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
149  * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
150  * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
151 */
152 {
153    uint64_t mant;
154    uint16_t _exp, e1;
155    int s;
156    int i=0;
157 
158    if (nb_fargs)
159       return;
160 
161    fargs = malloc( 16 * sizeof(double) );
162    for (s = 0; s < 2; s++) {
163       for (e1 = 0x001;; e1 = ((e1 + 1) << 13) + 7) {
164          if (e1 >= 0x400)
165             e1 = 0x3fe;
166          _exp = e1;
167          for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
168          /* Add 'random' bits */
169          mant = ((mant + 0x4A6) << 29) + 0x359) {
170             register_farg( &fargs[i++], s, _exp, mant );
171          }
172          if (e1 == 0x3fe)
173             break;
174       }
175    }
176    // add a few smaller values to fargs . . .
177    s = 0;
178    _exp = 0x002;
179    mant = 0x0000000000b01ULL;
180    register_farg(&fargs[i++], s, _exp, mant);
181 
182    _exp = 0x000;
183    mant = 0x00000203f0b3dULL;
184    register_farg(&fargs[i++], s, _exp, mant);
185 
186    mant = 0x00000005a203dULL;
187    register_farg(&fargs[i++], s, _exp, mant);
188 
189    s = 1;
190    _exp = 0x002;
191    mant = 0x0000000000b01ULL;
192    register_farg(&fargs[i++], s, _exp, mant);
193 
194    _exp = 0x000;
195    mant = 0x00000203f0b3dULL;
196    register_farg(&fargs[i++], s, _exp, mant);
197 
198    nb_fargs = i;
199 }
200 
201 
202 typedef struct fp_test_args {
203    int fra_idx;
204    int frb_idx;
205    int cr_flags;
206 } fp_test_args_t;
207 
208 
209 fp_test_args_t ftdiv_tests[] = {
210                               {0, 1, 0x8},
211                               {9, 1, 0xa},
212                               {1, 12, 0xa},
213                               {0, 2, 0xa},
214                               {1, 3, 0xa},
215                               {3, 0, 0xa},
216                               {0, 3, 0xa},
217                               {4, 0, 0xa},
218                               {7, 1, 0xe},
219                               {8, 1, 0xe},
220                               {1, 7, 0xe},
221                               {0, 13, 0xe},
222                               {5, 5, 0xe},
223                               {5, 6, 0xe},
224 };
225 
226 fp_test_args_t xscmpX_tests[] = {
227                                    {8, 8, 0x2},
228                                    {8, 14, 0x8},
229                                    {8, 6, 0x8},
230                                    {8, 5, 0x8},
231                                    {8, 4, 0x8},
232                                    {8, 7, 0x8},
233                                    {8, 9, 0x1},
234                                    {8, 11, 0x1},
235                                    {14, 8, 0x4},
236                                    {14, 14, 0x2},
237                                    {14, 6, 0x8},
238                                    {14, 5, 0x8},
239                                    {14, 4, 0x8},
240                                    {14, 7, 0x8},
241                                    {14, 9, 0x1},
242                                    {14, 11, 0x1},
243                                    {6, 8, 0x4},
244                                    {6, 14, 0x4},
245                                    {6, 6, 0x2},
246                                    {6, 5, 0x2},
247                                    {6, 4, 0x8},
248                                    {6, 7, 0x8},
249                                    {6, 9, 0x1},
250                                    {6, 11, 0x1},
251                                    {5, 8, 0x4},
252                                    {5, 14, 0x4},
253                                    {5, 6, 0x2},
254                                    {5, 5, 0x2},
255                                    {5, 4, 0x8},
256                                    {5, 7, 0x8},
257                                    {5, 9, 0x1},
258                                    {5, 11, 0x1},
259                                    {4, 8, 0x4},
260                                    {4, 14, 0x4},
261                                    {4, 6, 0x4},
262                                    {4, 5, 0x4},
263                                    {4, 1, 0x8},
264                                    {4, 7, 0x8},
265                                    {4, 9, 0x1},
266                                    {4, 11, 0x1},
267                                    {7, 8, 0x4},
268                                    {7, 14, 0x4},
269                                    {7, 6, 0x4},
270                                    {7, 5, 0x4},
271                                    {7, 4, 0x4},
272                                    {7, 7, 0x2},
273                                    {7, 9, 0x1},
274                                    {7, 11, 0x1},
275                                    {10, 8, 0x1},
276                                    {10, 14, 0x1},
277                                    {10, 6, 0x1},
278                                    {10, 5, 0x1},
279                                    {10, 4, 0x1},
280                                    {10, 7, 0x1},
281                                    {10, 9, 0x1},
282                                    {10, 11, 0x1},
283                                    {12, 8, 0x1},
284                                    {12, 14, 0x1},
285                                    {12, 6, 0x1},
286                                    {12, 5, 0x1},
287                                    {12, 4, 0x1},
288                                    {12, 7, 0x1},
289                                    {12, 9, 0x1},
290                                    {12, 11, 0x1},
291 };
292 
293 fp_test_args_t xsadddp_tests[] = {
294                                    {8, 8, 0x0},
295                                    {8, 14, 0x0},
296                                    {8, 6, 0x0},
297                                    {8, 5, 0x0},
298                                    {8, 4, 0x0},
299                                    {8, 7, 0x0},
300                                    {8, 9, 0x0},
301                                    {8, 11, 0x0},
302                                    {14, 8, 0x0},
303                                    {14, 14, 0x0},
304                                    {14, 6, 0x0},
305                                    {14, 5, 0x0},
306                                    {14, 4, 0x0},
307                                    {14, 7, 0x0},
308                                    {14, 9, 0x0},
309                                    {14, 11, 0x0},
310                                    {6, 8, 0x0},
311                                    {6, 14, 0x0},
312                                    {6, 6, 0x0},
313                                    {6, 5, 0x0},
314                                    {6, 4, 0x0},
315                                    {6, 7, 0x0},
316                                    {6, 9, 0x0},
317                                    {6, 11, 0x0},
318                                    {5, 8, 0x0},
319                                    {5, 14, 0x0},
320                                    {5, 6, 0x0},
321                                    {5, 5, 0x0},
322                                    {5, 4, 0x0},
323                                    {5, 7, 0x0},
324                                    {5, 9, 0x0},
325                                    {5, 11, 0x0},
326                                    {4, 8, 0x0},
327                                    {4, 14, 0x0},
328                                    {4, 6, 0x0},
329                                    {4, 5, 0x0},
330                                    {4, 1, 0x0},
331                                    {4, 7, 0x0},
332                                    {4, 9, 0x0},
333                                    {4, 11, 0x0},
334                                    {7, 8, 0x0},
335                                    {7, 14, 0x0},
336                                    {7, 6, 0x0},
337                                    {7, 5, 0x0},
338                                    {7, 4, 0x0},
339                                    {7, 7, 0x0},
340                                    {7, 9, 0x0},
341                                    {7, 11, 0x0},
342                                    {10, 8, 0x0},
343                                    {10, 14, 0x0},
344                                    {10, 6, 0x0},
345                                    {10, 5, 0x0},
346                                    {10, 4, 0x0},
347                                    {10, 7, 0x0},
348                                    {10, 9, 0x0},
349                                    {10, 11, 0x0},
350                                    {12, 8, 0x0},
351                                    {12, 14, 0x0},
352                                    {12, 6, 0x0},
353                                    {12, 5, 0x0},
354                                    {12, 4, 0x0},
355                                    {12, 7, 0x0},
356                                    {12, 9, 0x0},
357                                    {12, 11, 0x0},
358 };
359 
360 fp_test_args_t xsdivdp_tests[] = {
361                                    {8, 8, 0x0},
362                                    {8, 14, 0x0},
363                                    {8, 6, 0x0},
364                                    {8, 5, 0x0},
365                                    {8, 4, 0x0},
366                                    {8, 7, 0x0},
367                                    {8, 9, 0x0},
368                                    {8, 11, 0x0},
369                                    {14, 8, 0x0},
370                                    {14, 14, 0x0},
371                                    {14, 6, 0x0},
372                                    {14, 5, 0x0},
373                                    {14, 4, 0x0},
374                                    {14, 7, 0x0},
375                                    {14, 9, 0x0},
376                                    {14, 11, 0x0},
377                                    {6, 8, 0x0},
378                                    {6, 14, 0x0},
379                                    {6, 6, 0x0},
380                                    {6, 5, 0x0},
381                                    {6, 4, 0x0},
382                                    {6, 7, 0x0},
383                                    {6, 9, 0x0},
384                                    {6, 11, 0x0},
385                                    {5, 8, 0x0},
386                                    {5, 14, 0x0},
387                                    {5, 6, 0x0},
388                                    {5, 5, 0x0},
389                                    {5, 4, 0x0},
390                                    {5, 7, 0x0},
391                                    {5, 9, 0x0},
392                                    {5, 11, 0x0},
393                                    {4, 8, 0x0},
394                                    {4, 14, 0x0},
395                                    {4, 6, 0x0},
396                                    {4, 5, 0x0},
397                                    {4, 1, 0x0},
398                                    {4, 7, 0x0},
399                                    {4, 9, 0x0},
400                                    {4, 11, 0x0},
401                                    {7, 8, 0x0},
402                                    {7, 14, 0x0},
403                                    {7, 6, 0x0},
404                                    {7, 5, 0x0},
405                                    {7, 4, 0x0},
406                                    {7, 7, 0x0},
407                                    {7, 9, 0x0},
408                                    {7, 11, 0x0},
409                                    {10, 8, 0x0},
410                                    {10, 14, 0x0},
411                                    {10, 6, 0x0},
412                                    {10, 5, 0x0},
413                                    {10, 4, 0x0},
414                                    {10, 7, 0x0},
415                                    {10, 9, 0x0},
416                                    {10, 11, 0x0},
417                                    {12, 8, 0x0},
418                                    {12, 14, 0x0},
419                                    {12, 6, 0x0},
420                                    {12, 5, 0x0},
421                                    {12, 4, 0x0},
422                                    {12, 7, 0x0},
423                                    {12, 9, 0x0},
424                                    {12, 11, 0x0},
425 };
426 
427 fp_test_args_t xsmaddXdp_tests[] = {
428                                    {8, 8, 0x0},
429                                    {8, 14, 0x0},
430                                    {8, 6, 0x0},
431                                    {8, 5, 0x0},
432                                    {8, 4, 0x0},
433                                    {8, 7, 0x0},
434                                    {8, 9, 0x0},
435                                    {8, 11, 0x0},
436                                    {14, 8, 0x0},
437                                    {14, 14, 0x0},
438                                    {14, 6, 0x0},
439                                    {14, 5, 0x0},
440                                    {14, 4, 0x0},
441                                    {14, 7, 0x0},
442                                    {14, 9, 0x0},
443                                    {14, 11, 0x0},
444                                    {6, 8, 0x0},
445                                    {6, 14, 0x0},
446                                    {6, 6, 0x0},
447                                    {6, 5, 0x0},
448                                    {6, 4, 0x0},
449                                    {6, 7, 0x0},
450                                    {6, 9, 0x0},
451                                    {6, 11, 0x0},
452                                    {5, 8, 0x0},
453                                    {5, 14, 0x0},
454                                    {5, 6, 0x0},
455                                    {5, 5, 0x0},
456                                    {5, 4, 0x0},
457                                    {5, 7, 0x0},
458                                    {5, 9, 0x0},
459                                    {5, 11, 0x0},
460                                    {4, 8, 0x0},
461                                    {4, 14, 0x0},
462                                    {4, 6, 0x0},
463                                    {4, 5, 0x0},
464                                    {4, 1, 0x0},
465                                    {4, 7, 0x0},
466                                    {4, 9, 0x0},
467                                    {4, 11, 0x0},
468                                    {7, 8, 0x0},
469                                    {7, 14, 0x0},
470                                    {7, 6, 0x0},
471                                    {7, 5, 0x0},
472                                    {7, 4, 0x0},
473                                    {7, 7, 0x0},
474                                    {7, 9, 0x0},
475                                    {7, 11, 0x0},
476                                    {10, 8, 0x0},
477                                    {10, 14, 0x0},
478                                    {10, 6, 0x0},
479                                    {10, 5, 0x0},
480                                    {10, 4, 0x0},
481                                    {10, 7, 0x0},
482                                    {10, 9, 0x0},
483                                    {10, 11, 0x0},
484                                    {12, 8, 0x0},
485                                    {12, 14, 0x0},
486                                    {12, 6, 0x0},
487                                    {12, 5, 0x0},
488                                    {12, 4, 0x0},
489                                    {12, 7, 0x0},
490                                    {12, 9, 0x0},
491                                    {12, 11, 0x0},
492 };
493 
494 fp_test_args_t xsmsubXdp_tests[] = {
495                                    {8, 8, 0x0},
496                                    {8, 14, 0x0},
497                                    {8, 6, 0x0},
498                                    {8, 5, 0x0},
499                                    {8, 4, 0x0},
500                                    {8, 7, 0x0},
501                                    {8, 9, 0x0},
502                                    {8, 11, 0x0},
503                                    {14, 8, 0x0},
504                                    {14, 14, 0x0},
505                                    {14, 6, 0x0},
506                                    {14, 5, 0x0},
507                                    {14, 4, 0x0},
508                                    {14, 7, 0x0},
509                                    {14, 9, 0x0},
510                                    {14, 11, 0x0},
511                                    {6, 8, 0x0},
512                                    {6, 14, 0x0},
513                                    {6, 6, 0x0},
514                                    {6, 5, 0x0},
515                                    {6, 4, 0x0},
516                                    {6, 7, 0x0},
517                                    {6, 9, 0x0},
518                                    {6, 11, 0x0},
519                                    {5, 8, 0x0},
520                                    {5, 14, 0x0},
521                                    {5, 6, 0x0},
522                                    {5, 5, 0x0},
523                                    {5, 4, 0x0},
524                                    {5, 7, 0x0},
525                                    {5, 9, 0x0},
526                                    {5, 11, 0x0},
527                                    {4, 8, 0x0},
528                                    {4, 14, 0x0},
529                                    {4, 6, 0x0},
530                                    {4, 5, 0x0},
531                                    {4, 1, 0x0},
532                                    {4, 7, 0x0},
533                                    {4, 9, 0x0},
534                                    {4, 11, 0x0},
535                                    {7, 8, 0x0},
536                                    {7, 14, 0x0},
537                                    {7, 6, 0x0},
538                                    {7, 5, 0x0},
539                                    {7, 4, 0x0},
540                                    {7, 7, 0x0},
541                                    {7, 9, 0x0},
542                                    {7, 11, 0x0},
543                                    {10, 8, 0x0},
544                                    {10, 14, 0x0},
545                                    {10, 6, 0x0},
546                                    {10, 5, 0x0},
547                                    {10, 4, 0x0},
548                                    {10, 7, 0x0},
549                                    {10, 9, 0x0},
550                                    {10, 11, 0x0},
551                                    {12, 8, 0x0},
552                                    {12, 14, 0x0},
553                                    {12, 6, 0x0},
554                                    {12, 5, 0x0},
555                                    {12, 4, 0x0},
556                                    {12, 7, 0x0},
557                                    {12, 9, 0x0},
558                                    {12, 11, 0x0},
559 };
560 
561 fp_test_args_t xsnmaddXdp_tests[] = {
562                                      {8, 8, 0x0},
563                                      {8, 14, 0x0},
564                                      {8, 6, 0x0},
565                                      {8, 5, 0x0},
566                                      {8, 4, 0x0},
567                                      {8, 7, 0x0},
568                                      {8, 9, 0x0},
569                                      {8, 11, 0x0},
570                                      {14, 8, 0x0},
571                                      {14, 14, 0x0},
572                                      {14, 6, 0x0},
573                                      {14, 5, 0x0},
574                                      {14, 4, 0x0},
575                                      {14, 7, 0x0},
576                                      {14, 9, 0x0},
577                                      {14, 11, 0x0},
578                                      {6, 8, 0x0},
579                                      {6, 14, 0x0},
580                                      {6, 6, 0x0},
581                                      {6, 5, 0x0},
582                                      {6, 4, 0x0},
583                                      {6, 7, 0x0},
584                                      {6, 9, 0x0},
585                                      {6, 11, 0x0},
586                                      {5, 8, 0x0},
587                                      {5, 14, 0x0},
588                                      {5, 6, 0x0},
589                                      {5, 5, 0x0},
590                                      {5, 4, 0x0},
591                                      {5, 7, 0x0},
592                                      {5, 9, 0x0},
593                                      {5, 11, 0x0},
594                                      {4, 8, 0x0},
595                                      {4, 14, 0x0},
596                                      {4, 6, 0x0},
597                                      {4, 5, 0x0},
598                                      {4, 1, 0x0},
599                                      {4, 7, 0x0},
600                                      {4, 9, 0x0},
601                                      {4, 11, 0x0},
602                                      {7, 8, 0x0},
603                                      {7, 14, 0x0},
604                                      {7, 6, 0x0},
605                                      {7, 5, 0x0},
606                                      {7, 4, 0x0},
607                                      {7, 7, 0x0},
608                                      {7, 9, 0x0},
609                                      {7, 11, 0x0},
610                                      {10, 8, 0x0},
611                                      {10, 14, 0x0},
612                                      {10, 6, 0x0},
613                                      {10, 5, 0x0},
614                                      {10, 4, 0x0},
615                                      {10, 7, 0x0},
616                                      {10, 9, 0x0},
617                                      {10, 11, 0x0},
618                                      {12, 8, 0x0},
619                                      {12, 14, 0x0},
620                                      {12, 6, 0x0},
621                                      {12, 5, 0x0},
622                                      {12, 4, 0x0},
623                                      {12, 7, 0x0},
624                                      {12, 9, 0x0},
625                                      {12, 11, 0x0},
626 };
627 
628 fp_test_args_t xsmuldp_tests[] = {
629                                   {8, 8, 0x0},
630                                   {8, 14, 0x0},
631                                   {8, 6, 0x0},
632                                   {8, 5, 0x0},
633                                   {8, 4, 0x0},
634                                   {8, 7, 0x0},
635                                   {8, 9, 0x0},
636                                   {8, 11, 0x0},
637                                   {14, 8, 0x0},
638                                   {14, 14, 0x0},
639                                   {14, 6, 0x0},
640                                   {14, 5, 0x0},
641                                   {14, 4, 0x0},
642                                   {14, 7, 0x0},
643                                   {14, 9, 0x0},
644                                   {14, 11, 0x0},
645                                   {6, 8, 0x0},
646                                   {6, 14, 0x0},
647                                   {6, 6, 0x0},
648                                   {6, 5, 0x0},
649                                   {6, 4, 0x0},
650                                   {6, 7, 0x0},
651                                   {6, 9, 0x0},
652                                   {6, 11, 0x0},
653                                   {5, 8, 0x0},
654                                   {5, 14, 0x0},
655                                   {5, 6, 0x0},
656                                   {5, 5, 0x0},
657                                   {5, 4, 0x0},
658                                   {5, 7, 0x0},
659                                   {5, 9, 0x0},
660                                   {5, 11, 0x0},
661                                   {4, 8, 0x0},
662                                   {4, 14, 0x0},
663                                   {4, 6, 0x0},
664                                   {4, 5, 0x0},
665                                   {4, 1, 0x0},
666                                   {4, 7, 0x0},
667                                   {4, 9, 0x0},
668                                   {4, 11, 0x0},
669                                   {7, 8, 0x0},
670                                   {7, 14, 0x0},
671                                   {7, 6, 0x0},
672                                   {7, 5, 0x0},
673                                   {7, 4, 0x0},
674                                   {7, 7, 0x0},
675                                   {7, 9, 0x0},
676                                   {7, 11, 0x0},
677                                   {10, 8, 0x0},
678                                   {10, 14, 0x0},
679                                   {10, 6, 0x0},
680                                   {10, 5, 0x0},
681                                   {10, 4, 0x0},
682                                   {10, 7, 0x0},
683                                   {10, 9, 0x0},
684                                   {10, 11, 0x0},
685                                   {12, 8, 0x0},
686                                   {12, 14, 0x0},
687                                   {12, 6, 0x0},
688                                   {12, 5, 0x0},
689                                   {12, 4, 0x0},
690                                   {12, 7, 0x0},
691                                   {12, 9, 0x0},
692                                   {12, 11, 0x0},
693 };
694 
695 fp_test_args_t xssubdp_tests[] = {
696                                   {8, 8, 0x0},
697                                   {8, 14, 0x0},
698                                   {8, 6, 0x0},
699                                   {8, 5, 0x0},
700                                   {8, 4, 0x0},
701                                   {8, 7, 0x0},
702                                   {8, 9, 0x0},
703                                   {8, 11, 0x0},
704                                   {14, 8, 0x0},
705                                   {14, 14, 0x0},
706                                   {14, 6, 0x0},
707                                   {14, 5, 0x0},
708                                   {14, 4, 0x0},
709                                   {14, 7, 0x0},
710                                   {14, 9, 0x0},
711                                   {14, 11, 0x0},
712                                   {6, 8, 0x0},
713                                   {6, 14, 0x0},
714                                   {6, 6, 0x0},
715                                   {6, 5, 0x0},
716                                   {6, 4, 0x0},
717                                   {6, 7, 0x0},
718                                   {6, 9, 0x0},
719                                   {6, 11, 0x0},
720                                   {5, 8, 0x0},
721                                   {5, 14, 0x0},
722                                   {5, 6, 0x0},
723                                   {5, 5, 0x0},
724                                   {5, 4, 0x0},
725                                   {5, 7, 0x0},
726                                   {5, 9, 0x0},
727                                   {5, 11, 0x0},
728                                   {4, 8, 0x0},
729                                   {4, 14, 0x0},
730                                   {4, 6, 0x0},
731                                   {4, 5, 0x0},
732                                   {4, 1, 0x0},
733                                   {4, 7, 0x0},
734                                   {4, 9, 0x0},
735                                   {4, 11, 0x0},
736                                   {7, 8, 0x0},
737                                   {7, 14, 0x0},
738                                   {7, 6, 0x0},
739                                   {7, 5, 0x0},
740                                   {7, 4, 0x0},
741                                   {7, 7, 0x0},
742                                   {7, 9, 0x0},
743                                   {7, 11, 0x0},
744                                   {10, 8, 0x0},
745                                   {10, 14, 0x0},
746                                   {10, 6, 0x0},
747                                   {10, 5, 0x0},
748                                   {10, 4, 0x0},
749                                   {10, 7, 0x0},
750                                   {10, 9, 0x0},
751                                   {10, 11, 0x0},
752                                   {12, 8, 0x0},
753                                   {12, 14, 0x0},
754                                   {12, 6, 0x0},
755                                   {12, 5, 0x0},
756                                   {12, 4, 0x0},
757                                   {12, 7, 0x0},
758                                   {12, 9, 0x0},
759                                   {12, 11, 0x0},
760 };
761 
762 
763 
764 static int nb_special_fargs;
765 static double * spec_fargs;
766 
build_special_fargs_table(void)767 static void build_special_fargs_table(void)
768 {
769    /* The special floating point values created below are for
770     * use in the ftdiv tests for setting the fe_flag and fg_flag,
771     * but they can also be used for other tests (e.g., xscmpudp).
772     *
773     * Note that fl_flag is 'always '1' on ppc64 Linux.
774     *
775   Entry  Sign Exp   fraction                  Special value
776    0      0   3fd   0x8000000000000ULL         Positive finite number
777    1      0   404   0xf000000000000ULL         ...
778    2      0   001   0x8000000b77501ULL         ...
779    3      0   7fe   0x800000000051bULL         ...
780    4      0   012   0x3214569900000ULL         ...
781    5      0   000   0x0000000000000ULL         +0.0 (+zero)
782    6      1   000   0x0000000000000ULL         -0.0 (-zero)
783    7      0   7ff   0x0000000000000ULL         +infinity
784    8      1   7ff   0x0000000000000ULL         -infinity
785    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
786    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
787    11     0   7ff   0x8000000000000ULL         +QNaN
788    12     1   7ff   0x8000000000000ULL         -QNaN
789    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
790    14     1   40d   0x0650f5a07b353ULL         Negative finite number
791     */
792 
793    uint64_t mant;
794    uint16_t _exp;
795    int s;
796    int i = 0;
797 
798    if (spec_fargs)
799       return;
800 
801    spec_fargs = malloc( 16 * sizeof(double) );
802 
803    // #0
804    s = 0;
805    _exp = 0x3fd;
806    mant = 0x8000000000000ULL;
807    register_farg(&spec_fargs[i++], s, _exp, mant);
808 
809    // #1
810    s = 0;
811    _exp = 0x404;
812    mant = 0xf000000000000ULL;
813    register_farg(&spec_fargs[i++], s, _exp, mant);
814 
815    /* None of the ftdiv tests succeed.
816     * FRA = value #0; FRB = value #1
817     * ea_ = -2; e_b = 5
818     * fl_flag || fg_flag || fe_flag = 100
819     */
820 
821    /*************************************************
822     *     fe_flag tests
823     *
824     *************************************************/
825 
826    /* fe_flag <- 1 if FRA is a NaN
827     * FRA = value #9; FRB = value #1
828     * e_a = 1024; e_b = 5
829     * fl_flag || fg_flag || fe_flag = 101
830     */
831 
832    /* fe_flag <- 1 if FRB is a NaN
833     * FRA = value #1; FRB = value #12
834     * e_a = 5; e_b = 1024
835     * fl_flag || fg_flag || fe_flag = 101
836     */
837 
838    /* fe_flag <- 1 if e_b <= -1022
839     * FRA = value #0; FRB = value #2
840     * e_a = -2; e_b = -1022
841     * fl_flag || fg_flag || fe_flag = 101
842     *
843     */
844    // #2
845    s = 0;
846    _exp = 0x001;
847    mant = 0x8000000b77501ULL;
848    register_farg(&spec_fargs[i++], s, _exp, mant);
849 
850    /* fe_flag <- 1 if e_b >= 1021
851     * FRA = value #1; FRB = value #3
852     * e_a = 5; e_b = 1023
853     * fl_flag || fg_flag || fe_flag = 101
854     */
855    // #3
856    s = 0;
857    _exp = 0x7fe;
858    mant = 0x800000000051bULL;
859    register_farg(&spec_fargs[i++], s, _exp, mant);
860 
861    /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
862     * Let FRA = value #3 and FRB be value #0.
863     * e_a = 1023; e_b = -2
864     * fl_flag || fg_flag || fe_flag = 101
865     */
866 
867    /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
868     * Let FRA = value #0 above and FRB be value #3 above
869     * e_a = -2; e_b = 1023
870     * fl_flag || fg_flag || fe_flag = 101
871     */
872 
873    /* fe_flag <- 1 if FRA != 0 && e_a <= -970
874     * Let FRA = value #4 and FRB be value #0
875     * e_a = -1005; e_b = -2
876     * fl_flag || fg_flag || fe_flag = 101
877    */
878    // #4
879    s = 0;
880    _exp = 0x012;
881    mant = 0x3214569900000ULL;
882    register_farg(&spec_fargs[i++], s, _exp, mant);
883 
884    /*************************************************
885     *     fg_flag tests
886     *
887     *************************************************/
888    /* fg_flag <- 1 if FRA is an Infinity
889     * NOTE: FRA = Inf also sets fe_flag
890     * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
891     * Test 1:
892     *   Let FRA be value #7 and FRB be value #1
893     *   e_a = 1024; e_b = 5
894     *   fl_flag || fg_flag || fe_flag = 111
895     *
896     * Test 2:
897     *   Let FRA be value #8 and FRB be value #1
898     *   e_a = 1024; e_b = 5
899     *   fl_flag || fg_flag || fe_flag = 111
900     *
901     */
902 
903    /* fg_flag <- 1 if FRB is an Infinity
904     * NOTE: FRB = Inf also sets fe_flag
905     * Let FRA be value #1 and FRB be value #7
906     * e_a = 5; e_b = 1024
907     * fl_flag || fg_flag || fe_flag = 111
908     */
909 
910    /* fg_flag <- 1 if FRB is denormalized
911     * NOTE: e_b < -1022 ==> fe_flag <- 1
912     * Let FRA be value #0 and FRB be value #13
913     * e_a = -2; e_b = -1023
914     * fl_flag || fg_flag || fe_flag = 111
915     */
916 
917    /* fg_flag <- 1 if FRB is +zero
918     * NOTE: FRA = Inf also sets fe_flag
919     * Let FRA = val #5; FRB = val #5
920     * ea_ = -1023; e_b = -1023
921     * fl_flag || fg_flag || fe_flag = 111
922     */
923 
924    /* fg_flag <- 1 if FRB is -zero
925     * NOTE: FRA = Inf also sets fe_flag
926     * Let FRA = val #5; FRB = val #6
927     * ea_ = -1023; e_b = -1023
928     * fl_flag || fg_flag || fe_flag = 111
929     */
930 
931    /* Special values */
932    /* +0.0      : 0 0x000 0x0000000000000 */
933    // #5
934    s = 0;
935    _exp = 0x000;
936    mant = 0x0000000000000ULL;
937    register_farg(&spec_fargs[i++], s, _exp, mant);
938 
939    /* -0.0      : 1 0x000 0x0000000000000 */
940    // #6
941    s = 1;
942    _exp = 0x000;
943    mant = 0x0000000000000ULL;
944    register_farg(&spec_fargs[i++], s, _exp, mant);
945 
946    /* +infinity : 0 0x7FF 0x0000000000000  */
947    // #7
948    s = 0;
949    _exp = 0x7FF;
950    mant = 0x0000000000000ULL;
951    register_farg(&spec_fargs[i++], s, _exp, mant);
952 
953    /* -infinity : 1 0x7FF 0x0000000000000 */
954    // #8
955    s = 1;
956    _exp = 0x7FF;
957    mant = 0x0000000000000ULL;
958    register_farg(&spec_fargs[i++], s, _exp, mant);
959 
960    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
961    // #9
962    s = 0;
963    _exp = 0x7FF;
964    mant = 0x7FFFFFFFFFFFFULL;
965    register_farg(&spec_fargs[i++], s, _exp, mant);
966 
967    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
968    // #10
969    s = 1;
970    _exp = 0x7FF;
971    mant = 0x7FFFFFFFFFFFFULL;
972    register_farg(&spec_fargs[i++], s, _exp, mant);
973 
974    /* +QNaN     : 0 0x7FF 0x8000000000000 */
975    // #11
976    s = 0;
977    _exp = 0x7FF;
978    mant = 0x8000000000000ULL;
979    register_farg(&spec_fargs[i++], s, _exp, mant);
980 
981    /* -QNaN     : 1 0x7FF 0x8000000000000 */
982    // #12
983    s = 1;
984    _exp = 0x7FF;
985    mant = 0x8000000000000ULL;
986    register_farg(&spec_fargs[i++], s, _exp, mant);
987 
988    /* denormalized value */
989    // #13
990    s = 1;
991    _exp = 0x000;
992    mant = 0x8340000078000ULL;
993    register_farg(&spec_fargs[i++], s, _exp, mant);
994 
995    /* Negative finite number */
996    // #14
997    s = 1;
998    _exp = 0x40d;
999    mant = 0x0650f5a07b353ULL;
1000    register_farg(&spec_fargs[i++], s, _exp, mant);
1001 
1002    nb_special_fargs = i;
1003 }
1004 
1005 
1006 struct test_table
1007 {
1008    test_func_t test_category;
1009    char * name;
1010 };
1011 
1012 struct p7_fp_test
1013 {
1014    test_func_t test_func;
1015    const char *name;
1016    int single;  // 1=single precision result; 0=double precision result
1017 };
1018 
1019 typedef enum {
1020    VX_FP_CMP,
1021    VX_FP_SMA,
1022    VX_FP_SMS,
1023    VX_FP_SNMA,
1024    VX_FP_OTHER
1025 } vx_fp_test_type;
1026 
1027 struct vx_fp_test
1028 {
1029    test_func_t test_func;
1030    const char *name;
1031    fp_test_args_t * targs;
1032    int num_tests;
1033    vx_fp_test_type test_type;
1034 };
1035 
1036 struct xs_conv_test
1037 {
1038    test_func_t test_func;
1039    const char *name;
1040    int num_tests;
1041 };
1042 
1043 typedef enum {
1044    VSX_LOAD =1,
1045    VSX_LOAD_SPLAT,
1046    VSX_STORE
1047 } vsx_ldst_type;
1048 
1049 struct ldst_test
1050 {
1051    test_func_t test_func;
1052    const char *name;
1053    void * base_addr;
1054    uint32_t offset;
1055    int num_words_to_process;
1056    vsx_ldst_type type;
1057 };
1058 
1059 typedef enum {
1060    VSX_AND = 1,
1061    VSX_XOR,
1062    VSX_ANDC,
1063    VSX_OR,
1064    VSX_NOR
1065 } vsx_log_op;
1066 
1067 struct vsx_logic_test
1068 {
1069    test_func_t test_func;
1070    const char *name;
1071    vsx_log_op op;
1072 };
1073 
1074 struct vsx_move_test
1075 {
1076    test_func_t test_func;
1077    const char *name;
1078 };
1079 
1080 struct vsx_permute_test
1081 {
1082    test_func_t test_func;
1083    const char *name;
1084    unsigned int xa[4];
1085    unsigned int xb[4];
1086 };
1087 
1088 static vector unsigned int vec_out, vec_inA, vec_inB;
1089 
test_lxsdx(void)1090 static void test_lxsdx(void)
1091 {
1092    __asm__ __volatile__ ("lxsdx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1093 }
1094 
1095 static void
test_lxvd2x(void)1096 test_lxvd2x(void)
1097 {
1098    __asm__ __volatile__ ("lxvd2x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1099 }
1100 
test_lxvdsx(void)1101 static void test_lxvdsx(void)
1102 {
1103    __asm__ __volatile__ ("lxvdsx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1104 }
1105 
test_lxvw4x(void)1106 static void test_lxvw4x(void)
1107 {
1108    __asm__ __volatile__ ("lxvw4x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1109 }
1110 
test_stxsdx(void)1111 static void test_stxsdx(void)
1112 {
1113    __asm__ __volatile__ ("stxsdx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1114 }
1115 
test_stxvd2x(void)1116 static void test_stxvd2x(void)
1117 {
1118    __asm__ __volatile__ ("stxvd2x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1119 }
1120 
test_stxvw4x(void)1121 static void test_stxvw4x(void)
1122 {
1123    __asm__ __volatile__ ("stxvw4x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1124 }
1125 
test_xxlxor(void)1126 static void test_xxlxor(void)
1127 {
1128    __asm__ __volatile__ ("xxlxor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1129 }
1130 
test_xxlor(void)1131 static void test_xxlor(void)
1132 {
1133    __asm__ __volatile__ ("xxlor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1134 }
1135 
test_xxlnor(void)1136 static void test_xxlnor(void)
1137 {
1138    __asm__ __volatile__ ("xxlnor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1139 }
1140 
test_xxland(void)1141 static void test_xxland(void)
1142 {
1143    __asm__ __volatile__ ("xxland          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1144 }
1145 
test_xxlandc(void)1146 static void test_xxlandc(void)
1147 {
1148    __asm__ __volatile__ ("xxlandc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1149 }
1150 
test_xxmrghw(void)1151 static void test_xxmrghw(void)
1152 {
1153    __asm__ __volatile__ ("xxmrghw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1154 }
1155 
test_xxmrglw(void)1156 static void test_xxmrglw(void)
1157 {
1158    __asm__ __volatile__ ("xxmrglw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1159 }
1160 
test_xxpermdi_00(void)1161 static void test_xxpermdi_00(void)
1162 {
1163    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1164 }
1165 
test_xxpermdi_01(void)1166 static void test_xxpermdi_01(void)
1167 {
1168    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1169 }
1170 
test_xxpermdi_10(void)1171 static void test_xxpermdi_10(void)
1172 {
1173    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1174 }
1175 
test_xxpermdi_11(void)1176 static void test_xxpermdi_11(void)
1177 {
1178    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1179 }
1180 
test_xxsldwi_0(void)1181 static void test_xxsldwi_0(void)
1182 {
1183    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1184 }
1185 
test_xxsldwi_1(void)1186 static void test_xxsldwi_1(void)
1187 {
1188    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1189 }
1190 
test_xxsldwi_2(void)1191 static void test_xxsldwi_2(void)
1192 {
1193    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1194 }
1195 
test_xxsldwi_3(void)1196 static void test_xxsldwi_3(void)
1197 {
1198    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1199 }
1200 
test_fcfids(void)1201 static void test_fcfids (void)
1202 {
1203     __asm__ __volatile__ ("fcfids          %0, %1" : "=f" (f17): "d" (f14));
1204 }
1205 
test_fcfidus(void)1206 static void test_fcfidus (void)
1207 {
1208     __asm__ __volatile__ ("fcfidus          %0, %1" : "=f" (f17): "d" (f14));
1209 }
1210 
test_fcfidu(void)1211 static void test_fcfidu (void)
1212 {
1213     __asm__ __volatile__ ("fcfidu          %0, %1" : "=f" (f17): "d" (f14));
1214 }
1215 
test_xsabsdp(void)1216 static void test_xsabsdp (void)
1217 {
1218    __asm__ __volatile__ ("xsabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1219 }
1220 
test_xscpsgndp(void)1221 static void test_xscpsgndp (void)
1222 {
1223    __asm__ __volatile__ ("xscpsgndp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1224 }
1225 
test_xsnabsdp(void)1226 static void test_xsnabsdp (void)
1227 {
1228    __asm__ __volatile__ ("xsnabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1229 }
1230 
test_xsnegdp(void)1231 static void test_xsnegdp (void)
1232 {
1233    __asm__ __volatile__ ("xsnegdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1234 }
1235 
1236 static int do_cmpudp;
test_xscmp(void)1237 static void test_xscmp (void)
1238 {
1239    if (do_cmpudp)
1240       __asm__ __volatile__ ("xscmpudp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
1241    else
1242       __asm__ __volatile__ ("xscmpodp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
1243 }
1244 
test_xsadddp(void)1245 static void test_xsadddp(void)
1246 {
1247    __asm__ __volatile__ ("xsadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1248 }
1249 
test_xsdivdp(void)1250 static void test_xsdivdp(void)
1251 {
1252    __asm__ __volatile__ ("xsdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1253 }
1254 
1255 static int do_adp;
test_xsmadd(void)1256 static void test_xsmadd(void)
1257 {
1258    if (do_adp)
1259       __asm__ __volatile__ ("xsmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1260    else
1261       __asm__ __volatile__ ("xsmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1262 }
1263 
test_xsmsub(void)1264 static void test_xsmsub(void)
1265 {
1266    if (do_adp)
1267       __asm__ __volatile__ ("xsmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1268    else
1269       __asm__ __volatile__ ("xsmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1270 }
1271 
test_xsnmadd(void)1272 static void test_xsnmadd(void)
1273 {
1274    if (do_adp)
1275       __asm__ __volatile__ ("xsnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1276    else
1277       __asm__ __volatile__ ("xsnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1278 }
1279 
test_xsmuldp(void)1280 static void test_xsmuldp(void)
1281 {
1282    __asm__ __volatile__ ("xsmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1283 }
1284 
test_xssubdp(void)1285 static void test_xssubdp(void)
1286 {
1287    __asm__ __volatile__ ("xssubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1288 }
1289 
test_xscvdpsxds(void)1290 static void test_xscvdpsxds (void)
1291 {
1292    __asm__ __volatile__ ("xscvdpsxds          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1293 }
1294 
test_xscvsxddp(void)1295 static void test_xscvsxddp (void)
1296 {
1297    __asm__ __volatile__ ("xscvsxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1298 }
1299 
test_xscvuxddp(void)1300 static void test_xscvuxddp (void)
1301 {
1302    __asm__ __volatile__ ("xscvuxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1303 }
1304 
1305 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
1306                                                               0, 0, 0, 0 };
1307 
1308 #define NUM_VSTG_INTS (sizeof vstg/sizeof vstg[0])
1309 #define NUM_VSTG_VECS (NUM_VSTG_INTS/4)
1310 
1311 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x01234567,
1312                                                                 0x89abcdef,
1313                                                                 0x00112233,
1314                                                                 0x44556677,
1315                                                                 0x8899aabb,
1316                                                                 0x91929394,
1317                                                                 0xa1a2a3a4,
1318                                                                 0xb1b2b3b4,
1319                                                                 0xc1c2c3c4,
1320                                                                 0xd1d2d3d4,
1321                                                                 0x7a6b5d3e
1322 };
1323 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
1324 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
1325 
1326 static ldst_test_t ldst_tests[] = { { &test_lxsdx, "lxsdx", viargs, 0, 2, VSX_LOAD },
1327                                      { &test_lxsdx, "lxsdx", viargs, 4, 2, VSX_LOAD },
1328                                      { &test_lxvd2x, "lxvd2x", viargs, 0, 4, VSX_LOAD },
1329                                      { &test_lxvd2x, "lxvd2x", viargs, 4, 4, VSX_LOAD },
1330                                      { &test_lxvdsx, "lxvdsx", viargs, 0, 4, VSX_LOAD_SPLAT },
1331                                      { &test_lxvdsx, "lxvdsx", viargs, 4, 4, VSX_LOAD_SPLAT },
1332                                      { &test_lxvw4x, "lxvw4x", viargs, 0, 4, VSX_LOAD },
1333                                      { &test_lxvw4x, "lxvw4x", viargs, 4, 4, VSX_LOAD },
1334                                      { &test_stxsdx, "stxsdx", vstg, 0, 2, VSX_STORE },
1335                                      { &test_stxsdx, "stxsdx", vstg, 4, 2, VSX_STORE },
1336                                      { &test_stxvd2x, "stxvd2x", vstg, 0, 4, VSX_STORE },
1337                                      { &test_stxvd2x, "stxvd2x", vstg, 4, 4, VSX_STORE },
1338                                      { &test_stxvw4x, "stxvw4x", vstg, 0, 4, VSX_STORE },
1339                                      { &test_stxvw4x, "stxvw4x", vstg, 4, 4, VSX_STORE },
1340                                      { NULL, NULL, NULL, 0, 0, 0 } };
1341 
1342 static logic_test_t logic_tests[] = { { &test_xxlxor, "xxlxor", VSX_XOR },
1343                                       { &test_xxlor, "xxlor", VSX_OR } ,
1344                                       { &test_xxlnor, "xxlnor", VSX_NOR },
1345                                       { &test_xxland, "xxland", VSX_AND },
1346                                       { &test_xxlandc, "xxlandc", VSX_ANDC },
1347                                       { NULL, NULL, 0}};
1348 
1349 static move_test_t move_tests[] = { { &test_xsabsdp, "xsabsdp" },
1350                                     { &test_xscpsgndp, "xscpsgndp" },
1351                                     { &test_xsnabsdp, "xsnabsdp" },
1352                                     { &test_xsnegdp, "xsnegdp" },
1353                                     { NULL, NULL }
1354 
1355 };
1356 
1357 static permute_test_t permute_tests[] =
1358 {
1359   { &test_xxmrghw, "xxmrghw",
1360     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1361     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1362   },
1363   { &test_xxmrghw, "xxmrghw",
1364     { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff }, /* XA input */
1365     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XB input */
1366   },
1367   { &test_xxmrglw, "xxmrglw",
1368     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1369     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1370   },
1371   { &test_xxmrglw, "xxmrglw",
1372     { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff}, /* XA input */
1373     { 0x11111111, 0x22222222, 0x33333333, 0x44444444}, /* XB input */
1374   },
1375   { &test_xxpermdi_00, "xxpermdi DM=00",
1376     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1377     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1378   },
1379   { &test_xxpermdi_01, "xxpermdi DM=01",
1380     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1381     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1382   },
1383   { &test_xxpermdi_10, "xxpermdi DM=10",
1384     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1385     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1386   },
1387   { &test_xxpermdi_11, "xxpermdi DM=11",
1388     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1389     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1390   },
1391   { &test_xxsldwi_0, "xxsldwi SHW=0",
1392     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1393     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1394   },
1395   { &test_xxsldwi_1, "xxsldwi SHW=1",
1396     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1397     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1398   },
1399   { &test_xxsldwi_2, "xxsldwi SHW=2",
1400     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1401     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1402   },
1403   { &test_xxsldwi_3, "xxsldwi SHW=3",
1404     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1405     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1406   },
1407   { NULL, NULL }
1408 };
1409 
1410 static fp_test_t fp_tests[] = { { &test_fcfids, "fcfids", 1 },
1411                                 { &test_fcfidus, "fcfidus", 1 },
1412                                 { &test_fcfidu, "fcfidu", 1 },
1413                                 { NULL, NULL, 0 },
1414 
1415 };
1416 
1417 static vx_fp_test_t vx_fp_tests[] = {
1418                                      { &test_xscmp, "xscmp", xscmpX_tests, 64, VX_FP_CMP},
1419                                      { &test_xsadddp, "xsadddp", xsadddp_tests, 64, VX_FP_OTHER},
1420                                      { &test_xsdivdp, "xsdivdp", xsdivdp_tests, 64, VX_FP_OTHER},
1421                                      { &test_xsmadd, "xsmadd", xsmaddXdp_tests, 64, VX_FP_SMA},
1422                                      { &test_xsmsub, "xsmsub", xsmsubXdp_tests, 64, VX_FP_SMS},
1423                                      { &test_xsnmadd, "xsnmadd", xsnmaddXdp_tests, 64, VX_FP_SNMA},
1424                                      { & test_xsmuldp, "xsmuldp", xsmuldp_tests, 64, VX_FP_OTHER},
1425                                      { & test_xssubdp, "xssubdp", xssubdp_tests, 64, VX_FP_OTHER},
1426                                      { NULL, NULL, NULL, 0, 0 }
1427 };
1428 
1429 static xs_conv_test_t xs_conv_tests[] = {
1430                                          { &test_xscvdpsxds, "xscvdpsxds", 15},
1431                                          { &test_xscvsxddp, "xscvsxddp", 15},
1432                                          { &test_xscvuxddp, "xscvuxddp", 15},
1433                                          { NULL, NULL, 0}
1434 };
1435 
1436 #ifdef __powerpc64__
test_ldbrx(void)1437 static void test_ldbrx(void)
1438 {
1439    int i;
1440    HWord_t reg_out;
1441    unsigned char * byteIn, * byteOut;
1442    r14 = (HWord_t)viargs;
1443    // Just try the instruction an arbitrary number of times at different r15 offsets.
1444    for (i = 0; i < 3; i++) {
1445       int j, k;
1446       reg_out = 0;
1447       r15 = i * 4;
1448       __asm__ __volatile__ ("ldbrx          %0, %1, %2" : "=r" (reg_out): "b" (r14),"r" (r15));
1449       byteIn = ((unsigned char *)(r14 + r15));
1450       byteOut = (unsigned char *)&reg_out;
1451 
1452       printf("ldbrx:");
1453       for (k = 0; k < 8; k++) {
1454          printf( " %02x", (byteIn[k]));
1455       }
1456       printf(" (reverse) =>");
1457       for (j = 0; j < 8; j++) {
1458          printf( " %02x", (byteOut[j]));
1459       }
1460       printf("\n");
1461    }
1462    printf( "\n" );
1463 }
1464 
1465 static void
test_popcntd(void)1466 test_popcntd(void)
1467 {
1468    uint64_t res;
1469    unsigned long long src = 0x9182736405504536ULL;
1470    r14 = src;
1471    __asm__ __volatile__ ("popcntd          %0, %1" : "=r" (res): "r" (r14));
1472    printf("popcntd: 0x%llx => %d\n", src, (int)res);
1473    printf( "\n" );
1474 }
1475 #endif
1476 
1477 static void
test_lfiwzx(void)1478 test_lfiwzx(void)
1479 {
1480    unsigned int i;
1481    unsigned int * src;
1482    uint64_t reg_out;
1483    r14 = (HWord_t)viargs;
1484    // Just try the instruction an arbitrary number of times at different r15 offsets.
1485    for (i = 0; i < 3; i++) {
1486       reg_out = 0;
1487       r15 = i * 4;
1488       __asm__ __volatile__ ("lfiwzx          %0, %1, %2" : "=d" (reg_out): "b" (r14),"r" (r15));
1489       src = ((unsigned int *)(r14 + r15));
1490       printf("lfiwzx: %u => %llu.00\n", *src, (unsigned long long)reg_out);
1491 
1492    }
1493    printf( "\n" );
1494 }
1495 
test_vx_fp_ops(void)1496 static void test_vx_fp_ops(void)
1497 {
1498 
1499    test_func_t func;
1500    int k;
1501    char * test_name = (char *)malloc(20);
1502    k = 0;
1503 
1504    build_special_fargs_table();
1505    while ((func = vx_fp_tests[k].test_func)) {
1506       int i, condreg, repeat = 0;
1507       unsigned int flags;
1508       unsigned long long * frap, * frbp, * dst;
1509       vx_fp_test_t test_group = vx_fp_tests[k];
1510       vx_fp_test_type test_type = test_group.test_type;
1511 
1512       switch (test_type) {
1513          case VX_FP_CMP:
1514             strcpy(test_name, "xscmp");
1515             if (!repeat) {
1516                repeat = 1;
1517                strcat(test_name, "udp");
1518                do_cmpudp = 1;
1519             }
1520             break;
1521          case VX_FP_SMA:
1522          case VX_FP_SMS:
1523          case VX_FP_SNMA:
1524             if (test_type == VX_FP_SMA)
1525                strcpy(test_name, "xsmadd");
1526             else if (test_type == VX_FP_SMS)
1527                strcpy(test_name, "xsmsub");
1528             else
1529                strcpy(test_name, "xsnmadd");
1530             if (!repeat) {
1531                repeat = 1;
1532                strcat(test_name, "adp");
1533                do_adp = 1;
1534             }
1535             break;
1536          case VX_FP_OTHER:
1537             strcpy(test_name, test_group.name);
1538             break;
1539          default:
1540             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
1541             exit(1);
1542       }
1543 
1544 again:
1545       for (i = 0; i < test_group.num_tests; i++) {
1546          unsigned int * inA, * inB, * pv;
1547          double * dpA = (double *)&vec_inA;
1548          double * dpB = (double *)&vec_inB;
1549          double * dpT = (double *)&vec_out;
1550 
1551          fp_test_args_t aTest = test_group.targs[i];
1552          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
1553          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
1554          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1555          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1556          // Only need to copy one doubleword into each vector's element 0
1557          if (isLE) {
1558             // With LE, vector element 0 is the second doubleword from the left
1559             memset(dpA, 0, 8);
1560             memset(dpB, 0, 8);
1561             dpA++;
1562             dpB++;
1563          }
1564          memcpy(dpA, inA, 8);
1565          memcpy(dpB, inB, 8);
1566 
1567          switch (test_type) {
1568             case VX_FP_CMP:
1569                SET_FPSCR_ZERO;
1570                SET_CR_XER_ZERO;
1571                (*func)();
1572                GET_CR(flags);
1573                condreg = (flags & 0x0f000000) >> 24;
1574                printf("#%d: %s %016llx <=> %016llx ? %x (CRx)\n", i, test_name, *frap, *frbp, condreg);
1575               // printf("\tFRA: %e;  FRB: %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx]);
1576                if ( condreg != aTest.cr_flags) {
1577                   printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, condreg);
1578                }
1579                break;
1580             case VX_FP_SMA:
1581             case VX_FP_SMS:
1582             case VX_FP_SNMA:
1583             case VX_FP_OTHER:
1584             {
1585                int idx;
1586                unsigned long long vsr_XT;
1587                pv = (unsigned int *)&vec_out;
1588                // clear vec_out
1589                for (idx = 0; idx < 4; idx++, pv++)
1590                   *pv = 0;
1591 
1592                if (test_type != VX_FP_OTHER) {
1593                   /* Then we need a third src argument, which is stored in element 0 of
1594                    * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
1595                    * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
1596                    * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
1597                    * data (input args) contain only two inputs, so I arbitrarily
1598                    * use spec_fargs elements 4 and 14 (alternating) for the third source
1599                    * argument.  We can use the same input data for a given pair of
1600                    * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
1601                    * the expected result should be the same.
1602                    */
1603                   int extra_arg_idx;
1604                   if (i % 2)
1605                      extra_arg_idx = 4;
1606                   else
1607                      extra_arg_idx = 14;
1608 
1609                   if (repeat) {
1610                      /* We're on the first time through of one of the VX_FP_SMx
1611                       * test types, meaning we're testing a xs<ZZZ>adp case, thus we
1612                       * have to swap inputs as described above:
1613                       *    src2 <= VSX[XT]
1614                       *    src3 <= VSX[XB]
1615                       */
1616                      if (isLE)
1617                         dpT++;
1618                      memcpy(dpT, inB, 8);  // src2
1619                      memcpy(dpB, &spec_fargs[extra_arg_idx], 8);  //src3
1620                      frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
1621                   } else {
1622                      // Don't need to init src2, as it's done before the switch()
1623                      if (isLE)
1624                         dpT++;
1625                      memcpy(dpT, &spec_fargs[extra_arg_idx], 8);  //src3
1626                   }
1627                   memcpy(&vsr_XT, dpT, 8);
1628                }
1629 
1630                (*func)();
1631                dst = (unsigned long long *) &vec_out;
1632                if (isLE)
1633                   dst++;
1634                if (test_type == VX_FP_OTHER)
1635                   printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name, *frap, *frbp, *dst);
1636                else
1637                   printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
1638                           test_name, vsr_XT, *frap, *frbp, *dst );
1639 
1640                /*
1641               {
1642                   // Debug code.  Keep this block commented out except when debugging.
1643                   double result, expected;
1644                   memcpy(&result, dst, 8);
1645                   memcpy(&expected, &aTest.dp_bin_result, 8);
1646                   printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
1647                           spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
1648                           expected, result );
1649                }
1650               */
1651                break;
1652             }
1653          }
1654 
1655 
1656       }
1657       printf( "\n" );
1658 
1659       if (repeat) {
1660          repeat = 0;
1661          switch (test_type) {
1662             case VX_FP_CMP:
1663                strcpy(test_name, "xscmp");
1664                strcat(test_name, "odp");
1665                do_cmpudp = 0;
1666                break;
1667             case VX_FP_SMA:
1668             case VX_FP_SMS:
1669             case VX_FP_SNMA:
1670                if (test_type == VX_FP_SMA)
1671                   strcpy(test_name, "xsmadd");
1672                else if (test_type == VX_FP_SMS)
1673                   strcpy(test_name, "xsmsub");
1674                else
1675                   strcpy(test_name, "xsnmadd");
1676                strcat(test_name, "mdp");
1677                do_adp = 0;
1678                break;
1679             case VX_FP_OTHER:
1680                break;
1681          }
1682          goto again;
1683       }
1684       k++;
1685    }
1686    printf( "\n" );
1687    free(test_name);
1688 }
1689 
test_xs_conv_ops(void)1690 static void test_xs_conv_ops(void)
1691 {
1692 
1693    test_func_t func;
1694    int k = 0;
1695    double * dpB = (double *)&vec_inB;
1696    if (isLE) {
1697       memset(dpB, 0, 8);
1698       dpB++;
1699    }
1700 
1701    build_special_fargs_table();
1702    while ((func = xs_conv_tests[k].test_func)) {
1703       int i;
1704       unsigned long long * frbp, * dst;
1705       xs_conv_test_t test_group = xs_conv_tests[k];
1706       for (i = 0; i < test_group.num_tests; i++) {
1707          unsigned int * inB, * pv;
1708          int idx;
1709          inB = (unsigned int *)&spec_fargs[i];
1710          frbp = (unsigned long long *)&spec_fargs[i];
1711 
1712          memcpy(dpB, inB, 8);
1713          pv = (unsigned int *)&vec_out;
1714          // clear vec_out
1715          for (idx = 0; idx < 4; idx++, pv++)
1716             *pv = 0;
1717          (*func)();
1718          dst = (unsigned long long *) &vec_out;
1719          if (isLE)
1720             dst++;
1721          printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, *dst);
1722 
1723       }
1724       k++;
1725       printf("\n");
1726    }
1727    printf( "\n" );
1728 }
1729 
do_load_test(ldst_test_t loadTest)1730 static void do_load_test(ldst_test_t loadTest)
1731 {
1732    test_func_t func;
1733    unsigned int *src, *dst;
1734    int splat = loadTest.type == VSX_LOAD_SPLAT ? 1: 0;
1735    int i, j, m, k;
1736    i = j = 0;
1737 
1738    func = loadTest.test_func;
1739    for (i = 0, r14 = (HWord_t) loadTest.base_addr; i < NUM_VIARGS_VECS; i++) {
1740       int again;
1741       j = 0;
1742        r14 += i * 16;
1743       do {
1744          unsigned int * pv = (unsigned int *)&vec_out;
1745          int idx;
1746          // clear vec_out
1747          for (idx = 0; idx < 4; idx++, pv+=idx)
1748             *pv = 0;
1749 
1750          again = 0;
1751          r15 = j;
1752 
1753          // execute test insn
1754          (*func)();
1755 
1756          src = (unsigned int*) (((unsigned char *)r14) + j);
1757          dst = (unsigned int*) &vec_out;
1758 
1759          printf( "%s:", loadTest.name);
1760          for (m = 0; m < loadTest.num_words_to_process; m++) {
1761             printf( " %08x", src[splat ? m % 2 : m]);
1762          }
1763          printf( " =>");
1764          m = 0;
1765          k = loadTest.num_words_to_process;
1766          if (isLE) {
1767             if (loadTest.num_words_to_process == 2) {
1768                m = 2;
1769                k += 2;
1770             }
1771          }
1772 
1773          for (; m < k; m++) {
1774             printf( " %08x", dst[m]);
1775          }
1776          printf("\n");
1777          if (j == 0 && loadTest.offset) {
1778             again = 1;
1779             j += loadTest.offset;
1780          }
1781       }
1782       while (again);
1783    }
1784 }
1785 
1786 static void
do_store_test(ldst_test_t storeTest)1787 do_store_test ( ldst_test_t storeTest )
1788 {
1789    test_func_t func;
1790    unsigned int *src, *dst;
1791    int m;
1792 
1793    func = storeTest.test_func;
1794    r14 = (HWord_t) storeTest.base_addr;
1795    r15 = (HWord_t) storeTest.offset;
1796    unsigned int * pv = (unsigned int *) storeTest.base_addr;
1797    int idx;
1798    // clear out storage destination
1799    for (idx = 0; idx < 4; idx++, pv += idx)
1800       *pv = 0;
1801 
1802    memcpy(&vec_inA, &viargs[0], sizeof(vector unsigned char));
1803 
1804    // execute test insn
1805    (*func)();
1806    src = &viargs[0];
1807    dst = (unsigned int*) (((unsigned char *) r14) + storeTest.offset);
1808 
1809    printf( "%s:", storeTest.name );
1810    for (m = 0; m < storeTest.num_words_to_process; m++) {
1811       printf( " %08x", src[m] );
1812    }
1813    printf( " =>" );
1814    for (m = 0; m < storeTest.num_words_to_process; m++) {
1815       printf( " %08x", dst[m] );
1816    }
1817    printf( "\n" );
1818 }
1819 
1820 
test_ldst(void)1821 static void test_ldst(void)
1822 {
1823    int k = 0;
1824 
1825    while (ldst_tests[k].test_func) {
1826       if (ldst_tests[k].type == VSX_STORE)
1827          do_store_test(ldst_tests[k]);
1828       else
1829          do_load_test(ldst_tests[k]);
1830       k++;
1831       printf("\n");
1832    }
1833 }
1834 
test_ftdiv(void)1835 static void test_ftdiv(void)
1836 {
1837    int i, num_tests, crx;
1838    unsigned int flags;
1839    unsigned long long * frap, * frbp;
1840    build_special_fargs_table();
1841 
1842    num_tests = sizeof ftdiv_tests/sizeof ftdiv_tests[0];
1843 
1844    for (i = 0; i < num_tests; i++) {
1845       fp_test_args_t aTest = ftdiv_tests[i];
1846       f14 = spec_fargs[aTest.fra_idx];
1847       f15 = spec_fargs[aTest.frb_idx];
1848       frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1849       frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1850       SET_FPSCR_ZERO;
1851       SET_CR_XER_ZERO;
1852       __asm__ __volatile__ ("ftdiv           cr1, %0, %1" : : "d" (f14), "d" (f15));
1853       GET_CR(flags);
1854       crx = (flags & 0x0f000000) >> 24;
1855       printf( "ftdiv: %016llx <=> %016llx ? %x (CRx)\n", *frap, *frbp, crx);
1856 //      printf("\tFRA: %e;  FRB: %e\n", f14, f15);
1857       if ( crx != aTest.cr_flags) {
1858          printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, crx);
1859       }
1860    }
1861    printf( "\n" );
1862 }
1863 
1864 
test_p7_fpops(void)1865 static void test_p7_fpops ( void )
1866 {
1867    int k = 0;
1868    test_func_t func;
1869 
1870    build_fargs_table();
1871    while ((func = fp_tests[k].test_func)) {
1872       float res;
1873       double resd;
1874       unsigned long long u0;
1875       int i;
1876       int res32 = strcmp(fp_tests[k].name, "fcfidu");
1877 
1878       for (i = 0; i < nb_fargs; i++) {
1879          u0 = *(unsigned long long *) (&fargs[i]);
1880          f14 = fargs[i];
1881          (*func)();
1882          if (res32) {
1883             res = f17;
1884             printf( "%s %016llx => (raw sp) %08x)",
1885                     fp_tests[k].name, u0, *((unsigned int *)&res));
1886          } else {
1887             resd = f17;
1888             printf( "%s %016llx => (raw sp) %016llx)",
1889                     fp_tests[k].name, u0, *(unsigned long long *)(&resd));
1890          }
1891          printf( "\n" );
1892       }
1893 
1894       k++;
1895       printf( "\n" );
1896    }
1897 }
1898 
test_vsx_logic(void)1899 static void test_vsx_logic(void)
1900 {
1901    logic_test_t aTest;
1902    test_func_t func;
1903    int k;
1904    k = 0;
1905 
1906    while ((func = logic_tests[k].test_func)) {
1907       unsigned int * pv;
1908       int startA, startB;
1909       unsigned int * inA, * inB, * dst;
1910       int idx, i;
1911       startA = 0;
1912       aTest = logic_tests[k];
1913       for (i = 0; i <= (NUM_VIARGS_INTS - (NUM_VIARGS_VECS * sizeof(int))); i++, startA++) {
1914          startB = startA + 4;
1915          pv = (unsigned int *)&vec_out;
1916          inA = &viargs[startA];
1917          inB = &viargs[startB];
1918          memcpy(&vec_inA, inA, sizeof(vector unsigned char));
1919          memcpy(&vec_inB, inB, sizeof(vector unsigned char));
1920          // clear vec_out
1921          for (idx = 0; idx < 4; idx++, pv++)
1922             *pv = 0;
1923 
1924          // execute test insn
1925          (*func)();
1926          dst = (unsigned int*) &vec_out;
1927 
1928          printf( "%s:", aTest.name);
1929          printf( " %08x %08x %08x %08x %s", inA[0], inA[1], inA[2], inA[3], aTest.name);
1930          printf( " %08x %08x %08x %08x", inB[0], inB[1], inB[2], inB[3]);
1931          printf(" => %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
1932 
1933       }
1934       k++;
1935    }
1936    printf( "\n" );
1937 }
1938 
1939 static vector unsigned long long vec_args[] __attribute__ ((aligned (16))) =
1940 {
1941  { 0x0123456789abcdefULL, 0x0011223344556677ULL},
1942  { 0x8899aabb19293942ULL, 0xa1a2a3a4b1b2b3b4ULL},
1943  { 0xc1c2c3c4d1d2d3d4ULL, 0x7a6b5d3efc032778ULL}
1944 };
1945 #define NUM_VEC_ARGS_LONGS (sizeof vec_args/sizeof vec_args[0])
1946 
test_move_ops(void)1947 static void test_move_ops (void)
1948 {
1949    move_test_t aTest;
1950    test_func_t func;
1951    int k;
1952    k = 0;
1953 
1954    while ((func = move_tests[k].test_func)) {
1955       unsigned int * pv;
1956       int startA, startB;
1957       unsigned long long * inA, * inB, * dst;
1958       int use_vecA = (strcmp(move_tests[k].name, "xscpsgndp") == 0);
1959       int idx;
1960       inA = NULL;
1961       aTest = move_tests[k];
1962       for (startB = 0; startB < NUM_VEC_ARGS_LONGS; startB++) {
1963          inB = (unsigned long long *)&vec_args[startB];
1964          memcpy(&vec_inB, inB, sizeof(vector unsigned char));
1965          if (isLE)
1966             inB++;
1967          startA = 0;
1968 repeat:
1969          if (use_vecA) {
1970             inA = (unsigned long long *)&vec_args[startA];
1971             memcpy(&vec_inA, inA, sizeof(vector unsigned char));
1972             startA++;
1973          }
1974          pv = (unsigned int *)&vec_out;
1975          // clear vec_out
1976          for (idx = 0; idx < 4; idx++, pv++)
1977             *pv = 0;
1978 
1979          // execute test insn
1980          (*func)();
1981          dst = (unsigned long long *) &vec_out;
1982          if (isLE) {
1983             dst++;
1984             inA++;
1985          }
1986 
1987          printf( "%s:", aTest.name);
1988          if (use_vecA)
1989             printf( " X[A]: %016llx ", *inA);
1990          printf( " X[B]: %016llx", *inB);
1991          printf(" => %016llx\n", *dst);
1992 
1993          if (use_vecA && startA < NUM_VEC_ARGS_LONGS)
1994             goto repeat;
1995       }
1996       k++;
1997       printf( "\n" );
1998    }
1999 }
2000 
test_permute_ops(void)2001 static void test_permute_ops (void)
2002 {
2003   permute_test_t *aTest;
2004   unsigned int *dst = (unsigned int *) &vec_out;
2005 
2006   for (aTest = &(permute_tests[0]); aTest->test_func != NULL; aTest++)
2007     {
2008       /* Grab test input and clear output vector.  */
2009       memcpy(&vec_inA, aTest->xa, sizeof(vec_inA));
2010       memcpy(&vec_inB, aTest->xb, sizeof(vec_inB));
2011       memset(dst, 0, sizeof(vec_out));
2012 
2013       /* execute test insn */
2014       aTest->test_func();
2015 
2016       printf( "%s:\n", aTest->name);
2017       printf( "        XA[%08x,%08x,%08x,%08x]\n",
2018               aTest->xa[0], aTest->xa[1], aTest->xa[2], aTest->xa[3]);
2019       printf( "        XB[%08x,%08x,%08x,%08x]\n",
2020               aTest->xb[0], aTest->xb[1], aTest->xb[2], aTest->xb[3]);
2021       printf( "   =>   XT[%08x,%08x,%08x,%08x]\n",
2022               dst[0], dst[1], dst[2], dst[3]);
2023 
2024     }
2025   printf( "\n" );
2026 }
2027 
2028 static test_table_t all_tests[] = { { &test_ldst,
2029                                        "Test VSX load/store instructions" },
2030                                      { &test_vsx_logic,
2031                                        "Test VSX logic instructions" },
2032 #ifdef __powerpc64__
2033                                      { &test_ldbrx,
2034                                        "Test ldbrx instruction" },
2035                                      { &test_popcntd,
2036                                        "Test popcntd instruction" },
2037 #endif
2038                                      { &test_lfiwzx,
2039                                        "Test lfiwzx instruction" },
2040                                      { &test_p7_fpops,
2041                                        "Test P7 floating point convert instructions"},
2042                                      { &test_ftdiv,
2043                                        "Test ftdiv instruction" },
2044                                      { &test_move_ops,
2045                                        "Test VSX move instructions"},
2046                                      { &test_permute_ops,
2047                                        "Test VSX permute instructions"},
2048                                      { &test_vx_fp_ops,
2049                                        "Test VSX floating point instructions"},
2050                                      { &test_xs_conv_ops,
2051                                        "Test VSX scalar integer conversion instructions" },
2052                                      { NULL, NULL }
2053 };
2054 #endif // HAS_VSX
2055 
main(int argc,char * argv[])2056 int main(int argc, char *argv[])
2057 {
2058 #ifdef HAS_VSX
2059 
2060    test_table_t aTest;
2061    test_func_t func;
2062    int i = 0;
2063 
2064    while ((func = all_tests[i].test_category)) {
2065       aTest = all_tests[i];
2066       printf( "%s\n", aTest.name );
2067       (*func)();
2068       i++;
2069    }
2070 
2071 #endif // HAS _VSX
2072 
2073    return 0;
2074 }
2075