1 
2 /*
3 gcc -o v8fpsimd_a v8fpsimd_a.c -march=armv8-a -mfpu=crypto-neon-fp-armv8 \
4        -I../../.. -Wall -g -marm
5 
6 gcc -o v8fpsimd_t v8fpsimd_a.c -march=armv8-a -mfpu=crypto-neon-fp-armv8 \
7        -I../../.. -Wall -g
8 */
9 
10 #include <stdio.h>
11 #include <assert.h>
12 #include <malloc.h>  // memalign
13 #include <string.h>  // memset
14 #include "tests/malloc.h"
15 #include <math.h>    // isnormal
16 
17 typedef  unsigned char           UChar;
18 typedef  unsigned short int      UShort;
19 typedef  unsigned int            UInt;
20 typedef  signed int              Int;
21 typedef  unsigned char           UChar;
22 typedef  unsigned long long int  ULong;
23 typedef  signed long long int    Long;
24 typedef  double                  Double;
25 typedef  float                   Float;
26 
27 typedef  unsigned char           Bool;
28 #define False ((Bool)0)
29 #define True  ((Bool)1)
30 
31 
32 #define ITERS 1
33 
34 typedef
35   enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
36   LaneTy;
37 
38 union _V128 {
39    UChar  u8[16];
40    UShort u16[8];
41    UInt   u32[4];
42    ULong  u64[2];
43    Float  f32[4];
44    Double f64[2];
45 };
46 typedef  union _V128   V128;
47 
randUChar(void)48 static inline UChar randUChar ( void )
49 {
50    static UInt seed = 80021;
51    seed = 1103515245 * seed + 12345;
52    return (seed >> 17) & 0xFF;
53 }
54 
55 //static ULong randULong ( LaneTy ty )
56 //{
57 //   Int i;
58 //   ULong r = 0;
59 //   for (i = 0; i < 8; i++) {
60 //      r = (r << 8) | (ULong)(0xFF & randUChar());
61 //   }
62 //   return r;
63 //}
64 
65 /* Generates a random V128.  Ensures that that it contains normalised
66    FP numbers when viewed as either F32x4 or F64x2, so that it is
67    reasonable to use in FP test cases. */
randV128(V128 * v,LaneTy ty)68 static void randV128 ( /*OUT*/V128* v, LaneTy ty )
69 {
70    static UInt nCalls = 0, nIters = 0;
71    Int i;
72    nCalls++;
73    while (1) {
74       nIters++;
75       for (i = 0; i < 16; i++) {
76          v->u8[i] = randUChar();
77       }
78       if (randUChar() < 32) {
79          /* once every 8 times, clone one of the lanes */
80          switch (ty) {
81             case TySF: case TyS: {
82                UInt l1, l2;
83                while (1) {
84                   l1 = randUChar() & 3;
85                   l2 = randUChar() & 3;
86                   if (l1 != l2) break;
87                }
88                assert(l1 < 4 && l2 < 4);
89                v->u32[l1] = v->u32[l2];
90                printf("randV128: doing v->u32[%u] = v->u32[%u]\n", l1, l2);
91                break;
92             }
93             case TyDF: case TyD: {
94                UInt l1, l2;
95                while (1) {
96                   l1 = randUChar() & 1;
97                   l2 = randUChar() & 1;
98                   if (l1 != l2) break;
99                }
100                assert(l1 < 2 && l2 < 2);
101                printf("randV128: doing v->u64[%u] = v->u64[%u]\n", l1, l2);
102                v->u64[l1] = v->u64[l2];
103                break;
104             }
105             default:
106                break;
107          }
108       }
109       if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
110           && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
111         break;
112    }
113    if (0 == (nCalls & 0xFF))
114       printf("randV128: %u calls, %u iters\n", nCalls, nIters);
115 }
116 
showV128(V128 * v)117 static void showV128 ( V128* v )
118 {
119    Int i;
120    for (i = 15; i >= 0; i--)
121       printf("%02x", (Int)v->u8[i]);
122 }
123 
124 //static void showBlock ( const char* msg, V128* block, Int nBlock )
125 //{
126 //   Int i;
127 //   printf("%s\n", msg);
128 //   for (i = 0; i < nBlock; i++) {
129 //      printf("  ");
130 //      showV128(&block[i]);
131 //      printf("\n");
132 //   }
133 //}
134 
135 
136 /* ---------------------------------------------------------------- */
137 /* -- Parameterisable test macros                                -- */
138 /* ---------------------------------------------------------------- */
139 
140 #define DO50(_action) \
141    do { \
142       Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
143    } while (0)
144 
145 
146 /* Are we compiling for thumb or arm encodings?  This has a bearing
147    on the inline assembly syntax needed below. */
148 
149 #if defined(__thumb__) || defined(__thumb2__)
150 #  define IT_EQ "it eq ; "
151 #  define IT_NE "it ne ; "
152 #  define IT_AL /* */
153 #else
154 #  define IT_EQ /* */
155 #  define IT_NE /* */
156 #  define IT_AL /* */
157 #endif
158 
159 
160 /* Generate a test that involves two vector regs,
161    with no bias as towards which is input or output.
162    It's OK to use r8 as scratch.
163 
164    Note that the insn doesn't *have* to use Q (128 bit) registers --
165    it can instead mention D (64 bit) and S (32-bit) registers.
166    However, in that case callers of this macro must be very careful to
167    specify QVECREG1NO and QVECREG2NO in such a way as to cover all of
168    the mentioned D and S registers, using the relations
169 
170      D<n> == S<2n+1> and S<2n>
171      Q<n> == D<2n+1> and D<2n>
172 
173    Failing to do so correctly will make the test meaningless, because
174    it will potentially load test data into the wrong registers before
175    the test, and/or show the values of the wrong registers after the
176    test.  The allowed register values are:
177       S: 0 .. 31
178       D: 0 .. 31
179       Q: 0 .. 15
180    Note that Q[15..0] == D[31..0] but S[31..0] only overlaps Q[0..7],
181    so a Q value of 8 or above is definitely invalid for a S register.
182    None of this is checked, though, so be careful when creating the
183    Q numbers.
184 
185    It would be clearer and easier to write the Q numbers using integer
186    division.  For example, in
187 
188       GEN_TWOVEC_QDS_TEST(vcvtn_s32_f64, "vcvtn.s32.f64 s27, d5",  6,2)
189 
190    instead of writing "6, 2" at the end, write "(27/4), (5/2)".  This
191    would make clear the connection between the register numbers and
192    the Q numbers.  Unfortunately those expressions need to expanded to
193    single digits at C-preprocessing time, and cpp won't do that.  So
194    we have to do it the hard and error-prone way.
195 */
196 #define GEN_TWOVEC_QDS_TEST(TESTNAME,INSN_PRE,INSN, \
197                             QVECREG1NO,QVECREG2NO) \
198   __attribute__((noinline)) \
199   static void test_##TESTNAME ( LaneTy ty ) { \
200      Int i; \
201      assert(QVECREG1NO >= 0 && QVECREG1NO <= 15); \
202      assert(QVECREG2NO >= 0 && QVECREG2NO <= 15); \
203      for (i = 0; i < ITERS; i++) { \
204         V128 block[4+1]; \
205         memset(block, 0x55, sizeof(block)); \
206         randV128(&block[0], ty); \
207         randV128(&block[1], ty); \
208         randV128(&block[2], ty); \
209         randV128(&block[3], ty); \
210         __asm__ __volatile__( \
211            "mov r9, #0 ; vmsr fpscr, r9 ; " \
212            "msr apsr_nzcvq, r9 ; " \
213            "add r9, %0, #0  ; vld1.8 { q"#QVECREG1NO" }, [r9] ; " \
214            "add r9, %0, #16 ; vld1.8 { q"#QVECREG2NO" }, [r9] ; " \
215            INSN_PRE INSN " ; " \
216            "add r9, %0, #32 ; vst1.8 { q"#QVECREG1NO" }, [r9] ; " \
217            "add r9, %0, #48 ; vst1.8 { q"#QVECREG2NO" }, [r9] ; " \
218            "vmrs r9, fpscr ; str r9, [%0, #64] " \
219            : : "r"(&block[0]) \
220              : "cc", "memory", "q"#QVECREG1NO, "q"#QVECREG2NO, "r8", "r9" \
221         ); \
222         /* Don't use INSN_PRE in printing, since that differs */ \
223         /* between ARM and Thumb and hence makes their outputs differ. */ \
224         printf(INSN   "   "); \
225         UInt fpscr = 0xFFFFFFE0 & block[4].u32[0]; \
226         showV128(&block[0]); printf("  "); \
227         showV128(&block[1]); printf("  "); \
228         showV128(&block[2]); printf("  "); \
229         showV128(&block[3]); printf(" fpscr=%08x\n", fpscr); \
230      } \
231   }
232 
233 
234 /* Generate a test that involves three vector regs,
235    with no bias as towards which is input or output.  It's also OK
236    to use r8 as scratch. */
237 #define GEN_THREEVEC_QDS_TEST(TESTNAME,INSN_PRE, \
238                               INSN,QVECREG1NO,QVECREG2NO,QVECREG3NO) \
239   __attribute__((noinline)) \
240   static void test_##TESTNAME ( LaneTy ty ) { \
241      Int i; \
242      assert(QVECREG1NO >= 0 && QVECREG1NO <= 15); \
243      assert(QVECREG2NO >= 0 && QVECREG2NO <= 15); \
244      assert(QVECREG3NO >= 0 && QVECREG3NO <= 15); \
245      for (i = 0; i < ITERS; i++) { \
246         V128 block[6+1]; \
247         memset(block, 0x55, sizeof(block)); \
248         randV128(&block[0], ty); \
249         randV128(&block[1], ty); \
250         randV128(&block[2], ty); \
251         randV128(&block[3], ty); \
252         randV128(&block[4], ty); \
253         randV128(&block[5], ty); \
254         __asm__ __volatile__( \
255            "mov r9, #0 ; vmsr fpscr, r9 ; " \
256            "msr apsr_nzcvq, r9 ; " \
257            "add r9, %0, #0  ; vld1.8 { q"#QVECREG1NO" }, [r9] ; " \
258            "add r9, %0, #16 ; vld1.8 { q"#QVECREG2NO" }, [r9] ; " \
259            "add r9, %0, #32 ; vld1.8 { q"#QVECREG3NO" }, [r9] ; " \
260            INSN_PRE INSN " ; " \
261            "add r9, %0, #48 ; vst1.8 { q"#QVECREG1NO" }, [r9] ; " \
262            "add r9, %0, #64 ; vst1.8 { q"#QVECREG2NO" }, [r9] ; " \
263            "add r9, %0, #80 ; vst1.8 { q"#QVECREG3NO" }, [r9] ; " \
264            "vmrs r9, fpscr ; str r9, [%0, #96] " \
265            : : "r"(&block[0]) \
266            : "cc", "memory", "q"#QVECREG1NO, "q"#QVECREG2NO, "q"#QVECREG3NO, \
267              "r8", "r9" \
268         ); \
269         /* Don't use INSN_PRE in printing, since that differs */ \
270         /* between ARM and Thumb and hence makes their outputs differ. */ \
271         printf(INSN   "   "); \
272         UInt fpscr = 0xFFFFFFE0 & block[6].u32[0]; \
273         showV128(&block[0]); printf("  "); \
274         showV128(&block[1]); printf("  "); \
275         showV128(&block[2]); printf("  "); \
276         showV128(&block[3]); printf("  "); \
277         showV128(&block[4]); printf("  "); \
278         showV128(&block[5]); printf(" fpscr=%08x\n", fpscr); \
279      } \
280   }
281 
282 GEN_THREEVEC_QDS_TEST(vselge_f32, IT_AL, "vselge.f32 s15,s16,s20", 3,4,5)
283 GEN_THREEVEC_QDS_TEST(vselge_f64, IT_AL, "vselge.f64 d7, d8, d10", 3,4,5)
284 
285 GEN_THREEVEC_QDS_TEST(vselgt_f32, IT_AL, "vselgt.f32 s15,s16,s20", 3,4,5)
286 GEN_THREEVEC_QDS_TEST(vselgt_f64, IT_AL, "vselgt.f64 d7, d8, d10", 3,4,5)
287 
288 GEN_THREEVEC_QDS_TEST(vseleq_f32, IT_AL, "vseleq.f32 s15,s16,s20", 3,4,5)
289 GEN_THREEVEC_QDS_TEST(vseleq_f64, IT_AL, "vseleq.f64 d7, d8, d10", 3,4,5)
290 
291 GEN_THREEVEC_QDS_TEST(vselvs_f32, IT_AL, "vselvs.f32 s15,s16,s20", 3,4,5)
292 GEN_THREEVEC_QDS_TEST(vselvs_f64, IT_AL, "vselvs.f64 d7, d8, d10", 3,4,5)
293 
294 GEN_THREEVEC_QDS_TEST(vmaxnm_f32, IT_AL, "vmaxnm.f32 s15,s16,s20", 3,4,5)
295 GEN_THREEVEC_QDS_TEST(vmaxnm_f64, IT_AL, "vmaxnm.f64 d7, d8, d10", 3,4,5)
296 
297 GEN_THREEVEC_QDS_TEST(vminnm_f32, IT_AL, "vminnm.f32 s15,s16,s20", 3,4,5)
298 GEN_THREEVEC_QDS_TEST(vminnm_f64, IT_AL, "vminnm.f64 d7, d8, d10", 3,4,5)
299 
300 GEN_TWOVEC_QDS_TEST(vcvtn_s32_f64, IT_AL, "vcvtn.s32.f64 s27, d5",  6,2)
301 GEN_TWOVEC_QDS_TEST(vcvta_s32_f64, IT_AL, "vcvta.s32.f64 s4,  d20", 1,10)
302 GEN_TWOVEC_QDS_TEST(vcvtp_s32_f64, IT_AL, "vcvtp.s32.f64 s7,  d31", 1,15)
303 GEN_TWOVEC_QDS_TEST(vcvtm_s32_f64, IT_AL, "vcvtm.s32.f64 s1,  d0",  0,0)
304 
305 GEN_TWOVEC_QDS_TEST(vcvtn_s32_f32, IT_AL, "vcvtn.s32.f32 s27, s5",  6,1)
306 GEN_TWOVEC_QDS_TEST(vcvta_s32_f32, IT_AL, "vcvta.s32.f32 s4,  s20", 1,5)
307 GEN_TWOVEC_QDS_TEST(vcvtp_s32_f32, IT_AL, "vcvtp.s32.f32 s7,  s31", 1,7)
308 GEN_TWOVEC_QDS_TEST(vcvtm_s32_f32, IT_AL, "vcvtm.s32.f32 s1,  s0",  0,0)
309 
310 GEN_TWOVEC_QDS_TEST(vcvtn_u32_f64, IT_AL, "vcvtn.u32.f64 s27, d5",  6,2)
311 GEN_TWOVEC_QDS_TEST(vcvta_u32_f64, IT_AL, "vcvta.u32.f64 s4,  d20", 1,10)
312 GEN_TWOVEC_QDS_TEST(vcvtp_u32_f64, IT_AL, "vcvtp.u32.f64 s7,  d31", 1,15)
313 GEN_TWOVEC_QDS_TEST(vcvtm_u32_f64, IT_AL, "vcvtm.u32.f64 s1,  d0",  0,0)
314 
315 GEN_TWOVEC_QDS_TEST(vcvtn_u32_f32, IT_AL, "vcvtn.u32.f32 s27, s5",  6,1)
316 GEN_TWOVEC_QDS_TEST(vcvta_u32_f32, IT_AL, "vcvta.u32.f32 s4,  s20", 1,5)
317 GEN_TWOVEC_QDS_TEST(vcvtp_u32_f32, IT_AL, "vcvtp.u32.f32 s7,  s31", 1,7)
318 GEN_TWOVEC_QDS_TEST(vcvtm_u32_f32, IT_AL, "vcvtm.u32.f32 s1,  s0",  0,0)
319 
320 GEN_TWOVEC_QDS_TEST(vcvtb_f64_f16, IT_AL, "vcvtb.f64.f16 d27, s18", 13, 4)
321 GEN_TWOVEC_QDS_TEST(vcvtt_f64_f16, IT_AL, "vcvtt.f64.f16 d28, s17", 14, 4)
322 
323 GEN_TWOVEC_QDS_TEST(vcvtb_f16_f64, IT_AL, "vcvtb.f16.f64 s9, d17", 2, 8)
324 GEN_TWOVEC_QDS_TEST(vcvtt_f16_f64, IT_AL, "vcvtt.f16.f64 s8, d27", 2, 13)
325 
326 GEN_TWOVEC_QDS_TEST(vrintzeq_f64_f64, IT_EQ, "vrintzeq.f64.f64 d0, d9",  0, 4)
327 GEN_TWOVEC_QDS_TEST(vrintzne_f64_f64, IT_NE, "vrintzne.f64.f64 d1, d10", 0, 5)
328 GEN_TWOVEC_QDS_TEST(vrintzal_f64_f64, IT_AL,   "vrintz.f64.f64 d2, d11", 1, 5)
329 
330 GEN_TWOVEC_QDS_TEST(vrintreq_f64_f64, IT_EQ, "vrintreq.f64.f64 d3, d12", 1, 6)
331 GEN_TWOVEC_QDS_TEST(vrintrne_f64_f64, IT_NE, "vrintrne.f64.f64 d4, d13", 2, 6)
332 GEN_TWOVEC_QDS_TEST(vrintral_f64_f64, IT_AL,   "vrintr.f64.f64 d5, d14", 2, 7)
333 
334 GEN_TWOVEC_QDS_TEST(vrintxeq_f64_f64, IT_EQ, "vrintxeq.f64.f64 d6, d15", 3, 7)
335 GEN_TWOVEC_QDS_TEST(vrintxne_f64_f64, IT_NE, "vrintxne.f64.f64 d7, d16", 3, 8)
336 GEN_TWOVEC_QDS_TEST(vrintxal_f64_f64, IT_AL,   "vrintx.f64.f64 d8, d8",  4, 4)
337 
338 GEN_TWOVEC_QDS_TEST(vrintzeq_f32_f32, IT_EQ, "vrintzeq.f32.f32 s0, s9",  0, 2)
339 GEN_TWOVEC_QDS_TEST(vrintzne_f32_f32, IT_NE, "vrintzne.f32.f32 s1, s10", 0, 2)
340 GEN_TWOVEC_QDS_TEST(vrintzal_f32_f32, IT_AL,   "vrintz.f32.f32 s2, s11", 0, 2)
341 
342 GEN_TWOVEC_QDS_TEST(vrintreq_f32_f32, IT_EQ, "vrintreq.f32.f32 s3, s12", 0, 3)
343 GEN_TWOVEC_QDS_TEST(vrintrne_f32_f32, IT_NE, "vrintrne.f32.f32 s4, s13", 1, 3)
344 GEN_TWOVEC_QDS_TEST(vrintral_f32_f32, IT_AL,   "vrintr.f32.f32 s5, s14", 1, 3)
345 
346 GEN_TWOVEC_QDS_TEST(vrintxeq_f32_f32, IT_EQ, "vrintxeq.f32.f32 s6, s15", 1, 3)
347 GEN_TWOVEC_QDS_TEST(vrintxne_f32_f32, IT_NE, "vrintxne.f32.f32 s7, s16", 1, 4)
348 GEN_TWOVEC_QDS_TEST(vrintxal_f32_f32, IT_AL,   "vrintx.f32.f32 s8, s8",  2, 2)
349 
350 GEN_TWOVEC_QDS_TEST(vrintn_f64_f64, IT_AL, "vrintn.f64.f64 d3,  d15",  1,  7)
351 GEN_TWOVEC_QDS_TEST(vrinta_f64_f64, IT_AL, "vrinta.f64.f64 d6,  d18",  3,  9)
352 GEN_TWOVEC_QDS_TEST(vrintp_f64_f64, IT_AL, "vrintp.f64.f64 d9,  d21",  4, 10)
353 GEN_TWOVEC_QDS_TEST(vrintm_f64_f64, IT_AL, "vrintm.f64.f64 d12, d12",  6,  6)
354 
355 GEN_TWOVEC_QDS_TEST(vrintn_f32_f32, IT_AL, "vrintn.f32.f32 s3,  s15",  0,  3)
356 GEN_TWOVEC_QDS_TEST(vrinta_f32_f32, IT_AL, "vrinta.f32.f32 s6,  s18",  1,  4)
357 GEN_TWOVEC_QDS_TEST(vrintp_f32_f32, IT_AL, "vrintp.f32.f32 s9,  s21",  2,  5)
358 GEN_TWOVEC_QDS_TEST(vrintm_f32_f32, IT_AL, "vrintm.f32.f32 s12, s12",  3,  3)
359 
360 GEN_THREEVEC_QDS_TEST(vmaxnm_f32_vec64,
361                       IT_AL, "vmaxnm.f32 d15,d16,d20", 7,8,10)
362 GEN_THREEVEC_QDS_TEST(vmaxnm_f32_vec128,
363                       IT_AL, "vmaxnm.f32 q7, q8, q10", 7,8,10)
364 
365 GEN_THREEVEC_QDS_TEST(vminnm_f32_vec64,
366                       IT_AL, "vminnm.f32 d15,d16,d20", 7,8,10)
367 GEN_THREEVEC_QDS_TEST(vminnm_f32_vec128,
368                       IT_AL, "vminnm.f32 q7, q8, q10", 7,8,10)
369 
370 GEN_TWOVEC_QDS_TEST(vcvtn_s32_f32_vec64,
371                     IT_AL, "vcvtn.s32.f32 d0,  d20",  0, 10)
372 GEN_TWOVEC_QDS_TEST(vcvta_s32_f32_vec64,
373                     IT_AL, "vcvta.s32.f32 d5,  d25",  2, 12)
374 GEN_TWOVEC_QDS_TEST(vcvtp_s32_f32_vec64,
375                     IT_AL, "vcvtp.s32.f32 d10, d30",  5, 15)
376 GEN_TWOVEC_QDS_TEST(vcvtm_s32_f32_vec64,
377                     IT_AL, "vcvtm.s32.f32 d15, d15",  7, 7)
378 
379 GEN_TWOVEC_QDS_TEST(vcvtn_s32_f32_vec128,
380                     IT_AL, "vcvtn.s32.f32 q15, q0",  15, 0)
381 GEN_TWOVEC_QDS_TEST(vcvta_s32_f32_vec128,
382                     IT_AL, "vcvta.s32.f32 q14, q1",  14, 1)
383 GEN_TWOVEC_QDS_TEST(vcvtp_s32_f32_vec128,
384                     IT_AL, "vcvtp.s32.f32 q13, q2",  13, 2)
385 GEN_TWOVEC_QDS_TEST(vcvtm_s32_f32_vec128,
386                     IT_AL, "vcvtm.s32.f32 q12, q3",  12, 3)
387 
388 GEN_TWOVEC_QDS_TEST(vcvtn_u32_f32_vec64,
389                     IT_AL, "vcvtn.u32.f32 d0,  d20", 0, 10)
390 GEN_TWOVEC_QDS_TEST(vcvta_u32_f32_vec64,
391                     IT_AL, "vcvta.u32.f32 d5,  d25", 2, 12)
392 GEN_TWOVEC_QDS_TEST(vcvtp_u32_f32_vec64,
393                     IT_AL, "vcvtp.u32.f32 d10, d30", 5, 15)
394 GEN_TWOVEC_QDS_TEST(vcvtm_u32_f32_vec64,
395                     IT_AL, "vcvtm.u32.f32 d15, d15", 7, 7)
396 
397 GEN_TWOVEC_QDS_TEST(vcvtn_u32_f32_vec128,
398                     IT_AL, "vcvtn.u32.f32 q15, q0",  15, 0)
399 GEN_TWOVEC_QDS_TEST(vcvta_u32_f32_vec128,
400                     IT_AL, "vcvta.u32.f32 q14, q1",  14, 1)
401 GEN_TWOVEC_QDS_TEST(vcvtp_u32_f32_vec128,
402                     IT_AL, "vcvtp.u32.f32 q13, q2",  13, 2)
403 GEN_TWOVEC_QDS_TEST(vcvtm_u32_f32_vec128,
404                     IT_AL, "vcvtm.u32.f32 q12, q3",  12, 3)
405 
406 GEN_TWOVEC_QDS_TEST(vrintn_f32_f32_vec64,
407                     IT_AL, "vrintn.f32.f32 d0,  d18", 0, 9)
408 GEN_TWOVEC_QDS_TEST(vrinta_f32_f32_vec64,
409                     IT_AL, "vrinta.f32.f32 d3,  d21", 1, 10)
410 GEN_TWOVEC_QDS_TEST(vrintp_f32_f32_vec64,
411                     IT_AL, "vrintp.f32.f32 d6,  d24", 3, 12)
412 GEN_TWOVEC_QDS_TEST(vrintm_f32_f32_vec64,
413                     IT_AL, "vrintm.f32.f32 d9,  d27", 4, 13)
414 GEN_TWOVEC_QDS_TEST(vrintz_f32_f32_vec64,
415                     IT_AL, "vrintz.f32.f32 d12, d30", 6, 15)
416 GEN_TWOVEC_QDS_TEST(vrintx_f32_f32_vec64,
417                     IT_AL, "vrintx.f32.f32 d15, d15", 7, 7)
418 
419 GEN_TWOVEC_QDS_TEST(vrintn_f32_f32_vec128,
420                     IT_AL, "vrintn.f32.f32 q0,  q2",   0, 2)
421 GEN_TWOVEC_QDS_TEST(vrinta_f32_f32_vec128,
422                     IT_AL, "vrinta.f32.f32 q3,  q5",   3, 5)
423 GEN_TWOVEC_QDS_TEST(vrintp_f32_f32_vec128,
424                     IT_AL, "vrintp.f32.f32 q6,  q8",   6, 8)
425 GEN_TWOVEC_QDS_TEST(vrintm_f32_f32_vec128,
426                     IT_AL, "vrintm.f32.f32 q9,  q11",  9, 11)
427 GEN_TWOVEC_QDS_TEST(vrintz_f32_f32_vec128,
428                     IT_AL, "vrintz.f32.f32 q12, q14",  12, 14)
429 GEN_TWOVEC_QDS_TEST(vrintx_f32_f32_vec128,
430                     IT_AL, "vrintx.f32.f32 q15, q15",  15, 15)
431 
main(void)432 int main ( void )
433 {
434    if (1) DO50( test_vselge_f32(TySF) );
435    if (1) DO50( test_vselge_f64(TyDF) );
436 
437    if (1) DO50( test_vselgt_f32(TySF) );
438    if (1) DO50( test_vselgt_f64(TyDF) );
439 
440    if (1) DO50( test_vseleq_f32(TySF) );
441    if (1) DO50( test_vseleq_f64(TyDF) );
442 
443    if (1) DO50( test_vselvs_f32(TySF) );
444    if (1) DO50( test_vselvs_f64(TyDF) );
445 
446    if (1) DO50( test_vmaxnm_f32(TySF) );
447    if (1) DO50( test_vmaxnm_f64(TyDF) );
448 
449    if (1) DO50( test_vminnm_f32(TySF) );
450    if (1) DO50( test_vminnm_f64(TyDF) );
451 
452    if (1) DO50( test_vcvtn_s32_f64(TyDF) );
453    if (1) DO50( test_vcvta_s32_f64(TyDF) );
454    if (1) DO50( test_vcvtp_s32_f64(TyDF) );
455    if (1) DO50( test_vcvtm_s32_f64(TyDF) );
456 
457    if (1) DO50( test_vcvtn_s32_f32(TySF) );
458    if (1) DO50( test_vcvta_s32_f32(TySF) );
459    if (1) DO50( test_vcvtp_s32_f32(TySF) );
460    if (1) DO50( test_vcvtm_s32_f32(TySF) );
461 
462    if (1) DO50( test_vcvtn_u32_f64(TyDF) );
463    if (1) DO50( test_vcvta_u32_f64(TyDF) );
464    if (1) DO50( test_vcvtp_u32_f64(TyDF) );
465    if (1) DO50( test_vcvtm_u32_f64(TyDF) );
466 
467    if (1) DO50( test_vcvtn_u32_f32(TySF) );
468    if (1) DO50( test_vcvta_u32_f32(TySF) );
469    if (1) DO50( test_vcvtp_u32_f32(TySF) );
470    if (1) DO50( test_vcvtm_u32_f32(TySF) );
471 
472    if (0) DO50( test_vcvtb_f64_f16(TyDF) );
473    if (0) DO50( test_vcvtt_f64_f16(TyDF) );
474 
475    if (0) DO50( test_vcvtb_f16_f64(TyHF) );
476    if (0) DO50( test_vcvtt_f16_f64(TyHF) );
477 
478    if (1) DO50( test_vrintzeq_f64_f64(TyDF) );
479    if (1) DO50( test_vrintzne_f64_f64(TyDF) );
480    if (1) DO50( test_vrintzal_f64_f64(TyDF) );
481 
482    if (1) DO50( test_vrintreq_f64_f64(TyDF) );
483    if (1) DO50( test_vrintrne_f64_f64(TyDF) );
484    if (1) DO50( test_vrintral_f64_f64(TyDF) );
485 
486    if (1) DO50( test_vrintxeq_f64_f64(TyDF) );
487    if (1) DO50( test_vrintxne_f64_f64(TyDF) );
488    if (1) DO50( test_vrintxal_f64_f64(TyDF) );
489 
490    if (1) DO50( test_vrintzeq_f32_f32(TySF) );
491    if (1) DO50( test_vrintzne_f32_f32(TySF) );
492    if (1) DO50( test_vrintzal_f32_f32(TySF) );
493 
494    if (1) DO50( test_vrintreq_f32_f32(TySF) );
495    if (1) DO50( test_vrintrne_f32_f32(TySF) );
496    if (1) DO50( test_vrintral_f32_f32(TySF) );
497 
498    if (1) DO50( test_vrintxeq_f32_f32(TySF) );
499    if (1) DO50( test_vrintxne_f32_f32(TySF) );
500    if (1) DO50( test_vrintxal_f32_f32(TySF) );
501 
502    if (1) DO50( test_vrintn_f64_f64(TyDF) );
503    if (1) DO50( test_vrinta_f64_f64(TyDF) );
504    if (1) DO50( test_vrintp_f64_f64(TyDF) );
505    if (1) DO50( test_vrintm_f64_f64(TyDF) );
506 
507    if (1) DO50( test_vrintn_f32_f32(TySF) );
508    if (1) DO50( test_vrinta_f32_f32(TySF) );
509    if (1) DO50( test_vrintp_f32_f32(TySF) );
510    if (1) DO50( test_vrintm_f32_f32(TySF) );
511 
512    if (1) DO50( test_vmaxnm_f32_vec64(TySF) );
513    if (1) DO50( test_vmaxnm_f32_vec128(TySF) );
514 
515    if (1) DO50( test_vminnm_f32_vec64(TySF) );
516    if (1) DO50( test_vminnm_f32_vec128(TySF) );
517 
518    if (1) DO50( test_vcvtn_s32_f32_vec64(TySF) );
519    if (1) DO50( test_vcvta_s32_f32_vec64(TySF) );
520    if (1) DO50( test_vcvtp_s32_f32_vec64(TySF) );
521    if (1) DO50( test_vcvtm_s32_f32_vec64(TySF) );
522 
523    if (1) DO50( test_vcvtn_s32_f32_vec128(TySF) );
524    if (1) DO50( test_vcvta_s32_f32_vec128(TySF) );
525    if (1) DO50( test_vcvtp_s32_f32_vec128(TySF) );
526    if (1) DO50( test_vcvtm_s32_f32_vec128(TySF) );
527 
528    if (1) DO50( test_vcvtn_u32_f32_vec64(TySF) );
529    if (1) DO50( test_vcvta_u32_f32_vec64(TySF) );
530    if (1) DO50( test_vcvtp_u32_f32_vec64(TySF) );
531    if (1) DO50( test_vcvtm_u32_f32_vec64(TySF) );
532 
533    if (1) DO50( test_vcvtn_u32_f32_vec128(TySF) );
534    if (1) DO50( test_vcvta_u32_f32_vec128(TySF) );
535    if (1) DO50( test_vcvtp_u32_f32_vec128(TySF) );
536    if (1) DO50( test_vcvtm_u32_f32_vec128(TySF) );
537 
538    if (1) DO50( test_vrintn_f32_f32_vec64(TySF) );
539    if (1) DO50( test_vrinta_f32_f32_vec64(TySF) );
540    if (1) DO50( test_vrintp_f32_f32_vec64(TySF) );
541    if (1) DO50( test_vrintm_f32_f32_vec64(TySF) );
542    if (1) DO50( test_vrintz_f32_f32_vec64(TySF) );
543    if (1) DO50( test_vrintx_f32_f32_vec64(TySF) );
544 
545    if (1) DO50( test_vrintn_f32_f32_vec128(TySF) );
546    if (1) DO50( test_vrinta_f32_f32_vec128(TySF) );
547    if (1) DO50( test_vrintp_f32_f32_vec128(TySF) );
548    if (1) DO50( test_vrintm_f32_f32_vec128(TySF) );
549    if (1) DO50( test_vrintz_f32_f32_vec128(TySF) );
550    if (1) DO50( test_vrintx_f32_f32_vec128(TySF) );
551 
552    return 0;
553 }
554