1 
2 #include <stdio.h>
3 #include <assert.h>
4 #include <malloc.h>  // memalign
5 #include <string.h>  // memset
6 #include "tests/malloc.h"
7 #include <math.h>    // isnormal
8 
9 typedef  unsigned char           UChar;
10 typedef  unsigned short int      UShort;
11 typedef  unsigned int            UInt;
12 typedef  signed int              Int;
13 typedef  unsigned char           UChar;
14 typedef  unsigned long long int  ULong;
15 typedef  signed long long int    Long;
16 typedef  double                  Double;
17 typedef  float                   Float;
18 
19 typedef  unsigned char           Bool;
20 #define False ((Bool)0)
21 #define True  ((Bool)1)
22 
23 
24 #define ITERS 1
25 
26 typedef
27   enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
28   LaneTy;
29 
30 union _V128 {
31    UChar  u8[16];
32    UShort u16[8];
33    UInt   u32[4];
34    ULong  u64[2];
35    Float  f32[4];
36    Double f64[2];
37 };
38 typedef  union _V128   V128;
39 
randUChar(void)40 static inline UChar randUChar ( void )
41 {
42    static UInt seed = 80021;
43    seed = 1103515245 * seed + 12345;
44    return (seed >> 17) & 0xFF;
45 }
46 
randULong(LaneTy ty)47 static ULong randULong ( LaneTy ty )
48 {
49    Int i;
50    ULong r = 0;
51    for (i = 0; i < 8; i++) {
52       r = (r << 8) | (ULong)(0xFF & randUChar());
53    }
54    return r;
55 }
56 
57 /* Generates a random V128.  Ensures that that it contains normalised
58    FP numbers when viewed as either F32x4 or F64x2, so that it is
59    reasonable to use in FP test cases. */
randV128(V128 * v,LaneTy ty)60 static void randV128 ( /*OUT*/V128* v, LaneTy ty )
61 {
62    static UInt nCalls = 0, nIters = 0;
63    Int i;
64    nCalls++;
65    while (1) {
66       nIters++;
67       for (i = 0; i < 16; i++) {
68          v->u8[i] = randUChar();
69       }
70       if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
71           && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
72         break;
73    }
74    if (0 == (nCalls & 0xFF))
75       printf("randV128: %u calls, %u iters\n", nCalls, nIters);
76 }
77 
showV128(V128 * v)78 static void showV128 ( V128* v )
79 {
80    Int i;
81    for (i = 15; i >= 0; i--)
82       printf("%02x", (Int)v->u8[i]);
83 }
84 
showBlock(const char * msg,V128 * block,Int nBlock)85 static void showBlock ( const char* msg, V128* block, Int nBlock )
86 {
87    Int i;
88    printf("%s\n", msg);
89    for (i = 0; i < nBlock; i++) {
90       printf("  ");
91       showV128(&block[i]);
92       printf("\n");
93    }
94 }
95 
dup4x16(UInt x)96 static ULong dup4x16 ( UInt x )
97 {
98    ULong r = x & 0xF;
99    r |= (r << 4);
100    r |= (r << 8);
101    r |= (r << 16);
102    r |= (r << 32);
103    return r;
104 }
105 
106 // Generate a random double-precision number.  About 1 time in 2,
107 // instead return a special value (+/- Inf, +/-Nan, denorm).
108 // This ensures that many of the groups of 4 calls here will
109 // return a special value.
110 
111 static Double special_values[10];
112 static Bool   special_values_initted = False;
113 
114 static __attribute__((noinline))
negate(Double d)115 Double negate ( Double d ) { return -d; }
116 static __attribute__((noinline))
divf64(Double x,Double y)117 Double divf64 ( Double x, Double y ) { return x/y; }
118 
119 static __attribute__((noinline))
plusZero(void)120 Double plusZero  ( void ) { return 0.0; }
121 static __attribute__((noinline))
minusZero(void)122 Double minusZero ( void ) { return negate(plusZero()); }
123 
124 static __attribute__((noinline))
plusOne(void)125 Double plusOne  ( void ) { return 1.0; }
126 static __attribute__((noinline))
minusOne(void)127 Double minusOne ( void ) { return negate(plusOne()); }
128 
129 static __attribute__((noinline))
plusInf(void)130 Double plusInf   ( void ) { return 1.0 / 0.0; }
131 static __attribute__((noinline))
minusInf(void)132 Double minusInf  ( void ) { return negate(plusInf()); }
133 
134 static __attribute__((noinline))
plusNaN(void)135 Double plusNaN  ( void ) { return divf64(plusInf(),plusInf()); }
136 static __attribute__((noinline))
minusNaN(void)137 Double minusNaN ( void ) { return negate(plusNaN()); }
138 
139 static __attribute__((noinline))
plusDenorm(void)140 Double plusDenorm  ( void ) { return 1.23e-315 / 1e3; }
141 static __attribute__((noinline))
minusDenorm(void)142 Double minusDenorm ( void ) { return negate(plusDenorm()); }
143 
144 
ensure_special_values_initted(void)145 static void ensure_special_values_initted ( void )
146 {
147    if (special_values_initted) return;
148    special_values[0] = plusZero();
149    special_values[1] = minusZero();
150    special_values[2] = plusOne();
151    special_values[3] = minusOne();
152    special_values[4] = plusInf();
153    special_values[5] = minusInf();
154    special_values[6] = plusNaN();
155    special_values[7] = minusNaN();
156    special_values[8] = plusDenorm();
157    special_values[9] = minusDenorm();
158    special_values_initted = True;
159    int i;
160    printf("\n");
161    for (i = 0; i < 10; i++) {
162       printf("special value %d = %e\n", i, special_values[i]);
163    }
164    printf("\n");
165 }
166 
randDouble(void)167 static Double randDouble ( void )
168 {
169    ensure_special_values_initted();
170    UChar c = randUChar();
171    if (c >= 128) {
172       // return a normal number most of the time.
173       // 0 .. 2^63-1
174       ULong u64 = randULong(TyDF);
175       // -2^62 .. 2^62-1
176       Long s64 = (Long)u64;
177       // -2^55 .. 2^55-1
178       s64 >>= (62-55);
179       // and now as a float
180       return (Double)s64;
181    }
182    c = randUChar() % 10;
183    return special_values[c];
184 }
185 
randFloat(void)186 static Float randFloat ( void )
187 {
188    ensure_special_values_initted();
189    UChar c = randUChar();
190    if (c >= 128) {
191       // return a normal number most of the time.
192       // 0 .. 2^63-1
193       ULong u64 = randULong(TyDF);
194       // -2^62 .. 2^62-1
195       Long s64 = (Long)u64;
196       // -2^25 .. 2^25-1
197       s64 >>= (62-25);
198       // and now as a float
199       return (Float)s64;
200    }
201    c = randUChar() % 10;
202    return special_values[c];
203 }
204 
randBlock_Doubles(V128 * block,Int nBlock)205 void randBlock_Doubles ( V128* block, Int nBlock )
206 {
207    Int i;
208    for (i = 0; i < nBlock; i++) {
209       block[i].f64[0] = randDouble();
210       block[i].f64[1] = randDouble();
211    }
212 }
213 
randBlock_Floats(V128 * block,Int nBlock)214 void randBlock_Floats ( V128* block, Int nBlock )
215 {
216    Int i;
217    for (i = 0; i < nBlock; i++) {
218       block[i].f32[0] = randFloat();
219       block[i].f32[1] = randFloat();
220       block[i].f32[2] = randFloat();
221       block[i].f32[3] = randFloat();
222    }
223 }
224 
225 
226 /* ---------------------------------------------------------------- */
227 /* -- Parameterisable test macros                                -- */
228 /* ---------------------------------------------------------------- */
229 
230 #define DO50(_action) \
231    do { \
232       Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
233    } while (0)
234 
235 
236 /* Note this also sets the destination register to a known value (0x55..55)
237    since it can sometimes be an input to the instruction too. */
238 #define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
239   __attribute__((noinline)) \
240   static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( LaneTy ty ) { \
241      Int i; \
242      for (i = 0; i < ITERS; i++) { \
243         V128 block[2+1]; \
244         memset(block, 0x55, sizeof(block)); \
245         randV128(&block[0], ty); \
246         randV128(&block[1], ty); \
247         __asm__ __volatile__( \
248            "mov   x30, #0 ; msr fpsr, x30 ; " \
249            "ldr   q7, [%0, #0]   ; " \
250            "ldr   q8, [%0, #16]   ; " \
251            #INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
252            "str   q8, [%0, #16] ; " \
253            "mrs   x30, fpsr ; str x30, [%0, #32] " \
254            : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
255         ); \
256         printf(#INSN   " v8." #SUFFIXD ", v7." #SUFFIXN); \
257         UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
258         showV128(&block[0]); printf("  "); \
259         showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
260      } \
261   }
262 
263 
264 /* Note this also sets the destination register to a known value (0x55..55)
265    since it can sometimes be an input to the instruction too. */
266 #define GEN_BINARY_TEST(INSN,SUFFIXD,SUFFIXN,SUFFIXM)  \
267   __attribute__((noinline)) \
268   static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##SUFFIXM ( LaneTy ty ) { \
269      Int i; \
270      for (i = 0; i < ITERS; i++) { \
271         V128 block[3+1]; \
272         memset(block, 0x55, sizeof(block)); \
273         randV128(&block[0], ty); \
274         randV128(&block[1], ty); \
275         randV128(&block[2], ty); \
276         __asm__ __volatile__( \
277            "mov   x30, #0 ; msr fpsr, x30 ; " \
278            "ldr   q7, [%0, #0]   ; " \
279            "ldr   q8, [%0, #16]   ; " \
280            "ldr   q9, [%0, #32]   ; " \
281            #INSN " v9." #SUFFIXD ", v7." #SUFFIXN ", v8." #SUFFIXM " ; " \
282            "str   q9, [%0, #32] ; " \
283            "mrs   x30, fpsr ; str x30, [%0, #48] " \
284            : : "r"(&block[0]) : "memory", "v7", "v8", "v9", "x30" \
285         ); \
286         printf(#INSN   " v9." #SUFFIXD \
287                ", v7." #SUFFIXN ", v8." #SUFFIXM "  ");   \
288         UInt fpsr = 0xFFFFFF60 & block[3].u32[0]; \
289         showV128(&block[0]); printf("  "); \
290         showV128(&block[1]); printf("  "); \
291         showV128(&block[2]); printf(" fpsr=%08x\n", fpsr); \
292      } \
293   }
294 
295 
296 /* Note this also sets the destination register to a known value (0x55..55)
297    since it can sometimes be an input to the instruction too. */
298 #define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
299   __attribute__((noinline)) \
300   static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( LaneTy ty ) { \
301      Int i; \
302      for (i = 0; i < ITERS; i++) { \
303         V128 block[2+1]; \
304         memset(block, 0x55, sizeof(block)); \
305         randV128(&block[0], ty); \
306         randV128(&block[1], ty); \
307         __asm__ __volatile__( \
308            "mov   x30, #0 ; msr fpsr, x30 ; " \
309            "ldr   q7, [%0, #0]   ; " \
310            "ldr   q8, [%0, #16]   ; " \
311            #INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
312            "str   q8, [%0, #16] ; " \
313            "mrs   x30, fpsr ; str x30, [%0, #32] " \
314            : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
315         ); \
316         printf(#INSN   " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT "  "); \
317         UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
318         showV128(&block[0]); printf("  "); \
319         showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
320      } \
321   }
322 
323 
324 /* Generate a test that involves one integer reg and one vector reg,
325    with no bias as towards which is input or output. */
326 #define GEN_ONEINT_ONEVEC_TEST(TESTNAME,INSN,INTREGNO,VECREGNO) \
327   __attribute__((noinline)) \
328   static void test_##TESTNAME ( LaneTy ty ) { \
329      Int i; \
330      assert(INTREGNO != 30); \
331      for (i = 0; i < ITERS; i++) { \
332         V128 block[4+1]; \
333         memset(block, 0x55, sizeof(block)); \
334         randV128(&block[0], ty); \
335         randV128(&block[1], ty); \
336         randV128(&block[2], ty); \
337         randV128(&block[3], ty); \
338         __asm__ __volatile__( \
339            "mov   x30, #0 ; msr fpsr, x30 ; " \
340            "ldr   q"#VECREGNO", [%0, #0]  ; " \
341            "ldr   x"#INTREGNO", [%0, #16] ; " \
342            INSN " ; " \
343            "str   q"#VECREGNO", [%0, #32] ; " \
344            "str   x"#INTREGNO", [%0, #48] ; " \
345            "mrs   x30, fpsr ; str x30, [%0, #64] " \
346            : : "r"(&block[0]) : "memory", "v"#VECREGNO, "x"#INTREGNO, "x30" \
347         ); \
348         printf(INSN   "   "); \
349         UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
350         showV128(&block[0]); printf("  "); \
351         showV128(&block[1]); printf("  "); \
352         showV128(&block[2]); printf("  "); \
353         showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
354      } \
355   }
356 
357 
358 /* Generate a test that involves two vector regs,
359    with no bias as towards which is input or output.
360    It's OK to use x10 as scratch.*/
361 #define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
362   __attribute__((noinline)) \
363   static void test_##TESTNAME ( LaneTy ty ) { \
364      Int i; \
365      for (i = 0; i < ITERS; i++) { \
366         V128 block[4+1]; \
367         memset(block, 0x55, sizeof(block)); \
368         randV128(&block[0], ty); \
369         randV128(&block[1], ty); \
370         randV128(&block[2], ty); \
371         randV128(&block[3], ty); \
372         __asm__ __volatile__( \
373            "mov   x30, #0 ; msr fpsr, x30 ; " \
374            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
375            "ldr   q"#VECREG2NO", [%0, #16] ; " \
376            INSN " ; " \
377            "str   q"#VECREG1NO", [%0, #32] ; " \
378            "str   q"#VECREG2NO", [%0, #48] ; " \
379            "mrs   x30, fpsr ; str x30, [%0, #64] " \
380            : : "r"(&block[0]) \
381              : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "x10", "x30" \
382         ); \
383         printf(INSN   "   "); \
384         UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
385         showV128(&block[0]); printf("  "); \
386         showV128(&block[1]); printf("  "); \
387         showV128(&block[2]); printf("  "); \
388         showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
389      } \
390   }
391 
392 
393 /* Generate a test that involves three vector regs,
394    with no bias as towards which is input or output.  It's also OK
395    to use v16, v17, v18 as scratch. */
396 #define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO)  \
397   __attribute__((noinline)) \
398   static void test_##TESTNAME ( LaneTy ty ) { \
399      Int i; \
400      for (i = 0; i < ITERS; i++) { \
401         V128 block[6+1]; \
402         memset(block, 0x55, sizeof(block)); \
403         randV128(&block[0], ty); \
404         randV128(&block[1], ty); \
405         randV128(&block[2], ty); \
406         randV128(&block[3], ty); \
407         randV128(&block[4], ty); \
408         randV128(&block[5], ty); \
409         __asm__ __volatile__( \
410            "mov   x30, #0 ; msr fpsr, x30 ; " \
411            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
412            "ldr   q"#VECREG2NO", [%0, #16] ; " \
413            "ldr   q"#VECREG3NO", [%0, #32] ; " \
414            INSN " ; " \
415            "str   q"#VECREG1NO", [%0, #48] ; " \
416            "str   q"#VECREG2NO", [%0, #64] ; " \
417            "str   q"#VECREG3NO", [%0, #80] ; " \
418            "mrs   x30, fpsr ; str x30, [%0, #96] " \
419            : : "r"(&block[0]) \
420            : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
421              "v16", "v17", "v18", "x30" \
422         ); \
423         printf(INSN   "   "); \
424         UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
425         showV128(&block[0]); printf("  "); \
426         showV128(&block[1]); printf("  "); \
427         showV128(&block[2]); printf("  "); \
428         showV128(&block[3]); printf("  "); \
429         showV128(&block[4]); printf("  "); \
430         showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
431      } \
432   }
433 
434 
435 /* Generate a test that involves four vector regs,
436    with no bias as towards which is input or output.  It's also OK
437    to use v16, v17, v18 as scratch. */
438 #define GEN_FOURVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO, \
439                                        VECREG3NO,VECREG4NO)  \
440   __attribute__((noinline)) \
441   static void test_##TESTNAME ( LaneTy ty ) { \
442      Int i; \
443      for (i = 0; i < ITERS; i++) { \
444         V128 block[8+1]; \
445         memset(block, 0x55, sizeof(block)); \
446         randV128(&block[0], ty); \
447         randV128(&block[1], ty); \
448         randV128(&block[2], ty); \
449         randV128(&block[3], ty); \
450         randV128(&block[4], ty); \
451         randV128(&block[5], ty); \
452         randV128(&block[6], ty); \
453         randV128(&block[7], ty); \
454         __asm__ __volatile__( \
455            "mov   x30, #0 ; msr fpsr, x30 ; " \
456            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
457            "ldr   q"#VECREG2NO", [%0, #16] ; " \
458            "ldr   q"#VECREG3NO", [%0, #32] ; " \
459            "ldr   q"#VECREG4NO", [%0, #48] ; " \
460            INSN " ; " \
461            "str   q"#VECREG1NO", [%0, #64] ; " \
462            "str   q"#VECREG2NO", [%0, #80] ; " \
463            "str   q"#VECREG3NO", [%0, #96] ; " \
464            "str   q"#VECREG4NO", [%0, #112] ; " \
465            "mrs   x30, fpsr ; str x30, [%0, #128] " \
466            : : "r"(&block[0]) \
467            : "memory", "v"#VECREG1NO, "v"#VECREG2NO, \
468                        "v"#VECREG3NO, "v"#VECREG4NO, \
469              "v16", "v17", "v18", "x30" \
470         ); \
471         printf(INSN   "   "); \
472         UInt fpsr = 0xFFFFFF60 & block[8].u32[0]; \
473         showV128(&block[0]); printf("  "); \
474         showV128(&block[1]); printf("  "); \
475         showV128(&block[2]); printf("  "); \
476         showV128(&block[3]); printf("  "); \
477         showV128(&block[4]); printf("  "); \
478         showV128(&block[5]); printf("  "); \
479         showV128(&block[6]); printf("  "); \
480         showV128(&block[7]); printf(" fpsr=%08x\n", fpsr); \
481      } \
482   }
483 
484 
485 /* ---------------------------------------------------------------- */
486 /* -- Test functions and non-parameterisable test macros         -- */
487 /* ---------------------------------------------------------------- */
488 
test_UMINV(void)489 void test_UMINV ( void )
490 {
491   int i;
492   V128 block[2];
493 
494   /* -- 4s -- */
495 
496   for (i = 0; i < 10; i++) {
497     memset(&block, 0x55, sizeof(block));
498     randV128(&block[0], TyS);
499     randV128(&block[1], TyS);
500     __asm__ __volatile__(
501        "ldr   q7, [%0, #0]   ; "
502        "uminv s8, v7.4s   ; "
503        "str   q8, [%0, #16] "
504        : : "r"(&block[0]) : "memory", "v7", "v8"
505                          );
506     printf("UMINV v8, v7.4s  ");
507     showV128(&block[0]); printf("  ");
508     showV128(&block[1]); printf("\n");
509   }
510 
511   /* -- 8h -- */
512 
513   for (i = 0; i < 10; i++) {
514     memset(&block, 0x55, sizeof(block));
515     randV128(&block[0], TyH);
516     randV128(&block[1], TyH);
517     __asm__ __volatile__(
518        "ldr   q7, [%0, #0]   ; "
519        "uminv h8, v7.8h   ; "
520        "str   q8, [%0, #16] "
521        : : "r"(&block[0]) : "memory", "v7", "v8"
522                          );
523     printf("UMINV h8, v7.8h  ");
524     showV128(&block[0]); printf("  ");
525     showV128(&block[1]); printf("\n");
526   }
527 
528   /* -- 4h -- */
529 
530   for (i = 0; i < 10; i++) {
531     memset(&block, 0x55, sizeof(block));
532     randV128(&block[0], TyH);
533     randV128(&block[1], TyH);
534     __asm__ __volatile__(
535        "ldr   q7, [%0, #0]   ; "
536        "uminv h8, v7.4h   ; "
537        "str   q8, [%0, #16] "
538        : : "r"(&block[0]) : "memory", "v7", "v8"
539                          );
540     printf("UMINV h8, v7.4h  ");
541     showV128(&block[0]); printf("  ");
542     showV128(&block[1]); printf("\n");
543   }
544 
545   /* -- 16b -- */
546 
547   for (i = 0; i < 10; i++) {
548     memset(&block, 0x55, sizeof(block));
549     randV128(&block[0], TyB);
550     randV128(&block[1], TyB);
551     __asm__ __volatile__(
552        "ldr   q7, [%0, #0]   ; "
553        "uminv b8, v7.16b   ; "
554        "str   q8, [%0, #16] "
555        : : "r"(&block[0]) : "memory", "v7", "v8"
556                          );
557     printf("UMINV b8, v7.16b  ");
558     showV128(&block[0]); printf("  ");
559     showV128(&block[1]); printf("\n");
560   }
561 
562   /* -- 8b -- */
563 
564   for (i = 0; i < 10; i++) {
565     memset(&block, 0x55, sizeof(block));
566     randV128(&block[0], TyB);
567     randV128(&block[1], TyB);
568     __asm__ __volatile__(
569        "ldr   q7, [%0, #0]   ; "
570        "uminv b8, v7.8b   ; "
571        "str   q8, [%0, #16] "
572        : : "r"(&block[0]) : "memory", "v7", "v8"
573                          );
574     printf("UMINV b8, v7.8b  ");
575     showV128(&block[0]); printf("  ");
576     showV128(&block[1]); printf("\n");
577   }
578 
579 }
580 
581 
test_UMAXV(void)582 void test_UMAXV ( void )
583 {
584   int i;
585   V128 block[2];
586 
587   /* -- 4s -- */
588 
589   for (i = 0; i < 10; i++) {
590     memset(&block, 0x55, sizeof(block));
591     randV128(&block[0], TyS);
592     randV128(&block[1], TyS);
593     __asm__ __volatile__(
594        "ldr   q7, [%0, #0]   ; "
595        "umaxv s8, v7.4s   ; "
596        "str   q8, [%0, #16] "
597        : : "r"(&block[0]) : "memory", "v7", "v8"
598                          );
599     printf("UMAXV v8, v7.4s  ");
600     showV128(&block[0]); printf("  ");
601     showV128(&block[1]); printf("\n");
602   }
603 
604   /* -- 8h -- */
605 
606   for (i = 0; i < 10; i++) {
607     memset(&block, 0x55, sizeof(block));
608     randV128(&block[0], TyH);
609     randV128(&block[1], TyH);
610     __asm__ __volatile__(
611        "ldr   q7, [%0, #0]   ; "
612        "umaxv h8, v7.8h   ; "
613        "str   q8, [%0, #16] "
614        : : "r"(&block[0]) : "memory", "v7", "v8"
615                          );
616     printf("UMAXV h8, v7.8h  ");
617     showV128(&block[0]); printf("  ");
618     showV128(&block[1]); printf("\n");
619   }
620 
621   /* -- 4h -- */
622 
623   for (i = 0; i < 10; i++) {
624     memset(&block, 0x55, sizeof(block));
625     randV128(&block[0], TyH);
626     randV128(&block[1], TyH);
627     __asm__ __volatile__(
628        "ldr   q7, [%0, #0]   ; "
629        "umaxv h8, v7.4h   ; "
630        "str   q8, [%0, #16] "
631        : : "r"(&block[0]) : "memory", "v7", "v8"
632                          );
633     printf("UMAXV h8, v7.4h  ");
634     showV128(&block[0]); printf("  ");
635     showV128(&block[1]); printf("\n");
636   }
637 
638   /* -- 16b -- */
639 
640   for (i = 0; i < 10; i++) {
641     memset(&block, 0x55, sizeof(block));
642     randV128(&block[0], TyB);
643     randV128(&block[1], TyB);
644     __asm__ __volatile__(
645        "ldr   q7, [%0, #0]   ; "
646        "umaxv b8, v7.16b   ; "
647        "str   q8, [%0, #16] "
648        : : "r"(&block[0]) : "memory", "v7", "v8"
649                          );
650     printf("UMAXV b8, v7.16b  ");
651     showV128(&block[0]); printf("  ");
652     showV128(&block[1]); printf("\n");
653   }
654 
655   /* -- 8b -- */
656 
657   for (i = 0; i < 10; i++) {
658     memset(&block, 0x55, sizeof(block));
659     randV128(&block[0], TyB);
660     randV128(&block[1], TyB);
661     __asm__ __volatile__(
662        "ldr   q7, [%0, #0]   ; "
663        "umaxv b8, v7.8b   ; "
664        "str   q8, [%0, #16] "
665        : : "r"(&block[0]) : "memory", "v7", "v8"
666                          );
667     printf("UMAXV b8, v7.8b  ");
668     showV128(&block[0]); printf("  ");
669     showV128(&block[1]); printf("\n");
670   }
671 
672 }
673 
674 
test_INS_general(void)675 void test_INS_general ( void )
676 {
677   V128 block[3];
678 
679   /* -- D[0..1] -- */
680 
681   memset(&block, 0x55, sizeof(block));
682   block[1].u64[0] = randULong(TyD);
683   __asm__ __volatile__(
684      "ldr q7, [%0, #0]   ; "
685      "ldr x19, [%0, #16] ; "
686      "ins v7.d[0], x19   ; "
687      "str q7, [%0, #32] "
688      : : "r"(&block[0]) : "memory", "x19", "v7"
689   );
690   printf("INS v7.u64[0],x19  ");
691   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
692   showV128(&block[2]); printf("\n");
693 
694   memset(&block, 0x55, sizeof(block));
695   block[1].u64[0] = randULong(TyD);
696   __asm__ __volatile__(
697      "ldr q7, [%0, #0]   ; "
698      "ldr x19, [%0, #16] ; "
699      "ins v7.d[1], x19   ; "
700      "str q7, [%0, #32] "
701      : : "r"(&block[0]) : "memory", "x19", "v7"
702   );
703   printf("INS v7.d[1],x19  ");
704   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
705   showV128(&block[2]); printf("\n");
706 
707   /* -- S[0..3] -- */
708 
709   memset(&block, 0x55, sizeof(block));
710   block[1].u64[0] = randULong(TyS);
711   __asm__ __volatile__(
712      "ldr q7, [%0, #0]   ; "
713      "ldr x19, [%0, #16] ; "
714      "ins v7.s[0], w19   ; "
715      "str q7, [%0, #32] "
716      : : "r"(&block[0]) : "memory", "x19", "v7"
717   );
718   printf("INS v7.s[0],x19  ");
719   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
720   showV128(&block[2]); printf("\n");
721 
722   memset(&block, 0x55, sizeof(block));
723   block[1].u64[0] = randULong(TyS);
724   __asm__ __volatile__(
725      "ldr q7, [%0, #0]   ; "
726      "ldr x19, [%0, #16] ; "
727      "ins v7.s[1], w19   ; "
728      "str q7, [%0, #32] "
729      : : "r"(&block[0]) : "memory", "x19", "v7"
730   );
731   printf("INS v7.s[1],x19  ");
732   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
733   showV128(&block[2]); printf("\n");
734 
735   memset(&block, 0x55, sizeof(block));
736   block[1].u64[0] = randULong(TyS);
737   __asm__ __volatile__(
738      "ldr q7, [%0, #0]   ; "
739      "ldr x19, [%0, #16] ; "
740      "ins v7.s[2], w19   ; "
741      "str q7, [%0, #32] "
742      : : "r"(&block[0]) : "memory", "x19", "v7"
743   );
744   printf("INS v7.s[2],x19  ");
745   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
746   showV128(&block[2]); printf("\n");
747 
748   memset(&block, 0x55, sizeof(block));
749   block[1].u64[0] = randULong(TyS);
750   __asm__ __volatile__(
751      "ldr q7, [%0, #0]   ; "
752      "ldr x19, [%0, #16] ; "
753      "ins v7.s[3], w19   ; "
754      "str q7, [%0, #32] "
755      : : "r"(&block[0]) : "memory", "x19", "v7"
756   );
757   printf("INS v7.s[3],x19  ");
758   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
759   showV128(&block[2]); printf("\n");
760 
761   /* -- H[0..7] -- */
762 
763   memset(&block, 0x55, sizeof(block));
764   block[1].u64[0] = randULong(TyH);
765   __asm__ __volatile__(
766      "ldr q7, [%0, #0]   ; "
767      "ldr x19, [%0, #16] ; "
768      "ins v7.h[0], w19   ; "
769      "str q7, [%0, #32] "
770      : : "r"(&block[0]) : "memory", "x19", "v7"
771   );
772   printf("INS v7.h[0],x19  ");
773   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
774   showV128(&block[2]); printf("\n");
775 
776   memset(&block, 0x55, sizeof(block));
777   block[1].u64[0] = randULong(TyH);
778   __asm__ __volatile__(
779      "ldr q7, [%0, #0]   ; "
780      "ldr x19, [%0, #16] ; "
781      "ins v7.h[1], w19   ; "
782      "str q7, [%0, #32] "
783      : : "r"(&block[0]) : "memory", "x19", "v7"
784   );
785   printf("INS v7.h[1],x19  ");
786   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
787   showV128(&block[2]); printf("\n");
788 
789   memset(&block, 0x55, sizeof(block));
790   block[1].u64[0] = randULong(TyH);
791   __asm__ __volatile__(
792      "ldr q7, [%0, #0]   ; "
793      "ldr x19, [%0, #16] ; "
794      "ins v7.h[2], w19   ; "
795      "str q7, [%0, #32] "
796      : : "r"(&block[0]) : "memory", "x19", "v7"
797   );
798   printf("INS v7.h[2],x19  ");
799   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
800   showV128(&block[2]); printf("\n");
801 
802   memset(&block, 0x55, sizeof(block));
803   block[1].u64[0] = randULong(TyH);
804   __asm__ __volatile__(
805      "ldr q7, [%0, #0]   ; "
806      "ldr x19, [%0, #16] ; "
807      "ins v7.h[3], w19   ; "
808      "str q7, [%0, #32] "
809      : : "r"(&block[0]) : "memory", "x19", "v7"
810   );
811   printf("INS v7.h[3],x19  ");
812   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
813   showV128(&block[2]); printf("\n");
814 
815   memset(&block, 0x55, sizeof(block));
816   block[1].u64[0] = randULong(TyH);
817   __asm__ __volatile__(
818      "ldr q7, [%0, #0]   ; "
819      "ldr x19, [%0, #16] ; "
820      "ins v7.h[4], w19   ; "
821      "str q7, [%0, #32] "
822      : : "r"(&block[0]) : "memory", "x19", "v7"
823   );
824   printf("INS v7.h[4],x19  ");
825   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
826   showV128(&block[2]); printf("\n");
827 
828   memset(&block, 0x55, sizeof(block));
829   block[1].u64[0] = randULong(TyH);
830   __asm__ __volatile__(
831      "ldr q7, [%0, #0]   ; "
832      "ldr x19, [%0, #16] ; "
833      "ins v7.h[5], w19   ; "
834      "str q7, [%0, #32] "
835      : : "r"(&block[0]) : "memory", "x19", "v7"
836   );
837   printf("INS v7.h[5],x19  ");
838   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
839   showV128(&block[2]); printf("\n");
840 
841   memset(&block, 0x55, sizeof(block));
842   block[1].u64[0] = randULong(TyH);
843   __asm__ __volatile__(
844      "ldr q7, [%0, #0]   ; "
845      "ldr x19, [%0, #16] ; "
846      "ins v7.h[6], w19   ; "
847      "str q7, [%0, #32] "
848      : : "r"(&block[0]) : "memory", "x19", "v7"
849   );
850   printf("INS v7.h[6],x19  ");
851   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
852   showV128(&block[2]); printf("\n");
853 
854   memset(&block, 0x55, sizeof(block));
855   block[1].u64[0] = randULong(TyH);
856   __asm__ __volatile__(
857      "ldr q7, [%0, #0]   ; "
858      "ldr x19, [%0, #16] ; "
859      "ins v7.h[7], w19   ; "
860      "str q7, [%0, #32] "
861      : : "r"(&block[0]) : "memory", "x19", "v7"
862   );
863   printf("INS v7.h[7],x19  ");
864   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
865   showV128(&block[2]); printf("\n");
866 
867   /* -- B[0,15] -- */
868 
869   memset(&block, 0x55, sizeof(block));
870   block[1].u64[0] = randULong(TyB);
871   __asm__ __volatile__(
872      "ldr q7, [%0, #0]   ; "
873      "ldr x19, [%0, #16] ; "
874      "ins v7.b[0], w19   ; "
875      "str q7, [%0, #32] "
876      : : "r"(&block[0]) : "memory", "x19", "v7"
877   );
878   printf("INS v7.b[0],x19  ");
879   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
880   showV128(&block[2]); printf("\n");
881 
882   memset(&block, 0x55, sizeof(block));
883   block[1].u64[0] = randULong(TyB);
884   __asm__ __volatile__(
885      "ldr q7, [%0, #0]   ; "
886      "ldr x19, [%0, #16] ; "
887      "ins v7.b[15], w19   ; "
888      "str q7, [%0, #32] "
889      : : "r"(&block[0]) : "memory", "x19", "v7"
890   );
891   printf("INS v7.b[15],x19 ");
892   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
893   showV128(&block[2]); printf("\n");
894 }
895 
896 
897 
test_SMINV(void)898 void test_SMINV ( void )
899 {
900   int i;
901   V128 block[2];
902 
903   /* -- 4s -- */
904 
905   for (i = 0; i < 10; i++) {
906     memset(&block, 0x55, sizeof(block));
907     randV128(&block[0], TyS);
908     randV128(&block[1], TyS);
909     __asm__ __volatile__(
910        "ldr   q7, [%0, #0]   ; "
911        "sminv s8, v7.4s   ; "
912        "str   q8, [%0, #16] "
913        : : "r"(&block[0]) : "memory", "v7", "v8"
914                          );
915     printf("SMINV v8, v7.4s  ");
916     showV128(&block[0]); printf("  ");
917     showV128(&block[1]); printf("\n");
918   }
919 
920   /* -- 8h -- */
921 
922   for (i = 0; i < 10; i++) {
923     memset(&block, 0x55, sizeof(block));
924     randV128(&block[0], TyH);
925     randV128(&block[1], TyH);
926     __asm__ __volatile__(
927        "ldr   q7, [%0, #0]   ; "
928        "sminv h8, v7.8h   ; "
929        "str   q8, [%0, #16] "
930        : : "r"(&block[0]) : "memory", "v7", "v8"
931                          );
932     printf("SMINV h8, v7.8h  ");
933     showV128(&block[0]); printf("  ");
934     showV128(&block[1]); printf("\n");
935   }
936 
937   /* -- 4h -- */
938 
939   for (i = 0; i < 10; i++) {
940     memset(&block, 0x55, sizeof(block));
941     randV128(&block[0], TyH);
942     randV128(&block[1], TyH);
943     __asm__ __volatile__(
944        "ldr   q7, [%0, #0]   ; "
945        "sminv h8, v7.4h   ; "
946        "str   q8, [%0, #16] "
947        : : "r"(&block[0]) : "memory", "v7", "v8"
948                          );
949     printf("SMINV h8, v7.4h  ");
950     showV128(&block[0]); printf("  ");
951     showV128(&block[1]); printf("\n");
952   }
953 
954   /* -- 16b -- */
955 
956   for (i = 0; i < 10; i++) {
957     memset(&block, 0x55, sizeof(block));
958     randV128(&block[0], TyB);
959     randV128(&block[1], TyB);
960     __asm__ __volatile__(
961        "ldr   q7, [%0, #0]   ; "
962        "sminv b8, v7.16b   ; "
963        "str   q8, [%0, #16] "
964        : : "r"(&block[0]) : "memory", "v7", "v8"
965                          );
966     printf("SMINV b8, v7.16b  ");
967     showV128(&block[0]); printf("  ");
968     showV128(&block[1]); printf("\n");
969   }
970 
971   /* -- 8b -- */
972 
973   for (i = 0; i < 10; i++) {
974     memset(&block, 0x55, sizeof(block));
975     randV128(&block[0], TyB);
976     randV128(&block[1], TyB);
977     __asm__ __volatile__(
978        "ldr   q7, [%0, #0]   ; "
979        "sminv b8, v7.8b   ; "
980        "str   q8, [%0, #16] "
981        : : "r"(&block[0]) : "memory", "v7", "v8"
982                          );
983     printf("SMINV b8, v7.8b  ");
984     showV128(&block[0]); printf("  ");
985     showV128(&block[1]); printf("\n");
986   }
987 
988 }
989 
990 
test_SMAXV(void)991 void test_SMAXV ( void )
992 {
993   int i;
994   V128 block[2];
995 
996   /* -- 4s -- */
997 
998   for (i = 0; i < 10; i++) {
999     memset(&block, 0x55, sizeof(block));
1000     randV128(&block[0], TyS);
1001     randV128(&block[1], TyS);
1002     __asm__ __volatile__(
1003        "ldr   q7, [%0, #0]   ; "
1004        "smaxv s8, v7.4s   ; "
1005        "str   q8, [%0, #16] "
1006        : : "r"(&block[0]) : "memory", "v7", "v8"
1007                          );
1008     printf("SMAXV v8, v7.4s  ");
1009     showV128(&block[0]); printf("  ");
1010     showV128(&block[1]); printf("\n");
1011   }
1012 
1013   /* -- 8h -- */
1014 
1015   for (i = 0; i < 10; i++) {
1016     memset(&block, 0x55, sizeof(block));
1017     randV128(&block[0], TyH);
1018     randV128(&block[1], TyH);
1019     __asm__ __volatile__(
1020        "ldr   q7, [%0, #0]   ; "
1021        "smaxv h8, v7.8h   ; "
1022        "str   q8, [%0, #16] "
1023        : : "r"(&block[0]) : "memory", "v7", "v8"
1024                          );
1025     printf("SMAXV h8, v7.8h  ");
1026     showV128(&block[0]); printf("  ");
1027     showV128(&block[1]); printf("\n");
1028   }
1029 
1030   /* -- 4h -- */
1031 
1032   for (i = 0; i < 10; i++) {
1033     memset(&block, 0x55, sizeof(block));
1034     randV128(&block[0], TyH);
1035     randV128(&block[1], TyH);
1036     __asm__ __volatile__(
1037        "ldr   q7, [%0, #0]   ; "
1038        "smaxv h8, v7.4h   ; "
1039        "str   q8, [%0, #16] "
1040        : : "r"(&block[0]) : "memory", "v7", "v8"
1041                          );
1042     printf("SMAXV h8, v7.4h  ");
1043     showV128(&block[0]); printf("  ");
1044     showV128(&block[1]); printf("\n");
1045   }
1046 
1047   /* -- 16b -- */
1048 
1049   for (i = 0; i < 10; i++) {
1050     memset(&block, 0x55, sizeof(block));
1051     randV128(&block[0], TyB);
1052     randV128(&block[1], TyB);
1053     __asm__ __volatile__(
1054        "ldr   q7, [%0, #0]   ; "
1055        "smaxv b8, v7.16b   ; "
1056        "str   q8, [%0, #16] "
1057        : : "r"(&block[0]) : "memory", "v7", "v8"
1058                          );
1059     printf("SMAXV b8, v7.16b  ");
1060     showV128(&block[0]); printf("  ");
1061     showV128(&block[1]); printf("\n");
1062   }
1063 
1064   /* -- 8b -- */
1065 
1066   for (i = 0; i < 10; i++) {
1067     memset(&block, 0x55, sizeof(block));
1068     randV128(&block[0], TyB);
1069     randV128(&block[1], TyB);
1070     __asm__ __volatile__(
1071        "ldr   q7, [%0, #0]   ; "
1072        "smaxv b8, v7.8b   ; "
1073        "str   q8, [%0, #16] "
1074        : : "r"(&block[0]) : "memory", "v7", "v8"
1075                          );
1076     printf("SMAXV b8, v7.8b  ");
1077     showV128(&block[0]); printf("  ");
1078     showV128(&block[1]); printf("\n");
1079   }
1080 
1081 }
1082 
1083 
1084 //======== FCCMP_D ========//
1085 
1086 #define GEN_test_FCCMP_D_D_0xF_EQ \
1087   __attribute__((noinline)) static void test_FCCMP_D_D_0xF_EQ ( void ) \
1088   { \
1089      V128 block[4]; \
1090      randBlock_Doubles(&block[0], 3); \
1091      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1092      showBlock("FCCMP_D_D_0xF_EQ before", &block[0], 4); \
1093      __asm__ __volatile__( \
1094         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1095         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1096         "fccmp d29, d11, #0xf, eq; " \
1097         "mrs x9, nzcv; str x9, [%0, 48]; " \
1098         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1099         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1100      ); \
1101      showBlock("FCCMP_D_D_0xF_EQ after", &block[0], 4); \
1102      printf("\n"); \
1103   }
1104 
1105 #define GEN_test_FCCMP_D_D_0xF_NE \
1106   __attribute__((noinline)) static void test_FCCMP_D_D_0xF_NE ( void ) \
1107   { \
1108      V128 block[4]; \
1109      randBlock_Doubles(&block[0], 3); \
1110      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1111      showBlock("FCCMP_D_D_0xF_NE before", &block[0], 4); \
1112      __asm__ __volatile__( \
1113         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1114         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1115         "fccmp d29, d11, #0xf, ne; " \
1116         "mrs x9, nzcv; str x9, [%0, 48]; " \
1117         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1118         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1119      ); \
1120      showBlock("FCCMP_D_D_0xF_NE after", &block[0], 4); \
1121      printf("\n"); \
1122   }
1123 
1124 #define GEN_test_FCCMP_D_D_0x0_EQ \
1125   __attribute__((noinline)) static void test_FCCMP_D_D_0x0_EQ ( void ) \
1126   { \
1127      V128 block[4]; \
1128      randBlock_Doubles(&block[0], 3); \
1129      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1130      showBlock("FCCMP_D_D_0x0_EQ before", &block[0], 4); \
1131      __asm__ __volatile__( \
1132         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1133         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1134         "fccmp d29, d11, #0x0, eq; " \
1135         "mrs x9, nzcv; str x9, [%0, 48]; " \
1136         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1137         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1138      ); \
1139      showBlock("FCCMP_D_D_0x0_EQ after", &block[0], 4); \
1140      printf("\n"); \
1141   }
1142 
1143 #define GEN_test_FCCMP_D_D_0x0_NE \
1144   __attribute__((noinline)) static void test_FCCMP_D_D_0x0_NE ( void ) \
1145   { \
1146      V128 block[4]; \
1147      randBlock_Doubles(&block[0], 3); \
1148      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1149      showBlock("FCCMP_D_D_0x0_NE before", &block[0], 4); \
1150      __asm__ __volatile__( \
1151         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1152         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1153         "fccmp d29, d11, #0x0, ne; " \
1154         "mrs x9, nzcv; str x9, [%0, 48]; " \
1155         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1156         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1157      ); \
1158      showBlock("FCCMP_D_D_0x0_NE after", &block[0], 4); \
1159      printf("\n"); \
1160   }
1161 
1162 //======== FCCMP_S ========//
1163 
1164 #define GEN_test_FCCMP_S_S_0xF_EQ \
1165   __attribute__((noinline)) static void test_FCCMP_S_S_0xF_EQ ( void ) \
1166   { \
1167      V128 block[4]; \
1168      randBlock_Floats(&block[0], 3); \
1169      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1170      showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
1171      __asm__ __volatile__( \
1172         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1173         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1174         "fccmp s29, s11, #0xf, eq; " \
1175         "mrs x9, nzcv; str x9, [%0, 48]; " \
1176         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1177         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1178      ); \
1179      showBlock("FCCMP_S_S_0xF_EQ after", &block[0], 4); \
1180      printf("\n"); \
1181   }
1182 
1183 #define GEN_test_FCCMP_S_S_0xF_NE \
1184   __attribute__((noinline)) static void test_FCCMP_S_S_0xF_NE ( void ) \
1185   { \
1186      V128 block[4]; \
1187      randBlock_Floats(&block[0], 3); \
1188      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1189      showBlock("FCCMP_S_S_0xF_NE before", &block[0], 4); \
1190      __asm__ __volatile__( \
1191         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1192         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1193         "fccmp s29, s11, #0xf, ne; " \
1194         "mrs x9, nzcv; str x9, [%0, 48]; " \
1195         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1196         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1197      ); \
1198      showBlock("FCCMP_S_S_0xF_NE after", &block[0], 4); \
1199      printf("\n"); \
1200   }
1201 
1202 #define GEN_test_FCCMP_S_S_0x0_EQ \
1203   __attribute__((noinline)) static void test_FCCMP_S_S_0x0_EQ ( void ) \
1204   { \
1205      V128 block[4]; \
1206      randBlock_Floats(&block[0], 3); \
1207      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1208      showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
1209      __asm__ __volatile__( \
1210         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1211         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1212         "fccmp s29, s11, #0x0, eq; " \
1213         "mrs x9, nzcv; str x9, [%0, 48]; " \
1214         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1215         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1216      ); \
1217      showBlock("FCCMP_S_S_0x0_EQ after", &block[0], 4); \
1218      printf("\n"); \
1219   }
1220 
1221 #define GEN_test_FCCMP_S_S_0x0_NE \
1222   __attribute__((noinline)) static void test_FCCMP_S_S_0x0_NE ( void ) \
1223   { \
1224      V128 block[4]; \
1225      randBlock_Floats(&block[0], 3); \
1226      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1227      showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
1228      __asm__ __volatile__( \
1229         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1230         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1231         "fccmp s29, s11, #0x0, ne; " \
1232         "mrs x9, nzcv; str x9, [%0, 48]; " \
1233         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1234         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1235      ); \
1236      showBlock("FCCMP_S_S_0x0_NE after", &block[0], 4); \
1237      printf("\n"); \
1238   }
1239 
1240 //======== FCCMPE_D ========//
1241 
1242 #define GEN_test_FCCMPE_D_D_0xF_EQ \
1243   __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_EQ ( void ) \
1244   { \
1245      V128 block[4]; \
1246      randBlock_Doubles(&block[0], 3); \
1247      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1248      showBlock("FCCMPE_D_D_0xF_EQ before", &block[0], 4); \
1249      __asm__ __volatile__( \
1250         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1251         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1252         "fccmpe d29, d11, #0xf, eq; " \
1253         "mrs x9, nzcv; str x9, [%0, 48]; " \
1254         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1255         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1256      ); \
1257      showBlock("FCCMPE_D_D_0xF_EQ after", &block[0], 4); \
1258      printf("\n"); \
1259   }
1260 
1261 #define GEN_test_FCCMPE_D_D_0xF_NE \
1262   __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_NE ( void ) \
1263   { \
1264      V128 block[4]; \
1265      randBlock_Doubles(&block[0], 3); \
1266      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1267      showBlock("FCCMPE_D_D_0xF_NE before", &block[0], 4); \
1268      __asm__ __volatile__( \
1269         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1270         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1271         "fccmpe d29, d11, #0xf, ne; " \
1272         "mrs x9, nzcv; str x9, [%0, 48]; " \
1273         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1274         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1275      ); \
1276      showBlock("FCCMPE_D_D_0xF_NE after", &block[0], 4); \
1277      printf("\n"); \
1278   }
1279 
1280 #define GEN_test_FCCMPE_D_D_0x0_EQ \
1281   __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_EQ ( void ) \
1282   { \
1283      V128 block[4]; \
1284      randBlock_Doubles(&block[0], 3); \
1285      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1286      showBlock("FCCMPE_D_D_0x0_EQ before", &block[0], 4); \
1287      __asm__ __volatile__( \
1288         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1289         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1290         "fccmpe d29, d11, #0x0, eq; " \
1291         "mrs x9, nzcv; str x9, [%0, 48]; " \
1292         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1293         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1294      ); \
1295      showBlock("FCCMPE_D_D_0x0_EQ after", &block[0], 4); \
1296      printf("\n"); \
1297   }
1298 
1299 #define GEN_test_FCCMPE_D_D_0x0_NE \
1300   __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_NE ( void ) \
1301   { \
1302      V128 block[4]; \
1303      randBlock_Doubles(&block[0], 3); \
1304      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1305      showBlock("FCCMPE_D_D_0x0_NE before", &block[0], 4); \
1306      __asm__ __volatile__( \
1307         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1308         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1309         "fccmpe d29, d11, #0x0, ne; " \
1310         "mrs x9, nzcv; str x9, [%0, 48]; " \
1311         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1312         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1313      ); \
1314      showBlock("FCCMPE_D_D_0x0_NE after", &block[0], 4); \
1315      printf("\n"); \
1316   }
1317 
1318 //======== FCCMPE_S ========//
1319 
1320 #define GEN_test_FCCMPE_S_S_0xF_EQ \
1321   __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_EQ ( void ) \
1322   { \
1323      V128 block[4]; \
1324      randBlock_Floats(&block[0], 3); \
1325      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1326      showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
1327      __asm__ __volatile__( \
1328         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1329         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1330         "fccmpe s29, s11, #0xf, eq; " \
1331         "mrs x9, nzcv; str x9, [%0, 48]; " \
1332         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1333         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1334      ); \
1335      showBlock("FCCMPE_S_S_0xF_EQ after", &block[0], 4); \
1336      printf("\n"); \
1337   }
1338 
1339 #define GEN_test_FCCMPE_S_S_0xF_NE \
1340   __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_NE ( void ) \
1341   { \
1342      V128 block[4]; \
1343      randBlock_Floats(&block[0], 3); \
1344      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1345      showBlock("FCCMPE_S_S_0xF_NE before", &block[0], 4); \
1346      __asm__ __volatile__( \
1347         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1348         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1349         "fccmpe s29, s11, #0xf, ne; " \
1350         "mrs x9, nzcv; str x9, [%0, 48]; " \
1351         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1352         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1353      ); \
1354      showBlock("FCCMPE_S_S_0xF_NE after", &block[0], 4); \
1355      printf("\n"); \
1356   }
1357 
1358 #define GEN_test_FCCMPE_S_S_0x0_EQ \
1359   __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_EQ ( void ) \
1360   { \
1361      V128 block[4]; \
1362      randBlock_Floats(&block[0], 3); \
1363      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1364      showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
1365      __asm__ __volatile__( \
1366         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1367         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1368         "fccmpe s29, s11, #0x0, eq; " \
1369         "mrs x9, nzcv; str x9, [%0, 48]; " \
1370         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1371         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1372      ); \
1373      showBlock("FCCMPE_S_S_0x0_EQ after", &block[0], 4); \
1374      printf("\n"); \
1375   }
1376 
1377 #define GEN_test_FCCMPE_S_S_0x0_NE \
1378   __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_NE ( void ) \
1379   { \
1380      V128 block[4]; \
1381      randBlock_Floats(&block[0], 3); \
1382      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1383      showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
1384      __asm__ __volatile__( \
1385         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1386         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1387         "fccmpe s29, s11, #0x0, ne; " \
1388         "mrs x9, nzcv; str x9, [%0, 48]; " \
1389         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1390         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1391      ); \
1392      showBlock("FCCMPE_S_S_0x0_NE after", &block[0], 4); \
1393      printf("\n"); \
1394   }
1395 
1396 //======== FCMEQ_D_D_D ========//
1397 
1398 #define GEN_test_FCMEQ_D_D_D \
1399   __attribute__((noinline)) static void test_FCMEQ_D_D_D ( void ) \
1400   { \
1401      V128 block[4]; \
1402      randBlock_Doubles(&block[0], 3); \
1403      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1404      showBlock("FCMEQ_D_D_D before", &block[0], 4); \
1405      __asm__ __volatile__( \
1406         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1407         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1408         "fcmeq d29, d11, d9; " \
1409         "mrs x9, nzcv; str x9, [%0, 48]; " \
1410         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1411         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1412      ); \
1413      showBlock("FCMEQ_D_D_D after", &block[0], 4); \
1414      printf("\n"); \
1415   }
1416 
1417 //======== FCMEQ_S_S_S ========//
1418 
1419 #define GEN_test_FCMEQ_S_S_S \
1420   __attribute__((noinline)) static void test_FCMEQ_S_S_S ( void ) \
1421   { \
1422      V128 block[4]; \
1423      randBlock_Floats(&block[0], 3); \
1424      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1425      showBlock("FCMEQ_S_S_S before", &block[0], 4); \
1426      __asm__ __volatile__( \
1427         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1428         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1429         "fcmeq s29, s11, s9; " \
1430         "mrs x9, nzcv; str x9, [%0, 48]; " \
1431         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1432         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1433      ); \
1434      showBlock("FCMEQ_S_S_S after", &block[0], 4); \
1435      printf("\n"); \
1436   }
1437 
1438 //======== FCMGE_D_D_D ========//
1439 
1440 #define GEN_test_FCMGE_D_D_D \
1441   __attribute__((noinline)) static void test_FCMGE_D_D_D ( void ) \
1442   { \
1443      V128 block[4]; \
1444      randBlock_Doubles(&block[0], 3); \
1445      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1446      showBlock("FCMGE_D_D_D before", &block[0], 4); \
1447      __asm__ __volatile__( \
1448         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1449         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1450         "fcmge d29, d11, d9; " \
1451         "mrs x9, nzcv; str x9, [%0, 48]; " \
1452         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1453         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1454      ); \
1455      showBlock("FCMGE_D_D_D after", &block[0], 4); \
1456      printf("\n"); \
1457   }
1458 
1459 //======== FCMGE_S_S_S ========//
1460 
1461 #define GEN_test_FCMGE_S_S_S \
1462   __attribute__((noinline)) static void test_FCMGE_S_S_S ( void ) \
1463   { \
1464      V128 block[4]; \
1465      randBlock_Floats(&block[0], 3); \
1466      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1467      showBlock("FCMGE_S_S_S before", &block[0], 4); \
1468      __asm__ __volatile__( \
1469         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1470         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1471         "fcmge s29, s11, s9; " \
1472         "mrs x9, nzcv; str x9, [%0, 48]; " \
1473         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1474         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1475      ); \
1476      showBlock("FCMGE_S_S_S after", &block[0], 4); \
1477      printf("\n"); \
1478   }
1479 
1480 //======== FCMGT_D_D_D ========//
1481 
1482 #define GEN_test_FCMGT_D_D_D \
1483   __attribute__((noinline)) static void test_FCMGT_D_D_D ( void ) \
1484   { \
1485      V128 block[4]; \
1486      randBlock_Doubles(&block[0], 3); \
1487      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1488      showBlock("FCMGT_D_D_D before", &block[0], 4); \
1489      __asm__ __volatile__( \
1490         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1491         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1492         "fcmgt d29, d11, d9; " \
1493         "mrs x9, nzcv; str x9, [%0, 48]; " \
1494         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1495         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1496      ); \
1497      showBlock("FCMGT_D_D_D after", &block[0], 4); \
1498      printf("\n"); \
1499   }
1500 
1501 //======== FCMGT_S_S_S ========//
1502 
1503 #define GEN_test_FCMGT_S_S_S \
1504   __attribute__((noinline)) static void test_FCMGT_S_S_S ( void ) \
1505   { \
1506      V128 block[4]; \
1507      randBlock_Floats(&block[0], 3); \
1508      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1509      showBlock("FCMGT_S_S_S before", &block[0], 4); \
1510      __asm__ __volatile__( \
1511         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1512         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1513         "fcmgt s29, s11, s9; " \
1514         "mrs x9, nzcv; str x9, [%0, 48]; " \
1515         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1516         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1517      ); \
1518      showBlock("FCMGT_S_S_S after", &block[0], 4); \
1519      printf("\n"); \
1520   }
1521 
1522 //======== FACGT_D_D_D ========//
1523 
1524 #define GEN_test_FACGT_D_D_D \
1525   __attribute__((noinline)) static void test_FACGT_D_D_D ( void ) \
1526   { \
1527      V128 block[4]; \
1528      randBlock_Doubles(&block[0], 3); \
1529      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1530      showBlock("FACGT_D_D_D before", &block[0], 4); \
1531      __asm__ __volatile__( \
1532         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1533         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1534         "facgt d29, d11, d9; " \
1535         "mrs x9, nzcv; str x9, [%0, 48]; " \
1536         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1537         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1538      ); \
1539      showBlock("FACGT_D_D_D after", &block[0], 4); \
1540      printf("\n"); \
1541   }
1542 
1543 //======== FACGT_S_S_S ========//
1544 
1545 #define GEN_test_FACGT_S_S_S \
1546   __attribute__((noinline)) static void test_FACGT_S_S_S ( void ) \
1547   { \
1548      V128 block[4]; \
1549      randBlock_Floats(&block[0], 3); \
1550      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1551      showBlock("FACGT_S_S_S before", &block[0], 4); \
1552      __asm__ __volatile__( \
1553         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1554         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1555         "facgt s29, s11, s9; " \
1556         "mrs x9, nzcv; str x9, [%0, 48]; " \
1557         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1558         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1559      ); \
1560      showBlock("FACGT_S_S_S after", &block[0], 4); \
1561      printf("\n"); \
1562   }
1563 
1564 //======== FACGE_D_D_D ========//
1565 
1566 #define GEN_test_FACGE_D_D_D \
1567   __attribute__((noinline)) static void test_FACGE_D_D_D ( void ) \
1568   { \
1569      V128 block[4]; \
1570      randBlock_Doubles(&block[0], 3); \
1571      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1572      showBlock("FACGE_D_D_D before", &block[0], 4); \
1573      __asm__ __volatile__( \
1574         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1575         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1576         "facge d29, d11, d9; " \
1577         "mrs x9, nzcv; str x9, [%0, 48]; " \
1578         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1579         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1580      ); \
1581      showBlock("FACGE_D_D_D after", &block[0], 4); \
1582      printf("\n"); \
1583   }
1584 
1585 //======== FACGE_S_S_S ========//
1586 
1587 #define GEN_test_FACGE_S_S_S \
1588   __attribute__((noinline)) static void test_FACGE_S_S_S ( void ) \
1589   { \
1590      V128 block[4]; \
1591      randBlock_Floats(&block[0], 3); \
1592      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1593      showBlock("FACGE_S_S_S before", &block[0], 4); \
1594      __asm__ __volatile__( \
1595         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1596         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1597         "facge s29, s11, s9; " \
1598         "mrs x9, nzcv; str x9, [%0, 48]; " \
1599         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1600         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1601      ); \
1602      showBlock("FACGE_S_S_S after", &block[0], 4); \
1603      printf("\n"); \
1604   }
1605 
1606 //======== FCMEQ_D_D_Z ========//
1607 
1608 #define GEN_test_FCMEQ_D_D_Z \
1609   __attribute__((noinline)) static void test_FCMEQ_D_D_Z ( void ) \
1610   { \
1611      V128 block[4]; \
1612      randBlock_Doubles(&block[0], 3); \
1613      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1614      showBlock("FCMEQ_D_D_Z before", &block[0], 4); \
1615      __asm__ __volatile__( \
1616         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1617         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1618         "fcmeq d29, d11, #0; " \
1619         "mrs x9, nzcv; str x9, [%0, 48]; " \
1620         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1621         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1622      ); \
1623      showBlock("FCMEQ_D_D_Z after", &block[0], 4); \
1624      printf("\n"); \
1625   }
1626 
1627 //======== FCMEQ_S_S_Z ========//
1628 
1629 #define GEN_test_FCMEQ_S_S_Z \
1630   __attribute__((noinline)) static void test_FCMEQ_S_S_Z ( void ) \
1631   { \
1632      V128 block[4]; \
1633      randBlock_Floats(&block[0], 3); \
1634      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1635      showBlock("FCMEQ_S_S_Z before", &block[0], 4); \
1636      __asm__ __volatile__( \
1637         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1638         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1639         "fcmeq s29, s11, #0; " \
1640         "mrs x9, nzcv; str x9, [%0, 48]; " \
1641         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1642         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1643      ); \
1644      showBlock("FCMEQ_S_S_Z after", &block[0], 4); \
1645      printf("\n"); \
1646   }
1647 
1648 //======== FCMGE_D_D_Z ========//
1649 
1650 #define GEN_test_FCMGE_D_D_Z \
1651   __attribute__((noinline)) static void test_FCMGE_D_D_Z ( void ) \
1652   { \
1653      V128 block[4]; \
1654      randBlock_Doubles(&block[0], 3); \
1655      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1656      showBlock("FCMGE_D_D_Z before", &block[0], 4); \
1657      __asm__ __volatile__( \
1658         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1659         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1660         "fcmge d29, d11, #0; " \
1661         "mrs x9, nzcv; str x9, [%0, 48]; " \
1662         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1663         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1664      ); \
1665      showBlock("FCMGE_D_D_Z after", &block[0], 4); \
1666      printf("\n"); \
1667   }
1668 
1669 //======== FCMGE_S_S_Z ========//
1670 
1671 #define GEN_test_FCMGE_S_S_Z \
1672   __attribute__((noinline)) static void test_FCMGE_S_S_Z ( void ) \
1673   { \
1674      V128 block[4]; \
1675      randBlock_Floats(&block[0], 3); \
1676      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1677      showBlock("FCMGE_S_S_Z before", &block[0], 4); \
1678      __asm__ __volatile__( \
1679         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1680         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1681         "fcmge s29, s11, #0; " \
1682         "mrs x9, nzcv; str x9, [%0, 48]; " \
1683         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1684         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1685      ); \
1686      showBlock("FCMGE_S_S_Z after", &block[0], 4); \
1687      printf("\n"); \
1688   }
1689 
1690 //======== FCMGT_D_D_Z ========//
1691 
1692 #define GEN_test_FCMGT_D_D_Z \
1693   __attribute__((noinline)) static void test_FCMGT_D_D_Z ( void ) \
1694   { \
1695      V128 block[4]; \
1696      randBlock_Doubles(&block[0], 3); \
1697      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1698      showBlock("FCMGT_D_D_Z before", &block[0], 4); \
1699      __asm__ __volatile__( \
1700         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1701         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1702         "fcmgt d29, d11, #0; " \
1703         "mrs x9, nzcv; str x9, [%0, 48]; " \
1704         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1705         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1706      ); \
1707      showBlock("FCMGT_D_D_Z after", &block[0], 4); \
1708      printf("\n"); \
1709   }
1710 
1711 //======== FCMGT_S_S_Z ========//
1712 
1713 #define GEN_test_FCMGT_S_S_Z \
1714   __attribute__((noinline)) static void test_FCMGT_S_S_Z ( void ) \
1715   { \
1716      V128 block[4]; \
1717      randBlock_Floats(&block[0], 3); \
1718      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1719      showBlock("FCMGT_S_S_Z before", &block[0], 4); \
1720      __asm__ __volatile__( \
1721         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1722         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1723         "fcmgt s29, s11, #0; " \
1724         "mrs x9, nzcv; str x9, [%0, 48]; " \
1725         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1726         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1727      ); \
1728      showBlock("FCMGT_S_S_Z after", &block[0], 4); \
1729      printf("\n"); \
1730   }
1731 
1732 //======== FCMLE_D_D_Z ========//
1733 
1734 #define GEN_test_FCMLE_D_D_Z \
1735   __attribute__((noinline)) static void test_FCMLE_D_D_Z ( void ) \
1736   { \
1737      V128 block[4]; \
1738      randBlock_Doubles(&block[0], 3); \
1739      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1740      showBlock("FCMLE_D_D_Z before", &block[0], 4); \
1741      __asm__ __volatile__( \
1742         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1743         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1744         "fcmle d29, d11, #0; " \
1745         "mrs x9, nzcv; str x9, [%0, 48]; " \
1746         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1747         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1748      ); \
1749      showBlock("FCMLE_D_D_Z after", &block[0], 4); \
1750      printf("\n"); \
1751   }
1752 
1753 //======== FCMLE_S_S_Z ========//
1754 
1755 #define GEN_test_FCMLE_S_S_Z \
1756   __attribute__((noinline)) static void test_FCMLE_S_S_Z ( void ) \
1757   { \
1758      V128 block[4]; \
1759      randBlock_Floats(&block[0], 3); \
1760      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1761      showBlock("FCMLE_S_S_Z before", &block[0], 4); \
1762      __asm__ __volatile__( \
1763         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1764         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1765         "fcmle s29, s11, #0; " \
1766         "mrs x9, nzcv; str x9, [%0, 48]; " \
1767         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1768         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1769      ); \
1770      showBlock("FCMLE_S_S_Z after", &block[0], 4); \
1771      printf("\n"); \
1772   }
1773 
1774 //======== FCMLT_D_D_Z ========//
1775 
1776 #define GEN_test_FCMLT_D_D_Z \
1777   __attribute__((noinline)) static void test_FCMLT_D_D_Z ( void ) \
1778   { \
1779      V128 block[4]; \
1780      randBlock_Doubles(&block[0], 3); \
1781      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1782      showBlock("FCMLT_D_D_Z before", &block[0], 4); \
1783      __asm__ __volatile__( \
1784         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1785         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1786         "fcmlt d29, d11, #0; " \
1787         "mrs x9, nzcv; str x9, [%0, 48]; " \
1788         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1789         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1790      ); \
1791      showBlock("FCMLT_D_D_Z after", &block[0], 4); \
1792      printf("\n"); \
1793   }
1794 
1795 //======== FCMLT_S_S_Z ========//
1796 
1797 #define GEN_test_FCMLT_S_S_Z \
1798   __attribute__((noinline)) static void test_FCMLT_S_S_Z ( void ) \
1799   { \
1800      V128 block[4]; \
1801      randBlock_Floats(&block[0], 3); \
1802      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1803      showBlock("FCMLT_S_S_Z before", &block[0], 4); \
1804      __asm__ __volatile__( \
1805         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1806         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1807         "fcmlt s29, s11, #0; " \
1808         "mrs x9, nzcv; str x9, [%0, 48]; " \
1809         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1810         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1811      ); \
1812      showBlock("FCMLT_S_S_Z after", &block[0], 4); \
1813      printf("\n"); \
1814   }
1815 
1816 //======== FCMP_D_D ========//
1817 
1818 #define GEN_test_FCMP_D_D \
1819   __attribute__((noinline)) static void test_FCMP_D_D ( void ) \
1820   { \
1821      V128 block[4]; \
1822      randBlock_Doubles(&block[0], 3); \
1823      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1824      showBlock("FCMP_D_D before", &block[0], 4); \
1825      __asm__ __volatile__( \
1826         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1827         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1828         "fcmp d29, d11; " \
1829         "mrs x9, nzcv; str x9, [%0, 48]; " \
1830         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1831         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1832      ); \
1833      showBlock("FCMP_D_D after", &block[0], 4); \
1834      printf("\n"); \
1835   }
1836 
1837 //======== FCMP_S_S ========//
1838 
1839 #define GEN_test_FCMP_S_S \
1840   __attribute__((noinline)) static void test_FCMP_S_S ( void ) \
1841   { \
1842      V128 block[4]; \
1843      randBlock_Floats(&block[0], 3); \
1844      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1845      showBlock("FCMP_S_S before", &block[0], 4); \
1846      __asm__ __volatile__( \
1847         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1848         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1849         "fcmp s29, s11; " \
1850         "mrs x9, nzcv; str x9, [%0, 48]; " \
1851         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1852         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1853      ); \
1854      showBlock("FCMP_S_S after", &block[0], 4); \
1855      printf("\n"); \
1856   }
1857 
1858 //======== FCMPE_D_D ========//
1859 
1860 #define GEN_test_FCMPE_D_D \
1861   __attribute__((noinline)) static void test_FCMPE_D_D ( void ) \
1862   { \
1863      V128 block[4]; \
1864      randBlock_Doubles(&block[0], 3); \
1865      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1866      showBlock("FCMPE_D_D before", &block[0], 4); \
1867      __asm__ __volatile__( \
1868         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1869         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1870         "fcmpe d29, d11; " \
1871         "mrs x9, nzcv; str x9, [%0, 48]; " \
1872         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1873         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1874      ); \
1875      showBlock("FCMPE_D_D after", &block[0], 4); \
1876      printf("\n"); \
1877   }
1878 
1879 //======== FCMPE_S_S ========//
1880 
1881 #define GEN_test_FCMPE_S_S \
1882   __attribute__((noinline)) static void test_FCMPE_S_S ( void ) \
1883   { \
1884      V128 block[4]; \
1885      randBlock_Floats(&block[0], 3); \
1886      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1887      showBlock("FCMPE_S_S before", &block[0], 4); \
1888      __asm__ __volatile__( \
1889         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1890         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1891         "fcmpe s29, s11; " \
1892         "mrs x9, nzcv; str x9, [%0, 48]; " \
1893         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1894         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1895      ); \
1896      showBlock("FCMPE_S_S after", &block[0], 4); \
1897      printf("\n"); \
1898   }
1899 
1900 //======== FCMP_D_Z ========//
1901 
1902 #define GEN_test_FCMP_D_Z \
1903   __attribute__((noinline)) static void test_FCMP_D_Z ( void ) \
1904   { \
1905      V128 block[4]; \
1906      randBlock_Doubles(&block[0], 3); \
1907      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1908      showBlock("FCMP_D_Z before", &block[0], 4); \
1909      __asm__ __volatile__( \
1910         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1911         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1912         "fcmp d29, #0; " \
1913         "mrs x9, nzcv; str x9, [%0, 48]; " \
1914         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1915         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1916      ); \
1917      showBlock("FCMP_D_Z after", &block[0], 4); \
1918      printf("\n"); \
1919   }
1920 
1921 //======== FCMP_S_Z ========//
1922 
1923 #define GEN_test_FCMP_S_Z \
1924   __attribute__((noinline)) static void test_FCMP_S_Z ( void ) \
1925   { \
1926      V128 block[4]; \
1927      randBlock_Floats(&block[0], 3); \
1928      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1929      showBlock("FCMP_S_Z before", &block[0], 4); \
1930      __asm__ __volatile__( \
1931         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1932         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1933         "fcmp s29, #0; " \
1934         "mrs x9, nzcv; str x9, [%0, 48]; " \
1935         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1936         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1937      ); \
1938      showBlock("FCMP_S_Z after", &block[0], 4); \
1939      printf("\n"); \
1940   }
1941 
1942 //======== FCMPE_D_Z ========//
1943 
1944 #define GEN_test_FCMPE_D_Z \
1945   __attribute__((noinline)) static void test_FCMPE_D_Z ( void ) \
1946   { \
1947      V128 block[4]; \
1948      randBlock_Doubles(&block[0], 3); \
1949      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1950      showBlock("FCMPE_D_Z before", &block[0], 4); \
1951      __asm__ __volatile__( \
1952         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1953         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1954         "fcmpe d29, #0; " \
1955         "mrs x9, nzcv; str x9, [%0, 48]; " \
1956         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1957         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1958      ); \
1959      showBlock("FCMPE_D_Z after", &block[0], 4); \
1960      printf("\n"); \
1961   }
1962 
1963 //======== FCMPE_S_Z ========//
1964 
1965 #define GEN_test_FCMPE_S_Z \
1966   __attribute__((noinline)) static void test_FCMPE_S_Z ( void ) \
1967   { \
1968      V128 block[4]; \
1969      randBlock_Floats(&block[0], 3); \
1970      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1971      showBlock("FCMPE_S_Z before", &block[0], 4); \
1972      __asm__ __volatile__( \
1973         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1974         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1975         "fcmpe s29, #0; " \
1976         "mrs x9, nzcv; str x9, [%0, 48]; " \
1977         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1978         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
1979      ); \
1980      showBlock("FCMPE_S_Z after", &block[0], 4); \
1981      printf("\n"); \
1982   }
1983 
1984 //======== FCSEL_D_D_D_EQ ========//
1985 
1986 #define GEN_test_FCSEL_D_D_D_EQ \
1987   __attribute__((noinline)) static void test_FCSEL_D_D_D_EQ ( void ) \
1988   { \
1989      V128 block[4]; \
1990      randBlock_Doubles(&block[0], 3); \
1991      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
1992      showBlock("FCSEL_D_D_D_EQ before", &block[0], 4); \
1993      __asm__ __volatile__( \
1994         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
1995         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
1996         "fcsel d29, d11, d9, eq; " \
1997         "mrs x9, nzcv; str x9, [%0, 48]; " \
1998         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
1999         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
2000      ); \
2001      showBlock("FCSEL_D_D_D_EQ after", &block[0], 4); \
2002      printf("\n"); \
2003   }
2004 
2005 //======== FCSEL_D_D_D_NE ========//
2006 
2007 #define GEN_test_FCSEL_D_D_D_NE \
2008   __attribute__((noinline)) static void test_FCSEL_D_D_D_NE ( void ) \
2009   { \
2010      V128 block[4]; \
2011      randBlock_Doubles(&block[0], 3); \
2012      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
2013      showBlock("FCSEL_D_D_D_NE before", &block[0], 4); \
2014      __asm__ __volatile__( \
2015         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
2016         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
2017         "fcsel d29, d11, d9, ne; " \
2018         "mrs x9, nzcv; str x9, [%0, 48]; " \
2019         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
2020         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
2021      ); \
2022      showBlock("FCSEL_D_D_D_NE after", &block[0], 4); \
2023      printf("\n"); \
2024   }
2025 
2026 //======== FCSEL_S_S_S_EQ ========//
2027 
2028 #define GEN_test_FCSEL_S_S_S_EQ \
2029   __attribute__((noinline)) static void test_FCSEL_S_S_S_EQ ( void ) \
2030   { \
2031      V128 block[4]; \
2032      randBlock_Doubles(&block[0], 3); \
2033      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
2034      showBlock("FCSEL_S_S_S_EQ before", &block[0], 4); \
2035      __asm__ __volatile__( \
2036         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
2037         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
2038         "fcsel s29, s11, s9, eq; " \
2039         "mrs x9, nzcv; str x9, [%0, 48]; " \
2040         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
2041         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
2042      ); \
2043      showBlock("FCSEL_S_S_S_EQ after", &block[0], 4); \
2044      printf("\n"); \
2045   }
2046 
2047 //======== FCSEL_S_S_S_NE ========//
2048 
2049 #define GEN_test_FCSEL_S_S_S_NE \
2050   __attribute__((noinline)) static void test_FCSEL_S_S_S_NE ( void ) \
2051   { \
2052      V128 block[4]; \
2053      randBlock_Doubles(&block[0], 3); \
2054      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
2055      showBlock("FCSEL_S_S_S_NE before", &block[0], 4); \
2056      __asm__ __volatile__( \
2057         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
2058         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
2059         "fcsel s29, s11, s9, ne; " \
2060         "mrs x9, nzcv; str x9, [%0, 48]; " \
2061         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
2062         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
2063      ); \
2064      showBlock("FCSEL_S_S_S_NE after", &block[0], 4); \
2065      printf("\n"); \
2066   }
2067 
2068 
2069 /* ---------------------------------------------------------------- */
2070 /* -- Tests, in the same order that they appear in main()        -- */
2071 /* ---------------------------------------------------------------- */
2072 
2073 // ======================== FP ========================
2074 
2075 GEN_TWOVEC_TEST(fabs_d_d,   "fabs d22,    d23",    22, 23)
2076 GEN_TWOVEC_TEST(fabs_s_s,   "fabs s22,    s23",    22, 23)
2077 GEN_TWOVEC_TEST(fabs_2d_2d, "fabs v22.2d, v23.2d", 22, 23)
2078 GEN_TWOVEC_TEST(fabs_4s_4s, "fabs v22.4s, v23.4s", 22, 23)
2079 GEN_TWOVEC_TEST(fabs_2s_2s, "fabs v22.2s, v23.2s", 22, 23)
2080 
2081 GEN_TWOVEC_TEST(fneg_d_d,   "fneg d22, d23",       22, 23)
2082 GEN_TWOVEC_TEST(fneg_s_s,   "fneg s22, s23",       22, 23)
2083 GEN_TWOVEC_TEST(fneg_2d_2d, "fneg v22.2d, v23.2d", 22, 23)
2084 GEN_TWOVEC_TEST(fneg_4s_4s, "fneg v22.4s, v23.4s", 22, 23)
2085 GEN_TWOVEC_TEST(fneg_2s_2s, "fneg v22.2s, v23.2s", 22, 23)
2086 
2087 GEN_TWOVEC_TEST(fsqrt_d_d,   "fsqrt d22, d23",       22, 23)
2088 GEN_TWOVEC_TEST(fsqrt_s_s,   "fsqrt s22, s23",       22, 23)
2089 GEN_TWOVEC_TEST(fsqrt_2d_2d, "fsqrt v22.2d, v23.2d", 22, 23)
2090 GEN_TWOVEC_TEST(fsqrt_4s_4s, "fsqrt v22.4s, v23.4s", 22, 23)
2091 GEN_TWOVEC_TEST(fsqrt_2s_2s, "fsqrt v22.2s, v23.2s", 22, 23)
2092 
2093 GEN_THREEVEC_TEST(fadd_d_d_d,  "fadd d2, d11, d29", 2, 11, 29)
2094 GEN_THREEVEC_TEST(fadd_s_s_s,  "fadd s2, s11, s29", 2, 11, 29)
2095 GEN_THREEVEC_TEST(fsub_d_d_d,  "fsub d2, d11, d29", 2, 11, 29)
2096 GEN_THREEVEC_TEST(fsub_s_s_s,  "fsub s2, s11, s29", 2, 11, 29)
2097 
2098 GEN_BINARY_TEST(fadd, 2d, 2d, 2d)
2099 GEN_BINARY_TEST(fadd, 4s, 4s, 4s)
2100 GEN_BINARY_TEST(fadd, 2s, 2s, 2s)
2101 GEN_BINARY_TEST(fsub, 2d, 2d, 2d)
2102 GEN_BINARY_TEST(fsub, 4s, 4s, 4s)
2103 GEN_BINARY_TEST(fsub, 2s, 2s, 2s)
2104 
2105 GEN_THREEVEC_TEST(fabd_d_d_d,  "fabd d2, d11, d29", 2, 11, 29)
2106 GEN_THREEVEC_TEST(fabd_s_s_s,  "fabd s2, s11, s29", 2, 11, 29)
2107 GEN_BINARY_TEST(fabd, 2d, 2d, 2d)
2108 GEN_BINARY_TEST(fabd, 4s, 4s, 4s)
2109 GEN_BINARY_TEST(fabd, 2s, 2s, 2s)
2110 
2111 GEN_TWOVEC_TEST(faddp_d_2d,     "faddp d2, v23.2d",    2, 23)
2112 GEN_TWOVEC_TEST(faddp_s_2s,     "faddp s2, v23.2s",    2, 23)
2113 GEN_THREEVEC_TEST(faddp_2d_2d_2d, "faddp v2.2d, v23.2d, v11.2d", 2, 23, 11)
2114 GEN_THREEVEC_TEST(faddp_4s_4s_4s, "faddp v2.4s, v23.4s, v11.4s", 2, 23, 11)
2115 GEN_THREEVEC_TEST(faddp_2s_2s_2s, "faddp v2.2s, v23.2s, v11.2s", 2, 23, 11)
2116 
2117 GEN_test_FCCMP_D_D_0xF_EQ
2118 GEN_test_FCCMP_D_D_0xF_NE
2119 GEN_test_FCCMP_D_D_0x0_EQ
2120 GEN_test_FCCMP_D_D_0x0_NE
2121 GEN_test_FCCMP_S_S_0xF_EQ
2122 GEN_test_FCCMP_S_S_0xF_NE
2123 GEN_test_FCCMP_S_S_0x0_EQ
2124 GEN_test_FCCMP_S_S_0x0_NE
2125 GEN_test_FCCMPE_D_D_0xF_EQ
2126 GEN_test_FCCMPE_D_D_0xF_NE
2127 GEN_test_FCCMPE_D_D_0x0_EQ
2128 GEN_test_FCCMPE_D_D_0x0_NE
2129 GEN_test_FCCMPE_S_S_0xF_EQ
2130 GEN_test_FCCMPE_S_S_0xF_NE
2131 GEN_test_FCCMPE_S_S_0x0_EQ
2132 GEN_test_FCCMPE_S_S_0x0_NE
2133 
2134 GEN_test_FCMEQ_D_D_D
2135 GEN_test_FCMEQ_S_S_S
2136 GEN_test_FCMGE_D_D_D
2137 GEN_test_FCMGE_S_S_S
2138 GEN_test_FCMGT_D_D_D
2139 GEN_test_FCMGT_S_S_S
2140 GEN_test_FACGT_D_D_D
2141 GEN_test_FACGT_S_S_S
2142 GEN_test_FACGE_D_D_D
2143 GEN_test_FACGE_S_S_S
2144 
2145 GEN_THREEVEC_TEST(fcmeq_2d_2d_2d, "fcmeq v2.2d, v23.2d, v11.2d", 2, 23, 11)
2146 GEN_THREEVEC_TEST(fcmeq_4s_4s_4s, "fcmeq v2.4s, v23.4s, v11.4s", 2, 23, 11)
2147 GEN_THREEVEC_TEST(fcmeq_2s_2s_2s, "fcmeq v2.2s, v23.2s, v11.2s", 2, 23, 11)
2148 GEN_THREEVEC_TEST(fcmge_2d_2d_2d, "fcmge v2.2d, v23.2d, v11.2d", 2, 23, 11)
2149 GEN_THREEVEC_TEST(fcmge_4s_4s_4s, "fcmge v2.4s, v23.4s, v11.4s", 2, 23, 11)
2150 GEN_THREEVEC_TEST(fcmge_2s_2s_2s, "fcmge v2.2s, v23.2s, v11.2s", 2, 23, 11)
2151 GEN_THREEVEC_TEST(fcmgt_2d_2d_2d, "fcmgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
2152 GEN_THREEVEC_TEST(fcmgt_4s_4s_4s, "fcmgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
2153 GEN_THREEVEC_TEST(fcmgt_2s_2s_2s, "fcmgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
2154 GEN_THREEVEC_TEST(facge_2d_2d_2d, "facge v2.2d, v23.2d, v11.2d", 2, 23, 11)
2155 GEN_THREEVEC_TEST(facge_4s_4s_4s, "facge v2.4s, v23.4s, v11.4s", 2, 23, 11)
2156 GEN_THREEVEC_TEST(facge_2s_2s_2s, "facge v2.2s, v23.2s, v11.2s", 2, 23, 11)
2157 GEN_THREEVEC_TEST(facgt_2d_2d_2d, "facgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
2158 GEN_THREEVEC_TEST(facgt_4s_4s_4s, "facgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
2159 GEN_THREEVEC_TEST(facgt_2s_2s_2s, "facgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
2160 
2161 GEN_test_FCMEQ_D_D_Z
2162 GEN_test_FCMEQ_S_S_Z
2163 GEN_test_FCMGE_D_D_Z
2164 GEN_test_FCMGE_S_S_Z
2165 GEN_test_FCMGT_D_D_Z
2166 GEN_test_FCMGT_S_S_Z
2167 GEN_test_FCMLE_D_D_Z
2168 GEN_test_FCMLE_S_S_Z
2169 GEN_test_FCMLT_D_D_Z
2170 GEN_test_FCMLT_S_S_Z
2171 
2172 GEN_TWOVEC_TEST(fcmeq_z_2d_2d, "fcmeq v2.2d, v23.2d, #0", 2, 23)
2173 GEN_TWOVEC_TEST(fcmeq_z_4s_4s, "fcmeq v2.4s, v23.4s, #0", 2, 23)
2174 GEN_TWOVEC_TEST(fcmeq_z_2s_2s, "fcmeq v2.2s, v23.2s, #0", 2, 23)
2175 GEN_TWOVEC_TEST(fcmge_z_2d_2d, "fcmge v2.2d, v23.2d, #0", 2, 23)
2176 GEN_TWOVEC_TEST(fcmge_z_4s_4s, "fcmge v2.4s, v23.4s, #0", 2, 23)
2177 GEN_TWOVEC_TEST(fcmge_z_2s_2s, "fcmge v2.2s, v23.2s, #0", 2, 23)
2178 GEN_TWOVEC_TEST(fcmgt_z_2d_2d, "fcmgt v2.2d, v23.2d, #0", 2, 23)
2179 GEN_TWOVEC_TEST(fcmgt_z_4s_4s, "fcmgt v2.4s, v23.4s, #0", 2, 23)
2180 GEN_TWOVEC_TEST(fcmgt_z_2s_2s, "fcmgt v2.2s, v23.2s, #0", 2, 23)
2181 GEN_TWOVEC_TEST(fcmle_z_2d_2d, "fcmle v2.2d, v23.2d, #0", 2, 23)
2182 GEN_TWOVEC_TEST(fcmle_z_4s_4s, "fcmle v2.4s, v23.4s, #0", 2, 23)
2183 GEN_TWOVEC_TEST(fcmle_z_2s_2s, "fcmle v2.2s, v23.2s, #0", 2, 23)
2184 GEN_TWOVEC_TEST(fcmlt_z_2d_2d, "fcmlt v2.2d, v23.2d, #0", 2, 23)
2185 GEN_TWOVEC_TEST(fcmlt_z_4s_4s, "fcmlt v2.4s, v23.4s, #0", 2, 23)
2186 GEN_TWOVEC_TEST(fcmlt_z_2s_2s, "fcmlt v2.2s, v23.2s, #0", 2, 23)
2187 
2188 GEN_test_FCMP_D_Z
2189 GEN_test_FCMP_S_Z
2190 GEN_test_FCMPE_D_Z
2191 GEN_test_FCMPE_S_Z
2192 GEN_test_FCMP_D_D
2193 GEN_test_FCMP_S_S
2194 GEN_test_FCMPE_D_D
2195 GEN_test_FCMPE_S_S
2196 
2197 GEN_test_FCSEL_D_D_D_EQ
2198 GEN_test_FCSEL_D_D_D_NE
2199 GEN_test_FCSEL_S_S_S_EQ
2200 GEN_test_FCSEL_S_S_S_NE
2201 
2202 GEN_THREEVEC_TEST(fdiv_d_d_d,  "fdiv d2, d11, d29", 2, 11, 29)
2203 GEN_THREEVEC_TEST(fdiv_s_s_s,  "fdiv s2, s11, s29", 2, 11, 29)
2204 GEN_BINARY_TEST(fdiv, 2d, 2d, 2d)
2205 GEN_BINARY_TEST(fdiv, 4s, 4s, 4s)
2206 GEN_BINARY_TEST(fdiv, 2s, 2s, 2s)
2207 
2208 GEN_FOURVEC_TEST(fmadd_d_d_d_d,  "fmadd  d2, d11, d29, d3", 2, 11, 29, 3)
2209 GEN_FOURVEC_TEST(fmadd_s_s_s_s,  "fmadd  s2, s11, s29, s3", 2, 11, 29, 3)
2210 GEN_FOURVEC_TEST(fnmadd_d_d_d_d, "fnmadd d2, d11, d29, d3", 2, 11, 29, 3)
2211 GEN_FOURVEC_TEST(fnmadd_s_s_s_s, "fnmadd s2, s11, s29, s3", 2, 11, 29, 3)
2212 GEN_FOURVEC_TEST(fmsub_d_d_d_d,  "fmsub  d2, d11, d29, d3", 2, 11, 29, 3)
2213 GEN_FOURVEC_TEST(fmsub_s_s_s_s,  "fmsub  s2, s11, s29, s3", 2, 11, 29, 3)
2214 GEN_FOURVEC_TEST(fnmsub_d_d_d_d, "fnmsub d2, d11, d29, d3", 2, 11, 29, 3)
2215 GEN_FOURVEC_TEST(fnmsub_s_s_s_s, "fnmsub s2, s11, s29, s3", 2, 11, 29, 3)
2216 
2217 GEN_THREEVEC_TEST(fnmul_d_d_d, "fnmul d2, d11, d29", 2, 11, 29)
2218 GEN_THREEVEC_TEST(fnmul_s_s_s, "fnmul s2, s11, s29", 2, 11, 29)
2219 
2220 GEN_THREEVEC_TEST(fmax_d_d_d,  "fmax d2, d11, d29", 2, 11, 29)
2221 GEN_THREEVEC_TEST(fmax_s_s_s,  "fmax s2, s11, s29", 2, 11, 29)
2222 GEN_THREEVEC_TEST(fmin_d_d_d,  "fmin d2, d11, d29", 2, 11, 29)
2223 GEN_THREEVEC_TEST(fmin_s_s_s,  "fmin s2, s11, s29", 2, 11, 29)
2224 GEN_THREEVEC_TEST(fmaxnm_d_d_d,  "fmaxnm d2, d11, d29", 2, 11, 29)
2225 GEN_THREEVEC_TEST(fmaxnm_s_s_s,  "fmaxnm s2, s11, s29", 2, 11, 29)
2226 GEN_THREEVEC_TEST(fminnm_d_d_d,  "fminnm d2, d11, d29", 2, 11, 29)
2227 GEN_THREEVEC_TEST(fminnm_s_s_s,  "fminnm s2, s11, s29", 2, 11, 29)
2228 
2229 GEN_THREEVEC_TEST(fmax_2d_2d_2d, "fmax v2.2d, v23.2d, v11.2d", 2, 23, 11)
2230 GEN_THREEVEC_TEST(fmax_4s_4s_4s, "fmax v2.4s, v23.4s, v11.4s", 2, 23, 11)
2231 GEN_THREEVEC_TEST(fmax_2s_2s_2s, "fmax v2.2s, v23.2s, v11.2s", 2, 23, 11)
2232 GEN_THREEVEC_TEST(fmin_2d_2d_2d, "fmin v2.2d, v23.2d, v11.2d", 2, 23, 11)
2233 GEN_THREEVEC_TEST(fmin_4s_4s_4s, "fmin v2.4s, v23.4s, v11.4s", 2, 23, 11)
2234 GEN_THREEVEC_TEST(fmin_2s_2s_2s, "fmin v2.2s, v23.2s, v11.2s", 2, 23, 11)
2235 GEN_THREEVEC_TEST(fmaxnm_2d_2d_2d, "fmaxnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
2236 GEN_THREEVEC_TEST(fmaxnm_4s_4s_4s, "fmaxnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
2237 GEN_THREEVEC_TEST(fmaxnm_2s_2s_2s, "fmaxnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
2238 GEN_THREEVEC_TEST(fminnm_2d_2d_2d, "fminnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
2239 GEN_THREEVEC_TEST(fminnm_4s_4s_4s, "fminnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
2240 GEN_THREEVEC_TEST(fminnm_2s_2s_2s, "fminnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
2241 
2242 GEN_TWOVEC_TEST(fmaxnmp_d_2d, "fmaxnmp d2, v23.2d", 2, 23)
2243 GEN_TWOVEC_TEST(fmaxnmp_s_2s, "fmaxnmp s2, v23.2s", 2, 23)
2244 GEN_TWOVEC_TEST(fminnmp_d_2d, "fminnmp d2, v23.2d", 2, 23)
2245 GEN_TWOVEC_TEST(fminnmp_s_2s, "fminnmp s2, v23.2s", 2, 23)
2246 
2247 GEN_THREEVEC_TEST(fmaxnmp_2d_2d_2d, "fmaxnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
2248 GEN_THREEVEC_TEST(fmaxnmp_4s_4s_4s, "fmaxnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
2249 GEN_THREEVEC_TEST(fmaxnmp_2s_2s_2s, "fmaxnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
2250 GEN_THREEVEC_TEST(fminnmp_2d_2d_2d, "fminnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
2251 GEN_THREEVEC_TEST(fminnmp_4s_4s_4s, "fminnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
2252 GEN_THREEVEC_TEST(fminnmp_2s_2s_2s, "fminnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
2253 
2254 GEN_TWOVEC_TEST(fmaxnmv_s_4s, "fmaxnmv s2, v23.4s", 2, 23)
2255 GEN_TWOVEC_TEST(fminnmv_s_4s, "fminnmv s2, v23.4s", 2, 23)
2256 
2257 GEN_TWOVEC_TEST(fmaxp_d_2d, "fmaxp d2, v23.2d", 2, 23)
2258 GEN_TWOVEC_TEST(fmaxp_s_2s, "fmaxp s2, v23.2s", 2, 23)
2259 GEN_TWOVEC_TEST(fminp_d_2d, "fminp d2, v23.2d", 2, 23)
2260 GEN_TWOVEC_TEST(fminp_s_2s, "fminp s2, v23.2s", 2, 23)
2261 
2262 GEN_THREEVEC_TEST(fmaxp_2d_2d_2d, "fmaxp v2.2d, v23.2d, v11.2d", 2, 23, 11)
2263 GEN_THREEVEC_TEST(fmaxp_4s_4s_4s, "fmaxp v2.4s, v23.4s, v11.4s", 2, 23, 11)
2264 GEN_THREEVEC_TEST(fmaxp_2s_2s_2s, "fmaxp v2.2s, v23.2s, v11.2s", 2, 23, 11)
2265 GEN_THREEVEC_TEST(fminp_2d_2d_2d, "fminp v2.2d, v23.2d, v11.2d", 2, 23, 11)
2266 GEN_THREEVEC_TEST(fminp_4s_4s_4s, "fminp v2.4s, v23.4s, v11.4s", 2, 23, 11)
2267 GEN_THREEVEC_TEST(fminp_2s_2s_2s, "fminp v2.2s, v23.2s, v11.2s", 2, 23, 11)
2268 
2269 GEN_TWOVEC_TEST(fmaxv_s_4s, "fmaxv s2, v23.4s", 2, 23)
2270 GEN_TWOVEC_TEST(fminv_s_4s, "fminv s2, v23.4s", 2, 23)
2271 
2272 GEN_THREEVEC_TEST(fmla_2d_2d_2d, "fmla v2.2d, v23.2d, v11.2d", 2, 23, 11)
2273 GEN_THREEVEC_TEST(fmla_4s_4s_4s, "fmla v2.4s, v23.4s, v11.4s", 2, 23, 11)
2274 GEN_THREEVEC_TEST(fmla_2s_2s_2s, "fmla v2.2s, v23.2s, v11.2s", 2, 23, 11)
2275 GEN_THREEVEC_TEST(fmls_2d_2d_2d, "fmls v2.2d, v23.2d, v11.2d", 2, 23, 11)
2276 GEN_THREEVEC_TEST(fmls_4s_4s_4s, "fmls v2.4s, v23.4s, v11.4s", 2, 23, 11)
2277 GEN_THREEVEC_TEST(fmls_2s_2s_2s, "fmls v2.2s, v23.2s, v11.2s", 2, 23, 11)
2278 
2279 GEN_THREEVEC_TEST(fmla_d_d_d0, "fmla d2, d11, v29.d[0]", 2, 11, 29)
2280 GEN_THREEVEC_TEST(fmla_d_d_d1, "fmla d2, d11, v29.d[1]", 2, 11, 29)
2281 GEN_THREEVEC_TEST(fmla_s_s_s0, "fmla s2, s11, v29.s[0]", 2, 11, 29)
2282 GEN_THREEVEC_TEST(fmla_s_s_s3, "fmla s2, s11, v29.s[3]", 2, 11, 29)
2283 GEN_THREEVEC_TEST(fmls_d_d_d0, "fmls d2, d11, v29.d[0]", 2, 11, 29)
2284 GEN_THREEVEC_TEST(fmls_d_d_d1, "fmls d2, d11, v29.d[1]", 2, 11, 29)
2285 GEN_THREEVEC_TEST(fmls_s_s_s0, "fmls s2, s11, v29.s[0]", 2, 11, 29)
2286 GEN_THREEVEC_TEST(fmls_s_s_s3, "fmls s2, s11, v29.s[3]", 2, 11, 29)
2287 
2288 GEN_THREEVEC_TEST(fmla_2d_2d_d0, "fmla v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
2289 GEN_THREEVEC_TEST(fmla_2d_2d_d1, "fmla v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
2290 GEN_THREEVEC_TEST(fmla_4s_4s_s0, "fmla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
2291 GEN_THREEVEC_TEST(fmla_4s_4s_s3, "fmla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
2292 GEN_THREEVEC_TEST(fmla_2s_2s_s0, "fmla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
2293 GEN_THREEVEC_TEST(fmla_2s_2s_s3, "fmla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
2294 
2295 GEN_THREEVEC_TEST(fmls_2d_2d_d0, "fmls v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
2296 GEN_THREEVEC_TEST(fmls_2d_2d_d1, "fmls v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
2297 GEN_THREEVEC_TEST(fmls_4s_4s_s0, "fmls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
2298 GEN_THREEVEC_TEST(fmls_4s_4s_s3, "fmls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
2299 GEN_THREEVEC_TEST(fmls_2s_2s_s0, "fmls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
2300 GEN_THREEVEC_TEST(fmls_2s_2s_s3, "fmls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
2301 
2302 GEN_TWOVEC_TEST(fmov_2d_imm_01, "fmov v22.2d, #0.125", 22, 23)
2303 GEN_TWOVEC_TEST(fmov_2d_imm_02, "fmov v22.2d, #-4.0",  22, 23)
2304 GEN_TWOVEC_TEST(fmov_2d_imm_03, "fmov v22.2d, #1.0",   22, 23)
2305 GEN_TWOVEC_TEST(fmov_4s_imm_01, "fmov v22.4s, #0.125", 22, 23)
2306 GEN_TWOVEC_TEST(fmov_4s_imm_02, "fmov v22.4s, #-4.0",  22, 23)
2307 GEN_TWOVEC_TEST(fmov_4s_imm_03, "fmov v22.4s, #1.0",   22, 23)
2308 GEN_TWOVEC_TEST(fmov_2s_imm_01, "fmov v22.2s, #0.125", 22, 23)
2309 GEN_TWOVEC_TEST(fmov_2s_imm_02, "fmov v22.2s, #-4.0",  22, 23)
2310 GEN_TWOVEC_TEST(fmov_2s_imm_03, "fmov v22.2s, #1.0",   22, 23)
2311 
2312 GEN_TWOVEC_TEST(fmov_d_d,  "fmov d22, d23",   22, 23)
2313 GEN_TWOVEC_TEST(fmov_s_s,  "fmov s22, s23",   22, 23)
2314 
2315 GEN_ONEINT_ONEVEC_TEST(fmov_s_w,  "fmov s7,      w15", 15, 7)
2316 GEN_ONEINT_ONEVEC_TEST(fmov_d_x,  "fmov d7,      x15", 15, 7)
2317 GEN_ONEINT_ONEVEC_TEST(fmov_d1_x, "fmov v7.d[1], x15", 15, 7)
2318 GEN_ONEINT_ONEVEC_TEST(fmov_w_s,  "fmov w15,      s7", 15, 7)
2319 GEN_ONEINT_ONEVEC_TEST(fmov_x_d,  "fmov x15,      d7", 15, 7)
2320 GEN_ONEINT_ONEVEC_TEST(fmov_x_d1, "fmov x15, v7.d[1]", 15, 7)
2321 
2322 /* overkill -- don't need two vecs, only one */
2323 GEN_TWOVEC_TEST(fmov_d_imm_01, "fmov d22, #0.125", 22, 23)
2324 GEN_TWOVEC_TEST(fmov_d_imm_02, "fmov d22, #-4.0",  22, 23)
2325 GEN_TWOVEC_TEST(fmov_d_imm_03, "fmov d22, #1.0",   22, 23)
2326 GEN_TWOVEC_TEST(fmov_s_imm_01, "fmov s22, #0.125", 22, 23)
2327 GEN_TWOVEC_TEST(fmov_s_imm_02, "fmov s22, #-4.0",  22, 23)
2328 GEN_TWOVEC_TEST(fmov_s_imm_03, "fmov s22, #-1.0",   22, 23)
2329 
2330 GEN_THREEVEC_TEST(fmul_d_d_d0, "fmul d2, d11, v29.d[0]", 2, 11, 29)
2331 GEN_THREEVEC_TEST(fmul_d_d_d1, "fmul d2, d11, v29.d[1]", 2, 11, 29)
2332 GEN_THREEVEC_TEST(fmul_s_s_s0, "fmul s2, s11, v29.s[0]", 2, 11, 29)
2333 GEN_THREEVEC_TEST(fmul_s_s_s3, "fmul s2, s11, v29.s[3]", 2, 11, 29)
2334 
2335 GEN_THREEVEC_TEST(fmul_2d_2d_d0, "fmul v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
2336 GEN_THREEVEC_TEST(fmul_2d_2d_d1, "fmul v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
2337 GEN_THREEVEC_TEST(fmul_4s_4s_s0, "fmul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
2338 GEN_THREEVEC_TEST(fmul_4s_4s_s3, "fmul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
2339 GEN_THREEVEC_TEST(fmul_2s_2s_s0, "fmul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
2340 GEN_THREEVEC_TEST(fmul_2s_2s_s3, "fmul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
2341 
2342 GEN_THREEVEC_TEST(fmul_d_d_d,    "fmul d2, d11, d29", 2, 11, 29)
2343 GEN_THREEVEC_TEST(fmul_s_s_s,    "fmul s2, s11, s29", 2, 11, 29)
2344 GEN_THREEVEC_TEST(fmul_2d_2d_2d, "fmul v2.2d, v11.2d, v29.2d", 2, 11, 29)
2345 GEN_THREEVEC_TEST(fmul_4s_4s_4s, "fmul v2.4s, v11.4s, v29.4s", 2, 11, 29)
2346 GEN_THREEVEC_TEST(fmul_2s_2s_2s, "fmul v2.2s, v11.2s, v29.2s", 2, 11, 29)
2347 
2348 GEN_THREEVEC_TEST(fmulx_d_d_d0, "fmulx d2, d11, v29.d[0]", 2, 11, 29)
2349 GEN_THREEVEC_TEST(fmulx_d_d_d1, "fmulx d2, d11, v29.d[1]", 2, 11, 29)
2350 GEN_THREEVEC_TEST(fmulx_s_s_s0, "fmulx s2, s11, v29.s[0]", 2, 11, 29)
2351 GEN_THREEVEC_TEST(fmulx_s_s_s3, "fmulx s2, s11, v29.s[3]", 2, 11, 29)
2352 GEN_THREEVEC_TEST(fmulx_2d_2d_d0, "fmulx v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
2353 GEN_THREEVEC_TEST(fmulx_2d_2d_d1, "fmulx v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
2354 GEN_THREEVEC_TEST(fmulx_4s_4s_s0, "fmulx v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
2355 GEN_THREEVEC_TEST(fmulx_4s_4s_s3, "fmulx v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
2356 GEN_THREEVEC_TEST(fmulx_2s_2s_s0, "fmulx v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
2357 GEN_THREEVEC_TEST(fmulx_2s_2s_s3, "fmulx v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
2358 
2359 GEN_THREEVEC_TEST(fmulx_d_d_d,    "fmulx d2, d11, d29", 2, 11, 29)
2360 GEN_THREEVEC_TEST(fmulx_s_s_s,    "fmulx s2, s11, s29", 2, 11, 29)
2361 GEN_THREEVEC_TEST(fmulx_2d_2d_2d, "fmulx v2.2d, v11.2d, v29.2d", 2, 11, 29)
2362 GEN_THREEVEC_TEST(fmulx_4s_4s_4s, "fmulx v2.4s, v11.4s, v29.4s", 2, 11, 29)
2363 GEN_THREEVEC_TEST(fmulx_2s_2s_2s, "fmulx v2.2s, v11.2s, v29.2s", 2, 11, 29)
2364 
2365 GEN_TWOVEC_TEST(frecpe_d_d,   "frecpe d22, d23",       22, 23)
2366 GEN_TWOVEC_TEST(frecpe_s_s,   "frecpe s22, s23",       22, 23)
2367 GEN_TWOVEC_TEST(frecpe_2d_2d, "frecpe v22.2d, v23.2d", 22, 23)
2368 GEN_TWOVEC_TEST(frecpe_4s_4s, "frecpe v22.4s, v23.4s", 22, 23)
2369 GEN_TWOVEC_TEST(frecpe_2s_2s, "frecpe v22.2s, v23.2s", 22, 23)
2370 
2371 GEN_THREEVEC_TEST(frecps_d_d_d,    "frecps d2, d11, d29", 2, 11, 29)
2372 GEN_THREEVEC_TEST(frecps_s_s_s,    "frecps s2, s11, s29", 2, 11, 29)
2373 GEN_THREEVEC_TEST(frecps_2d_2d_2d, "frecps v2.2d, v11.2d, v29.2d", 2, 11, 29)
2374 GEN_THREEVEC_TEST(frecps_4s_4s_4s, "frecps v2.4s, v11.4s, v29.4s", 2, 11, 29)
2375 GEN_THREEVEC_TEST(frecps_2s_2s_2s, "frecps v2.2s, v11.2s, v29.2s", 2, 11, 29)
2376 
2377 GEN_TWOVEC_TEST(frecpx_d_d,   "frecpx d22, d23",       22, 23)
2378 GEN_TWOVEC_TEST(frecpx_s_s,   "frecpx s22, s23",       22, 23)
2379 
2380 GEN_TWOVEC_TEST(frinta_d_d,   "frinta d22, d23",       22, 23)
2381 GEN_TWOVEC_TEST(frinta_s_s,   "frinta s22, s23",       22, 23)
2382 GEN_TWOVEC_TEST(frinti_d_d,   "frinti d22, d23",       22, 23)
2383 GEN_TWOVEC_TEST(frinti_s_s,   "frinti s22, s23",       22, 23)
2384 GEN_TWOVEC_TEST(frintm_d_d,   "frintm d22, d23",       22, 23)
2385 GEN_TWOVEC_TEST(frintm_s_s,   "frintm s22, s23",       22, 23)
2386 GEN_TWOVEC_TEST(frintn_d_d,   "frintn d22, d23",       22, 23)
2387 GEN_TWOVEC_TEST(frintn_s_s,   "frintn s22, s23",       22, 23)
2388 GEN_TWOVEC_TEST(frintp_d_d,   "frintp d22, d23",       22, 23)
2389 GEN_TWOVEC_TEST(frintp_s_s,   "frintp s22, s23",       22, 23)
2390 GEN_TWOVEC_TEST(frintx_d_d,   "frintx d22, d23",       22, 23)
2391 GEN_TWOVEC_TEST(frintx_s_s,   "frintx s22, s23",       22, 23)
2392 GEN_TWOVEC_TEST(frintz_d_d,   "frintz d22, d23",       22, 23)
2393 GEN_TWOVEC_TEST(frintz_s_s,   "frintz s22, s23",       22, 23)
2394 
2395 GEN_TWOVEC_TEST(frinta_2d_2d, "frinta v2.2d, v11.2d", 2, 11)
2396 GEN_TWOVEC_TEST(frinta_4s_4s, "frinta v2.4s, v11.4s", 2, 11)
2397 GEN_TWOVEC_TEST(frinta_2s_2s, "frinta v2.2s, v11.2s", 2, 11)
2398 GEN_TWOVEC_TEST(frinti_2d_2d, "frinti v2.2d, v11.2d", 2, 11)
2399 GEN_TWOVEC_TEST(frinti_4s_4s, "frinti v2.4s, v11.4s", 2, 11)
2400 GEN_TWOVEC_TEST(frinti_2s_2s, "frinti v2.2s, v11.2s", 2, 11)
2401 GEN_TWOVEC_TEST(frintm_2d_2d, "frintm v2.2d, v11.2d", 2, 11)
2402 GEN_TWOVEC_TEST(frintm_4s_4s, "frintm v2.4s, v11.4s", 2, 11)
2403 GEN_TWOVEC_TEST(frintm_2s_2s, "frintm v2.2s, v11.2s", 2, 11)
2404 GEN_TWOVEC_TEST(frintn_2d_2d, "frintn v2.2d, v11.2d", 2, 11)
2405 GEN_TWOVEC_TEST(frintn_4s_4s, "frintn v2.4s, v11.4s", 2, 11)
2406 GEN_TWOVEC_TEST(frintn_2s_2s, "frintn v2.2s, v11.2s", 2, 11)
2407 GEN_TWOVEC_TEST(frintp_2d_2d, "frintp v2.2d, v11.2d", 2, 11)
2408 GEN_TWOVEC_TEST(frintp_4s_4s, "frintp v2.4s, v11.4s", 2, 11)
2409 GEN_TWOVEC_TEST(frintp_2s_2s, "frintp v2.2s, v11.2s", 2, 11)
2410 GEN_TWOVEC_TEST(frintx_2d_2d, "frintx v2.2d, v11.2d", 2, 11)
2411 GEN_TWOVEC_TEST(frintx_4s_4s, "frintx v2.4s, v11.4s", 2, 11)
2412 GEN_TWOVEC_TEST(frintx_2s_2s, "frintx v2.2s, v11.2s", 2, 11)
2413 GEN_TWOVEC_TEST(frintz_2d_2d, "frintz v2.2d, v11.2d", 2, 11)
2414 GEN_TWOVEC_TEST(frintz_4s_4s, "frintz v2.4s, v11.4s", 2, 11)
2415 GEN_TWOVEC_TEST(frintz_2s_2s, "frintz v2.2s, v11.2s", 2, 11)
2416 
2417 GEN_TWOVEC_TEST(frsqrte_d_d,   "frsqrte d22, d23",       22, 23)
2418 GEN_TWOVEC_TEST(frsqrte_s_s,   "frsqrte s22, s23",       22, 23)
2419 GEN_TWOVEC_TEST(frsqrte_2d_2d, "frsqrte v22.2d, v23.2d", 22, 23)
2420 GEN_TWOVEC_TEST(frsqrte_4s_4s, "frsqrte v22.4s, v23.4s", 22, 23)
2421 GEN_TWOVEC_TEST(frsqrte_2s_2s, "frsqrte v22.2s, v23.2s", 22, 23)
2422 
2423 GEN_THREEVEC_TEST(frsqrts_d_d_d,    "frsqrts d2, d11, d29", 2, 11, 29)
2424 GEN_THREEVEC_TEST(frsqrts_s_s_s,    "frsqrts s2, s11, s29", 2, 11, 29)
2425 GEN_THREEVEC_TEST(frsqrts_2d_2d_2d, "frsqrts v2.2d, v11.2d, v29.2d", 2, 11, 29)
2426 GEN_THREEVEC_TEST(frsqrts_4s_4s_4s, "frsqrts v2.4s, v11.4s, v29.4s", 2, 11, 29)
2427 GEN_THREEVEC_TEST(frsqrts_2s_2s_2s, "frsqrts v2.2s, v11.2s, v29.2s", 2, 11, 29)
2428 
2429 // ======================== CONV ========================
2430 
2431 GEN_TWOVEC_TEST(fcvt_s_h, "fcvt s7, h16", 7, 16)
2432 GEN_TWOVEC_TEST(fcvt_d_h, "fcvt d7, h16", 7, 16)
2433 GEN_TWOVEC_TEST(fcvt_h_s, "fcvt h7, s16", 7, 16)
2434 GEN_TWOVEC_TEST(fcvt_d_s, "fcvt d7, s16", 7, 16)
2435 GEN_TWOVEC_TEST(fcvt_h_d, "fcvt h7, d16", 7, 16)
2436 GEN_TWOVEC_TEST(fcvt_s_d, "fcvt s7, d16", 7, 16)
2437 
2438 GEN_TWOVEC_TEST(fcvtl_4s_4h, "fcvtl  v11.4s, v29.4h", 11, 29)
2439 GEN_TWOVEC_TEST(fcvtl_4s_8h, "fcvtl2 v11.4s, v29.8h", 11, 29)
2440 GEN_TWOVEC_TEST(fcvtl_2d_2s, "fcvtl  v11.2d, v29.2s", 11, 29)
2441 GEN_TWOVEC_TEST(fcvtl_2d_4s, "fcvtl2 v11.2d, v29.4s", 11, 29)
2442 
2443 GEN_TWOVEC_TEST(fcvtn_4h_4s, "fcvtn  v22.4h, v23.4s", 22, 23)
2444 GEN_TWOVEC_TEST(fcvtn_8h_4s, "fcvtn2 v22.8h, v23.4s", 22, 23)
2445 GEN_TWOVEC_TEST(fcvtn_2s_2d, "fcvtn  v22.2s, v23.2d", 22, 23)
2446 GEN_TWOVEC_TEST(fcvtn_4s_2d, "fcvtn2 v22.4s, v23.2d", 22, 23)
2447 
2448 GEN_TWOVEC_TEST(fcvtas_d_d,   "fcvtas d10, d21",       10, 21)
2449 GEN_TWOVEC_TEST(fcvtau_d_d,   "fcvtau d21, d10",       21, 10)
2450 GEN_TWOVEC_TEST(fcvtas_s_s,   "fcvtas s10, s21",       10, 21)
2451 GEN_TWOVEC_TEST(fcvtau_s_s,   "fcvtau s21, s10",       21, 10)
2452 GEN_TWOVEC_TEST(fcvtas_2d_2d, "fcvtas v10.2d, v21.2d", 10, 21)
2453 GEN_TWOVEC_TEST(fcvtau_2d_2d, "fcvtau v10.2d, v21.2d", 10, 21)
2454 GEN_TWOVEC_TEST(fcvtas_4s_4s, "fcvtas v10.4s, v21.4s", 10, 21)
2455 GEN_TWOVEC_TEST(fcvtau_4s_4s, "fcvtau v10.4s, v21.4s", 10, 21)
2456 GEN_TWOVEC_TEST(fcvtas_2s_2s, "fcvtas v10.2s, v21.2s", 10, 21)
2457 GEN_TWOVEC_TEST(fcvtau_2s_2s, "fcvtau v10.2s, v21.2s", 10, 21)
2458 GEN_ONEINT_ONEVEC_TEST(fcvtas_w_s, "fcvtas w21, s10", 21, 10)
2459 GEN_ONEINT_ONEVEC_TEST(fcvtau_w_s, "fcvtau w21, s10", 21, 10)
2460 GEN_ONEINT_ONEVEC_TEST(fcvtas_x_s, "fcvtas x21, s10", 21, 10)
2461 GEN_ONEINT_ONEVEC_TEST(fcvtau_x_s, "fcvtau x21, s10", 21, 10)
2462 GEN_ONEINT_ONEVEC_TEST(fcvtas_w_d, "fcvtas w21, d10", 21, 10)
2463 GEN_ONEINT_ONEVEC_TEST(fcvtau_w_d, "fcvtau w21, d10", 21, 10)
2464 GEN_ONEINT_ONEVEC_TEST(fcvtas_x_d, "fcvtas x21, d10", 21, 10)
2465 GEN_ONEINT_ONEVEC_TEST(fcvtau_x_d, "fcvtau x21, d10", 21, 10)
2466 
2467 GEN_TWOVEC_TEST(fcvtms_d_d,   "fcvtms d10, d21",       10, 21)
2468 GEN_TWOVEC_TEST(fcvtmu_d_d,   "fcvtmu d21, d10",       21, 10)
2469 GEN_TWOVEC_TEST(fcvtms_s_s,   "fcvtms s10, s21",       10, 21)
2470 GEN_TWOVEC_TEST(fcvtmu_s_s,   "fcvtmu s21, s10",       21, 10)
2471 GEN_TWOVEC_TEST(fcvtms_2d_2d, "fcvtms v10.2d, v21.2d", 10, 21)
2472 GEN_TWOVEC_TEST(fcvtmu_2d_2d, "fcvtmu v10.2d, v21.2d", 10, 21)
2473 GEN_TWOVEC_TEST(fcvtms_4s_4s, "fcvtms v10.4s, v21.4s", 10, 21)
2474 GEN_TWOVEC_TEST(fcvtmu_4s_4s, "fcvtmu v10.4s, v21.4s", 10, 21)
2475 GEN_TWOVEC_TEST(fcvtms_2s_2s, "fcvtms v10.2s, v21.2s", 10, 21)
2476 GEN_TWOVEC_TEST(fcvtmu_2s_2s, "fcvtmu v10.2s, v21.2s", 10, 21)
2477 GEN_ONEINT_ONEVEC_TEST(fcvtms_w_s, "fcvtms w21, s10", 21, 10)
2478 GEN_ONEINT_ONEVEC_TEST(fcvtmu_w_s, "fcvtmu w21, s10", 21, 10)
2479 GEN_ONEINT_ONEVEC_TEST(fcvtms_x_s, "fcvtms x21, s10", 21, 10)
2480 GEN_ONEINT_ONEVEC_TEST(fcvtmu_x_s, "fcvtmu x21, s10", 21, 10)
2481 GEN_ONEINT_ONEVEC_TEST(fcvtms_w_d, "fcvtms w21, d10", 21, 10)
2482 GEN_ONEINT_ONEVEC_TEST(fcvtmu_w_d, "fcvtmu w21, d10", 21, 10)
2483 GEN_ONEINT_ONEVEC_TEST(fcvtms_x_d, "fcvtms x21, d10", 21, 10)
2484 GEN_ONEINT_ONEVEC_TEST(fcvtmu_x_d, "fcvtmu x21, d10", 21, 10)
2485 
2486 GEN_TWOVEC_TEST(fcvtns_d_d,   "fcvtns d10, d21",       10, 21)
2487 GEN_TWOVEC_TEST(fcvtnu_d_d,   "fcvtnu d21, d10",       21, 10)
2488 GEN_TWOVEC_TEST(fcvtns_s_s,   "fcvtns s10, s21",       10, 21)
2489 GEN_TWOVEC_TEST(fcvtnu_s_s,   "fcvtnu s21, s10",       21, 10)
2490 GEN_TWOVEC_TEST(fcvtns_2d_2d, "fcvtns v10.2d, v21.2d", 10, 21)
2491 GEN_TWOVEC_TEST(fcvtnu_2d_2d, "fcvtnu v10.2d, v21.2d", 10, 21)
2492 GEN_TWOVEC_TEST(fcvtns_4s_4s, "fcvtns v10.4s, v21.4s", 10, 21)
2493 GEN_TWOVEC_TEST(fcvtnu_4s_4s, "fcvtnu v10.4s, v21.4s", 10, 21)
2494 GEN_TWOVEC_TEST(fcvtns_2s_2s, "fcvtns v10.2s, v21.2s", 10, 21)
2495 GEN_TWOVEC_TEST(fcvtnu_2s_2s, "fcvtnu v10.2s, v21.2s", 10, 21)
2496 GEN_ONEINT_ONEVEC_TEST(fcvtns_w_s, "fcvtns w21, s10", 21, 10)
2497 GEN_ONEINT_ONEVEC_TEST(fcvtnu_w_s, "fcvtnu w21, s10", 21, 10)
2498 GEN_ONEINT_ONEVEC_TEST(fcvtns_x_s, "fcvtns x21, s10", 21, 10)
2499 GEN_ONEINT_ONEVEC_TEST(fcvtnu_x_s, "fcvtnu x21, s10", 21, 10)
2500 GEN_ONEINT_ONEVEC_TEST(fcvtns_w_d, "fcvtns w21, d10", 21, 10)
2501 GEN_ONEINT_ONEVEC_TEST(fcvtnu_w_d, "fcvtnu w21, d10", 21, 10)
2502 GEN_ONEINT_ONEVEC_TEST(fcvtns_x_d, "fcvtns x21, d10", 21, 10)
2503 GEN_ONEINT_ONEVEC_TEST(fcvtnu_x_d, "fcvtnu x21, d10", 21, 10)
2504 
2505 GEN_TWOVEC_TEST(fcvtps_d_d,   "fcvtps d10, d21",       10, 21)
2506 GEN_TWOVEC_TEST(fcvtpu_d_d,   "fcvtpu d21, d10",       21, 10)
2507 GEN_TWOVEC_TEST(fcvtps_s_s,   "fcvtps s10, s21",       10, 21)
2508 GEN_TWOVEC_TEST(fcvtpu_s_s,   "fcvtpu s21, s10",       21, 10)
2509 GEN_TWOVEC_TEST(fcvtps_2d_2d, "fcvtps v10.2d, v21.2d", 10, 21)
2510 GEN_TWOVEC_TEST(fcvtpu_2d_2d, "fcvtpu v10.2d, v21.2d", 10, 21)
2511 GEN_TWOVEC_TEST(fcvtps_4s_4s, "fcvtps v10.4s, v21.4s", 10, 21)
2512 GEN_TWOVEC_TEST(fcvtpu_4s_4s, "fcvtpu v10.4s, v21.4s", 10, 21)
2513 GEN_TWOVEC_TEST(fcvtps_2s_2s, "fcvtps v10.2s, v21.2s", 10, 21)
2514 GEN_TWOVEC_TEST(fcvtpu_2s_2s, "fcvtpu v10.2s, v21.2s", 10, 21)
2515 GEN_ONEINT_ONEVEC_TEST(fcvtps_w_s, "fcvtps w21, s10", 21, 10)
2516 GEN_ONEINT_ONEVEC_TEST(fcvtpu_w_s, "fcvtpu w21, s10", 21, 10)
2517 GEN_ONEINT_ONEVEC_TEST(fcvtps_x_s, "fcvtps x21, s10", 21, 10)
2518 GEN_ONEINT_ONEVEC_TEST(fcvtpu_x_s, "fcvtpu x21, s10", 21, 10)
2519 GEN_ONEINT_ONEVEC_TEST(fcvtps_w_d, "fcvtps w21, d10", 21, 10)
2520 GEN_ONEINT_ONEVEC_TEST(fcvtpu_w_d, "fcvtpu w21, d10", 21, 10)
2521 GEN_ONEINT_ONEVEC_TEST(fcvtps_x_d, "fcvtps x21, d10", 21, 10)
2522 GEN_ONEINT_ONEVEC_TEST(fcvtpu_x_d, "fcvtpu x21, d10", 21, 10)
2523 
2524 GEN_TWOVEC_TEST(fcvtzs_d_d,   "fcvtzs d10, d21",       10, 21)
2525 GEN_TWOVEC_TEST(fcvtzu_d_d,   "fcvtzu d21, d10",       21, 10)
2526 GEN_TWOVEC_TEST(fcvtzs_s_s,   "fcvtzs s10, s21",       10, 21)
2527 GEN_TWOVEC_TEST(fcvtzu_s_s,   "fcvtzu s21, s10",       21, 10)
2528 GEN_TWOVEC_TEST(fcvtzs_2d_2d, "fcvtzs v10.2d, v21.2d", 10, 21)
2529 GEN_TWOVEC_TEST(fcvtzu_2d_2d, "fcvtzu v10.2d, v21.2d", 10, 21)
2530 GEN_TWOVEC_TEST(fcvtzs_4s_4s, "fcvtzs v10.4s, v21.4s", 10, 21)
2531 GEN_TWOVEC_TEST(fcvtzu_4s_4s, "fcvtzu v10.4s, v21.4s", 10, 21)
2532 GEN_TWOVEC_TEST(fcvtzs_2s_2s, "fcvtzs v10.2s, v21.2s", 10, 21)
2533 GEN_TWOVEC_TEST(fcvtzu_2s_2s, "fcvtzu v10.2s, v21.2s", 10, 21)
2534 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s, "fcvtzs w21, s10", 21, 10)
2535 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s, "fcvtzu w21, s10", 21, 10)
2536 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s, "fcvtzs x21, s10", 21, 10)
2537 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s, "fcvtzu x21, s10", 21, 10)
2538 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d, "fcvtzs w21, d10", 21, 10)
2539 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d, "fcvtzu w21, d10", 21, 10)
2540 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d, "fcvtzs x21, d10", 21, 10)
2541 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d, "fcvtzu x21, d10", 21, 10)
2542 
2543 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits1,    "fcvtzs d10, d21, #1",   10, 21)
2544 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits32,   "fcvtzs d10, d21, #32",  10, 21)
2545 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits64,   "fcvtzs d10, d21, #64",  10, 21)
2546 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits1,    "fcvtzu d10, d21, #1",   10, 21)
2547 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits32,   "fcvtzu d10, d21, #32",  10, 21)
2548 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits64,   "fcvtzu d10, d21, #64",  10, 21)
2549 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits1,    "fcvtzs s10, s21, #1",   10, 21)
2550 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits16,   "fcvtzs s10, s21, #16",  10, 21)
2551 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits32,   "fcvtzs s10, s21, #32",  10, 21)
2552 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits1,    "fcvtzu s10, s21, #1",   10, 21)
2553 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits16,   "fcvtzu s10, s21, #16",  10, 21)
2554 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits32,   "fcvtzu s10, s21, #32",  10, 21)
2555 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits1,  "fcvtzs v10.2d, v21.2d, #1",  10, 21)
2556 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits32, "fcvtzs v10.2d, v21.2d, #32", 10, 21)
2557 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits64, "fcvtzs v10.2d, v21.2d, #64", 10, 21)
2558 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits1,  "fcvtzu v10.2d, v21.2d, #1",  10, 21)
2559 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits32, "fcvtzu v10.2d, v21.2d, #32", 10, 21)
2560 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits64, "fcvtzu v10.2d, v21.2d, #64", 10, 21)
2561 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits1,  "fcvtzs v10.4s, v21.4s, #1",  10, 21)
2562 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits16, "fcvtzs v10.4s, v21.4s, #16", 10, 21)
2563 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits32, "fcvtzs v10.4s, v21.4s, #32", 10, 21)
2564 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits1,  "fcvtzu v10.4s, v21.4s, #1",  10, 21)
2565 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits16, "fcvtzu v10.4s, v21.4s, #16", 10, 21)
2566 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits32, "fcvtzu v10.4s, v21.4s, #32", 10, 21)
2567 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits1,  "fcvtzs v10.2s, v21.2s, #1",  10, 21)
2568 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits16, "fcvtzs v10.2s, v21.2s, #16", 10, 21)
2569 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits32, "fcvtzs v10.2s, v21.2s, #32", 10, 21)
2570 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits1,  "fcvtzu v10.2s, v21.2s, #1",  10, 21)
2571 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits16, "fcvtzu v10.2s, v21.2s, #16", 10, 21)
2572 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits32, "fcvtzu v10.2s, v21.2s, #32", 10, 21)
2573 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits1,  "fcvtzs w21, s10, #1",  21, 10)
2574 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits16, "fcvtzs w21, s10, #16", 21, 10)
2575 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits32, "fcvtzs w21, s10, #32", 21, 10)
2576 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits1,  "fcvtzu w21, s10, #1",  21, 10)
2577 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits16, "fcvtzu w21, s10, #16", 21, 10)
2578 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits32, "fcvtzu w21, s10, #32", 21, 10)
2579 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits1,  "fcvtzs x21, s10, #1",  21, 10)
2580 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits32, "fcvtzs x21, s10, #32", 21, 10)
2581 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits64, "fcvtzs x21, s10, #64", 21, 10)
2582 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits1,  "fcvtzu x21, s10, #1",  21, 10)
2583 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits32, "fcvtzu x21, s10, #32", 21, 10)
2584 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits64, "fcvtzu x21, s10, #64", 21, 10)
2585 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits1,  "fcvtzs w21, d10, #1",  21, 10)
2586 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits16, "fcvtzs w21, d10, #16", 21, 10)
2587 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits32, "fcvtzs w21, d10, #32", 21, 10)
2588 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits1,  "fcvtzu w21, d10, #1",  21, 10)
2589 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits16, "fcvtzu w21, d10, #16", 21, 10)
2590 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits32, "fcvtzu w21, d10, #32", 21, 10)
2591 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits1,  "fcvtzs x21, d10, #1",  21, 10)
2592 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits32, "fcvtzs x21, d10, #32", 21, 10)
2593 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits64, "fcvtzs x21, d10, #64", 21, 10)
2594 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits1,  "fcvtzu x21, d10, #1",  21, 10)
2595 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits32, "fcvtzu x21, d10, #32", 21, 10)
2596 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits64, "fcvtzu x21, d10, #64", 21, 10)
2597 
2598 GEN_TWOVEC_TEST(fcvtxn_s_d,   "fcvtxn s10, d21", 10, 21)
2599 GEN_TWOVEC_TEST(fcvtxn_2s_2d, "fcvtxn  v10.2s, v21.2d", 10, 21)
2600 GEN_TWOVEC_TEST(fcvtxn_4s_2d, "fcvtxn2 v10.4s, v21.2d", 10, 21)
2601 
2602 GEN_TWOVEC_TEST(scvtf_d_d_fbits1,    "scvtf d10, d21      , #1",  10, 21)
2603 GEN_TWOVEC_TEST(scvtf_d_d_fbits32,   "scvtf d10, d21      , #32", 10, 21)
2604 GEN_TWOVEC_TEST(scvtf_d_d_fbits64,   "scvtf d10, d21      , #64", 10, 21)
2605 GEN_TWOVEC_TEST(ucvtf_d_d_fbits1,    "ucvtf d21, d10      , #1",  21, 10)
2606 GEN_TWOVEC_TEST(ucvtf_d_d_fbits32,   "ucvtf d21, d10      , #32", 21, 10)
2607 GEN_TWOVEC_TEST(ucvtf_d_d_fbits64,   "ucvtf d21, d10      , #64", 21, 10)
2608 GEN_TWOVEC_TEST(scvtf_s_s_fbits1,    "scvtf s10, s21      , #1",  10, 21)
2609 GEN_TWOVEC_TEST(scvtf_s_s_fbits16,   "scvtf s10, s21      , #16", 10, 21)
2610 GEN_TWOVEC_TEST(scvtf_s_s_fbits32,   "scvtf s10, s21      , #32", 10, 21)
2611 GEN_TWOVEC_TEST(ucvtf_s_s_fbits1,    "ucvtf s21, s10      , #1",  21, 10)
2612 GEN_TWOVEC_TEST(ucvtf_s_s_fbits16,   "ucvtf s21, s10      , #16", 21, 10)
2613 GEN_TWOVEC_TEST(ucvtf_s_s_fbits32,   "ucvtf s21, s10      , #32", 21, 10)
2614 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits1,  "scvtf v10.2d, v21.2d, #1",  10, 21)
2615 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits32, "scvtf v10.2d, v21.2d, #32", 10, 21)
2616 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits64, "scvtf v10.2d, v21.2d, #64", 10, 21)
2617 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits1,  "ucvtf v10.2d, v21.2d, #1",  10, 21)
2618 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits32, "ucvtf v10.2d, v21.2d, #32", 10, 21)
2619 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits64, "ucvtf v10.2d, v21.2d, #64", 10, 21)
2620 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits1,  "scvtf v10.4s, v21.4s, #1",  10, 21)
2621 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits16, "scvtf v10.4s, v21.4s, #16", 10, 21)
2622 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits32, "scvtf v10.4s, v21.4s, #32", 10, 21)
2623 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits1,  "ucvtf v10.4s, v21.4s, #1",  10, 21)
2624 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits16, "ucvtf v10.4s, v21.4s, #16", 10, 21)
2625 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits32, "ucvtf v10.4s, v21.4s, #32", 10, 21)
2626 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits1,  "scvtf v10.2s, v21.2s, #1",  10, 21)
2627 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits16, "scvtf v10.2s, v21.2s, #16", 10, 21)
2628 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits32, "scvtf v10.2s, v21.2s, #32", 10, 21)
2629 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits1,  "ucvtf v10.2s, v21.2s, #1",  10, 21)
2630 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits16, "ucvtf v10.2s, v21.2s, #16", 10, 21)
2631 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits32, "ucvtf v10.2s, v21.2s, #32", 10, 21)
2632 
2633 GEN_TWOVEC_TEST(scvtf_d_d,   "scvtf d10, d21",       10, 21)
2634 GEN_TWOVEC_TEST(ucvtf_d_d,   "ucvtf d21, d10",       21, 10)
2635 GEN_TWOVEC_TEST(scvtf_s_s,   "scvtf s10, s21",       10, 21)
2636 GEN_TWOVEC_TEST(ucvtf_s_s,   "ucvtf s21, s10",       21, 10)
2637 GEN_TWOVEC_TEST(scvtf_2d_2d, "scvtf v10.2d, v21.2d", 10, 21)
2638 GEN_TWOVEC_TEST(ucvtf_2d_2d, "ucvtf v10.2d, v21.2d", 10, 21)
2639 GEN_TWOVEC_TEST(scvtf_4s_4s, "scvtf v10.4s, v21.4s", 10, 21)
2640 GEN_TWOVEC_TEST(ucvtf_4s_4s, "ucvtf v10.4s, v21.4s", 10, 21)
2641 GEN_TWOVEC_TEST(scvtf_2s_2s, "scvtf v10.2s, v21.2s", 10, 21)
2642 GEN_TWOVEC_TEST(ucvtf_2s_2s, "ucvtf v10.2s, v21.2s", 10, 21)
2643 
2644 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits1,  "scvtf s7, w15, #1",  15, 7)
2645 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits16, "scvtf s7, w15, #16", 15, 7)
2646 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits32, "scvtf s7, w15, #32", 15, 7)
2647 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits1,  "scvtf d7, w15, #1",  15, 7)
2648 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits16, "scvtf d7, w15, #16", 15, 7)
2649 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits32, "scvtf d7, w15, #32", 15, 7)
2650 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits1,  "scvtf s7, x15, #1",  15, 7)
2651 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits32, "scvtf s7, x15, #32", 15, 7)
2652 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits64, "scvtf s7, x15, #64", 15, 7)
2653 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits1,  "scvtf d7, x15, #1",  15, 7)
2654 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits32, "scvtf d7, x15, #32", 15, 7)
2655 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits64, "scvtf d7, x15, #64", 15, 7)
2656 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits1,  "ucvtf s7, w15, #1",  15, 7)
2657 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits16, "ucvtf s7, w15, #16", 15, 7)
2658 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits32, "ucvtf s7, w15, #32", 15, 7)
2659 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits1,  "ucvtf d7, w15, #1",  15, 7)
2660 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits16, "ucvtf d7, w15, #16", 15, 7)
2661 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits32, "ucvtf d7, w15, #32", 15, 7)
2662 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits1,  "ucvtf s7, x15, #1",  15, 7)
2663 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits32, "ucvtf s7, x15, #32", 15, 7)
2664 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits64, "ucvtf s7, x15, #64", 15, 7)
2665 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits1,  "ucvtf d7, x15, #1",  15, 7)
2666 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits32, "ucvtf d7, x15, #32", 15, 7)
2667 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits64, "ucvtf d7, x15, #64", 15, 7)
2668 
2669 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w, "scvtf s7, w15", 15, 7)
2670 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w, "scvtf d7, w15", 15, 7)
2671 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x, "scvtf s7, x15", 15, 7)
2672 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x, "scvtf d7, x15", 15, 7)
2673 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w, "ucvtf s7, w15", 15, 7)
2674 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w, "ucvtf d7, w15", 15, 7)
2675 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x, "ucvtf s7, x15", 15, 7)
2676 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x, "ucvtf d7, x15", 15, 7)
2677 
2678 // ======================== INT ========================
2679 
2680 GEN_TWOVEC_TEST(abs_d_d,  "abs d22, d23",   22, 23)
2681 GEN_TWOVEC_TEST(neg_d_d,  "neg d22, d23",   22, 23)
2682 
2683 GEN_UNARY_TEST(abs, 2d, 2d)
2684 GEN_UNARY_TEST(abs, 4s, 4s)
2685 GEN_UNARY_TEST(abs, 2s, 2s)
2686 GEN_UNARY_TEST(abs, 8h, 8h)
2687 GEN_UNARY_TEST(abs, 4h, 4h)
2688 GEN_UNARY_TEST(abs, 16b, 16b)
2689 GEN_UNARY_TEST(abs, 8b, 8b)
2690 GEN_UNARY_TEST(neg, 2d, 2d)
2691 GEN_UNARY_TEST(neg, 4s, 4s)
2692 GEN_UNARY_TEST(neg, 2s, 2s)
2693 GEN_UNARY_TEST(neg, 8h, 8h)
2694 GEN_UNARY_TEST(neg, 4h, 4h)
2695 GEN_UNARY_TEST(neg, 16b, 16b)
2696 GEN_UNARY_TEST(neg, 8b,  8b)
2697 
2698 GEN_THREEVEC_TEST(add_d_d_d, "add d21, d22, d23", 21, 22, 23)
2699 GEN_THREEVEC_TEST(sub_d_d_d, "sub d21, d22, d23", 21, 22, 23)
2700 
2701 GEN_BINARY_TEST(add, 2d, 2d, 2d)
2702 GEN_BINARY_TEST(add, 4s, 4s, 4s)
2703 GEN_BINARY_TEST(add, 2s, 2s, 2s)
2704 GEN_BINARY_TEST(add, 8h, 8h, 8h)
2705 GEN_BINARY_TEST(add, 4h, 4h, 4h)
2706 GEN_BINARY_TEST(add, 16b, 16b, 16b)
2707 GEN_BINARY_TEST(add, 8b, 8b, 8b)
2708 GEN_BINARY_TEST(sub, 2d, 2d, 2d)
2709 GEN_BINARY_TEST(sub, 4s, 4s, 4s)
2710 GEN_BINARY_TEST(sub, 2s, 2s, 2s)
2711 GEN_BINARY_TEST(sub, 8h, 8h, 8h)
2712 GEN_BINARY_TEST(sub, 4h, 4h, 4h)
2713 GEN_BINARY_TEST(sub, 16b, 16b, 16b)
2714 GEN_BINARY_TEST(sub, 8b, 8b, 8b)
2715 
2716 GEN_BINARY_TEST(addhn,   2s, 2d, 2d)
2717 GEN_BINARY_TEST(addhn2,  4s, 2d, 2d)
2718 GEN_BINARY_TEST(addhn,   4h, 4s, 4s)
2719 GEN_BINARY_TEST(addhn2,  8h, 4s, 4s)
2720 GEN_BINARY_TEST(addhn,   8b, 8h, 8h)
2721 GEN_BINARY_TEST(addhn2,  16b, 8h, 8h)
2722 GEN_BINARY_TEST(subhn,   2s, 2d, 2d)
2723 GEN_BINARY_TEST(subhn2,  4s, 2d, 2d)
2724 GEN_BINARY_TEST(subhn,   4h, 4s, 4s)
2725 GEN_BINARY_TEST(subhn2,  8h, 4s, 4s)
2726 GEN_BINARY_TEST(subhn,   8b, 8h, 8h)
2727 GEN_BINARY_TEST(subhn2,  16b, 8h, 8h)
2728 GEN_BINARY_TEST(raddhn,  2s, 2d, 2d)
2729 GEN_BINARY_TEST(raddhn2, 4s, 2d, 2d)
2730 GEN_BINARY_TEST(raddhn,  4h, 4s, 4s)
2731 GEN_BINARY_TEST(raddhn2, 8h, 4s, 4s)
2732 GEN_BINARY_TEST(raddhn,  8b, 8h, 8h)
2733 GEN_BINARY_TEST(raddhn2, 16b, 8h, 8h)
2734 GEN_BINARY_TEST(rsubhn,  2s, 2d, 2d)
2735 GEN_BINARY_TEST(rsubhn2, 4s, 2d, 2d)
2736 GEN_BINARY_TEST(rsubhn,  4h, 4s, 4s)
2737 GEN_BINARY_TEST(rsubhn2, 8h, 4s, 4s)
2738 GEN_BINARY_TEST(rsubhn,  8b, 8h, 8h)
2739 GEN_BINARY_TEST(rsubhn2, 16b, 8h, 8h)
2740 
2741 GEN_TWOVEC_TEST(addp_d_2d,  "addp d22, v23.2d",   22, 23)
2742 
2743 GEN_BINARY_TEST(addp, 2d, 2d, 2d)
2744 GEN_BINARY_TEST(addp, 4s, 4s, 4s)
2745 GEN_BINARY_TEST(addp, 2s, 2s, 2s)
2746 GEN_BINARY_TEST(addp, 8h, 8h, 8h)
2747 GEN_BINARY_TEST(addp, 4h, 4h, 4h)
2748 GEN_BINARY_TEST(addp, 16b, 16b, 16b)
2749 GEN_BINARY_TEST(addp, 8b, 8b, 8b)
2750 
2751 GEN_TWOVEC_TEST(addv_s_4s,  "addv s22, v23.4s",  22, 23)
2752 GEN_TWOVEC_TEST(addv_h_8h,  "addv h22, v23.8h",  22, 23)
2753 GEN_TWOVEC_TEST(addv_h_4h,  "addv h22, v23.4h",  22, 23)
2754 GEN_TWOVEC_TEST(addv_b_16b, "addv b22, v23.16b", 22, 23)
2755 GEN_TWOVEC_TEST(addv_b_8b,  "addv b22, v23.8b",  22, 23)
2756 
2757 GEN_BINARY_TEST(and, 16b, 16b, 16b)
2758 GEN_BINARY_TEST(and, 8b, 8b, 8b)
2759 GEN_BINARY_TEST(bic, 16b, 16b, 16b)
2760 GEN_BINARY_TEST(bic, 8b, 8b, 8b)
2761 GEN_BINARY_TEST(orr, 16b, 16b, 16b)
2762 GEN_BINARY_TEST(orr, 8b, 8b, 8b)
2763 GEN_BINARY_TEST(orn, 16b, 16b, 16b)
2764 GEN_BINARY_TEST(orn, 8b, 8b, 8b)
2765 
2766 /* overkill -- don't need two vecs, only one */
2767 GEN_TWOVEC_TEST(orr_8h_0x5A_lsl0, "orr v22.8h, #0x5A, LSL #0", 22, 23)
2768 GEN_TWOVEC_TEST(orr_8h_0xA5_lsl8, "orr v22.8h, #0xA5, LSL #8", 22, 23)
2769 GEN_TWOVEC_TEST(orr_4h_0x5A_lsl0, "orr v22.4h, #0x5A, LSL #0", 22, 23)
2770 GEN_TWOVEC_TEST(orr_4h_0xA5_lsl8, "orr v22.4h, #0xA5, LSL #8", 22, 23)
2771 GEN_TWOVEC_TEST(orr_4s_0x5A_lsl0,  "orr v22.4s, #0x5A, LSL #0",  22, 23)
2772 GEN_TWOVEC_TEST(orr_4s_0x6B_lsl8,  "orr v22.4s, #0x6B, LSL #8",  22, 23)
2773 GEN_TWOVEC_TEST(orr_4s_0x49_lsl16, "orr v22.4s, #0x49, LSL #16", 22, 23)
2774 GEN_TWOVEC_TEST(orr_4s_0x3D_lsl24, "orr v22.4s, #0x3D, LSL #24", 22, 23)
2775 GEN_TWOVEC_TEST(orr_2s_0x5A_lsl0,  "orr v22.2s, #0x5A, LSL #0",  22, 23)
2776 GEN_TWOVEC_TEST(orr_2s_0x6B_lsl8,  "orr v22.2s, #0x6B, LSL #8",  22, 23)
2777 GEN_TWOVEC_TEST(orr_2s_0x49_lsl16, "orr v22.2s, #0x49, LSL #16", 22, 23)
2778 GEN_TWOVEC_TEST(orr_2s_0x3D_lsl24, "orr v22.2s, #0x3D, LSL #24", 22, 23)
2779 GEN_TWOVEC_TEST(bic_8h_0x5A_lsl0, "bic v22.8h, #0x5A, LSL #0", 22, 23)
2780 GEN_TWOVEC_TEST(bic_8h_0xA5_lsl8, "bic v22.8h, #0xA5, LSL #8", 22, 23)
2781 GEN_TWOVEC_TEST(bic_4h_0x5A_lsl0, "bic v22.4h, #0x5A, LSL #0", 22, 23)
2782 GEN_TWOVEC_TEST(bic_4h_0xA5_lsl8, "bic v22.4h, #0xA5, LSL #8", 22, 23)
2783 GEN_TWOVEC_TEST(bic_4s_0x5A_lsl0,  "bic v22.4s, #0x5A, LSL #0",  22, 23)
2784 GEN_TWOVEC_TEST(bic_4s_0x6B_lsl8,  "bic v22.4s, #0x6B, LSL #8",  22, 23)
2785 GEN_TWOVEC_TEST(bic_4s_0x49_lsl16, "bic v22.4s, #0x49, LSL #16", 22, 23)
2786 GEN_TWOVEC_TEST(bic_4s_0x3D_lsl24, "bic v22.4s, #0x3D, LSL #24", 22, 23)
2787 GEN_TWOVEC_TEST(bic_2s_0x5A_lsl0,  "bic v22.2s, #0x5A, LSL #0",  22, 23)
2788 GEN_TWOVEC_TEST(bic_2s_0x6B_lsl8,  "bic v22.2s, #0x6B, LSL #8",  22, 23)
2789 GEN_TWOVEC_TEST(bic_2s_0x49_lsl16, "bic v22.2s, #0x49, LSL #16", 22, 23)
2790 GEN_TWOVEC_TEST(bic_2s_0x3D_lsl24, "bic v22.2s, #0x3D, LSL #24", 22, 23)
2791 
2792 GEN_BINARY_TEST(bif, 16b, 16b, 16b)
2793 GEN_BINARY_TEST(bif, 8b, 8b, 8b)
2794 GEN_BINARY_TEST(bit, 16b, 16b, 16b)
2795 GEN_BINARY_TEST(bit, 8b, 8b, 8b)
2796 GEN_BINARY_TEST(bsl, 16b, 16b, 16b)
2797 GEN_BINARY_TEST(bsl, 8b, 8b, 8b)
2798 GEN_BINARY_TEST(eor, 16b, 16b, 16b)
2799 GEN_BINARY_TEST(eor, 8b, 8b, 8b)
2800 
2801 GEN_UNARY_TEST(cls, 4s, 4s)
2802 GEN_UNARY_TEST(cls, 2s, 2s)
2803 GEN_UNARY_TEST(cls, 8h, 8h)
2804 GEN_UNARY_TEST(cls, 4h, 4h)
2805 GEN_UNARY_TEST(cls, 16b, 16b)
2806 GEN_UNARY_TEST(cls, 8b, 8b)
2807 GEN_UNARY_TEST(clz, 4s, 4s)
2808 GEN_UNARY_TEST(clz, 2s, 2s)
2809 GEN_UNARY_TEST(clz, 8h, 8h)
2810 GEN_UNARY_TEST(clz, 4h, 4h)
2811 GEN_UNARY_TEST(clz, 16b, 16b)
2812 GEN_UNARY_TEST(clz, 8b, 8b)
2813 
2814 GEN_THREEVEC_TEST(cmeq_d_d_d,  "cmeq  d2, d11, d29", 2, 11, 29)
2815 GEN_THREEVEC_TEST(cmge_d_d_d,  "cmge  d2, d11, d29", 2, 11, 29)
2816 GEN_THREEVEC_TEST(cmgt_d_d_d,  "cmgt  d2, d11, d29", 2, 11, 29)
2817 GEN_THREEVEC_TEST(cmhi_d_d_d,  "cmhi  d2, d11, d29", 2, 11, 29)
2818 GEN_THREEVEC_TEST(cmhs_d_d_d,  "cmhs  d2, d11, d29", 2, 11, 29)
2819 GEN_THREEVEC_TEST(cmtst_d_d_d, "cmtst d2, d11, d29", 2, 11, 29)
2820 
2821 GEN_BINARY_TEST(cmeq, 2d, 2d, 2d)
2822 GEN_BINARY_TEST(cmeq, 4s, 4s, 4s)
2823 GEN_BINARY_TEST(cmeq, 2s, 2s, 2s)
2824 GEN_BINARY_TEST(cmeq, 8h, 8h, 8h)
2825 GEN_BINARY_TEST(cmeq, 4h, 4h, 4h)
2826 GEN_BINARY_TEST(cmeq, 16b, 16b, 16b)
2827 GEN_BINARY_TEST(cmeq, 8b, 8b, 8b)
2828 GEN_BINARY_TEST(cmge, 2d, 2d, 2d)
2829 GEN_BINARY_TEST(cmge, 4s, 4s, 4s)
2830 GEN_BINARY_TEST(cmge, 2s, 2s, 2s)
2831 GEN_BINARY_TEST(cmge, 8h, 8h, 8h)
2832 GEN_BINARY_TEST(cmge, 4h, 4h, 4h)
2833 GEN_BINARY_TEST(cmge, 16b, 16b, 16b)
2834 GEN_BINARY_TEST(cmge, 8b, 8b, 8b)
2835 GEN_BINARY_TEST(cmgt, 2d, 2d, 2d)
2836 GEN_BINARY_TEST(cmgt, 4s, 4s, 4s)
2837 GEN_BINARY_TEST(cmgt, 2s, 2s, 2s)
2838 GEN_BINARY_TEST(cmgt, 8h, 8h, 8h)
2839 GEN_BINARY_TEST(cmgt, 4h, 4h, 4h)
2840 GEN_BINARY_TEST(cmgt, 16b, 16b, 16b)
2841 GEN_BINARY_TEST(cmgt, 8b, 8b, 8b)
2842 GEN_BINARY_TEST(cmhi, 2d, 2d, 2d)
2843 GEN_BINARY_TEST(cmhi, 4s, 4s, 4s)
2844 GEN_BINARY_TEST(cmhi, 2s, 2s, 2s)
2845 GEN_BINARY_TEST(cmhi, 8h, 8h, 8h)
2846 GEN_BINARY_TEST(cmhi, 4h, 4h, 4h)
2847 GEN_BINARY_TEST(cmhi, 16b, 16b, 16b)
2848 GEN_BINARY_TEST(cmhi, 8b, 8b, 8b)
2849 GEN_BINARY_TEST(cmhs, 2d, 2d, 2d)
2850 GEN_BINARY_TEST(cmhs, 4s, 4s, 4s)
2851 GEN_BINARY_TEST(cmhs, 2s, 2s, 2s)
2852 GEN_BINARY_TEST(cmhs, 8h, 8h, 8h)
2853 GEN_BINARY_TEST(cmhs, 4h, 4h, 4h)
2854 GEN_BINARY_TEST(cmhs, 16b, 16b, 16b)
2855 GEN_BINARY_TEST(cmhs, 8b, 8b, 8b)
2856 GEN_BINARY_TEST(cmtst, 2d, 2d, 2d)
2857 GEN_BINARY_TEST(cmtst, 4s, 4s, 4s)
2858 GEN_BINARY_TEST(cmtst, 2s, 2s, 2s)
2859 GEN_BINARY_TEST(cmtst, 8h, 8h, 8h)
2860 GEN_BINARY_TEST(cmtst, 4h, 4h, 4h)
2861 GEN_BINARY_TEST(cmtst, 16b, 16b, 16b)
2862 GEN_BINARY_TEST(cmtst, 8b, 8b, 8b)
2863 
2864 GEN_TWOVEC_TEST(cmeq_zero_d_d,  "cmeq  d2, d11, #0", 2, 11)
2865 GEN_TWOVEC_TEST(cmge_zero_d_d,  "cmge  d2, d11, #0", 2, 11)
2866 GEN_TWOVEC_TEST(cmgt_zero_d_d,  "cmgt  d2, d11, #0", 2, 11)
2867 GEN_TWOVEC_TEST(cmle_zero_d_d,  "cmle  d2, d11, #0", 2, 11)
2868 GEN_TWOVEC_TEST(cmlt_zero_d_d,  "cmlt  d2, d11, #0", 2, 11)
2869 
2870 GEN_TWOVEC_TEST(cmeq_zero_2d_2d,   "cmeq v5.2d,  v22.2d,  #0", 5, 22)
2871 GEN_TWOVEC_TEST(cmeq_zero_4s_4s,   "cmeq v5.4s,  v22.4s,  #0", 5, 22)
2872 GEN_TWOVEC_TEST(cmeq_zero_2s_2s,   "cmeq v5.2s,  v22.2s,  #0", 5, 22)
2873 GEN_TWOVEC_TEST(cmeq_zero_8h_8h,   "cmeq v5.8h,  v22.8h,  #0", 5, 22)
2874 GEN_TWOVEC_TEST(cmeq_zero_4h_4h,   "cmeq v5.4h,  v22.4h,  #0", 5, 22)
2875 GEN_TWOVEC_TEST(cmeq_zero_16b_16b, "cmeq v5.16b, v22.16b, #0", 5, 22)
2876 GEN_TWOVEC_TEST(cmeq_zero_8b_8b,   "cmeq v5.8b,  v22.8b,  #0", 5, 22)
2877 GEN_TWOVEC_TEST(cmge_zero_2d_2d,   "cmge v5.2d,  v22.2d,  #0", 5, 22)
2878 GEN_TWOVEC_TEST(cmge_zero_4s_4s,   "cmge v5.4s,  v22.4s,  #0", 5, 22)
2879 GEN_TWOVEC_TEST(cmge_zero_2s_2s,   "cmge v5.2s,  v22.2s,  #0", 5, 22)
2880 GEN_TWOVEC_TEST(cmge_zero_8h_8h,   "cmge v5.8h,  v22.8h,  #0", 5, 22)
2881 GEN_TWOVEC_TEST(cmge_zero_4h_4h,   "cmge v5.4h,  v22.4h,  #0", 5, 22)
2882 GEN_TWOVEC_TEST(cmge_zero_16b_16b, "cmge v5.16b, v22.16b, #0", 5, 22)
2883 GEN_TWOVEC_TEST(cmge_zero_8b_8b,   "cmge v5.8b,  v22.8b,  #0", 5, 22)
2884 GEN_TWOVEC_TEST(cmgt_zero_2d_2d,   "cmgt v5.2d,  v22.2d,  #0", 5, 22)
2885 GEN_TWOVEC_TEST(cmgt_zero_4s_4s,   "cmgt v5.4s,  v22.4s,  #0", 5, 22)
2886 GEN_TWOVEC_TEST(cmgt_zero_2s_2s,   "cmgt v5.2s,  v22.2s,  #0", 5, 22)
2887 GEN_TWOVEC_TEST(cmgt_zero_8h_8h,   "cmgt v5.8h,  v22.8h,  #0", 5, 22)
2888 GEN_TWOVEC_TEST(cmgt_zero_4h_4h,   "cmgt v5.4h,  v22.4h,  #0", 5, 22)
2889 GEN_TWOVEC_TEST(cmgt_zero_16b_16b, "cmgt v5.16b, v22.16b, #0", 5, 22)
2890 GEN_TWOVEC_TEST(cmgt_zero_8b_8b,   "cmgt v5.8b,  v22.8b,  #0", 5, 22)
2891 GEN_TWOVEC_TEST(cmle_zero_2d_2d,   "cmle v5.2d,  v22.2d,  #0", 5, 22)
2892 GEN_TWOVEC_TEST(cmle_zero_4s_4s,   "cmle v5.4s,  v22.4s,  #0", 5, 22)
2893 GEN_TWOVEC_TEST(cmle_zero_2s_2s,   "cmle v5.2s,  v22.2s,  #0", 5, 22)
2894 GEN_TWOVEC_TEST(cmle_zero_8h_8h,   "cmle v5.8h,  v22.8h,  #0", 5, 22)
2895 GEN_TWOVEC_TEST(cmle_zero_4h_4h,   "cmle v5.4h,  v22.4h,  #0", 5, 22)
2896 GEN_TWOVEC_TEST(cmle_zero_16b_16b, "cmle v5.16b, v22.16b, #0", 5, 22)
2897 GEN_TWOVEC_TEST(cmle_zero_8b_8b,   "cmle v5.8b,  v22.8b,  #0", 5, 22)
2898 GEN_TWOVEC_TEST(cmlt_zero_2d_2d,   "cmlt v5.2d,  v22.2d,  #0", 5, 22)
2899 GEN_TWOVEC_TEST(cmlt_zero_4s_4s,   "cmlt v5.4s,  v22.4s,  #0", 5, 22)
2900 GEN_TWOVEC_TEST(cmlt_zero_2s_2s,   "cmlt v5.2s,  v22.2s,  #0", 5, 22)
2901 GEN_TWOVEC_TEST(cmlt_zero_8h_8h,   "cmlt v5.8h,  v22.8h,  #0", 5, 22)
2902 GEN_TWOVEC_TEST(cmlt_zero_4h_4h,   "cmlt v5.4h,  v22.4h,  #0", 5, 22)
2903 GEN_TWOVEC_TEST(cmlt_zero_16b_16b, "cmlt v5.16b, v22.16b, #0", 5, 22)
2904 GEN_TWOVEC_TEST(cmlt_zero_8b_8b,   "cmlt v5.8b,  v22.8b,  #0", 5, 22)
2905 
2906 GEN_UNARY_TEST(cnt, 16b, 16b)
2907 GEN_UNARY_TEST(cnt, 8b, 8b)
2908 
2909 GEN_TWOVEC_TEST(dup_d_d0,  "dup d22, v23.d[0]", 22, 23)
2910 GEN_TWOVEC_TEST(dup_d_d1,  "dup d22, v23.d[1]", 22, 23)
2911 GEN_TWOVEC_TEST(dup_s_s0,  "dup s22, v23.s[0]", 22, 23)
2912 GEN_TWOVEC_TEST(dup_s_s3,  "dup s22, v23.s[3]", 22, 23)
2913 GEN_TWOVEC_TEST(dup_h_h0,  "dup h22, v23.h[0]", 22, 23)
2914 GEN_TWOVEC_TEST(dup_h_h6,  "dup h22, v23.h[6]", 22, 23)
2915 GEN_TWOVEC_TEST(dup_b_b0,  "dup b0,  v23.b[0]",  22, 23)
2916 GEN_TWOVEC_TEST(dup_b_b13, "dup b13, v23.b[13]", 22, 23)
2917 
2918 GEN_TWOVEC_TEST(dup_2d_d0,  "dup v9.2d, v17.d[0]", 9, 17)
2919 GEN_TWOVEC_TEST(dup_2d_d1,  "dup v9.2d, v17.d[1]", 9, 17)
2920 GEN_TWOVEC_TEST(dup_4s_s0,  "dup v9.4s, v17.s[0]", 9, 17)
2921 GEN_TWOVEC_TEST(dup_4s_s3,  "dup v9.4s, v17.s[3]", 9, 17)
2922 GEN_TWOVEC_TEST(dup_2s_s0,  "dup v9.2s, v17.s[0]", 9, 17)
2923 GEN_TWOVEC_TEST(dup_2s_s2,  "dup v9.2s, v17.s[2]", 9, 17)
2924 GEN_TWOVEC_TEST(dup_8h_h0,  "dup v9.8h, v17.h[0]", 9, 17)
2925 GEN_TWOVEC_TEST(dup_8h_h6,  "dup v9.8h, v17.h[6]", 9, 17)
2926 GEN_TWOVEC_TEST(dup_4h_h1,  "dup v9.4h, v17.h[1]", 9, 17)
2927 GEN_TWOVEC_TEST(dup_4h_h5,  "dup v9.4h, v17.h[5]", 9, 17)
2928 GEN_TWOVEC_TEST(dup_16b_b2,  "dup v9.16b, v17.b[2]", 9, 17)
2929 GEN_TWOVEC_TEST(dup_16b_b12, "dup v9.16b, v17.b[12]", 9, 17)
2930 GEN_TWOVEC_TEST(dup_8b_b3,  "dup v9.8b, v17.b[3]", 9, 17)
2931 GEN_TWOVEC_TEST(dup_8b_b13, "dup v9.8b, v17.b[13]", 9, 17)
2932 
2933 GEN_TWOVEC_TEST(dup_2d_x,  "mov x10, v17.d[0];  dup v9.2d,  x10", 9, 17)
2934 GEN_TWOVEC_TEST(dup_4s_w,  "mov x10, v17.d[0];  dup v9.4s,  w10", 9, 17)
2935 GEN_TWOVEC_TEST(dup_2s_w,  "mov x10, v17.d[0];  dup v9.2s,  w10", 9, 17)
2936 GEN_TWOVEC_TEST(dup_8h_w,  "mov x10, v17.d[0];  dup v9.8h,  w10",  9, 17)
2937 GEN_TWOVEC_TEST(dup_4h_w,  "mov x10, v17.d[0];  dup v9.4h,  w10",  9, 17)
2938 GEN_TWOVEC_TEST(dup_16b_w, "mov x10, v17.d[0];  dup v9.16b, w10", 9, 17)
2939 GEN_TWOVEC_TEST(dup_8b_w,  "mov x10, v17.d[0];  dup v9.8b,  w10",  9, 17)
2940 
2941 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x0,
2942                   "ext  v2.16b, v11.16b, v29.16b, #0", 2, 11, 29)
2943 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x1,
2944                   "ext  v2.16b, v11.16b, v29.16b, #1", 2, 11, 29)
2945 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x2,
2946                   "ext  v2.16b, v11.16b, v29.16b, #2", 2, 11, 29)
2947 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x3,
2948                   "ext  v2.16b, v11.16b, v29.16b, #3", 2, 11, 29)
2949 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x4,
2950                   "ext  v2.16b, v11.16b, v29.16b, #4", 2, 11, 29)
2951 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x5,
2952                   "ext  v2.16b, v11.16b, v29.16b, #5", 2, 11, 29)
2953 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x6,
2954                   "ext  v2.16b, v11.16b, v29.16b, #6", 2, 11, 29)
2955 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x7,
2956                   "ext  v2.16b, v11.16b, v29.16b, #7", 2, 11, 29)
2957 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x8,
2958                   "ext  v2.16b, v11.16b, v29.16b, #8", 2, 11, 29)
2959 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x9,
2960                   "ext  v2.16b, v11.16b, v29.16b, #9", 2, 11, 29)
2961 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xA,
2962                   "ext  v2.16b, v11.16b, v29.16b, #10", 2, 11, 29)
2963 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xB,
2964                   "ext  v2.16b, v11.16b, v29.16b, #11", 2, 11, 29)
2965 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xC,
2966                   "ext  v2.16b, v11.16b, v29.16b, #12", 2, 11, 29)
2967 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xD,
2968                   "ext  v2.16b, v11.16b, v29.16b, #13", 2, 11, 29)
2969 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xE,
2970                   "ext  v2.16b, v11.16b, v29.16b, #14", 2, 11, 29)
2971 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xF,
2972                   "ext  v2.16b, v11.16b, v29.16b, #15", 2, 11, 29)
2973 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x0,
2974                   "ext  v2.8b, v11.8b, v29.8b, #0", 2, 11, 29)
2975 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x1,
2976                   "ext  v2.8b, v11.8b, v29.8b, #1", 2, 11, 29)
2977 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x2,
2978                   "ext  v2.8b, v11.8b, v29.8b, #2", 2, 11, 29)
2979 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x3,
2980                   "ext  v2.8b, v11.8b, v29.8b, #3", 2, 11, 29)
2981 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x4,
2982                   "ext  v2.8b, v11.8b, v29.8b, #4", 2, 11, 29)
2983 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x5,
2984                   "ext  v2.8b, v11.8b, v29.8b, #5", 2, 11, 29)
2985 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x6,
2986                   "ext  v2.8b, v11.8b, v29.8b, #6", 2, 11, 29)
2987 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x7,
2988                   "ext  v2.8b, v11.8b, v29.8b, #7", 2, 11, 29)
2989 
2990 GEN_TWOVEC_TEST(ins_d0_d0, "ins v3.d[0], v24.d[0]", 3, 24)
2991 GEN_TWOVEC_TEST(ins_d0_d1, "ins v3.d[0], v24.d[1]", 3, 24)
2992 GEN_TWOVEC_TEST(ins_d1_d0, "ins v3.d[1], v24.d[0]", 3, 24)
2993 GEN_TWOVEC_TEST(ins_d1_d1, "ins v3.d[1], v24.d[1]", 3, 24)
2994 GEN_TWOVEC_TEST(ins_s0_s2, "ins v3.s[0], v24.s[2]", 3, 24)
2995 GEN_TWOVEC_TEST(ins_s3_s0, "ins v3.s[3], v24.s[0]", 3, 24)
2996 GEN_TWOVEC_TEST(ins_s2_s1, "ins v3.s[2], v24.s[1]", 3, 24)
2997 GEN_TWOVEC_TEST(ins_s1_s3, "ins v3.s[1], v24.s[3]", 3, 24)
2998 GEN_TWOVEC_TEST(ins_h0_h6, "ins v3.h[0], v24.h[6]", 3, 24)
2999 GEN_TWOVEC_TEST(ins_h7_h0, "ins v3.h[7], v24.h[0]", 3, 24)
3000 GEN_TWOVEC_TEST(ins_h6_h1, "ins v3.h[6], v24.h[1]", 3, 24)
3001 GEN_TWOVEC_TEST(ins_h1_h7, "ins v3.h[1], v24.h[7]", 3, 24)
3002 GEN_TWOVEC_TEST(ins_b0_b14, "ins v3.b[0],  v24.b[14]", 3, 24)
3003 GEN_TWOVEC_TEST(ins_b15_b8, "ins v3.b[15], v24.b[8]",  3, 24)
3004 GEN_TWOVEC_TEST(ins_b13_b9, "ins v3.b[13], v24.b[9]",  3, 24)
3005 GEN_TWOVEC_TEST(ins_b5_b12, "ins v3.b[5],  v24.b[12]", 3, 24)
3006 
3007 // test_INS_general is a handwritten function
3008 
3009 GEN_THREEVEC_TEST(mla_4s_4s_s0, "mla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
3010 GEN_THREEVEC_TEST(mla_4s_4s_s3, "mla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
3011 GEN_THREEVEC_TEST(mla_2s_2s_s0, "mla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
3012 GEN_THREEVEC_TEST(mla_2s_2s_s3, "mla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
3013 // For the 'h' version of these, Rm can only be <= 15 (!)
3014 GEN_THREEVEC_TEST(mla_8h_8h_h1, "mla v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
3015 GEN_THREEVEC_TEST(mla_8h_8h_h5, "mla v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
3016 GEN_THREEVEC_TEST(mla_4h_4h_h2, "mla v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
3017 GEN_THREEVEC_TEST(mla_4h_4h_h7, "mla v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
3018 GEN_THREEVEC_TEST(mls_4s_4s_s0, "mls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
3019 GEN_THREEVEC_TEST(mls_4s_4s_s3, "mls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
3020 GEN_THREEVEC_TEST(mls_2s_2s_s0, "mls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
3021 GEN_THREEVEC_TEST(mls_2s_2s_s3, "mls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
3022 // For the 'h' version of these, Rm can only be <= 15 (!)
3023 GEN_THREEVEC_TEST(mls_8h_8h_h1, "mls v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
3024 GEN_THREEVEC_TEST(mls_8h_8h_h5, "mls v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
3025 GEN_THREEVEC_TEST(mls_4h_4h_h2, "mls v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
3026 GEN_THREEVEC_TEST(mls_4h_4h_h7, "mls v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
3027 GEN_THREEVEC_TEST(mul_4s_4s_s0, "mul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
3028 GEN_THREEVEC_TEST(mul_4s_4s_s3, "mul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
3029 GEN_THREEVEC_TEST(mul_2s_2s_s0, "mul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
3030 GEN_THREEVEC_TEST(mul_2s_2s_s3, "mul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
3031 // For the 'h' version of these, Rm can only be <= 15 (!)
3032 GEN_THREEVEC_TEST(mul_8h_8h_h1, "mul v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
3033 GEN_THREEVEC_TEST(mul_8h_8h_h5, "mul v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
3034 GEN_THREEVEC_TEST(mul_4h_4h_h2, "mul v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
3035 GEN_THREEVEC_TEST(mul_4h_4h_h7, "mul v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
3036 
3037 GEN_BINARY_TEST(mla, 4s, 4s, 4s)
3038 GEN_BINARY_TEST(mla, 2s, 2s, 2s)
3039 GEN_BINARY_TEST(mla, 8h, 8h, 8h)
3040 GEN_BINARY_TEST(mla, 4h, 4h, 4h)
3041 GEN_BINARY_TEST(mla, 16b, 16b, 16b)
3042 GEN_BINARY_TEST(mla, 8b, 8b, 8b)
3043 GEN_BINARY_TEST(mls, 4s, 4s, 4s)
3044 GEN_BINARY_TEST(mls, 2s, 2s, 2s)
3045 GEN_BINARY_TEST(mls, 8h, 8h, 8h)
3046 GEN_BINARY_TEST(mls, 4h, 4h, 4h)
3047 GEN_BINARY_TEST(mls, 16b, 16b, 16b)
3048 GEN_BINARY_TEST(mls, 8b, 8b, 8b)
3049 GEN_BINARY_TEST(mul, 4s, 4s, 4s)
3050 GEN_BINARY_TEST(mul, 2s, 2s, 2s)
3051 GEN_BINARY_TEST(mul, 8h, 8h, 8h)
3052 GEN_BINARY_TEST(mul, 4h, 4h, 4h)
3053 GEN_BINARY_TEST(mul, 16b, 16b, 16b)
3054 GEN_BINARY_TEST(mul, 8b, 8b, 8b)
3055 
3056 /* overkill -- don't need two vecs, only one */
3057 GEN_TWOVEC_TEST(movi_16b_0x9C_lsl0, "movi v22.16b, #0x9C, LSL #0", 22, 23)
3058 GEN_TWOVEC_TEST(movi_8b_0x8B_lsl0,  "movi v22.8b,  #0x8B, LSL #0", 22, 23)
3059 
3060 GEN_TWOVEC_TEST(movi_8h_0x5A_lsl0,  "movi v22.8h,  #0x5A, LSL #0", 22, 23)
3061 GEN_TWOVEC_TEST(movi_8h_0xA5_lsl8,  "movi v22.8h,  #0xA5, LSL #8", 22, 23)
3062 GEN_TWOVEC_TEST(movi_4h_0x5A_lsl0,  "movi v22.4h,  #0x5A, LSL #0", 22, 23)
3063 GEN_TWOVEC_TEST(movi_4h_0xA5_lsl8,  "movi v22.4h,  #0xA5, LSL #8", 22, 23)
3064 GEN_TWOVEC_TEST(mvni_8h_0x5A_lsl0,  "mvni v22.8h,  #0x5A, LSL #0", 22, 23)
3065 GEN_TWOVEC_TEST(mvni_8h_0xA5_lsl8,  "mvni v22.8h,  #0xA5, LSL #8", 22, 23)
3066 GEN_TWOVEC_TEST(mvni_4h_0x5A_lsl0,  "mvni v22.4h,  #0x5A, LSL #0", 22, 23)
3067 GEN_TWOVEC_TEST(mvni_4h_0xA5_lsl8,  "mvni v22.4h,  #0xA5, LSL #8", 22, 23)
3068 
3069 GEN_TWOVEC_TEST(movi_4s_0x5A_lsl0,  "movi v22.4s,  #0x5A, LSL #0",  22, 23)
3070 GEN_TWOVEC_TEST(movi_4s_0x6B_lsl8,  "movi v22.4s,  #0x6B, LSL #8",  22, 23)
3071 GEN_TWOVEC_TEST(movi_4s_0x49_lsl16, "movi v22.4s,  #0x49, LSL #16", 22, 23)
3072 GEN_TWOVEC_TEST(movi_4s_0x3D_lsl24, "movi v22.4s,  #0x3D, LSL #24", 22, 23)
3073 GEN_TWOVEC_TEST(movi_2s_0x5A_lsl0,  "movi v22.2s,  #0x5A, LSL #0",  22, 23)
3074 GEN_TWOVEC_TEST(movi_2s_0x6B_lsl8,  "movi v22.2s,  #0x6B, LSL #8",  22, 23)
3075 GEN_TWOVEC_TEST(movi_2s_0x49_lsl16, "movi v22.2s,  #0x49, LSL #16", 22, 23)
3076 GEN_TWOVEC_TEST(movi_2s_0x3D_lsl24, "movi v22.2s,  #0x3D, LSL #24", 22, 23)
3077 GEN_TWOVEC_TEST(mvni_4s_0x5A_lsl0,  "mvni v22.4s,  #0x5A, LSL #0",  22, 23)
3078 GEN_TWOVEC_TEST(mvni_4s_0x6B_lsl8,  "mvni v22.4s,  #0x6B, LSL #8",  22, 23)
3079 GEN_TWOVEC_TEST(mvni_4s_0x49_lsl16, "mvni v22.4s,  #0x49, LSL #16", 22, 23)
3080 GEN_TWOVEC_TEST(mvni_4s_0x3D_lsl24, "mvni v22.4s,  #0x3D, LSL #24", 22, 23)
3081 GEN_TWOVEC_TEST(mvni_2s_0x5A_lsl0,  "mvni v22.2s,  #0x5A, LSL #0",  22, 23)
3082 GEN_TWOVEC_TEST(mvni_2s_0x6B_lsl8,  "mvni v22.2s,  #0x6B, LSL #8",  22, 23)
3083 GEN_TWOVEC_TEST(mvni_2s_0x49_lsl16, "mvni v22.2s,  #0x49, LSL #16", 22, 23)
3084 GEN_TWOVEC_TEST(mvni_2s_0x3D_lsl24, "mvni v22.2s,  #0x3D, LSL #24", 22, 23)
3085 
3086 /* overkill -- don't need two vecs, only one */
3087 GEN_TWOVEC_TEST(movi_4s_0x6B_msl8,  "movi v22.4s,  #0x6B, MSL #8", 22, 23)
3088 GEN_TWOVEC_TEST(movi_4s_0x94_msl16, "movi v22.4s,  #0x94, MSL #16", 22, 23)
3089 GEN_TWOVEC_TEST(movi_2s_0x7A_msl8,  "movi v22.2s,  #0x7A, MSL #8", 22, 23)
3090 GEN_TWOVEC_TEST(movi_2s_0xA5_msl16, "movi v22.2s,  #0xA5, MSL #16", 22, 23)
3091 GEN_TWOVEC_TEST(mvni_4s_0x6B_msl8,  "mvni v22.4s,  #0x6B, MSL #8", 22, 23)
3092 GEN_TWOVEC_TEST(mvni_4s_0x94_msl16, "mvni v22.4s,  #0x94, MSL #16", 22, 23)
3093 GEN_TWOVEC_TEST(mvni_2s_0x7A_msl8,  "mvni v22.2s,  #0x7A, MSL #8", 22, 23)
3094 GEN_TWOVEC_TEST(mvni_2s_0xA5_msl16, "mvni v22.2s,  #0xA5, MSL #16", 22, 23)
3095 
3096 GEN_TWOVEC_TEST(movi_d_0xA5,  "movi d22,    #0xFF00FF0000FF00FF", 22, 23)
3097 GEN_TWOVEC_TEST(movi_2d_0xB4, "movi v22.2d, #0xFF00FFFF00FF0000", 22, 23)
3098 
3099 GEN_UNARY_TEST(not, 16b, 16b)
3100 GEN_UNARY_TEST(not, 8b,  8b)
3101 
3102 GEN_BINARY_TEST(pmul, 16b, 16b, 16b)
3103 GEN_BINARY_TEST(pmul, 8b, 8b, 8b)
3104 
3105 GEN_BINARY_TEST(pmull,  8h, 8b,  8b)
3106 GEN_BINARY_TEST(pmull2, 8h, 16b, 16b)
3107 GEN_BINARY_TEST(pmull,  1q, 1d,  1d)
3108 GEN_BINARY_TEST(pmull2, 1q, 2d,  2d)
3109 
3110 GEN_UNARY_TEST(rbit, 16b, 16b)
3111 GEN_UNARY_TEST(rbit, 8b, 8b)
3112 GEN_UNARY_TEST(rev16, 16b, 16b)
3113 GEN_UNARY_TEST(rev16, 8b, 8b)
3114 GEN_UNARY_TEST(rev32, 16b, 16b)
3115 GEN_UNARY_TEST(rev32, 8b, 8b)
3116 GEN_UNARY_TEST(rev32, 8h, 8h)
3117 GEN_UNARY_TEST(rev32, 4h, 4h)
3118 GEN_UNARY_TEST(rev64, 16b, 16b)
3119 GEN_UNARY_TEST(rev64, 8b, 8b)
3120 GEN_UNARY_TEST(rev64, 8h, 8h)
3121 GEN_UNARY_TEST(rev64, 4h, 4h)
3122 GEN_UNARY_TEST(rev64, 4s, 4s)
3123 GEN_UNARY_TEST(rev64, 2s, 2s)
3124 
3125 GEN_BINARY_TEST(saba, 4s, 4s, 4s)
3126 GEN_BINARY_TEST(saba, 2s, 2s, 2s)
3127 GEN_BINARY_TEST(saba, 8h, 8h, 8h)
3128 GEN_BINARY_TEST(saba, 4h, 4h, 4h)
3129 GEN_BINARY_TEST(saba, 16b, 16b, 16b)
3130 GEN_BINARY_TEST(saba, 8b, 8b, 8b)
3131 GEN_BINARY_TEST(uaba, 4s, 4s, 4s)
3132 GEN_BINARY_TEST(uaba, 2s, 2s, 2s)
3133 GEN_BINARY_TEST(uaba, 8h, 8h, 8h)
3134 GEN_BINARY_TEST(uaba, 4h, 4h, 4h)
3135 GEN_BINARY_TEST(uaba, 16b, 16b, 16b)
3136 GEN_BINARY_TEST(uaba, 8b, 8b, 8b)
3137 
3138 GEN_THREEVEC_TEST(sabal_2d_2s_2s,  "sabal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3139 GEN_THREEVEC_TEST(sabal2_2d_4s_4s, "sabal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3140 GEN_THREEVEC_TEST(sabal_4s_4h_4h,  "sabal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3141 GEN_THREEVEC_TEST(sabal2_4s_8h_8h, "sabal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3142 GEN_THREEVEC_TEST(sabal_8h_8b_8b,  "sabal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3143 GEN_THREEVEC_TEST(sabal2_8h_16b_16b,
3144                                    "sabal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3145 GEN_THREEVEC_TEST(uabal_2d_2s_2s,  "uabal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3146 GEN_THREEVEC_TEST(uabal2_2d_4s_4s, "uabal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3147 GEN_THREEVEC_TEST(uabal_4s_4h_4h,  "uabal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3148 GEN_THREEVEC_TEST(uabal2_4s_8h_8h, "uabal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3149 GEN_THREEVEC_TEST(uabal_8h_8b_8b,  "uabal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3150 GEN_THREEVEC_TEST(uabal2_8h_16b_16b,
3151                                    "uabal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3152 
3153 GEN_THREEVEC_TEST(sabd_4s_4s_4s,    "sabd v2.4s, v11.4s, v29.4s", 2, 11, 29)
3154 GEN_THREEVEC_TEST(sabd_2s_2s_2s,    "sabd v2.2s, v11.2s, v29.2s", 2, 11, 29)
3155 GEN_THREEVEC_TEST(sabd_8h_8h_8h,    "sabd v2.8h, v11.8h, v29.8h", 2, 11, 29)
3156 GEN_THREEVEC_TEST(sabd_4h_4h_4h,    "sabd v2.4h, v11.4h, v29.4h", 2, 11, 29)
3157 GEN_THREEVEC_TEST(sabd_16b_16b_16b, "sabd v2.16b, v11.16b, v29.16b", 2, 11, 29)
3158 GEN_THREEVEC_TEST(sabd_8b_8b_8b,    "sabd v2.8b, v11.8b, v29.8b", 2, 11, 29)
3159 GEN_THREEVEC_TEST(uabd_4s_4s_4s,    "uabd v2.4s, v11.4s, v29.4s", 2, 11, 29)
3160 GEN_THREEVEC_TEST(uabd_2s_2s_2s,    "uabd v2.2s, v11.2s, v29.2s", 2, 11, 29)
3161 GEN_THREEVEC_TEST(uabd_8h_8h_8h,    "uabd v2.8h, v11.8h, v29.8h", 2, 11, 29)
3162 GEN_THREEVEC_TEST(uabd_4h_4h_4h,    "uabd v2.4h, v11.4h, v29.4h", 2, 11, 29)
3163 GEN_THREEVEC_TEST(uabd_16b_16b_16b, "uabd v2.16b, v11.16b, v29.16b", 2, 11, 29)
3164 GEN_THREEVEC_TEST(uabd_8b_8b_8b,    "uabd v2.8b, v11.8b, v29.8b", 2, 11, 29)
3165 
3166 GEN_THREEVEC_TEST(sabdl_2d_2s_2s,  "sabdl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3167 GEN_THREEVEC_TEST(sabdl2_2d_4s_4s, "sabdl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3168 GEN_THREEVEC_TEST(sabdl_4s_4h_4h,  "sabdl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3169 GEN_THREEVEC_TEST(sabdl2_4s_8h_8h, "sabdl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3170 GEN_THREEVEC_TEST(sabdl_8h_8b_8b,  "sabdl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3171 GEN_THREEVEC_TEST(sabdl2_8h_16b_16b,
3172                                    "sabdl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3173 GEN_THREEVEC_TEST(uabdl_2d_2s_2s,  "uabdl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3174 GEN_THREEVEC_TEST(uabdl2_2d_4s_4s, "uabdl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3175 GEN_THREEVEC_TEST(uabdl_4s_4h_4h,  "uabdl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3176 GEN_THREEVEC_TEST(uabdl2_4s_8h_8h, "uabdl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3177 GEN_THREEVEC_TEST(uabdl_8h_8b_8b,  "uabdl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3178 GEN_THREEVEC_TEST(uabdl2_8h_16b_16b,
3179                                    "uabdl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3180 
3181 GEN_TWOVEC_TEST(sadalp_4h_8b,  "sadalp v3.4h, v19.8b",  3, 19)
3182 GEN_TWOVEC_TEST(sadalp_8h_16b, "sadalp v3.8h, v19.16b", 3, 19)
3183 GEN_TWOVEC_TEST(sadalp_2s_4h,  "sadalp v3.2s, v19.4h",  3, 19)
3184 GEN_TWOVEC_TEST(sadalp_4s_8h,  "sadalp v3.4s, v19.8h",  3, 19)
3185 GEN_TWOVEC_TEST(sadalp_1d_2s,  "sadalp v3.1d, v19.2s",  3, 19)
3186 GEN_TWOVEC_TEST(sadalp_2d_4s,  "sadalp v3.2d, v19.4s",  3, 19)
3187 GEN_TWOVEC_TEST(uadalp_4h_8b,  "uadalp v3.4h, v19.8b",  3, 19)
3188 GEN_TWOVEC_TEST(uadalp_8h_16b, "uadalp v3.8h, v19.16b", 3, 19)
3189 GEN_TWOVEC_TEST(uadalp_2s_4h,  "uadalp v3.2s, v19.4h",  3, 19)
3190 GEN_TWOVEC_TEST(uadalp_4s_8h,  "uadalp v3.4s, v19.8h",  3, 19)
3191 GEN_TWOVEC_TEST(uadalp_1d_2s,  "uadalp v3.1d, v19.2s",  3, 19)
3192 GEN_TWOVEC_TEST(uadalp_2d_4s,  "uadalp v3.2d, v19.4s",  3, 19)
3193 
3194 GEN_THREEVEC_TEST(saddl_2d_2s_2s,  "saddl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3195 GEN_THREEVEC_TEST(saddl2_2d_4s_4s, "saddl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3196 GEN_THREEVEC_TEST(saddl_4s_4h_4h,  "saddl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3197 GEN_THREEVEC_TEST(saddl2_4s_8h_8h, "saddl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3198 GEN_THREEVEC_TEST(saddl_8h_8b_8b,  "saddl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3199 GEN_THREEVEC_TEST(saddl2_8h_16b_16b,
3200                                    "saddl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3201 GEN_THREEVEC_TEST(uaddl_2d_2s_2s,  "uaddl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3202 GEN_THREEVEC_TEST(uaddl2_2d_4s_4s, "uaddl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3203 GEN_THREEVEC_TEST(uaddl_4s_4h_4h,  "uaddl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3204 GEN_THREEVEC_TEST(uaddl2_4s_8h_8h, "uaddl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3205 GEN_THREEVEC_TEST(uaddl_8h_8b_8b,  "uaddl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3206 GEN_THREEVEC_TEST(uaddl2_8h_16b_16b,
3207                                    "uaddl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3208 GEN_THREEVEC_TEST(ssubl_2d_2s_2s,  "ssubl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3209 GEN_THREEVEC_TEST(ssubl2_2d_4s_4s, "ssubl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3210 GEN_THREEVEC_TEST(ssubl_4s_4h_4h,  "ssubl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3211 GEN_THREEVEC_TEST(ssubl2_4s_8h_8h, "ssubl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3212 GEN_THREEVEC_TEST(ssubl_8h_8b_8b,  "ssubl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3213 GEN_THREEVEC_TEST(ssubl2_8h_16b_16b,
3214                                    "ssubl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3215 GEN_THREEVEC_TEST(usubl_2d_2s_2s,  "usubl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3216 GEN_THREEVEC_TEST(usubl2_2d_4s_4s, "usubl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3217 GEN_THREEVEC_TEST(usubl_4s_4h_4h,  "usubl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3218 GEN_THREEVEC_TEST(usubl2_4s_8h_8h, "usubl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3219 GEN_THREEVEC_TEST(usubl_8h_8b_8b,  "usubl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3220 GEN_THREEVEC_TEST(usubl2_8h_16b_16b,
3221                                    "usubl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3222 
3223 GEN_TWOVEC_TEST(saddlp_4h_8b,  "saddlp v3.4h, v19.8b",  3, 19)
3224 GEN_TWOVEC_TEST(saddlp_8h_16b, "saddlp v3.8h, v19.16b", 3, 19)
3225 GEN_TWOVEC_TEST(saddlp_2s_4h,  "saddlp v3.2s, v19.4h",  3, 19)
3226 GEN_TWOVEC_TEST(saddlp_4s_8h,  "saddlp v3.4s, v19.8h",  3, 19)
3227 GEN_TWOVEC_TEST(saddlp_1d_2s,  "saddlp v3.1d, v19.2s",  3, 19)
3228 GEN_TWOVEC_TEST(saddlp_2d_4s,  "saddlp v3.2d, v19.4s",  3, 19)
3229 GEN_TWOVEC_TEST(uaddlp_4h_8b,  "uaddlp v3.4h, v19.8b",  3, 19)
3230 GEN_TWOVEC_TEST(uaddlp_8h_16b, "uaddlp v3.8h, v19.16b", 3, 19)
3231 GEN_TWOVEC_TEST(uaddlp_2s_4h,  "uaddlp v3.2s, v19.4h",  3, 19)
3232 GEN_TWOVEC_TEST(uaddlp_4s_8h,  "uaddlp v3.4s, v19.8h",  3, 19)
3233 GEN_TWOVEC_TEST(uaddlp_1d_2s,  "uaddlp v3.1d, v19.2s",  3, 19)
3234 GEN_TWOVEC_TEST(uaddlp_2d_4s,  "uaddlp v3.2d, v19.4s",  3, 19)
3235 
3236 GEN_TWOVEC_TEST(saddlv_h_16b, "saddlv h3, v19.16b",  3, 19)
3237 GEN_TWOVEC_TEST(saddlv_h_8b,  "saddlv h3, v19.8b",   3, 19)
3238 GEN_TWOVEC_TEST(saddlv_s_8h,  "saddlv s3, v19.8h",   3, 19)
3239 GEN_TWOVEC_TEST(saddlv_s_4h,  "saddlv s3, v19.4h",   3, 19)
3240 GEN_TWOVEC_TEST(saddlv_d_4s,  "saddlv d3, v19.4s",   3, 19)
3241 GEN_TWOVEC_TEST(uaddlv_h_16b, "uaddlv h3, v19.16b",  3, 19)
3242 GEN_TWOVEC_TEST(uaddlv_h_8b,  "uaddlv h3, v19.8b",   3, 19)
3243 GEN_TWOVEC_TEST(uaddlv_s_8h,  "uaddlv s3, v19.8h",   3, 19)
3244 GEN_TWOVEC_TEST(uaddlv_s_4h,  "uaddlv s3, v19.4h",   3, 19)
3245 GEN_TWOVEC_TEST(uaddlv_d_4s,  "uaddlv d3, v19.4s",   3, 19)
3246 
3247 GEN_THREEVEC_TEST(saddw2_8h_8h_16b, "saddw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
3248 GEN_THREEVEC_TEST(saddw_8h_8h_8b,   "saddw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
3249 GEN_THREEVEC_TEST(saddw2_4s_4s_8h,  "saddw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
3250 GEN_THREEVEC_TEST(saddw_4s_4s_4h,   "saddw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
3251 GEN_THREEVEC_TEST(saddw2_2d_2d_4s,  "saddw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
3252 GEN_THREEVEC_TEST(saddw_2d_2d_2s,   "saddw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
3253 GEN_THREEVEC_TEST(uaddw2_8h_8h_16b, "uaddw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
3254 GEN_THREEVEC_TEST(uaddw_8h_8h_8b,   "uaddw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
3255 GEN_THREEVEC_TEST(uaddw2_4s_4s_8h,  "uaddw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
3256 GEN_THREEVEC_TEST(uaddw_4s_4s_4h,   "uaddw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
3257 GEN_THREEVEC_TEST(uaddw2_2d_2d_4s,  "uaddw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
3258 GEN_THREEVEC_TEST(uaddw_2d_2d_2s,   "uaddw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
3259 GEN_THREEVEC_TEST(ssubw2_8h_8h_16b, "ssubw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
3260 GEN_THREEVEC_TEST(ssubw_8h_8h_8b,   "ssubw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
3261 GEN_THREEVEC_TEST(ssubw2_4s_4s_8h,  "ssubw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
3262 GEN_THREEVEC_TEST(ssubw_4s_4s_4h,   "ssubw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
3263 GEN_THREEVEC_TEST(ssubw2_2d_2d_4s,  "ssubw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
3264 GEN_THREEVEC_TEST(ssubw_2d_2d_2s,   "ssubw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
3265 GEN_THREEVEC_TEST(usubw2_8h_8h_16b, "usubw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
3266 GEN_THREEVEC_TEST(usubw_8h_8h_8b,   "usubw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
3267 GEN_THREEVEC_TEST(usubw2_4s_4s_8h,  "usubw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
3268 GEN_THREEVEC_TEST(usubw_4s_4s_4h,   "usubw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
3269 GEN_THREEVEC_TEST(usubw2_2d_2d_4s,  "usubw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
3270 GEN_THREEVEC_TEST(usubw_2d_2d_2s,   "usubw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
3271 
3272 GEN_THREEVEC_TEST(shadd_4s_4s_4s,   "shadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
3273 GEN_THREEVEC_TEST(shadd_2s_2s_2s,   "shadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
3274 GEN_THREEVEC_TEST(shadd_8h_8h_8h,   "shadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
3275 GEN_THREEVEC_TEST(shadd_4h_4h_4h,   "shadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
3276 GEN_THREEVEC_TEST(shadd_16b_16b_16b,"shadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
3277 GEN_THREEVEC_TEST(shadd_8b_8b_8b,   "shadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
3278 GEN_THREEVEC_TEST(uhadd_4s_4s_4s,   "uhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
3279 GEN_THREEVEC_TEST(uhadd_2s_2s_2s,   "uhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
3280 GEN_THREEVEC_TEST(uhadd_8h_8h_8h,   "uhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
3281 GEN_THREEVEC_TEST(uhadd_4h_4h_4h,   "uhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
3282 GEN_THREEVEC_TEST(uhadd_16b_16b_16b,"uhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
3283 GEN_THREEVEC_TEST(uhadd_8b_8b_8b,   "uhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
3284 GEN_THREEVEC_TEST(shsub_4s_4s_4s,   "shsub v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
3285 GEN_THREEVEC_TEST(shsub_2s_2s_2s,   "shsub v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
3286 GEN_THREEVEC_TEST(shsub_8h_8h_8h,   "shsub v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
3287 GEN_THREEVEC_TEST(shsub_4h_4h_4h,   "shsub v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
3288 GEN_THREEVEC_TEST(shsub_16b_16b_16b,"shsub v2.16b, v11.16b, v29.16b", 2, 11, 29)
3289 GEN_THREEVEC_TEST(shsub_8b_8b_8b,   "shsub v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
3290 GEN_THREEVEC_TEST(uhsub_4s_4s_4s,   "uhsub v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
3291 GEN_THREEVEC_TEST(uhsub_2s_2s_2s,   "uhsub v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
3292 GEN_THREEVEC_TEST(uhsub_8h_8h_8h,   "uhsub v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
3293 GEN_THREEVEC_TEST(uhsub_4h_4h_4h,   "uhsub v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
3294 GEN_THREEVEC_TEST(uhsub_16b_16b_16b,"uhsub v2.16b, v11.16b, v29.16b", 2, 11, 29)
3295 GEN_THREEVEC_TEST(uhsub_8b_8b_8b,   "uhsub v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
3296 
3297 GEN_TWOVEC_TEST(shll_8h_8b_8,   "shll  v3.8h, v24.8b,  #8", 3, 24)
3298 GEN_TWOVEC_TEST(shll2_8h_16b_8, "shll2 v3.8h, v24.16b, #8", 3, 24)
3299 GEN_TWOVEC_TEST(shll_4s_4h_16,  "shll  v3.4s, v24.4h, #16", 3, 24)
3300 GEN_TWOVEC_TEST(shll2_4s_8h_16, "shll2 v3.4s, v24.8h, #16", 3, 24)
3301 GEN_TWOVEC_TEST(shll_2d_2s_32,  "shll  v3.2d, v24.2s, #32", 3, 24)
3302 GEN_TWOVEC_TEST(shll2_2d_4s_32, "shll2 v3.2d, v24.4s, #32", 3, 24)
3303 
3304 GEN_TWOVEC_TEST(shrn_2s_2d_1,   "shrn  v4.2s,  v29.2d, #1",  4, 29)
3305 GEN_TWOVEC_TEST(shrn_2s_2d_32,  "shrn  v4.2s,  v29.2d, #32", 4, 29)
3306 GEN_TWOVEC_TEST(shrn2_4s_2d_1,  "shrn2 v4.4s,  v29.2d, #1",  4, 29)
3307 GEN_TWOVEC_TEST(shrn2_4s_2d_32, "shrn2 v4.4s,  v29.2d, #32", 4, 29)
3308 GEN_TWOVEC_TEST(shrn_4h_4s_1,   "shrn  v4.4h,  v29.4s, #1",  4, 29)
3309 GEN_TWOVEC_TEST(shrn_4h_4s_16,  "shrn  v4.4h,  v29.4s, #16", 4, 29)
3310 GEN_TWOVEC_TEST(shrn2_8h_4s_1,  "shrn2 v4.8h,  v29.4s, #1",  4, 29)
3311 GEN_TWOVEC_TEST(shrn2_8h_4s_16, "shrn2 v4.8h,  v29.4s, #16", 4, 29)
3312 GEN_TWOVEC_TEST(shrn_8b_8h_1,   "shrn  v4.8b,  v29.8h, #1",  4, 29)
3313 GEN_TWOVEC_TEST(shrn_8b_8h_8,   "shrn  v4.8b,  v29.8h, #8",  4, 29)
3314 GEN_TWOVEC_TEST(shrn2_16b_8h_1, "shrn2 v4.16b, v29.8h, #1",  4, 29)
3315 GEN_TWOVEC_TEST(shrn2_16b_8h_8, "shrn2 v4.16b, v29.8h, #8",  4, 29)
3316 GEN_TWOVEC_TEST(rshrn_2s_2d_1,   "rshrn  v4.2s,  v29.2d, #1",  4, 29)
3317 GEN_TWOVEC_TEST(rshrn_2s_2d_32,  "rshrn  v4.2s,  v29.2d, #32", 4, 29)
3318 GEN_TWOVEC_TEST(rshrn2_4s_2d_1,  "rshrn2 v4.4s,  v29.2d, #1",  4, 29)
3319 GEN_TWOVEC_TEST(rshrn2_4s_2d_32, "rshrn2 v4.4s,  v29.2d, #32", 4, 29)
3320 GEN_TWOVEC_TEST(rshrn_4h_4s_1,   "rshrn  v4.4h,  v29.4s, #1",  4, 29)
3321 GEN_TWOVEC_TEST(rshrn_4h_4s_16,  "rshrn  v4.4h,  v29.4s, #16", 4, 29)
3322 GEN_TWOVEC_TEST(rshrn2_8h_4s_1,  "rshrn2 v4.8h,  v29.4s, #1",  4, 29)
3323 GEN_TWOVEC_TEST(rshrn2_8h_4s_16, "rshrn2 v4.8h,  v29.4s, #16", 4, 29)
3324 GEN_TWOVEC_TEST(rshrn_8b_8h_1,   "rshrn  v4.8b,  v29.8h, #1",  4, 29)
3325 GEN_TWOVEC_TEST(rshrn_8b_8h_8,   "rshrn  v4.8b,  v29.8h, #8",  4, 29)
3326 GEN_TWOVEC_TEST(rshrn2_16b_8h_1, "rshrn2 v4.16b, v29.8h, #1",  4, 29)
3327 GEN_TWOVEC_TEST(rshrn2_16b_8h_8, "rshrn2 v4.16b, v29.8h, #8",  4, 29)
3328 
3329 GEN_TWOVEC_TEST(sli_d_d_0,  "sli d5, d28, #0",  5, 28)
3330 GEN_TWOVEC_TEST(sli_d_d_32, "sli d5, d28, #32", 5, 28)
3331 GEN_TWOVEC_TEST(sli_d_d_63, "sli d5, d28, #63", 5, 28)
3332 GEN_TWOVEC_TEST(sri_d_d_1,  "sri d5, d28, #1",  5, 28)
3333 GEN_TWOVEC_TEST(sri_d_d_33, "sri d5, d28, #33", 5, 28)
3334 GEN_TWOVEC_TEST(sri_d_d_64, "sri d5, d28, #64", 5, 28)
3335 
3336 GEN_TWOVEC_TEST(sli_2d_2d_0,   "sli v6.2d,  v27.2d, #0",  6, 27)
3337 GEN_TWOVEC_TEST(sli_2d_2d_32,  "sli v6.2d,  v27.2d, #32", 6, 27)
3338 GEN_TWOVEC_TEST(sli_2d_2d_63,  "sli v6.2d,  v27.2d, #63", 6, 27)
3339 GEN_TWOVEC_TEST(sli_4s_4s_0,   "sli v6.4s,  v27.4s, #0",  6, 27)
3340 GEN_TWOVEC_TEST(sli_4s_4s_16,  "sli v6.4s,  v27.4s, #16", 6, 27)
3341 GEN_TWOVEC_TEST(sli_4s_4s_31,  "sli v6.4s,  v27.4s, #31", 6, 27)
3342 GEN_TWOVEC_TEST(sli_2s_2s_0,   "sli v6.2s,  v27.2s, #0",  6, 27)
3343 GEN_TWOVEC_TEST(sli_2s_2s_16,  "sli v6.2s,  v27.2s, #16", 6, 27)
3344 GEN_TWOVEC_TEST(sli_2s_2s_31,  "sli v6.2s,  v27.2s, #31", 6, 27)
3345 GEN_TWOVEC_TEST(sli_8h_8h_0,   "sli v6.8h,  v27.8h, #0",  6, 27)
3346 GEN_TWOVEC_TEST(sli_8h_8h_8,   "sli v6.8h,  v27.8h, #8",  6, 27)
3347 GEN_TWOVEC_TEST(sli_8h_8h_15,  "sli v6.8h,  v27.8h, #15", 6, 27)
3348 GEN_TWOVEC_TEST(sli_4h_4h_0,   "sli v6.4h,  v27.4h, #0",  6, 27)
3349 GEN_TWOVEC_TEST(sli_4h_4h_8,   "sli v6.4h,  v27.4h, #8",  6, 27)
3350 GEN_TWOVEC_TEST(sli_4h_4h_15,  "sli v6.4h,  v27.4h, #15", 6, 27)
3351 GEN_TWOVEC_TEST(sli_16b_16b_0, "sli v6.16b, v27.16b, #0", 6, 27)
3352 GEN_TWOVEC_TEST(sli_16b_16b_3, "sli v6.16b, v27.16b, #3", 6, 27)
3353 GEN_TWOVEC_TEST(sli_16b_16b_7, "sli v6.16b, v27.16b, #7", 6, 27)
3354 GEN_TWOVEC_TEST(sli_8b_8b_0,   "sli v6.8b,  v27.8b, #0",  6, 27)
3355 GEN_TWOVEC_TEST(sli_8b_8b_3,   "sli v6.8b,  v27.8b, #3",  6, 27)
3356 GEN_TWOVEC_TEST(sli_8b_8b_7,   "sli v6.8b,  v27.8b, #7",  6, 27)
3357 GEN_TWOVEC_TEST(sri_2d_2d_1,   "sri v6.2d,  v27.2d,  #1",  6, 27)
3358 GEN_TWOVEC_TEST(sri_2d_2d_33,  "sri v6.2d,  v27.2d,  #33", 6, 27)
3359 GEN_TWOVEC_TEST(sri_2d_2d_64,  "sri v6.2d,  v27.2d,  #64", 6, 27)
3360 GEN_TWOVEC_TEST(sri_4s_4s_1,   "sri v6.4s,  v27.4s,  #1",  6, 27)
3361 GEN_TWOVEC_TEST(sri_4s_4s_17,  "sri v6.4s,  v27.4s,  #17", 6, 27)
3362 GEN_TWOVEC_TEST(sri_4s_4s_32,  "sri v6.4s,  v27.4s,  #32", 6, 27)
3363 GEN_TWOVEC_TEST(sri_2s_2s_1,   "sri v6.2s,  v27.2s,  #1",  6, 27)
3364 GEN_TWOVEC_TEST(sri_2s_2s_17,  "sri v6.2s,  v27.2s,  #17", 6, 27)
3365 GEN_TWOVEC_TEST(sri_2s_2s_32,  "sri v6.2s,  v27.2s,  #32", 6, 27)
3366 GEN_TWOVEC_TEST(sri_8h_8h_1,   "sri v6.8h,  v27.8h,  #1",  6, 27)
3367 GEN_TWOVEC_TEST(sri_8h_8h_8,   "sri v6.8h,  v27.8h,  #8",  6, 27)
3368 GEN_TWOVEC_TEST(sri_8h_8h_16,  "sri v6.8h,  v27.8h,  #16", 6, 27)
3369 GEN_TWOVEC_TEST(sri_4h_4h_1,   "sri v6.4h,  v27.4h,  #1",  6, 27)
3370 GEN_TWOVEC_TEST(sri_4h_4h_8,   "sri v6.4h,  v27.4h,  #8",  6, 27)
3371 GEN_TWOVEC_TEST(sri_4h_4h_16,  "sri v6.4h,  v27.4h,  #16", 6, 27)
3372 GEN_TWOVEC_TEST(sri_16b_16b_1, "sri v6.16b, v27.16b, #1", 6, 27)
3373 GEN_TWOVEC_TEST(sri_16b_16b_4, "sri v6.16b, v27.16b, #4", 6, 27)
3374 GEN_TWOVEC_TEST(sri_16b_16b_8, "sri v6.16b, v27.16b, #8", 6, 27)
3375 GEN_TWOVEC_TEST(sri_8b_8b_1,   "sri v6.8b,  v27.8b,  #1",  6, 27)
3376 GEN_TWOVEC_TEST(sri_8b_8b_4,   "sri v6.8b,  v27.8b,  #4",  6, 27)
3377 GEN_TWOVEC_TEST(sri_8b_8b_8,   "sri v6.8b,  v27.8b,  #8",  6, 27)
3378 
3379 GEN_BINARY_TEST(smax, 4s, 4s, 4s)
3380 GEN_BINARY_TEST(smax, 2s, 2s, 2s)
3381 GEN_BINARY_TEST(smax, 8h, 8h, 8h)
3382 GEN_BINARY_TEST(smax, 4h, 4h, 4h)
3383 GEN_BINARY_TEST(smax, 16b, 16b, 16b)
3384 GEN_BINARY_TEST(smax, 8b, 8b, 8b)
3385 GEN_BINARY_TEST(umax, 4s, 4s, 4s)
3386 GEN_BINARY_TEST(umax, 2s, 2s, 2s)
3387 GEN_BINARY_TEST(umax, 8h, 8h, 8h)
3388 GEN_BINARY_TEST(umax, 4h, 4h, 4h)
3389 GEN_BINARY_TEST(umax, 16b, 16b, 16b)
3390 GEN_BINARY_TEST(umax, 8b, 8b, 8b)
3391 GEN_BINARY_TEST(smin, 4s, 4s, 4s)
3392 GEN_BINARY_TEST(smin, 2s, 2s, 2s)
3393 GEN_BINARY_TEST(smin, 8h, 8h, 8h)
3394 GEN_BINARY_TEST(smin, 4h, 4h, 4h)
3395 GEN_BINARY_TEST(smin, 16b, 16b, 16b)
3396 GEN_BINARY_TEST(smin, 8b, 8b, 8b)
3397 GEN_BINARY_TEST(umin, 4s, 4s, 4s)
3398 GEN_BINARY_TEST(umin, 2s, 2s, 2s)
3399 GEN_BINARY_TEST(umin, 8h, 8h, 8h)
3400 GEN_BINARY_TEST(umin, 4h, 4h, 4h)
3401 GEN_BINARY_TEST(umin, 16b, 16b, 16b)
3402 GEN_BINARY_TEST(umin, 8b, 8b, 8b)
3403 
3404 GEN_BINARY_TEST(smaxp, 4s, 4s, 4s)
3405 GEN_BINARY_TEST(smaxp, 2s, 2s, 2s)
3406 GEN_BINARY_TEST(smaxp, 8h, 8h, 8h)
3407 GEN_BINARY_TEST(smaxp, 4h, 4h, 4h)
3408 GEN_BINARY_TEST(smaxp, 16b, 16b, 16b)
3409 GEN_BINARY_TEST(smaxp, 8b, 8b, 8b)
3410 GEN_BINARY_TEST(umaxp, 4s, 4s, 4s)
3411 GEN_BINARY_TEST(umaxp, 2s, 2s, 2s)
3412 GEN_BINARY_TEST(umaxp, 8h, 8h, 8h)
3413 GEN_BINARY_TEST(umaxp, 4h, 4h, 4h)
3414 GEN_BINARY_TEST(umaxp, 16b, 16b, 16b)
3415 GEN_BINARY_TEST(umaxp, 8b, 8b, 8b)
3416 GEN_BINARY_TEST(sminp, 4s, 4s, 4s)
3417 GEN_BINARY_TEST(sminp, 2s, 2s, 2s)
3418 GEN_BINARY_TEST(sminp, 8h, 8h, 8h)
3419 GEN_BINARY_TEST(sminp, 4h, 4h, 4h)
3420 GEN_BINARY_TEST(sminp, 16b, 16b, 16b)
3421 GEN_BINARY_TEST(sminp, 8b, 8b, 8b)
3422 GEN_BINARY_TEST(uminp, 4s, 4s, 4s)
3423 GEN_BINARY_TEST(uminp, 2s, 2s, 2s)
3424 GEN_BINARY_TEST(uminp, 8h, 8h, 8h)
3425 GEN_BINARY_TEST(uminp, 4h, 4h, 4h)
3426 GEN_BINARY_TEST(uminp, 16b, 16b, 16b)
3427 GEN_BINARY_TEST(uminp, 8b, 8b, 8b)
3428 
3429 // test_SMAXV is a handwritten function
3430 // test_UMAXV is a handwritten function
3431 // test_SMINV is a handwritten function
3432 // test_UMINV is a handwritten function
3433 
3434 GEN_THREEVEC_TEST(smlal_2d_2s_s0,  "smlal  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
3435 GEN_THREEVEC_TEST(smlal_2d_2s_s3,  "smlal  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
3436 GEN_THREEVEC_TEST(smlal2_2d_4s_s1, "smlal2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
3437 GEN_THREEVEC_TEST(smlal2_2d_4s_s2, "smlal2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
3438 GEN_THREEVEC_TEST(smlal_4s_4h_h0,  "smlal  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
3439 GEN_THREEVEC_TEST(smlal_4s_4h_h7,  "smlal  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
3440 GEN_THREEVEC_TEST(smlal2_4s_8h_h1, "smlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3441 GEN_THREEVEC_TEST(smlal2_4s_8h_h4, "smlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3442 GEN_THREEVEC_TEST(umlal_2d_2s_s0,  "umlal  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
3443 GEN_THREEVEC_TEST(umlal_2d_2s_s3,  "umlal  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
3444 GEN_THREEVEC_TEST(umlal2_2d_4s_s1, "umlal2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
3445 GEN_THREEVEC_TEST(umlal2_2d_4s_s2, "umlal2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
3446 GEN_THREEVEC_TEST(umlal_4s_4h_h0,  "umlal  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
3447 GEN_THREEVEC_TEST(umlal_4s_4h_h7,  "umlal  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
3448 GEN_THREEVEC_TEST(umlal2_4s_8h_h1, "umlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3449 GEN_THREEVEC_TEST(umlal2_4s_8h_h4, "umlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3450 GEN_THREEVEC_TEST(smlsl_2d_2s_s0,  "smlsl  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
3451 GEN_THREEVEC_TEST(smlsl_2d_2s_s3,  "smlsl  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
3452 GEN_THREEVEC_TEST(smlsl2_2d_4s_s1, "smlsl2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
3453 GEN_THREEVEC_TEST(smlsl2_2d_4s_s2, "smlsl2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
3454 GEN_THREEVEC_TEST(smlsl_4s_4h_h0,  "smlsl  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
3455 GEN_THREEVEC_TEST(smlsl_4s_4h_h7,  "smlsl  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
3456 GEN_THREEVEC_TEST(smlsl2_4s_8h_h1, "smlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3457 GEN_THREEVEC_TEST(smlsl2_4s_8h_h4, "smlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3458 GEN_THREEVEC_TEST(umlsl_2d_2s_s0,  "umlsl  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
3459 GEN_THREEVEC_TEST(umlsl_2d_2s_s3,  "umlsl  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
3460 GEN_THREEVEC_TEST(umlsl2_2d_4s_s1, "umlsl2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
3461 GEN_THREEVEC_TEST(umlsl2_2d_4s_s2, "umlsl2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
3462 GEN_THREEVEC_TEST(umlsl_4s_4h_h0,  "umlsl  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
3463 GEN_THREEVEC_TEST(umlsl_4s_4h_h7,  "umlsl  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
3464 GEN_THREEVEC_TEST(umlsl2_4s_8h_h1, "umlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3465 GEN_THREEVEC_TEST(umlsl2_4s_8h_h4, "umlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3466 GEN_THREEVEC_TEST(smull_2d_2s_s0,  "smull  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
3467 GEN_THREEVEC_TEST(smull_2d_2s_s3,  "smull  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
3468 GEN_THREEVEC_TEST(smull2_2d_4s_s1, "smull2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
3469 GEN_THREEVEC_TEST(smull2_2d_4s_s2, "smull2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
3470 GEN_THREEVEC_TEST(smull_4s_4h_h0,  "smull  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
3471 GEN_THREEVEC_TEST(smull_4s_4h_h7,  "smull  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
3472 GEN_THREEVEC_TEST(smull2_4s_8h_h1, "smull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3473 GEN_THREEVEC_TEST(smull2_4s_8h_h4, "smull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3474 GEN_THREEVEC_TEST(umull_2d_2s_s0,  "umull  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
3475 GEN_THREEVEC_TEST(umull_2d_2s_s3,  "umull  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
3476 GEN_THREEVEC_TEST(umull2_2d_4s_s1, "umull2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
3477 GEN_THREEVEC_TEST(umull2_2d_4s_s2, "umull2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
3478 GEN_THREEVEC_TEST(umull_4s_4h_h0,  "umull  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
3479 GEN_THREEVEC_TEST(umull_4s_4h_h7,  "umull  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
3480 GEN_THREEVEC_TEST(umull2_4s_8h_h1, "umull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3481 GEN_THREEVEC_TEST(umull2_4s_8h_h4, "umull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
3482 
3483 GEN_THREEVEC_TEST(smlal_2d_2s_2s,  "smlal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3484 GEN_THREEVEC_TEST(smlal2_2d_4s_4s, "smlal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3485 GEN_THREEVEC_TEST(smlal_4s_4h_4h,  "smlal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3486 GEN_THREEVEC_TEST(smlal2_4s_8h_8h, "smlal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3487 GEN_THREEVEC_TEST(smlal_8h_8b_8b,  "smlal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3488 GEN_THREEVEC_TEST(smlal2_8h_16b_16b,
3489                                    "smlal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3490 GEN_THREEVEC_TEST(umlal_2d_2s_2s,  "umlal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3491 GEN_THREEVEC_TEST(umlal2_2d_4s_4s, "umlal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3492 GEN_THREEVEC_TEST(umlal_4s_4h_4h,  "umlal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3493 GEN_THREEVEC_TEST(umlal2_4s_8h_8h, "umlal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3494 GEN_THREEVEC_TEST(umlal_8h_8b_8b,  "umlal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3495 GEN_THREEVEC_TEST(umlal2_8h_16b_16b,
3496                                    "umlal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3497 GEN_THREEVEC_TEST(smlsl_2d_2s_2s,  "smlsl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3498 GEN_THREEVEC_TEST(smlsl2_2d_4s_4s, "smlsl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3499 GEN_THREEVEC_TEST(smlsl_4s_4h_4h,  "smlsl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3500 GEN_THREEVEC_TEST(smlsl2_4s_8h_8h, "smlsl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3501 GEN_THREEVEC_TEST(smlsl_8h_8b_8b,  "smlsl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3502 GEN_THREEVEC_TEST(smlsl2_8h_16b_16b,
3503                                    "smlsl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3504 GEN_THREEVEC_TEST(umlsl_2d_2s_2s,  "umlsl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3505 GEN_THREEVEC_TEST(umlsl2_2d_4s_4s, "umlsl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3506 GEN_THREEVEC_TEST(umlsl_4s_4h_4h,  "umlsl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3507 GEN_THREEVEC_TEST(umlsl2_4s_8h_8h, "umlsl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3508 GEN_THREEVEC_TEST(umlsl_8h_8b_8b,  "umlsl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3509 GEN_THREEVEC_TEST(umlsl2_8h_16b_16b,
3510                                    "umlsl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3511 GEN_THREEVEC_TEST(smull_2d_2s_2s,  "smull  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3512 GEN_THREEVEC_TEST(smull2_2d_4s_4s, "smull2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3513 GEN_THREEVEC_TEST(smull_4s_4h_4h,  "smull  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3514 GEN_THREEVEC_TEST(smull2_4s_8h_8h, "smull2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3515 GEN_THREEVEC_TEST(smull_8h_8b_8b,  "smull  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3516 GEN_THREEVEC_TEST(smull2_8h_16b_16b,
3517                                    "smull2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3518 GEN_THREEVEC_TEST(umull_2d_2s_2s,  "umull  v2.2d, v11.2s, v29.2s", 2, 11, 29)
3519 GEN_THREEVEC_TEST(umull2_2d_4s_4s, "umull2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
3520 GEN_THREEVEC_TEST(umull_4s_4h_4h,  "umull  v2.4s, v11.4h, v29.4h", 2, 11, 29)
3521 GEN_THREEVEC_TEST(umull2_4s_8h_8h, "umull2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
3522 GEN_THREEVEC_TEST(umull_8h_8b_8b,  "umull  v2.8h, v11.8b, v29.8b", 2, 11, 29)
3523 GEN_THREEVEC_TEST(umull2_8h_16b_16b,
3524                                    "umull2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
3525 
3526 GEN_ONEINT_ONEVEC_TEST(umov_x_d0,  "umov x9, v10.d[0]", 9, 10)
3527 GEN_ONEINT_ONEVEC_TEST(umov_x_d1,  "umov x9, v10.d[1]", 9, 10)
3528 GEN_ONEINT_ONEVEC_TEST(umov_w_s0,  "umov w9, v10.s[0]", 9, 10)
3529 GEN_ONEINT_ONEVEC_TEST(umov_w_s3,  "umov w9, v10.s[3]", 9, 10)
3530 GEN_ONEINT_ONEVEC_TEST(umov_w_h0,  "umov w9, v10.h[0]", 9, 10)
3531 GEN_ONEINT_ONEVEC_TEST(umov_w_h7,  "umov w9, v10.h[7]", 9, 10)
3532 GEN_ONEINT_ONEVEC_TEST(umov_w_b0,  "umov w9, v10.b[0]", 9, 10)
3533 GEN_ONEINT_ONEVEC_TEST(umov_w_b15, "umov w9, v10.b[15]", 9, 10)
3534 GEN_ONEINT_ONEVEC_TEST(smov_x_s0,  "smov x9, v10.s[0]", 9, 10)
3535 GEN_ONEINT_ONEVEC_TEST(smov_x_s3,  "smov x9, v10.s[3]", 9, 10)
3536 GEN_ONEINT_ONEVEC_TEST(smov_x_h0,  "smov x9, v10.h[0]", 9, 10)
3537 GEN_ONEINT_ONEVEC_TEST(smov_x_h7,  "smov x9, v10.h[7]", 9, 10)
3538 GEN_ONEINT_ONEVEC_TEST(smov_w_h0,  "smov w9, v10.h[0]", 9, 10)
3539 GEN_ONEINT_ONEVEC_TEST(smov_w_h7,  "smov w9, v10.h[7]", 9, 10)
3540 GEN_ONEINT_ONEVEC_TEST(smov_x_b0,  "smov x9, v10.b[0]", 9, 10)
3541 GEN_ONEINT_ONEVEC_TEST(smov_x_b15, "smov x9, v10.b[15]", 9, 10)
3542 GEN_ONEINT_ONEVEC_TEST(smov_w_b0,  "smov w9, v10.b[0]", 9, 10)
3543 GEN_ONEINT_ONEVEC_TEST(smov_w_b15, "smov w9, v10.b[15]", 9, 10)
3544 
3545 GEN_TWOVEC_TEST(sqabs_d_d, "sqabs d7, d30", 7, 30)
3546 GEN_TWOVEC_TEST(sqabs_s_s, "sqabs s7, s30", 7, 30)
3547 GEN_TWOVEC_TEST(sqabs_h_h, "sqabs h7, h30", 7, 30)
3548 GEN_TWOVEC_TEST(sqabs_b_b, "sqabs b7, b30", 7, 30)
3549 GEN_TWOVEC_TEST(sqneg_d_d, "sqneg d7, d30", 7, 30)
3550 GEN_TWOVEC_TEST(sqneg_s_s, "sqneg s7, s30", 7, 30)
3551 GEN_TWOVEC_TEST(sqneg_h_h, "sqneg h7, h30", 7, 30)
3552 GEN_TWOVEC_TEST(sqneg_b_b, "sqneg b7, b30", 7, 30)
3553 
3554 GEN_UNARY_TEST(sqabs, 2d, 2d)
3555 GEN_UNARY_TEST(sqabs, 4s, 4s)
3556 GEN_UNARY_TEST(sqabs, 2s, 2s)
3557 GEN_UNARY_TEST(sqabs, 8h, 8h)
3558 GEN_UNARY_TEST(sqabs, 4h, 4h)
3559 GEN_UNARY_TEST(sqabs, 16b, 16b)
3560 GEN_UNARY_TEST(sqabs, 8b, 8b)
3561 GEN_UNARY_TEST(sqneg, 2d, 2d)
3562 GEN_UNARY_TEST(sqneg, 4s, 4s)
3563 GEN_UNARY_TEST(sqneg, 2s, 2s)
3564 GEN_UNARY_TEST(sqneg, 8h, 8h)
3565 GEN_UNARY_TEST(sqneg, 4h, 4h)
3566 GEN_UNARY_TEST(sqneg, 16b, 16b)
3567 GEN_UNARY_TEST(sqneg, 8b, 8b)
3568 
3569 GEN_THREEVEC_TEST(sqadd_d_d_d, "sqadd d1, d2, d4", 1, 2, 4)
3570 GEN_THREEVEC_TEST(sqadd_s_s_s, "sqadd s1, s2, s4", 1, 2, 4)
3571 GEN_THREEVEC_TEST(sqadd_h_h_h, "sqadd h1, h2, h4", 1, 2, 4)
3572 GEN_THREEVEC_TEST(sqadd_b_b_b, "sqadd b1, b2, b4", 1, 2, 4)
3573 GEN_THREEVEC_TEST(uqadd_d_d_d, "uqadd d1, d2, d4", 1, 2, 4)
3574 GEN_THREEVEC_TEST(uqadd_s_s_s, "uqadd s1, s2, s4", 1, 2, 4)
3575 GEN_THREEVEC_TEST(uqadd_h_h_h, "uqadd h1, h2, h4", 1, 2, 4)
3576 GEN_THREEVEC_TEST(uqadd_b_b_b, "uqadd b1, b2, b4", 1, 2, 4)
3577 GEN_THREEVEC_TEST(sqsub_d_d_d, "sqsub d1, d2, d4", 1, 2, 4)
3578 GEN_THREEVEC_TEST(sqsub_s_s_s, "sqsub s1, s2, s4", 1, 2, 4)
3579 GEN_THREEVEC_TEST(sqsub_h_h_h, "sqsub h1, h2, h4", 1, 2, 4)
3580 GEN_THREEVEC_TEST(sqsub_b_b_b, "sqsub b1, b2, b4", 1, 2, 4)
3581 GEN_THREEVEC_TEST(uqsub_d_d_d, "uqsub d1, d2, d4", 1, 2, 4)
3582 GEN_THREEVEC_TEST(uqsub_s_s_s, "uqsub s1, s2, s4", 1, 2, 4)
3583 GEN_THREEVEC_TEST(uqsub_h_h_h, "uqsub h1, h2, h4", 1, 2, 4)
3584 GEN_THREEVEC_TEST(uqsub_b_b_b, "uqsub b1, b2, b4", 1, 2, 4)
3585 
3586 GEN_THREEVEC_TEST(sqadd_2d_2d_2d,    "sqadd v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
3587 GEN_THREEVEC_TEST(sqadd_4s_4s_4s,    "sqadd v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3588 GEN_THREEVEC_TEST(sqadd_2s_2s_2s,    "sqadd v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3589 GEN_THREEVEC_TEST(sqadd_8h_8h_8h,    "sqadd v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3590 GEN_THREEVEC_TEST(sqadd_4h_4h_4h,    "sqadd v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3591 GEN_THREEVEC_TEST(sqadd_16b_16b_16b, "sqadd v1.16b, v2.16b, v4.16b", 1, 2, 4)
3592 GEN_THREEVEC_TEST(sqadd_8b_8b_8b,    "sqadd v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
3593 GEN_THREEVEC_TEST(uqadd_2d_2d_2d,    "uqadd v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
3594 GEN_THREEVEC_TEST(uqadd_4s_4s_4s,    "uqadd v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3595 GEN_THREEVEC_TEST(uqadd_2s_2s_2s,    "uqadd v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3596 GEN_THREEVEC_TEST(uqadd_8h_8h_8h,    "uqadd v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3597 GEN_THREEVEC_TEST(uqadd_4h_4h_4h,    "uqadd v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3598 GEN_THREEVEC_TEST(uqadd_16b_16b_16b, "uqadd v1.16b, v2.16b, v4.16b", 1, 2, 4)
3599 GEN_THREEVEC_TEST(uqadd_8b_8b_8b,    "uqadd v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
3600 GEN_THREEVEC_TEST(sqsub_2d_2d_2d,    "sqsub v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
3601 GEN_THREEVEC_TEST(sqsub_4s_4s_4s,    "sqsub v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3602 GEN_THREEVEC_TEST(sqsub_2s_2s_2s,    "sqsub v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3603 GEN_THREEVEC_TEST(sqsub_8h_8h_8h,    "sqsub v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3604 GEN_THREEVEC_TEST(sqsub_4h_4h_4h,    "sqsub v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3605 GEN_THREEVEC_TEST(sqsub_16b_16b_16b, "sqsub v1.16b, v2.16b, v4.16b", 1, 2, 4)
3606 GEN_THREEVEC_TEST(sqsub_8b_8b_8b,    "sqsub v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
3607 GEN_THREEVEC_TEST(uqsub_2d_2d_2d,    "uqsub v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
3608 GEN_THREEVEC_TEST(uqsub_4s_4s_4s,    "uqsub v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3609 GEN_THREEVEC_TEST(uqsub_2s_2s_2s,    "uqsub v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3610 GEN_THREEVEC_TEST(uqsub_8h_8h_8h,    "uqsub v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3611 GEN_THREEVEC_TEST(uqsub_4h_4h_4h,    "uqsub v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3612 GEN_THREEVEC_TEST(uqsub_16b_16b_16b, "uqsub v1.16b, v2.16b, v4.16b", 1, 2, 4)
3613 GEN_THREEVEC_TEST(uqsub_8b_8b_8b,    "uqsub v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
3614 
3615 GEN_THREEVEC_TEST(sqdmlal_d_s_s0, "sqdmlal d31, s30, v29.s[0]", 31,30,29)
3616 GEN_THREEVEC_TEST(sqdmlal_d_s_s3, "sqdmlal d31, s30, v29.s[3]", 31,30,29)
3617 GEN_THREEVEC_TEST(sqdmlal_s_h_h1, "sqdmlal s31, h30, v13.h[1]", 31,30,13)
3618 GEN_THREEVEC_TEST(sqdmlal_s_h_h5, "sqdmlal s31, h30, v13.h[5]", 31,30,13)
3619 GEN_THREEVEC_TEST(sqdmlsl_d_s_s0, "sqdmlsl d31, s30, v29.s[0]", 31,30,29)
3620 GEN_THREEVEC_TEST(sqdmlsl_d_s_s3, "sqdmlsl d31, s30, v29.s[3]", 31,30,29)
3621 GEN_THREEVEC_TEST(sqdmlsl_s_h_h1, "sqdmlsl s31, h30, v13.h[1]", 31,30,13)
3622 GEN_THREEVEC_TEST(sqdmlsl_s_h_h5, "sqdmlsl s31, h30, v13.h[5]", 31,30,13)
3623 GEN_THREEVEC_TEST(sqdmull_d_s_s0, "sqdmull d31, s30, v29.s[0]", 31,30,29)
3624 GEN_THREEVEC_TEST(sqdmull_d_s_s3, "sqdmull d31, s30, v29.s[3]", 31,30,29)
3625 GEN_THREEVEC_TEST(sqdmull_s_h_h1, "sqdmull s31, h30, v13.h[1]", 31,30,13)
3626 GEN_THREEVEC_TEST(sqdmull_s_h_h5, "sqdmull s31, h30, v13.h[5]", 31,30,13)
3627 
3628 GEN_THREEVEC_TEST(sqdmlal_2d_2s_s0, "sqdmlal  v29.2d, v20.2s, v3.s[0]",29,20,3)
3629 GEN_THREEVEC_TEST(sqdmlal_2d_2s_s3, "sqdmlal  v29.2d, v20.2s, v3.s[3]",29,20,3)
3630 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_s1,"sqdmlal2 v29.2d, v20.4s, v3.s[1]",29,20,3)
3631 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_s2,"sqdmlal2 v29.2d, v20.4s, v3.s[2]",29,20,3)
3632 GEN_THREEVEC_TEST(sqdmlal_4s_4h_h0, "sqdmlal  v29.4s, v20.4h, v3.h[0]",29,20,3)
3633 GEN_THREEVEC_TEST(sqdmlal_4s_4h_h7, "sqdmlal  v29.4s, v20.4h, v3.h[7]",29,20,3)
3634 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_h1,"sqdmlal2 v29.4s, v20.8h, v3.h[1]",29,20,3)
3635 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_h4,"sqdmlal2 v29.4s, v20.8h, v3.h[1]",29,20,3)
3636 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_s0, "sqdmlsl  v29.2d, v20.2s, v3.s[0]",29,20,3)
3637 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_s3, "sqdmlsl  v29.2d, v20.2s, v3.s[3]",29,20,3)
3638 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_s1,"sqdmlsl2 v29.2d, v20.4s, v3.s[1]",29,20,3)
3639 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_s2,"sqdmlsl2 v29.2d, v20.4s, v3.s[2]",29,20,3)
3640 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_h0, "sqdmlsl  v29.4s, v20.4h, v3.h[0]",29,20,3)
3641 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_h7, "sqdmlsl  v29.4s, v20.4h, v3.h[7]",29,20,3)
3642 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_h1,"sqdmlsl2 v29.4s, v20.8h, v3.h[1]",29,20,3)
3643 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_h4,"sqdmlsl2 v29.4s, v20.8h, v3.h[1]",29,20,3)
3644 GEN_THREEVEC_TEST(sqdmull_2d_2s_s0, "sqdmull  v29.2d, v20.2s, v3.s[0]",29,20,3)
3645 GEN_THREEVEC_TEST(sqdmull_2d_2s_s3, "sqdmull  v29.2d, v20.2s, v3.s[3]",29,20,3)
3646 GEN_THREEVEC_TEST(sqdmull2_2d_4s_s1,"sqdmull2 v29.2d, v20.4s, v3.s[1]",29,20,3)
3647 GEN_THREEVEC_TEST(sqdmull2_2d_4s_s2,"sqdmull2 v29.2d, v20.4s, v3.s[2]",29,20,3)
3648 GEN_THREEVEC_TEST(sqdmull_4s_4h_h0, "sqdmull  v29.4s, v20.4h, v3.h[0]",29,20,3)
3649 GEN_THREEVEC_TEST(sqdmull_4s_4h_h7, "sqdmull  v29.4s, v20.4h, v3.h[7]",29,20,3)
3650 GEN_THREEVEC_TEST(sqdmull2_4s_8h_h1,"sqdmull2 v29.4s, v20.8h, v3.h[1]",29,20,3)
3651 GEN_THREEVEC_TEST(sqdmull2_4s_8h_h4,"sqdmull2 v29.4s, v20.8h, v3.h[1]",29,20,3)
3652 
3653 GEN_THREEVEC_TEST(sqdmlal_d_s_s, "sqdmlal d0, s8, s16", 0, 8, 16)
3654 GEN_THREEVEC_TEST(sqdmlal_s_h_h, "sqdmlal s0, h8, h16", 0, 8, 16)
3655 GEN_THREEVEC_TEST(sqdmlsl_d_s_s, "sqdmlsl d0, s8, s16", 0, 8, 16)
3656 GEN_THREEVEC_TEST(sqdmlsl_s_h_h, "sqdmlsl s0, h8, h16", 0, 8, 16)
3657 GEN_THREEVEC_TEST(sqdmull_d_s_s, "sqdmull d0, s8, s16", 0, 8, 16)
3658 GEN_THREEVEC_TEST(sqdmull_s_h_h, "sqdmull s0, h8, h16", 0, 8, 16)
3659 
3660 GEN_THREEVEC_TEST(sqdmlal_2d_2s_2s,  "sqdmlal  v2.2d, v11.2s, v29.2s", 2,11,29)
3661 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_4s, "sqdmlal2 v2.2d, v11.4s, v29.4s", 2,11,29)
3662 GEN_THREEVEC_TEST(sqdmlal_4s_4h_4h,  "sqdmlal  v2.4s, v11.4h, v29.4h", 2,11,29)
3663 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_8h, "sqdmlal2 v2.4s, v11.8h, v29.8h", 2,11,29)
3664 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_2s,  "sqdmlsl  v2.2d, v11.2s, v29.2s", 2,11,29)
3665 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_4s, "sqdmlsl2 v2.2d, v11.4s, v29.4s", 2,11,29)
3666 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_4h,  "sqdmlsl  v2.4s, v11.4h, v29.4h", 2,11,29)
3667 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_8h, "sqdmlsl2 v2.4s, v11.8h, v29.8h", 2,11,29)
3668 GEN_THREEVEC_TEST(sqdmull_2d_2s_2s,  "sqdmull  v2.2d, v11.2s, v29.2s", 2,11,29)
3669 GEN_THREEVEC_TEST(sqdmull2_2d_4s_4s, "sqdmull2 v2.2d, v11.4s, v29.4s", 2,11,29)
3670 GEN_THREEVEC_TEST(sqdmull_4s_4h_4h,  "sqdmull  v2.4s, v11.4h, v29.4h", 2,11,29)
3671 GEN_THREEVEC_TEST(sqdmull2_4s_8h_8h, "sqdmull2 v2.4s, v11.8h, v29.8h", 2,11,29)
3672 
3673 GEN_THREEVEC_TEST(sqdmulh_s_s_s1, "sqdmulh s0, s1, v2.s[1]", 0,1,2)
3674 GEN_THREEVEC_TEST(sqdmulh_s_s_s3, "sqdmulh s0, s1, v2.s[3]", 0,1,2)
3675 GEN_THREEVEC_TEST(sqdmulh_h_h_h2, "sqdmulh h0, h1, v2.h[2]", 0,1,2)
3676 GEN_THREEVEC_TEST(sqdmulh_h_h_h7, "sqdmulh h0, h1, v2.h[7]", 0,1,2)
3677 GEN_THREEVEC_TEST(sqrdmulh_s_s_s1, "sqrdmulh s0, s1, v2.s[1]", 0,1,2)
3678 GEN_THREEVEC_TEST(sqrdmulh_s_s_s3, "sqrdmulh s0, s1, v2.s[3]", 0,1,2)
3679 GEN_THREEVEC_TEST(sqrdmulh_h_h_h2, "sqrdmulh h0, h1, v2.h[2]", 0,1,2)
3680 GEN_THREEVEC_TEST(sqrdmulh_h_h_h7, "sqrdmulh h0, h1, v2.h[7]", 0,1,2)
3681 
3682 GEN_THREEVEC_TEST(sqdmulh_4s_4s_s1, "sqdmulh v0.4s, v1.4s, v2.s[1]", 0,1,2)
3683 GEN_THREEVEC_TEST(sqdmulh_4s_4s_s3, "sqdmulh v0.4s, v1.4s, v2.s[3]", 0,1,2)
3684 GEN_THREEVEC_TEST(sqdmulh_2s_2s_s1, "sqdmulh v0.2s, v1.2s, v2.s[1]", 0,1,2)
3685 GEN_THREEVEC_TEST(sqdmulh_2s_2s_s3, "sqdmulh v0.2s, v1.2s, v2.s[3]", 0,1,2)
3686 GEN_THREEVEC_TEST(sqdmulh_8h_8h_h2, "sqdmulh v0.8h, v1.8h, v2.h[2]", 0,1,2)
3687 GEN_THREEVEC_TEST(sqdmulh_8h_8h_h7, "sqdmulh v0.8h, v1.8h, v2.h[7]", 0,1,2)
3688 GEN_THREEVEC_TEST(sqdmulh_4h_4h_h2, "sqdmulh v0.4h, v1.4h, v2.h[2]", 0,1,2)
3689 GEN_THREEVEC_TEST(sqdmulh_4h_4h_h7, "sqdmulh v0.4h, v1.4h, v2.h[7]", 0,1,2)
3690 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_s1, "sqrdmulh v0.4s, v1.4s, v2.s[1]", 0,1,2)
3691 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_s3, "sqrdmulh v0.4s, v1.4s, v2.s[3]", 0,1,2)
3692 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_s1, "sqrdmulh v0.2s, v1.2s, v2.s[1]", 0,1,2)
3693 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_s3, "sqrdmulh v0.2s, v1.2s, v2.s[3]", 0,1,2)
3694 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_h2, "sqrdmulh v0.8h, v1.8h, v2.h[2]", 0,1,2)
3695 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_h7, "sqrdmulh v0.8h, v1.8h, v2.h[7]", 0,1,2)
3696 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_h2, "sqrdmulh v0.4h, v1.4h, v2.h[2]", 0,1,2)
3697 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_h7, "sqrdmulh v0.4h, v1.4h, v2.h[7]", 0,1,2)
3698 
3699 GEN_THREEVEC_TEST(sqdmulh_s_s_s,  "sqdmulh  s1, s2, s4", 1, 2, 4)
3700 GEN_THREEVEC_TEST(sqdmulh_h_h_h,  "sqdmulh  h1, h2, h4", 1, 2, 4)
3701 GEN_THREEVEC_TEST(sqrdmulh_s_s_s, "sqrdmulh s1, s2, s4", 1, 2, 4)
3702 GEN_THREEVEC_TEST(sqrdmulh_h_h_h, "sqrdmulh h1, h2, h4", 1, 2, 4)
3703 
3704 GEN_THREEVEC_TEST(sqdmulh_4s_4s_4s, "sqdmulh v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3705 GEN_THREEVEC_TEST(sqdmulh_2s_2s_2s, "sqdmulh v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3706 GEN_THREEVEC_TEST(sqdmulh_8h_8h_8h, "sqdmulh v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3707 GEN_THREEVEC_TEST(sqdmulh_4h_4h_4h, "sqdmulh v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3708 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_4s, "sqrdmulh v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3709 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_2s, "sqrdmulh v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3710 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_8h, "sqrdmulh v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3711 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_4h, "sqrdmulh v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3712 
3713 GEN_THREEVEC_TEST(sqshl_d_d_d, "sqshl d1, d2, d4", 1, 2, 4)
3714 GEN_THREEVEC_TEST(sqshl_s_s_s, "sqshl s1, s2, s4", 1, 2, 4)
3715 GEN_THREEVEC_TEST(sqshl_h_h_h, "sqshl h1, h2, h4", 1, 2, 4)
3716 GEN_THREEVEC_TEST(sqshl_b_b_b, "sqshl b1, b2, b4", 1, 2, 4)
3717 GEN_THREEVEC_TEST(uqshl_d_d_d, "uqshl d1, d2, d4", 1, 2, 4)
3718 GEN_THREEVEC_TEST(uqshl_s_s_s, "uqshl s1, s2, s4", 1, 2, 4)
3719 GEN_THREEVEC_TEST(uqshl_h_h_h, "uqshl h1, h2, h4", 1, 2, 4)
3720 GEN_THREEVEC_TEST(uqshl_b_b_b, "uqshl b1, b2, b4", 1, 2, 4)
3721 GEN_THREEVEC_TEST(sqrshl_d_d_d, "sqrshl d1, d2, d4", 1, 2, 4)
3722 GEN_THREEVEC_TEST(sqrshl_s_s_s, "sqrshl s1, s2, s4", 1, 2, 4)
3723 GEN_THREEVEC_TEST(sqrshl_h_h_h, "sqrshl h1, h2, h4", 1, 2, 4)
3724 GEN_THREEVEC_TEST(sqrshl_b_b_b, "sqrshl b1, b2, b4", 1, 2, 4)
3725 GEN_THREEVEC_TEST(uqrshl_d_d_d, "uqrshl d1, d2, d4", 1, 2, 4)
3726 GEN_THREEVEC_TEST(uqrshl_s_s_s, "uqrshl s1, s2, s4", 1, 2, 4)
3727 GEN_THREEVEC_TEST(uqrshl_h_h_h, "uqrshl h1, h2, h4", 1, 2, 4)
3728 GEN_THREEVEC_TEST(uqrshl_b_b_b, "uqrshl b1, b2, b4", 1, 2, 4)
3729 
3730 GEN_THREEVEC_TEST(sqshl_2d_2d_2d,    "sqshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
3731 GEN_THREEVEC_TEST(sqshl_4s_4s_4s,    "sqshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3732 GEN_THREEVEC_TEST(sqshl_2s_2s_2s,    "sqshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3733 GEN_THREEVEC_TEST(sqshl_8h_8h_8h,    "sqshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3734 GEN_THREEVEC_TEST(sqshl_4h_4h_4h,    "sqshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3735 GEN_THREEVEC_TEST(sqshl_16b_16b_16b, "sqshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
3736 GEN_THREEVEC_TEST(sqshl_8b_8b_8b,    "sqshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
3737 GEN_THREEVEC_TEST(uqshl_2d_2d_2d,    "uqshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
3738 GEN_THREEVEC_TEST(uqshl_4s_4s_4s,    "uqshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3739 GEN_THREEVEC_TEST(uqshl_2s_2s_2s,    "uqshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3740 GEN_THREEVEC_TEST(uqshl_8h_8h_8h,    "uqshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3741 GEN_THREEVEC_TEST(uqshl_4h_4h_4h,    "uqshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3742 GEN_THREEVEC_TEST(uqshl_16b_16b_16b, "uqshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
3743 GEN_THREEVEC_TEST(uqshl_8b_8b_8b,    "uqshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
3744 GEN_THREEVEC_TEST(sqrshl_2d_2d_2d,    "sqrshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
3745 GEN_THREEVEC_TEST(sqrshl_4s_4s_4s,    "sqrshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3746 GEN_THREEVEC_TEST(sqrshl_2s_2s_2s,    "sqrshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3747 GEN_THREEVEC_TEST(sqrshl_8h_8h_8h,    "sqrshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3748 GEN_THREEVEC_TEST(sqrshl_4h_4h_4h,    "sqrshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3749 GEN_THREEVEC_TEST(sqrshl_16b_16b_16b, "sqrshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
3750 GEN_THREEVEC_TEST(sqrshl_8b_8b_8b,    "sqrshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
3751 GEN_THREEVEC_TEST(uqrshl_2d_2d_2d,    "uqrshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
3752 GEN_THREEVEC_TEST(uqrshl_4s_4s_4s,    "uqrshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
3753 GEN_THREEVEC_TEST(uqrshl_2s_2s_2s,    "uqrshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
3754 GEN_THREEVEC_TEST(uqrshl_8h_8h_8h,    "uqrshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
3755 GEN_THREEVEC_TEST(uqrshl_4h_4h_4h,    "uqrshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
3756 GEN_THREEVEC_TEST(uqrshl_16b_16b_16b, "uqrshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
3757 GEN_THREEVEC_TEST(uqrshl_8b_8b_8b,    "uqrshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
3758 
3759 GEN_TWOVEC_TEST(sqrshrn_s_d_1,  "sqrshrn s2, d5, #1",  2, 5)
3760 GEN_TWOVEC_TEST(sqrshrn_s_d_17, "sqrshrn s2, d5, #17", 2, 5)
3761 GEN_TWOVEC_TEST(sqrshrn_s_d_32, "sqrshrn s2, d5, #32", 2, 5)
3762 GEN_TWOVEC_TEST(sqrshrn_h_s_1,  "sqrshrn h2, s5, #1",  2, 5)
3763 GEN_TWOVEC_TEST(sqrshrn_h_s_9,  "sqrshrn h2, s5, #9",  2, 5)
3764 GEN_TWOVEC_TEST(sqrshrn_h_s_16, "sqrshrn h2, s5, #16", 2, 5)
3765 GEN_TWOVEC_TEST(sqrshrn_b_h_1,  "sqrshrn b2, h5, #1",  2, 5)
3766 GEN_TWOVEC_TEST(sqrshrn_b_h_4,  "sqrshrn b2, h5, #4",  2, 5)
3767 GEN_TWOVEC_TEST(sqrshrn_b_h_8,  "sqrshrn b2, h5, #8",  2, 5)
3768 GEN_TWOVEC_TEST(uqrshrn_s_d_1,  "uqrshrn s2, d5, #1",  2, 5)
3769 GEN_TWOVEC_TEST(uqrshrn_s_d_17, "uqrshrn s2, d5, #17", 2, 5)
3770 GEN_TWOVEC_TEST(uqrshrn_s_d_32, "uqrshrn s2, d5, #32", 2, 5)
3771 GEN_TWOVEC_TEST(uqrshrn_h_s_1,  "uqrshrn h2, s5, #1",  2, 5)
3772 GEN_TWOVEC_TEST(uqrshrn_h_s_9,  "uqrshrn h2, s5, #9",  2, 5)
3773 GEN_TWOVEC_TEST(uqrshrn_h_s_16, "uqrshrn h2, s5, #16", 2, 5)
3774 GEN_TWOVEC_TEST(uqrshrn_b_h_1,  "uqrshrn b2, h5, #1",  2, 5)
3775 GEN_TWOVEC_TEST(uqrshrn_b_h_4,  "uqrshrn b2, h5, #4",  2, 5)
3776 GEN_TWOVEC_TEST(uqrshrn_b_h_8,  "uqrshrn b2, h5, #8",  2, 5)
3777 GEN_TWOVEC_TEST(sqshrn_s_d_1,  "sqshrn s2, d5, #1",  2, 5)
3778 GEN_TWOVEC_TEST(sqshrn_s_d_17, "sqshrn s2, d5, #17", 2, 5)
3779 GEN_TWOVEC_TEST(sqshrn_s_d_32, "sqshrn s2, d5, #32", 2, 5)
3780 GEN_TWOVEC_TEST(sqshrn_h_s_1,  "sqshrn h2, s5, #1",  2, 5)
3781 GEN_TWOVEC_TEST(sqshrn_h_s_9,  "sqshrn h2, s5, #9",  2, 5)
3782 GEN_TWOVEC_TEST(sqshrn_h_s_16, "sqshrn h2, s5, #16", 2, 5)
3783 GEN_TWOVEC_TEST(sqshrn_b_h_1,  "sqshrn b2, h5, #1",  2, 5)
3784 GEN_TWOVEC_TEST(sqshrn_b_h_4,  "sqshrn b2, h5, #4",  2, 5)
3785 GEN_TWOVEC_TEST(sqshrn_b_h_8,  "sqshrn b2, h5, #8",  2, 5)
3786 GEN_TWOVEC_TEST(uqshrn_s_d_1,  "uqshrn s2, d5, #1",  2, 5)
3787 GEN_TWOVEC_TEST(uqshrn_s_d_17, "uqshrn s2, d5, #17", 2, 5)
3788 GEN_TWOVEC_TEST(uqshrn_s_d_32, "uqshrn s2, d5, #32", 2, 5)
3789 GEN_TWOVEC_TEST(uqshrn_h_s_1,  "uqshrn h2, s5, #1",  2, 5)
3790 GEN_TWOVEC_TEST(uqshrn_h_s_9,  "uqshrn h2, s5, #9",  2, 5)
3791 GEN_TWOVEC_TEST(uqshrn_h_s_16, "uqshrn h2, s5, #16", 2, 5)
3792 GEN_TWOVEC_TEST(uqshrn_b_h_1,  "uqshrn b2, h5, #1",  2, 5)
3793 GEN_TWOVEC_TEST(uqshrn_b_h_4,  "uqshrn b2, h5, #4",  2, 5)
3794 GEN_TWOVEC_TEST(uqshrn_b_h_8,  "uqshrn b2, h5, #8",  2, 5)
3795 GEN_TWOVEC_TEST(sqrshrun_s_d_1,  "sqrshrun s2, d5, #1",  2, 5)
3796 GEN_TWOVEC_TEST(sqrshrun_s_d_17, "sqrshrun s2, d5, #17", 2, 5)
3797 GEN_TWOVEC_TEST(sqrshrun_s_d_32, "sqrshrun s2, d5, #32", 2, 5)
3798 GEN_TWOVEC_TEST(sqrshrun_h_s_1,  "sqrshrun h2, s5, #1",  2, 5)
3799 GEN_TWOVEC_TEST(sqrshrun_h_s_9,  "sqrshrun h2, s5, #9",  2, 5)
3800 GEN_TWOVEC_TEST(sqrshrun_h_s_16, "sqrshrun h2, s5, #16", 2, 5)
3801 GEN_TWOVEC_TEST(sqrshrun_b_h_1,  "sqrshrun b2, h5, #1",  2, 5)
3802 GEN_TWOVEC_TEST(sqrshrun_b_h_4,  "sqrshrun b2, h5, #4",  2, 5)
3803 GEN_TWOVEC_TEST(sqrshrun_b_h_8,  "sqrshrun b2, h5, #8",  2, 5)
3804 GEN_TWOVEC_TEST(sqshrun_s_d_1,  "sqshrun s2, d5, #1",  2, 5)
3805 GEN_TWOVEC_TEST(sqshrun_s_d_17, "sqshrun s2, d5, #17", 2, 5)
3806 GEN_TWOVEC_TEST(sqshrun_s_d_32, "sqshrun s2, d5, #32", 2, 5)
3807 GEN_TWOVEC_TEST(sqshrun_h_s_1,  "sqshrun h2, s5, #1",  2, 5)
3808 GEN_TWOVEC_TEST(sqshrun_h_s_9,  "sqshrun h2, s5, #9",  2, 5)
3809 GEN_TWOVEC_TEST(sqshrun_h_s_16, "sqshrun h2, s5, #16", 2, 5)
3810 GEN_TWOVEC_TEST(sqshrun_b_h_1,  "sqshrun b2, h5, #1",  2, 5)
3811 GEN_TWOVEC_TEST(sqshrun_b_h_4,  "sqshrun b2, h5, #4",  2, 5)
3812 GEN_TWOVEC_TEST(sqshrun_b_h_8,  "sqshrun b2, h5, #8",  2, 5)
3813 
3814 GEN_TWOVEC_TEST(sqrshrn_2s_2d_1,   "sqrshrn  v4.2s,  v29.2d, #1",  4, 29)
3815 GEN_TWOVEC_TEST(sqrshrn_2s_2d_17,  "sqrshrn  v4.2s,  v29.2d, #17", 4, 29)
3816 GEN_TWOVEC_TEST(sqrshrn_2s_2d_32,  "sqrshrn  v4.2s,  v29.2d, #32", 4, 29)
3817 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_1,  "sqrshrn2 v4.4s,  v29.2d, #1",  4, 29)
3818 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_17, "sqrshrn2 v4.4s,  v29.2d, #17", 4, 29)
3819 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_32, "sqrshrn2 v4.4s,  v29.2d, #32", 4, 29)
3820 GEN_TWOVEC_TEST(sqrshrn_4h_4s_1,   "sqrshrn  v4.4h,  v29.4s, #1",  4, 29)
3821 GEN_TWOVEC_TEST(sqrshrn_4h_4s_9,   "sqrshrn  v4.4h,  v29.4s, #9",  4, 29)
3822 GEN_TWOVEC_TEST(sqrshrn_4h_4s_16,  "sqrshrn  v4.4h,  v29.4s, #16", 4, 29)
3823 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_1,  "sqrshrn2 v4.8h,  v29.4s, #1",  4, 29)
3824 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_9,  "sqrshrn2 v4.8h,  v29.4s, #9",  4, 29)
3825 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_16, "sqrshrn2 v4.8h,  v29.4s, #16", 4, 29)
3826 GEN_TWOVEC_TEST(sqrshrn_8b_8h_1,   "sqrshrn  v4.8b,  v29.8h, #1",  4, 29)
3827 GEN_TWOVEC_TEST(sqrshrn_8b_8h_4,   "sqrshrn  v4.8b,  v29.8h, #4",  4, 29)
3828 GEN_TWOVEC_TEST(sqrshrn_8b_8h_8,   "sqrshrn  v4.8b,  v29.8h, #8",  4, 29)
3829 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_1, "sqrshrn2 v4.16b, v29.8h, #1",  4, 29)
3830 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_4, "sqrshrn2 v4.16b, v29.8h, #4",  4, 29)
3831 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_8, "sqrshrn2 v4.16b, v29.8h, #8",  4, 29)
3832 GEN_TWOVEC_TEST(uqrshrn_2s_2d_1,   "uqrshrn  v4.2s,  v29.2d, #1",  4, 29)
3833 GEN_TWOVEC_TEST(uqrshrn_2s_2d_17,  "uqrshrn  v4.2s,  v29.2d, #17", 4, 29)
3834 GEN_TWOVEC_TEST(uqrshrn_2s_2d_32,  "uqrshrn  v4.2s,  v29.2d, #32", 4, 29)
3835 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_1,  "uqrshrn2 v4.4s,  v29.2d, #1",  4, 29)
3836 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_17, "uqrshrn2 v4.4s,  v29.2d, #17", 4, 29)
3837 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_32, "uqrshrn2 v4.4s,  v29.2d, #32", 4, 29)
3838 GEN_TWOVEC_TEST(uqrshrn_4h_4s_1,   "uqrshrn  v4.4h,  v29.4s, #1",  4, 29)
3839 GEN_TWOVEC_TEST(uqrshrn_4h_4s_9,   "uqrshrn  v4.4h,  v29.4s, #9",  4, 29)
3840 GEN_TWOVEC_TEST(uqrshrn_4h_4s_16,  "uqrshrn  v4.4h,  v29.4s, #16", 4, 29)
3841 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_1,  "uqrshrn2 v4.8h,  v29.4s, #1",  4, 29)
3842 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_9,  "uqrshrn2 v4.8h,  v29.4s, #9",  4, 29)
3843 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_16, "uqrshrn2 v4.8h,  v29.4s, #16", 4, 29)
3844 GEN_TWOVEC_TEST(uqrshrn_8b_8h_1,   "uqrshrn  v4.8b,  v29.8h, #1",  4, 29)
3845 GEN_TWOVEC_TEST(uqrshrn_8b_8h_4,   "uqrshrn  v4.8b,  v29.8h, #4",  4, 29)
3846 GEN_TWOVEC_TEST(uqrshrn_8b_8h_8,   "uqrshrn  v4.8b,  v29.8h, #8",  4, 29)
3847 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_1, "uqrshrn2 v4.16b, v29.8h, #1",  4, 29)
3848 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_4, "uqrshrn2 v4.16b, v29.8h, #4",  4, 29)
3849 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_8, "uqrshrn2 v4.16b, v29.8h, #8",  4, 29)
3850 GEN_TWOVEC_TEST(sqshrn_2s_2d_1,   "sqshrn  v4.2s,  v29.2d, #1",  4, 29)
3851 GEN_TWOVEC_TEST(sqshrn_2s_2d_17,  "sqshrn  v4.2s,  v29.2d, #17", 4, 29)
3852 GEN_TWOVEC_TEST(sqshrn_2s_2d_32,  "sqshrn  v4.2s,  v29.2d, #32", 4, 29)
3853 GEN_TWOVEC_TEST(sqshrn2_4s_2d_1,  "sqshrn2 v4.4s,  v29.2d, #1",  4, 29)
3854 GEN_TWOVEC_TEST(sqshrn2_4s_2d_17, "sqshrn2 v4.4s,  v29.2d, #17", 4, 29)
3855 GEN_TWOVEC_TEST(sqshrn2_4s_2d_32, "sqshrn2 v4.4s,  v29.2d, #32", 4, 29)
3856 GEN_TWOVEC_TEST(sqshrn_4h_4s_1,   "sqshrn  v4.4h,  v29.4s, #1",  4, 29)
3857 GEN_TWOVEC_TEST(sqshrn_4h_4s_9,   "sqshrn  v4.4h,  v29.4s, #9",  4, 29)
3858 GEN_TWOVEC_TEST(sqshrn_4h_4s_16,  "sqshrn  v4.4h,  v29.4s, #16", 4, 29)
3859 GEN_TWOVEC_TEST(sqshrn2_8h_4s_1,  "sqshrn2 v4.8h,  v29.4s, #1",  4, 29)
3860 GEN_TWOVEC_TEST(sqshrn2_8h_4s_9,  "sqshrn2 v4.8h,  v29.4s, #9",  4, 29)
3861 GEN_TWOVEC_TEST(sqshrn2_8h_4s_16, "sqshrn2 v4.8h,  v29.4s, #16", 4, 29)
3862 GEN_TWOVEC_TEST(sqshrn_8b_8h_1,   "sqshrn  v4.8b,  v29.8h, #1",  4, 29)
3863 GEN_TWOVEC_TEST(sqshrn_8b_8h_4,   "sqshrn  v4.8b,  v29.8h, #4",  4, 29)
3864 GEN_TWOVEC_TEST(sqshrn_8b_8h_8,   "sqshrn  v4.8b,  v29.8h, #8",  4, 29)
3865 GEN_TWOVEC_TEST(sqshrn2_16b_8h_1, "sqshrn2 v4.16b, v29.8h, #1",  4, 29)
3866 GEN_TWOVEC_TEST(sqshrn2_16b_8h_4, "sqshrn2 v4.16b, v29.8h, #4",  4, 29)
3867 GEN_TWOVEC_TEST(sqshrn2_16b_8h_8, "sqshrn2 v4.16b, v29.8h, #8",  4, 29)
3868 GEN_TWOVEC_TEST(uqshrn_2s_2d_1,   "uqshrn  v4.2s,  v29.2d, #1",  4, 29)
3869 GEN_TWOVEC_TEST(uqshrn_2s_2d_17,  "uqshrn  v4.2s,  v29.2d, #17", 4, 29)
3870 GEN_TWOVEC_TEST(uqshrn_2s_2d_32,  "uqshrn  v4.2s,  v29.2d, #32", 4, 29)
3871 GEN_TWOVEC_TEST(uqshrn2_4s_2d_1,  "uqshrn2 v4.4s,  v29.2d, #1",  4, 29)
3872 GEN_TWOVEC_TEST(uqshrn2_4s_2d_17, "uqshrn2 v4.4s,  v29.2d, #17", 4, 29)
3873 GEN_TWOVEC_TEST(uqshrn2_4s_2d_32, "uqshrn2 v4.4s,  v29.2d, #32", 4, 29)
3874 GEN_TWOVEC_TEST(uqshrn_4h_4s_1,   "uqshrn  v4.4h,  v29.4s, #1",  4, 29)
3875 GEN_TWOVEC_TEST(uqshrn_4h_4s_9,   "uqshrn  v4.4h,  v29.4s, #9",  4, 29)
3876 GEN_TWOVEC_TEST(uqshrn_4h_4s_16,  "uqshrn  v4.4h,  v29.4s, #16", 4, 29)
3877 GEN_TWOVEC_TEST(uqshrn2_8h_4s_1,  "uqshrn2 v4.8h,  v29.4s, #1",  4, 29)
3878 GEN_TWOVEC_TEST(uqshrn2_8h_4s_9,  "uqshrn2 v4.8h,  v29.4s, #9",  4, 29)
3879 GEN_TWOVEC_TEST(uqshrn2_8h_4s_16, "uqshrn2 v4.8h,  v29.4s, #16", 4, 29)
3880 GEN_TWOVEC_TEST(uqshrn_8b_8h_1,   "uqshrn  v4.8b,  v29.8h, #1",  4, 29)
3881 GEN_TWOVEC_TEST(uqshrn_8b_8h_4,   "uqshrn  v4.8b,  v29.8h, #4",  4, 29)
3882 GEN_TWOVEC_TEST(uqshrn_8b_8h_8,   "uqshrn  v4.8b,  v29.8h, #8",  4, 29)
3883 GEN_TWOVEC_TEST(uqshrn2_16b_8h_1, "uqshrn2 v4.16b, v29.8h, #1",  4, 29)
3884 GEN_TWOVEC_TEST(uqshrn2_16b_8h_4, "uqshrn2 v4.16b, v29.8h, #4",  4, 29)
3885 GEN_TWOVEC_TEST(uqshrn2_16b_8h_8, "uqshrn2 v4.16b, v29.8h, #8",  4, 29)
3886 GEN_TWOVEC_TEST(sqrshrun_2s_2d_1,   "sqrshrun  v4.2s,  v29.2d, #1",  4, 29)
3887 GEN_TWOVEC_TEST(sqrshrun_2s_2d_17,  "sqrshrun  v4.2s,  v29.2d, #17", 4, 29)
3888 GEN_TWOVEC_TEST(sqrshrun_2s_2d_32,  "sqrshrun  v4.2s,  v29.2d, #32", 4, 29)
3889 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_1,  "sqrshrun2 v4.4s,  v29.2d, #1",  4, 29)
3890 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_17, "sqrshrun2 v4.4s,  v29.2d, #17", 4, 29)
3891 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_32, "sqrshrun2 v4.4s,  v29.2d, #32", 4, 29)
3892 GEN_TWOVEC_TEST(sqrshrun_4h_4s_1,   "sqrshrun  v4.4h,  v29.4s, #1",  4, 29)
3893 GEN_TWOVEC_TEST(sqrshrun_4h_4s_9,   "sqrshrun  v4.4h,  v29.4s, #9",  4, 29)
3894 GEN_TWOVEC_TEST(sqrshrun_4h_4s_16,  "sqrshrun  v4.4h,  v29.4s, #16", 4, 29)
3895 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_1,  "sqrshrun2 v4.8h,  v29.4s, #1",  4, 29)
3896 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_9,  "sqrshrun2 v4.8h,  v29.4s, #9",  4, 29)
3897 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_16, "sqrshrun2 v4.8h,  v29.4s, #16", 4, 29)
3898 GEN_TWOVEC_TEST(sqrshrun_8b_8h_1,   "sqrshrun  v4.8b,  v29.8h, #1",  4, 29)
3899 GEN_TWOVEC_TEST(sqrshrun_8b_8h_4,   "sqrshrun  v4.8b,  v29.8h, #4",  4, 29)
3900 GEN_TWOVEC_TEST(sqrshrun_8b_8h_8,   "sqrshrun  v4.8b,  v29.8h, #8",  4, 29)
3901 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_1, "sqrshrun2 v4.16b, v29.8h, #1",  4, 29)
3902 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_4, "sqrshrun2 v4.16b, v29.8h, #4",  4, 29)
3903 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_8, "sqrshrun2 v4.16b, v29.8h, #8",  4, 29)
3904 GEN_TWOVEC_TEST(sqshrun_2s_2d_1,   "sqshrun  v4.2s,  v29.2d, #1",  4, 29)
3905 GEN_TWOVEC_TEST(sqshrun_2s_2d_17,  "sqshrun  v4.2s,  v29.2d, #17", 4, 29)
3906 GEN_TWOVEC_TEST(sqshrun_2s_2d_32,  "sqshrun  v4.2s,  v29.2d, #32", 4, 29)
3907 GEN_TWOVEC_TEST(sqshrun2_4s_2d_1,  "sqshrun2 v4.4s,  v29.2d, #1",  4, 29)
3908 GEN_TWOVEC_TEST(sqshrun2_4s_2d_17, "sqshrun2 v4.4s,  v29.2d, #17", 4, 29)
3909 GEN_TWOVEC_TEST(sqshrun2_4s_2d_32, "sqshrun2 v4.4s,  v29.2d, #32", 4, 29)
3910 GEN_TWOVEC_TEST(sqshrun_4h_4s_1,   "sqshrun  v4.4h,  v29.4s, #1",  4, 29)
3911 GEN_TWOVEC_TEST(sqshrun_4h_4s_9,   "sqshrun  v4.4h,  v29.4s, #9",  4, 29)
3912 GEN_TWOVEC_TEST(sqshrun_4h_4s_16,  "sqshrun  v4.4h,  v29.4s, #16", 4, 29)
3913 GEN_TWOVEC_TEST(sqshrun2_8h_4s_1,  "sqshrun2 v4.8h,  v29.4s, #1",  4, 29)
3914 GEN_TWOVEC_TEST(sqshrun2_8h_4s_9,  "sqshrun2 v4.8h,  v29.4s, #9",  4, 29)
3915 GEN_TWOVEC_TEST(sqshrun2_8h_4s_16, "sqshrun2 v4.8h,  v29.4s, #16", 4, 29)
3916 GEN_TWOVEC_TEST(sqshrun_8b_8h_1,   "sqshrun  v4.8b,  v29.8h, #1",  4, 29)
3917 GEN_TWOVEC_TEST(sqshrun_8b_8h_4,   "sqshrun  v4.8b,  v29.8h, #4",  4, 29)
3918 GEN_TWOVEC_TEST(sqshrun_8b_8h_8,   "sqshrun  v4.8b,  v29.8h, #8",  4, 29)
3919 GEN_TWOVEC_TEST(sqshrun2_16b_8h_1, "sqshrun2 v4.16b, v29.8h, #1",  4, 29)
3920 GEN_TWOVEC_TEST(sqshrun2_16b_8h_4, "sqshrun2 v4.16b, v29.8h, #4",  4, 29)
3921 GEN_TWOVEC_TEST(sqshrun2_16b_8h_8, "sqshrun2 v4.16b, v29.8h, #8",  4, 29)
3922 
3923 GEN_TWOVEC_TEST(sqshl_d_d_0,  "sqshl d5, d28, #0",  5, 28)
3924 GEN_TWOVEC_TEST(sqshl_d_d_32, "sqshl d5, d28, #32", 5, 28)
3925 GEN_TWOVEC_TEST(sqshl_d_d_63, "sqshl d5, d28, #63", 5, 28)
3926 GEN_TWOVEC_TEST(sqshl_s_s_0,  "sqshl s5, s28, #0",  5, 28)
3927 GEN_TWOVEC_TEST(sqshl_s_s_16, "sqshl s5, s28, #16", 5, 28)
3928 GEN_TWOVEC_TEST(sqshl_s_s_31, "sqshl s5, s28, #31", 5, 28)
3929 GEN_TWOVEC_TEST(sqshl_h_h_0,  "sqshl h5, h28, #0",  5, 28)
3930 GEN_TWOVEC_TEST(sqshl_h_h_8,  "sqshl h5, h28, #8",  5, 28)
3931 GEN_TWOVEC_TEST(sqshl_h_h_15, "sqshl h5, h28, #15", 5, 28)
3932 GEN_TWOVEC_TEST(sqshl_b_b_0,  "sqshl b5, b28, #0",  5, 28)
3933 GEN_TWOVEC_TEST(sqshl_b_b_1,  "sqshl b5, b28, #1",  5, 28)
3934 GEN_TWOVEC_TEST(sqshl_b_b_4,  "sqshl b5, b28, #4",  5, 28)
3935 GEN_TWOVEC_TEST(sqshl_b_b_6,  "sqshl b5, b28, #6",  5, 28)
3936 GEN_TWOVEC_TEST(sqshl_b_b_7,  "sqshl b5, b28, #7",  5, 28)
3937 GEN_TWOVEC_TEST(uqshl_d_d_0,  "uqshl d5, d28, #0",  5, 28)
3938 GEN_TWOVEC_TEST(uqshl_d_d_32, "uqshl d5, d28, #32", 5, 28)
3939 GEN_TWOVEC_TEST(uqshl_d_d_63, "uqshl d5, d28, #63", 5, 28)
3940 GEN_TWOVEC_TEST(uqshl_s_s_0,  "uqshl s5, s28, #0",  5, 28)
3941 GEN_TWOVEC_TEST(uqshl_s_s_16, "uqshl s5, s28, #16", 5, 28)
3942 GEN_TWOVEC_TEST(uqshl_s_s_31, "uqshl s5, s28, #31", 5, 28)
3943 GEN_TWOVEC_TEST(uqshl_h_h_0,  "uqshl h5, h28, #0",  5, 28)
3944 GEN_TWOVEC_TEST(uqshl_h_h_8,  "uqshl h5, h28, #8",  5, 28)
3945 GEN_TWOVEC_TEST(uqshl_h_h_15, "uqshl h5, h28, #15", 5, 28)
3946 GEN_TWOVEC_TEST(uqshl_b_b_0,  "uqshl b5, b28, #0",  5, 28)
3947 GEN_TWOVEC_TEST(uqshl_b_b_1,  "uqshl b5, b28, #1",  5, 28)
3948 GEN_TWOVEC_TEST(uqshl_b_b_4,  "uqshl b5, b28, #4",  5, 28)
3949 GEN_TWOVEC_TEST(uqshl_b_b_6,  "uqshl b5, b28, #6",  5, 28)
3950 GEN_TWOVEC_TEST(uqshl_b_b_7,  "uqshl b5, b28, #7",  5, 28)
3951 GEN_TWOVEC_TEST(sqshlu_d_d_0,  "sqshlu d5, d28, #0",  5, 28)
3952 GEN_TWOVEC_TEST(sqshlu_d_d_32, "sqshlu d5, d28, #32", 5, 28)
3953 GEN_TWOVEC_TEST(sqshlu_d_d_63, "sqshlu d5, d28, #63", 5, 28)
3954 GEN_TWOVEC_TEST(sqshlu_s_s_0,  "sqshlu s5, s28, #0",  5, 28)
3955 GEN_TWOVEC_TEST(sqshlu_s_s_16, "sqshlu s5, s28, #16", 5, 28)
3956 GEN_TWOVEC_TEST(sqshlu_s_s_31, "sqshlu s5, s28, #31", 5, 28)
3957 GEN_TWOVEC_TEST(sqshlu_h_h_0,  "sqshlu h5, h28, #0",  5, 28)
3958 GEN_TWOVEC_TEST(sqshlu_h_h_8,  "sqshlu h5, h28, #8",  5, 28)
3959 GEN_TWOVEC_TEST(sqshlu_h_h_15, "sqshlu h5, h28, #15", 5, 28)
3960 GEN_TWOVEC_TEST(sqshlu_b_b_0,  "sqshlu b5, b28, #0",  5, 28)
3961 GEN_TWOVEC_TEST(sqshlu_b_b_1,  "sqshlu b5, b28, #1",  5, 28)
3962 GEN_TWOVEC_TEST(sqshlu_b_b_2,  "sqshlu b5, b28, #2",  5, 28)
3963 GEN_TWOVEC_TEST(sqshlu_b_b_3,  "sqshlu b5, b28, #3",  5, 28)
3964 GEN_TWOVEC_TEST(sqshlu_b_b_4,  "sqshlu b5, b28, #4",  5, 28)
3965 GEN_TWOVEC_TEST(sqshlu_b_b_5,  "sqshlu b5, b28, #5",  5, 28)
3966 GEN_TWOVEC_TEST(sqshlu_b_b_6,  "sqshlu b5, b28, #6",  5, 28)
3967 GEN_TWOVEC_TEST(sqshlu_b_b_7,  "sqshlu b5, b28, #7",  5, 28)
3968 
3969 GEN_TWOVEC_TEST(sqshl_2d_2d_0,   "sqshl v6.2d,  v27.2d, #0",  6, 27)
3970 GEN_TWOVEC_TEST(sqshl_2d_2d_32,  "sqshl v6.2d,  v27.2d, #32", 6, 27)
3971 GEN_TWOVEC_TEST(sqshl_2d_2d_63,  "sqshl v6.2d,  v27.2d, #63", 6, 27)
3972 GEN_TWOVEC_TEST(sqshl_4s_4s_0,   "sqshl v6.4s,  v27.4s, #0",  6, 27)
3973 GEN_TWOVEC_TEST(sqshl_4s_4s_16,  "sqshl v6.4s,  v27.4s, #16", 6, 27)
3974 GEN_TWOVEC_TEST(sqshl_4s_4s_31,  "sqshl v6.4s,  v27.4s, #31", 6, 27)
3975 GEN_TWOVEC_TEST(sqshl_2s_2s_0,   "sqshl v6.2s,  v27.2s, #0",  6, 27)
3976 GEN_TWOVEC_TEST(sqshl_2s_2s_16,  "sqshl v6.2s,  v27.2s, #16", 6, 27)
3977 GEN_TWOVEC_TEST(sqshl_2s_2s_31,  "sqshl v6.2s,  v27.2s, #31", 6, 27)
3978 GEN_TWOVEC_TEST(sqshl_8h_8h_0,   "sqshl v6.8h,  v27.8h, #0",  6, 27)
3979 GEN_TWOVEC_TEST(sqshl_8h_8h_8,   "sqshl v6.8h,  v27.8h, #8",  6, 27)
3980 GEN_TWOVEC_TEST(sqshl_8h_8h_15,  "sqshl v6.8h,  v27.8h, #15", 6, 27)
3981 GEN_TWOVEC_TEST(sqshl_4h_4h_0,   "sqshl v6.4h,  v27.4h, #0",  6, 27)
3982 GEN_TWOVEC_TEST(sqshl_4h_4h_8,   "sqshl v6.4h,  v27.4h, #8",  6, 27)
3983 GEN_TWOVEC_TEST(sqshl_4h_4h_15,  "sqshl v6.4h,  v27.4h, #15", 6, 27)
3984 GEN_TWOVEC_TEST(sqshl_16b_16b_0, "sqshl v6.16b, v27.16b, #0", 6, 27)
3985 GEN_TWOVEC_TEST(sqshl_16b_16b_3, "sqshl v6.16b, v27.16b, #3", 6, 27)
3986 GEN_TWOVEC_TEST(sqshl_16b_16b_7, "sqshl v6.16b, v27.16b, #7", 6, 27)
3987 GEN_TWOVEC_TEST(sqshl_8b_8b_0,   "sqshl v6.8b,  v27.8b, #0",  6, 27)
3988 GEN_TWOVEC_TEST(sqshl_8b_8b_3,   "sqshl v6.8b,  v27.8b, #3",  6, 27)
3989 GEN_TWOVEC_TEST(sqshl_8b_8b_7,   "sqshl v6.8b,  v27.8b, #7",  6, 27)
3990 GEN_TWOVEC_TEST(uqshl_2d_2d_0,   "uqshl v6.2d,  v27.2d, #0",  6, 27)
3991 GEN_TWOVEC_TEST(uqshl_2d_2d_32,  "uqshl v6.2d,  v27.2d, #32", 6, 27)
3992 GEN_TWOVEC_TEST(uqshl_2d_2d_63,  "uqshl v6.2d,  v27.2d, #63", 6, 27)
3993 GEN_TWOVEC_TEST(uqshl_4s_4s_0,   "uqshl v6.4s,  v27.4s, #0",  6, 27)
3994 GEN_TWOVEC_TEST(uqshl_4s_4s_16,  "uqshl v6.4s,  v27.4s, #16", 6, 27)
3995 GEN_TWOVEC_TEST(uqshl_4s_4s_31,  "uqshl v6.4s,  v27.4s, #31", 6, 27)
3996 GEN_TWOVEC_TEST(uqshl_2s_2s_0,   "uqshl v6.2s,  v27.2s, #0",  6, 27)
3997 GEN_TWOVEC_TEST(uqshl_2s_2s_16,  "uqshl v6.2s,  v27.2s, #16", 6, 27)
3998 GEN_TWOVEC_TEST(uqshl_2s_2s_31,  "uqshl v6.2s,  v27.2s, #31", 6, 27)
3999 GEN_TWOVEC_TEST(uqshl_8h_8h_0,   "uqshl v6.8h,  v27.8h, #0",  6, 27)
4000 GEN_TWOVEC_TEST(uqshl_8h_8h_8,   "uqshl v6.8h,  v27.8h, #8",  6, 27)
4001 GEN_TWOVEC_TEST(uqshl_8h_8h_15,  "uqshl v6.8h,  v27.8h, #15", 6, 27)
4002 GEN_TWOVEC_TEST(uqshl_4h_4h_0,   "uqshl v6.4h,  v27.4h, #0",  6, 27)
4003 GEN_TWOVEC_TEST(uqshl_4h_4h_8,   "uqshl v6.4h,  v27.4h, #8",  6, 27)
4004 GEN_TWOVEC_TEST(uqshl_4h_4h_15,  "uqshl v6.4h,  v27.4h, #15", 6, 27)
4005 GEN_TWOVEC_TEST(uqshl_16b_16b_0, "uqshl v6.16b, v27.16b, #0", 6, 27)
4006 GEN_TWOVEC_TEST(uqshl_16b_16b_3, "uqshl v6.16b, v27.16b, #3", 6, 27)
4007 GEN_TWOVEC_TEST(uqshl_16b_16b_7, "uqshl v6.16b, v27.16b, #7", 6, 27)
4008 GEN_TWOVEC_TEST(uqshl_8b_8b_0,   "uqshl v6.8b,  v27.8b, #0",  6, 27)
4009 GEN_TWOVEC_TEST(uqshl_8b_8b_3,   "uqshl v6.8b,  v27.8b, #3",  6, 27)
4010 GEN_TWOVEC_TEST(uqshl_8b_8b_7,   "uqshl v6.8b,  v27.8b, #7",  6, 27)
4011 GEN_TWOVEC_TEST(sqshlu_2d_2d_0,   "sqshlu v6.2d,  v27.2d, #0",  6, 27)
4012 GEN_TWOVEC_TEST(sqshlu_2d_2d_32,  "sqshlu v6.2d,  v27.2d, #32", 6, 27)
4013 GEN_TWOVEC_TEST(sqshlu_2d_2d_63,  "sqshlu v6.2d,  v27.2d, #63", 6, 27)
4014 GEN_TWOVEC_TEST(sqshlu_4s_4s_0,   "sqshlu v6.4s,  v27.4s, #0",  6, 27)
4015 GEN_TWOVEC_TEST(sqshlu_4s_4s_16,  "sqshlu v6.4s,  v27.4s, #16", 6, 27)
4016 GEN_TWOVEC_TEST(sqshlu_4s_4s_31,  "sqshlu v6.4s,  v27.4s, #31", 6, 27)
4017 GEN_TWOVEC_TEST(sqshlu_2s_2s_0,   "sqshlu v6.2s,  v27.2s, #0",  6, 27)
4018 GEN_TWOVEC_TEST(sqshlu_2s_2s_16,  "sqshlu v6.2s,  v27.2s, #16", 6, 27)
4019 GEN_TWOVEC_TEST(sqshlu_2s_2s_31,  "sqshlu v6.2s,  v27.2s, #31", 6, 27)
4020 GEN_TWOVEC_TEST(sqshlu_8h_8h_0,   "sqshlu v6.8h,  v27.8h, #0",  6, 27)
4021 GEN_TWOVEC_TEST(sqshlu_8h_8h_8,   "sqshlu v6.8h,  v27.8h, #8",  6, 27)
4022 GEN_TWOVEC_TEST(sqshlu_8h_8h_15,  "sqshlu v6.8h,  v27.8h, #15", 6, 27)
4023 GEN_TWOVEC_TEST(sqshlu_4h_4h_0,   "sqshlu v6.4h,  v27.4h, #0",  6, 27)
4024 GEN_TWOVEC_TEST(sqshlu_4h_4h_8,   "sqshlu v6.4h,  v27.4h, #8",  6, 27)
4025 GEN_TWOVEC_TEST(sqshlu_4h_4h_15,  "sqshlu v6.4h,  v27.4h, #15", 6, 27)
4026 GEN_TWOVEC_TEST(sqshlu_16b_16b_0, "sqshlu v6.16b, v27.16b, #0", 6, 27)
4027 GEN_TWOVEC_TEST(sqshlu_16b_16b_3, "sqshlu v6.16b, v27.16b, #3", 6, 27)
4028 GEN_TWOVEC_TEST(sqshlu_16b_16b_7, "sqshlu v6.16b, v27.16b, #7", 6, 27)
4029 GEN_TWOVEC_TEST(sqshlu_8b_8b_0,   "sqshlu v6.8b,  v27.8b, #0",  6, 27)
4030 GEN_TWOVEC_TEST(sqshlu_8b_8b_3,   "sqshlu v6.8b,  v27.8b, #3",  6, 27)
4031 GEN_TWOVEC_TEST(sqshlu_8b_8b_7,   "sqshlu v6.8b,  v27.8b, #7",  6, 27)
4032 
4033 GEN_TWOVEC_TEST(sqxtn_s_d,  "sqxtn s31,  d0", 31, 0)
4034 GEN_TWOVEC_TEST(sqxtn_h_s,  "sqxtn h31,  s0", 31, 0)
4035 GEN_TWOVEC_TEST(sqxtn_b_h,  "sqxtn b31,  h0", 31, 0)
4036 GEN_TWOVEC_TEST(uqxtn_s_d,  "uqxtn s31,  d0", 31, 0)
4037 GEN_TWOVEC_TEST(uqxtn_h_s,  "uqxtn h31,  s0", 31, 0)
4038 GEN_TWOVEC_TEST(uqxtn_b_h,  "uqxtn b31,  h0", 31, 0)
4039 GEN_TWOVEC_TEST(sqxtun_s_d, "sqxtun s31, d0", 31, 0)
4040 GEN_TWOVEC_TEST(sqxtun_h_s, "sqxtun h31, s0", 31, 0)
4041 GEN_TWOVEC_TEST(sqxtun_b_h, "sqxtun b31, h0", 31, 0)
4042 
4043 GEN_UNARY_TEST(sqxtn,   2s, 2d)
4044 GEN_UNARY_TEST(sqxtn2,  4s, 2d)
4045 GEN_UNARY_TEST(sqxtn,   4h, 4s)
4046 GEN_UNARY_TEST(sqxtn2,  8h, 4s)
4047 GEN_UNARY_TEST(sqxtn,   8b, 8h)
4048 GEN_UNARY_TEST(sqxtn2, 16b, 8h)
4049 GEN_UNARY_TEST(uqxtn,   2s, 2d)
4050 GEN_UNARY_TEST(uqxtn2,  4s, 2d)
4051 GEN_UNARY_TEST(uqxtn,   4h, 4s)
4052 GEN_UNARY_TEST(uqxtn2,  8h, 4s)
4053 GEN_UNARY_TEST(uqxtn,   8b, 8h)
4054 GEN_UNARY_TEST(uqxtn2, 16b, 8h)
4055 GEN_UNARY_TEST(sqxtun,   2s, 2d)
4056 GEN_UNARY_TEST(sqxtun2,  4s, 2d)
4057 GEN_UNARY_TEST(sqxtun,   4h, 4s)
4058 GEN_UNARY_TEST(sqxtun2,  8h, 4s)
4059 GEN_UNARY_TEST(sqxtun,   8b, 8h)
4060 GEN_UNARY_TEST(sqxtun2, 16b, 8h)
4061 
4062 GEN_THREEVEC_TEST(srhadd_4s_4s_4s,"srhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
4063 GEN_THREEVEC_TEST(srhadd_2s_2s_2s,"srhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
4064 GEN_THREEVEC_TEST(srhadd_8h_8h_8h,"srhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
4065 GEN_THREEVEC_TEST(srhadd_4h_4h_4h,"srhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
4066 GEN_THREEVEC_TEST(srhadd_16b_16b_16b,
4067                                   "srhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
4068 GEN_THREEVEC_TEST(srhadd_8b_8b_8b,"srhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
4069 GEN_THREEVEC_TEST(urhadd_4s_4s_4s,"urhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
4070 GEN_THREEVEC_TEST(urhadd_2s_2s_2s,"urhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
4071 GEN_THREEVEC_TEST(urhadd_8h_8h_8h,"urhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
4072 GEN_THREEVEC_TEST(urhadd_4h_4h_4h,"urhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
4073 GEN_THREEVEC_TEST(urhadd_16b_16b_16b,
4074                                   "urhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
4075 GEN_THREEVEC_TEST(urhadd_8b_8b_8b,"urhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
4076 
4077 GEN_THREEVEC_TEST(sshl_d_d_d, "sshl d29, d28, d27", 29, 28, 27)
4078 GEN_THREEVEC_TEST(ushl_d_d_d, "ushl d29, d28, d27", 29, 28, 27)
4079 
4080 GEN_THREEVEC_TEST(sshl_2d_2d_2d,    "sshl v29.2d, v28.2d, v27.2d", 29,28,27)
4081 GEN_THREEVEC_TEST(sshl_4s_4s_4s,    "sshl v29.4s, v28.4s, v27.4s", 29,28,27)
4082 GEN_THREEVEC_TEST(sshl_2s_2s_2s,    "sshl v29.2s, v28.2s, v27.2s", 29,28,27)
4083 GEN_THREEVEC_TEST(sshl_8h_8h_8h,    "sshl v29.8h, v28.8h, v27.8h", 29,28,27)
4084 GEN_THREEVEC_TEST(sshl_4h_4h_4h,    "sshl v29.4h, v28.4h, v27.4h", 29,28,27)
4085 GEN_THREEVEC_TEST(sshl_16b_16b_16b, "sshl v29.16b, v28.16b, v27.16b", 29,28,27)
4086 GEN_THREEVEC_TEST(sshl_8b_8b_8b,    "sshl v29.8b, v28.8b, v27.8b", 29,28,27)
4087 GEN_THREEVEC_TEST(ushl_2d_2d_2d,    "ushl v29.2d, v28.2d, v27.2d", 29,28,27)
4088 GEN_THREEVEC_TEST(ushl_4s_4s_4s,    "ushl v29.4s, v28.4s, v27.4s", 29,28,27)
4089 GEN_THREEVEC_TEST(ushl_2s_2s_2s,    "ushl v29.2s, v28.2s, v27.2s", 29,28,27)
4090 GEN_THREEVEC_TEST(ushl_8h_8h_8h,    "ushl v29.8h, v28.8h, v27.8h", 29,28,27)
4091 GEN_THREEVEC_TEST(ushl_4h_4h_4h,    "ushl v29.4h, v28.4h, v27.4h", 29,28,27)
4092 GEN_THREEVEC_TEST(ushl_16b_16b_16b, "ushl v29.16b, v28.16b, v27.16b", 29,28,27)
4093 GEN_THREEVEC_TEST(ushl_8b_8b_8b,    "ushl v29.8b, v28.8b, v27.8b", 29,28,27)
4094 
4095 GEN_TWOVEC_TEST(shl_d_d_0,  "shl d5, d28, #0",  5, 28)
4096 GEN_TWOVEC_TEST(shl_d_d_32, "shl d5, d28, #32", 5, 28)
4097 GEN_TWOVEC_TEST(shl_d_d_63, "shl d5, d28, #63", 5, 28)
4098 GEN_TWOVEC_TEST(sshr_d_d_1,  "sshr d5, d28, #1",  5, 28)
4099 GEN_TWOVEC_TEST(sshr_d_d_32, "sshr d5, d28, #32", 5, 28)
4100 GEN_TWOVEC_TEST(sshr_d_d_64, "sshr d5, d28, #64", 5, 28)
4101 GEN_TWOVEC_TEST(ushr_d_d_1,  "ushr d5, d28, #1",  5, 28)
4102 GEN_TWOVEC_TEST(ushr_d_d_32, "ushr d5, d28, #32", 5, 28)
4103 GEN_TWOVEC_TEST(ushr_d_d_64, "ushr d5, d28, #64", 5, 28)
4104 
4105 GEN_SHIFT_TEST(shl,  2d, 2d, 0)
4106 GEN_SHIFT_TEST(shl,  2d, 2d, 13)
4107 GEN_SHIFT_TEST(shl,  2d, 2d, 63)
4108 GEN_SHIFT_TEST(shl,  4s, 4s, 0)
4109 GEN_SHIFT_TEST(shl,  4s, 4s, 13)
4110 GEN_SHIFT_TEST(shl,  4s, 4s, 31)
4111 GEN_SHIFT_TEST(shl,  2s, 2s, 0)
4112 GEN_SHIFT_TEST(shl,  2s, 2s, 13)
4113 GEN_SHIFT_TEST(shl,  2s, 2s, 31)
4114 GEN_SHIFT_TEST(shl,  8h, 8h, 0)
4115 GEN_SHIFT_TEST(shl,  8h, 8h, 13)
4116 GEN_SHIFT_TEST(shl,  8h, 8h, 15)
4117 GEN_SHIFT_TEST(shl,  4h, 4h, 0)
4118 GEN_SHIFT_TEST(shl,  4h, 4h, 13)
4119 GEN_SHIFT_TEST(shl,  4h, 4h, 15)
4120 GEN_SHIFT_TEST(shl,  16b, 16b, 0)
4121 GEN_SHIFT_TEST(shl,  16b, 16b, 7)
4122 GEN_SHIFT_TEST(shl,  8b, 8b, 0)
4123 GEN_SHIFT_TEST(shl,  8b, 8b, 7)
4124 GEN_SHIFT_TEST(sshr, 2d, 2d, 1)
4125 GEN_SHIFT_TEST(sshr, 2d, 2d, 13)
4126 GEN_SHIFT_TEST(sshr, 2d, 2d, 64)
4127 GEN_SHIFT_TEST(sshr, 4s, 4s, 1)
4128 GEN_SHIFT_TEST(sshr, 4s, 4s, 13)
4129 GEN_SHIFT_TEST(sshr, 4s, 4s, 32)
4130 GEN_SHIFT_TEST(sshr, 2s, 2s, 1)
4131 GEN_SHIFT_TEST(sshr, 2s, 2s, 13)
4132 GEN_SHIFT_TEST(sshr, 2s, 2s, 32)
4133 GEN_SHIFT_TEST(sshr, 8h, 8h, 1)
4134 GEN_SHIFT_TEST(sshr, 8h, 8h, 13)
4135 GEN_SHIFT_TEST(sshr, 8h, 8h, 16)
4136 GEN_SHIFT_TEST(sshr, 4h, 4h, 1)
4137 GEN_SHIFT_TEST(sshr, 4h, 4h, 13)
4138 GEN_SHIFT_TEST(sshr, 4h, 4h, 16)
4139 GEN_SHIFT_TEST(sshr, 16b, 16b, 1)
4140 GEN_SHIFT_TEST(sshr, 16b, 16b, 8)
4141 GEN_SHIFT_TEST(sshr, 8b, 8b, 1)
4142 GEN_SHIFT_TEST(sshr, 8b, 8b, 8)
4143 GEN_SHIFT_TEST(ushr, 2d, 2d, 1)
4144 GEN_SHIFT_TEST(ushr, 2d, 2d, 13)
4145 GEN_SHIFT_TEST(ushr, 2d, 2d, 64)
4146 GEN_SHIFT_TEST(ushr, 4s, 4s, 1)
4147 GEN_SHIFT_TEST(ushr, 4s, 4s, 13)
4148 GEN_SHIFT_TEST(ushr, 4s, 4s, 32)
4149 GEN_SHIFT_TEST(ushr, 2s, 2s, 1)
4150 GEN_SHIFT_TEST(ushr, 2s, 2s, 13)
4151 GEN_SHIFT_TEST(ushr, 2s, 2s, 32)
4152 GEN_SHIFT_TEST(ushr, 8h, 8h, 1)
4153 GEN_SHIFT_TEST(ushr, 8h, 8h, 13)
4154 GEN_SHIFT_TEST(ushr, 8h, 8h, 16)
4155 GEN_SHIFT_TEST(ushr, 4h, 4h, 1)
4156 GEN_SHIFT_TEST(ushr, 4h, 4h, 13)
4157 GEN_SHIFT_TEST(ushr, 4h, 4h, 16)
4158 GEN_SHIFT_TEST(ushr, 16b, 16b, 1)
4159 GEN_SHIFT_TEST(ushr, 16b, 16b, 8)
4160 GEN_SHIFT_TEST(ushr, 8b, 8b, 1)
4161 GEN_SHIFT_TEST(ushr, 8b, 8b, 8)
4162 
4163 GEN_TWOVEC_TEST(ssra_d_d_1,  "ssra d5, d28, #1",  5, 28)
4164 GEN_TWOVEC_TEST(ssra_d_d_32, "ssra d5, d28, #32", 5, 28)
4165 GEN_TWOVEC_TEST(ssra_d_d_64, "ssra d5, d28, #64", 5, 28)
4166 GEN_TWOVEC_TEST(usra_d_d_1,  "usra d5, d28, #1",  5, 28)
4167 GEN_TWOVEC_TEST(usra_d_d_32, "usra d5, d28, #32", 5, 28)
4168 GEN_TWOVEC_TEST(usra_d_d_64, "usra d5, d28, #64", 5, 28)
4169 
4170 GEN_TWOVEC_TEST(ssra_2d_2d_1,   "ssra v6.2d,  v27.2d, #1",  6, 27)
4171 GEN_TWOVEC_TEST(ssra_2d_2d_32,  "ssra v6.2d,  v27.2d, #32", 6, 27)
4172 GEN_TWOVEC_TEST(ssra_2d_2d_64,  "ssra v6.2d,  v27.2d, #64", 6, 27)
4173 GEN_TWOVEC_TEST(ssra_4s_4s_1,   "ssra v6.4s,  v27.4s, #1",  6, 27)
4174 GEN_TWOVEC_TEST(ssra_4s_4s_16,  "ssra v6.4s,  v27.4s, #16", 6, 27)
4175 GEN_TWOVEC_TEST(ssra_4s_4s_32,  "ssra v6.4s,  v27.4s, #32", 6, 27)
4176 GEN_TWOVEC_TEST(ssra_2s_2s_1,   "ssra v6.2s,  v27.2s, #1",  6, 27)
4177 GEN_TWOVEC_TEST(ssra_2s_2s_16,  "ssra v6.2s,  v27.2s, #16", 6, 27)
4178 GEN_TWOVEC_TEST(ssra_2s_2s_32,  "ssra v6.2s,  v27.2s, #32", 6, 27)
4179 GEN_TWOVEC_TEST(ssra_8h_8h_1,   "ssra v6.8h,  v27.8h, #1",  6, 27)
4180 GEN_TWOVEC_TEST(ssra_8h_8h_8,   "ssra v6.8h,  v27.8h, #8",  6, 27)
4181 GEN_TWOVEC_TEST(ssra_8h_8h_16,  "ssra v6.8h,  v27.8h, #16", 6, 27)
4182 GEN_TWOVEC_TEST(ssra_4h_4h_1,   "ssra v6.4h,  v27.4h, #1",  6, 27)
4183 GEN_TWOVEC_TEST(ssra_4h_4h_8,   "ssra v6.4h,  v27.4h, #8",  6, 27)
4184 GEN_TWOVEC_TEST(ssra_4h_4h_16,  "ssra v6.4h,  v27.4h, #16", 6, 27)
4185 GEN_TWOVEC_TEST(ssra_16b_16b_1, "ssra v6.16b, v27.16b, #1", 6, 27)
4186 GEN_TWOVEC_TEST(ssra_16b_16b_3, "ssra v6.16b, v27.16b, #3", 6, 27)
4187 GEN_TWOVEC_TEST(ssra_16b_16b_8, "ssra v6.16b, v27.16b, #8", 6, 27)
4188 GEN_TWOVEC_TEST(ssra_8b_8b_1,   "ssra v6.8b,  v27.8b, #1",  6, 27)
4189 GEN_TWOVEC_TEST(ssra_8b_8b_3,   "ssra v6.8b,  v27.8b, #3",  6, 27)
4190 GEN_TWOVEC_TEST(ssra_8b_8b_8,   "ssra v6.8b,  v27.8b, #8",  6, 27)
4191 GEN_TWOVEC_TEST(usra_2d_2d_1,   "usra v6.2d,  v27.2d, #1",  6, 27)
4192 GEN_TWOVEC_TEST(usra_2d_2d_32,  "usra v6.2d,  v27.2d, #32", 6, 27)
4193 GEN_TWOVEC_TEST(usra_2d_2d_64,  "usra v6.2d,  v27.2d, #64", 6, 27)
4194 GEN_TWOVEC_TEST(usra_4s_4s_1,   "usra v6.4s,  v27.4s, #1",  6, 27)
4195 GEN_TWOVEC_TEST(usra_4s_4s_16,  "usra v6.4s,  v27.4s, #16", 6, 27)
4196 GEN_TWOVEC_TEST(usra_4s_4s_32,  "usra v6.4s,  v27.4s, #32", 6, 27)
4197 GEN_TWOVEC_TEST(usra_2s_2s_1,   "usra v6.2s,  v27.2s, #1",  6, 27)
4198 GEN_TWOVEC_TEST(usra_2s_2s_16,  "usra v6.2s,  v27.2s, #16", 6, 27)
4199 GEN_TWOVEC_TEST(usra_2s_2s_32,  "usra v6.2s,  v27.2s, #32", 6, 27)
4200 GEN_TWOVEC_TEST(usra_8h_8h_1,   "usra v6.8h,  v27.8h, #1",  6, 27)
4201 GEN_TWOVEC_TEST(usra_8h_8h_8,   "usra v6.8h,  v27.8h, #8",  6, 27)
4202 GEN_TWOVEC_TEST(usra_8h_8h_16,  "usra v6.8h,  v27.8h, #16", 6, 27)
4203 GEN_TWOVEC_TEST(usra_4h_4h_1,   "usra v6.4h,  v27.4h, #1",  6, 27)
4204 GEN_TWOVEC_TEST(usra_4h_4h_8,   "usra v6.4h,  v27.4h, #8",  6, 27)
4205 GEN_TWOVEC_TEST(usra_4h_4h_16,  "usra v6.4h,  v27.4h, #16", 6, 27)
4206 GEN_TWOVEC_TEST(usra_16b_16b_1, "usra v6.16b, v27.16b, #1", 6, 27)
4207 GEN_TWOVEC_TEST(usra_16b_16b_3, "usra v6.16b, v27.16b, #3", 6, 27)
4208 GEN_TWOVEC_TEST(usra_16b_16b_8, "usra v6.16b, v27.16b, #8", 6, 27)
4209 GEN_TWOVEC_TEST(usra_8b_8b_1,   "usra v6.8b,  v27.8b, #1",  6, 27)
4210 GEN_TWOVEC_TEST(usra_8b_8b_3,   "usra v6.8b,  v27.8b, #3",  6, 27)
4211 GEN_TWOVEC_TEST(usra_8b_8b_8,   "usra v6.8b,  v27.8b, #8",  6, 27)
4212 
4213 GEN_THREEVEC_TEST(srshl_d_d_d, "srshl d29, d28, d27", 29, 28, 27)
4214 GEN_THREEVEC_TEST(urshl_d_d_d, "urshl d29, d28, d27", 29, 28, 27)
4215 
4216 GEN_THREEVEC_TEST(srshl_2d_2d_2d,   "srshl v29.2d, v28.2d, v27.2d", 29,28,27)
4217 GEN_THREEVEC_TEST(srshl_4s_4s_4s,   "srshl v29.4s, v28.4s, v27.4s", 29,28,27)
4218 GEN_THREEVEC_TEST(srshl_2s_2s_2s,   "srshl v29.2s, v28.2s, v27.2s", 29,28,27)
4219 GEN_THREEVEC_TEST(srshl_8h_8h_8h,   "srshl v29.8h, v28.8h, v27.8h", 29,28,27)
4220 GEN_THREEVEC_TEST(srshl_4h_4h_4h,   "srshl v29.4h, v28.4h, v27.4h", 29,28,27)
4221 GEN_THREEVEC_TEST(srshl_16b_16b_16b,"srshl v29.16b, v28.16b, v27.16b", 29,28,27)
4222 GEN_THREEVEC_TEST(srshl_8b_8b_8b,   "srshl v29.8b, v28.8b, v27.8b", 29,28,27)
4223 GEN_THREEVEC_TEST(urshl_2d_2d_2d,   "urshl v29.2d, v28.2d, v27.2d", 29,28,27)
4224 GEN_THREEVEC_TEST(urshl_4s_4s_4s,   "urshl v29.4s, v28.4s, v27.4s", 29,28,27)
4225 GEN_THREEVEC_TEST(urshl_2s_2s_2s,   "urshl v29.2s, v28.2s, v27.2s", 29,28,27)
4226 GEN_THREEVEC_TEST(urshl_8h_8h_8h,   "urshl v29.8h, v28.8h, v27.8h", 29,28,27)
4227 GEN_THREEVEC_TEST(urshl_4h_4h_4h,   "urshl v29.4h, v28.4h, v27.4h", 29,28,27)
4228 GEN_THREEVEC_TEST(urshl_16b_16b_16b,"urshl v29.16b, v28.16b, v27.16b", 29,28,27)
4229 GEN_THREEVEC_TEST(urshl_8b_8b_8b,   "urshl v29.8b, v28.8b, v27.8b", 29,28,27)
4230 
4231 GEN_TWOVEC_TEST(srshr_d_d_1,  "srshr d5, d28, #1",  5, 28)
4232 GEN_TWOVEC_TEST(srshr_d_d_32, "srshr d5, d28, #32", 5, 28)
4233 GEN_TWOVEC_TEST(srshr_d_d_64, "srshr d5, d28, #64", 5, 28)
4234 GEN_TWOVEC_TEST(urshr_d_d_1,  "urshr d5, d28, #1",  5, 28)
4235 GEN_TWOVEC_TEST(urshr_d_d_32, "urshr d5, d28, #32", 5, 28)
4236 GEN_TWOVEC_TEST(urshr_d_d_64, "urshr d5, d28, #64", 5, 28)
4237 
4238 GEN_TWOVEC_TEST(srshr_2d_2d_1,   "srshr v6.2d,  v27.2d, #1",  6, 27)
4239 GEN_TWOVEC_TEST(srshr_2d_2d_32,  "srshr v6.2d,  v27.2d, #32", 6, 27)
4240 GEN_TWOVEC_TEST(srshr_2d_2d_64,  "srshr v6.2d,  v27.2d, #64", 6, 27)
4241 GEN_TWOVEC_TEST(srshr_4s_4s_1,   "srshr v6.4s,  v27.4s, #1",  6, 27)
4242 GEN_TWOVEC_TEST(srshr_4s_4s_16,  "srshr v6.4s,  v27.4s, #16", 6, 27)
4243 GEN_TWOVEC_TEST(srshr_4s_4s_32,  "srshr v6.4s,  v27.4s, #32", 6, 27)
4244 GEN_TWOVEC_TEST(srshr_2s_2s_1,   "srshr v6.2s,  v27.2s, #1",  6, 27)
4245 GEN_TWOVEC_TEST(srshr_2s_2s_16,  "srshr v6.2s,  v27.2s, #16", 6, 27)
4246 GEN_TWOVEC_TEST(srshr_2s_2s_32,  "srshr v6.2s,  v27.2s, #32", 6, 27)
4247 GEN_TWOVEC_TEST(srshr_8h_8h_1,   "srshr v6.8h,  v27.8h, #1",  6, 27)
4248 GEN_TWOVEC_TEST(srshr_8h_8h_8,   "srshr v6.8h,  v27.8h, #8",  6, 27)
4249 GEN_TWOVEC_TEST(srshr_8h_8h_16,  "srshr v6.8h,  v27.8h, #16", 6, 27)
4250 GEN_TWOVEC_TEST(srshr_4h_4h_1,   "srshr v6.4h,  v27.4h, #1",  6, 27)
4251 GEN_TWOVEC_TEST(srshr_4h_4h_8,   "srshr v6.4h,  v27.4h, #8",  6, 27)
4252 GEN_TWOVEC_TEST(srshr_4h_4h_16,  "srshr v6.4h,  v27.4h, #16", 6, 27)
4253 GEN_TWOVEC_TEST(srshr_16b_16b_1, "srshr v6.16b, v27.16b, #1", 6, 27)
4254 GEN_TWOVEC_TEST(srshr_16b_16b_3, "srshr v6.16b, v27.16b, #3", 6, 27)
4255 GEN_TWOVEC_TEST(srshr_16b_16b_8, "srshr v6.16b, v27.16b, #8", 6, 27)
4256 GEN_TWOVEC_TEST(srshr_8b_8b_1,   "srshr v6.8b,  v27.8b, #1",  6, 27)
4257 GEN_TWOVEC_TEST(srshr_8b_8b_3,   "srshr v6.8b,  v27.8b, #3",  6, 27)
4258 GEN_TWOVEC_TEST(srshr_8b_8b_8,   "srshr v6.8b,  v27.8b, #8",  6, 27)
4259 GEN_TWOVEC_TEST(urshr_2d_2d_1,   "urshr v6.2d,  v27.2d, #1",  6, 27)
4260 GEN_TWOVEC_TEST(urshr_2d_2d_32,  "urshr v6.2d,  v27.2d, #32", 6, 27)
4261 GEN_TWOVEC_TEST(urshr_2d_2d_64,  "urshr v6.2d,  v27.2d, #64", 6, 27)
4262 GEN_TWOVEC_TEST(urshr_4s_4s_1,   "urshr v6.4s,  v27.4s, #1",  6, 27)
4263 GEN_TWOVEC_TEST(urshr_4s_4s_16,  "urshr v6.4s,  v27.4s, #16", 6, 27)
4264 GEN_TWOVEC_TEST(urshr_4s_4s_32,  "urshr v6.4s,  v27.4s, #32", 6, 27)
4265 GEN_TWOVEC_TEST(urshr_2s_2s_1,   "urshr v6.2s,  v27.2s, #1",  6, 27)
4266 GEN_TWOVEC_TEST(urshr_2s_2s_16,  "urshr v6.2s,  v27.2s, #16", 6, 27)
4267 GEN_TWOVEC_TEST(urshr_2s_2s_32,  "urshr v6.2s,  v27.2s, #32", 6, 27)
4268 GEN_TWOVEC_TEST(urshr_8h_8h_1,   "urshr v6.8h,  v27.8h, #1",  6, 27)
4269 GEN_TWOVEC_TEST(urshr_8h_8h_8,   "urshr v6.8h,  v27.8h, #8",  6, 27)
4270 GEN_TWOVEC_TEST(urshr_8h_8h_16,  "urshr v6.8h,  v27.8h, #16", 6, 27)
4271 GEN_TWOVEC_TEST(urshr_4h_4h_1,   "urshr v6.4h,  v27.4h, #1",  6, 27)
4272 GEN_TWOVEC_TEST(urshr_4h_4h_8,   "urshr v6.4h,  v27.4h, #8",  6, 27)
4273 GEN_TWOVEC_TEST(urshr_4h_4h_16,  "urshr v6.4h,  v27.4h, #16", 6, 27)
4274 GEN_TWOVEC_TEST(urshr_16b_16b_1, "urshr v6.16b, v27.16b, #1", 6, 27)
4275 GEN_TWOVEC_TEST(urshr_16b_16b_3, "urshr v6.16b, v27.16b, #3", 6, 27)
4276 GEN_TWOVEC_TEST(urshr_16b_16b_8, "urshr v6.16b, v27.16b, #8", 6, 27)
4277 GEN_TWOVEC_TEST(urshr_8b_8b_1,   "urshr v6.8b,  v27.8b, #1",  6, 27)
4278 GEN_TWOVEC_TEST(urshr_8b_8b_3,   "urshr v6.8b,  v27.8b, #3",  6, 27)
4279 GEN_TWOVEC_TEST(urshr_8b_8b_8,   "urshr v6.8b,  v27.8b, #8",  6, 27)
4280 
4281 GEN_TWOVEC_TEST(srsra_d_d_1,  "srsra d5, d28, #1",  5, 28)
4282 GEN_TWOVEC_TEST(srsra_d_d_32, "srsra d5, d28, #32", 5, 28)
4283 GEN_TWOVEC_TEST(srsra_d_d_64, "srsra d5, d28, #64", 5, 28)
4284 GEN_TWOVEC_TEST(ursra_d_d_1,  "ursra d5, d28, #1",  5, 28)
4285 GEN_TWOVEC_TEST(ursra_d_d_32, "ursra d5, d28, #32", 5, 28)
4286 GEN_TWOVEC_TEST(ursra_d_d_64, "ursra d5, d28, #64", 5, 28)
4287 
4288 GEN_TWOVEC_TEST(srsra_2d_2d_1,   "srsra v6.2d,  v27.2d, #1",  6, 27)
4289 GEN_TWOVEC_TEST(srsra_2d_2d_32,  "srsra v6.2d,  v27.2d, #32", 6, 27)
4290 GEN_TWOVEC_TEST(srsra_2d_2d_64,  "srsra v6.2d,  v27.2d, #64", 6, 27)
4291 GEN_TWOVEC_TEST(srsra_4s_4s_1,   "srsra v6.4s,  v27.4s, #1",  6, 27)
4292 GEN_TWOVEC_TEST(srsra_4s_4s_16,  "srsra v6.4s,  v27.4s, #16", 6, 27)
4293 GEN_TWOVEC_TEST(srsra_4s_4s_32,  "srsra v6.4s,  v27.4s, #32", 6, 27)
4294 GEN_TWOVEC_TEST(srsra_2s_2s_1,   "srsra v6.2s,  v27.2s, #1",  6, 27)
4295 GEN_TWOVEC_TEST(srsra_2s_2s_16,  "srsra v6.2s,  v27.2s, #16", 6, 27)
4296 GEN_TWOVEC_TEST(srsra_2s_2s_32,  "srsra v6.2s,  v27.2s, #32", 6, 27)
4297 GEN_TWOVEC_TEST(srsra_8h_8h_1,   "srsra v6.8h,  v27.8h, #1",  6, 27)
4298 GEN_TWOVEC_TEST(srsra_8h_8h_8,   "srsra v6.8h,  v27.8h, #8",  6, 27)
4299 GEN_TWOVEC_TEST(srsra_8h_8h_16,  "srsra v6.8h,  v27.8h, #16", 6, 27)
4300 GEN_TWOVEC_TEST(srsra_4h_4h_1,   "srsra v6.4h,  v27.4h, #1",  6, 27)
4301 GEN_TWOVEC_TEST(srsra_4h_4h_8,   "srsra v6.4h,  v27.4h, #8",  6, 27)
4302 GEN_TWOVEC_TEST(srsra_4h_4h_16,  "srsra v6.4h,  v27.4h, #16", 6, 27)
4303 GEN_TWOVEC_TEST(srsra_16b_16b_1, "srsra v6.16b, v27.16b, #1", 6, 27)
4304 GEN_TWOVEC_TEST(srsra_16b_16b_3, "srsra v6.16b, v27.16b, #3", 6, 27)
4305 GEN_TWOVEC_TEST(srsra_16b_16b_8, "srsra v6.16b, v27.16b, #8", 6, 27)
4306 GEN_TWOVEC_TEST(srsra_8b_8b_1,   "srsra v6.8b,  v27.8b, #1",  6, 27)
4307 GEN_TWOVEC_TEST(srsra_8b_8b_3,   "srsra v6.8b,  v27.8b, #3",  6, 27)
4308 GEN_TWOVEC_TEST(srsra_8b_8b_8,   "srsra v6.8b,  v27.8b, #8",  6, 27)
4309 GEN_TWOVEC_TEST(ursra_2d_2d_1,   "ursra v6.2d,  v27.2d, #1",  6, 27)
4310 GEN_TWOVEC_TEST(ursra_2d_2d_32,  "ursra v6.2d,  v27.2d, #32", 6, 27)
4311 GEN_TWOVEC_TEST(ursra_2d_2d_64,  "ursra v6.2d,  v27.2d, #64", 6, 27)
4312 GEN_TWOVEC_TEST(ursra_4s_4s_1,   "ursra v6.4s,  v27.4s, #1",  6, 27)
4313 GEN_TWOVEC_TEST(ursra_4s_4s_16,  "ursra v6.4s,  v27.4s, #16", 6, 27)
4314 GEN_TWOVEC_TEST(ursra_4s_4s_32,  "ursra v6.4s,  v27.4s, #32", 6, 27)
4315 GEN_TWOVEC_TEST(ursra_2s_2s_1,   "ursra v6.2s,  v27.2s, #1",  6, 27)
4316 GEN_TWOVEC_TEST(ursra_2s_2s_16,  "ursra v6.2s,  v27.2s, #16", 6, 27)
4317 GEN_TWOVEC_TEST(ursra_2s_2s_32,  "ursra v6.2s,  v27.2s, #32", 6, 27)
4318 GEN_TWOVEC_TEST(ursra_8h_8h_1,   "ursra v6.8h,  v27.8h, #1",  6, 27)
4319 GEN_TWOVEC_TEST(ursra_8h_8h_8,   "ursra v6.8h,  v27.8h, #8",  6, 27)
4320 GEN_TWOVEC_TEST(ursra_8h_8h_16,  "ursra v6.8h,  v27.8h, #16", 6, 27)
4321 GEN_TWOVEC_TEST(ursra_4h_4h_1,   "ursra v6.4h,  v27.4h, #1",  6, 27)
4322 GEN_TWOVEC_TEST(ursra_4h_4h_8,   "ursra v6.4h,  v27.4h, #8",  6, 27)
4323 GEN_TWOVEC_TEST(ursra_4h_4h_16,  "ursra v6.4h,  v27.4h, #16", 6, 27)
4324 GEN_TWOVEC_TEST(ursra_16b_16b_1, "ursra v6.16b, v27.16b, #1", 6, 27)
4325 GEN_TWOVEC_TEST(ursra_16b_16b_3, "ursra v6.16b, v27.16b, #3", 6, 27)
4326 GEN_TWOVEC_TEST(ursra_16b_16b_8, "ursra v6.16b, v27.16b, #8", 6, 27)
4327 GEN_TWOVEC_TEST(ursra_8b_8b_1,   "ursra v6.8b,  v27.8b, #1",  6, 27)
4328 GEN_TWOVEC_TEST(ursra_8b_8b_3,   "ursra v6.8b,  v27.8b, #3",  6, 27)
4329 GEN_TWOVEC_TEST(ursra_8b_8b_8,   "ursra v6.8b,  v27.8b, #8",  6, 27)
4330 
4331 GEN_SHIFT_TEST(sshll,  2d, 2s,  0)
4332 GEN_SHIFT_TEST(sshll,  2d, 2s,  15)
4333 GEN_SHIFT_TEST(sshll,  2d, 2s,  31)
4334 GEN_SHIFT_TEST(sshll2, 2d, 4s,  0)
4335 GEN_SHIFT_TEST(sshll2, 2d, 4s,  15)
4336 GEN_SHIFT_TEST(sshll2, 2d, 4s,  31)
4337 GEN_SHIFT_TEST(sshll,  4s, 4h,  0)
4338 GEN_SHIFT_TEST(sshll,  4s, 4h,  7)
4339 GEN_SHIFT_TEST(sshll,  4s, 4h,  15)
4340 GEN_SHIFT_TEST(sshll2, 4s, 8h,  0)
4341 GEN_SHIFT_TEST(sshll2, 4s, 8h,  7)
4342 GEN_SHIFT_TEST(sshll2, 4s, 8h,  15)
4343 GEN_SHIFT_TEST(sshll,  8h, 8b,  0)
4344 GEN_SHIFT_TEST(sshll,  8h, 8b,  3)
4345 GEN_SHIFT_TEST(sshll,  8h, 8b,  7)
4346 GEN_SHIFT_TEST(sshll2, 8h, 16b, 0)
4347 GEN_SHIFT_TEST(sshll2, 8h, 16b, 3)
4348 GEN_SHIFT_TEST(sshll2, 8h, 16b, 7)
4349 GEN_SHIFT_TEST(ushll,  2d, 2s, 0)
4350 GEN_SHIFT_TEST(ushll,  2d, 2s, 15)
4351 GEN_SHIFT_TEST(ushll,  2d, 2s, 31)
4352 GEN_SHIFT_TEST(ushll2, 2d, 4s, 0)
4353 GEN_SHIFT_TEST(ushll2, 2d, 4s, 15)
4354 GEN_SHIFT_TEST(ushll2, 2d, 4s, 31)
4355 GEN_SHIFT_TEST(ushll,  4s, 4h,  0)
4356 GEN_SHIFT_TEST(ushll,  4s, 4h,  7)
4357 GEN_SHIFT_TEST(ushll,  4s, 4h,  15)
4358 GEN_SHIFT_TEST(ushll2, 4s, 8h,  0)
4359 GEN_SHIFT_TEST(ushll2, 4s, 8h,  7)
4360 GEN_SHIFT_TEST(ushll2, 4s, 8h,  15)
4361 GEN_SHIFT_TEST(ushll,  8h, 8b,  0)
4362 GEN_SHIFT_TEST(ushll,  8h, 8b,  3)
4363 GEN_SHIFT_TEST(ushll,  8h, 8b,  7)
4364 GEN_SHIFT_TEST(ushll2, 8h, 16b, 0)
4365 GEN_SHIFT_TEST(ushll2, 8h, 16b, 3)
4366 GEN_SHIFT_TEST(ushll2, 8h, 16b, 7)
4367 
4368 GEN_TWOVEC_TEST(suqadd_d_d,  "suqadd d22, d23",   22, 23)
4369 GEN_TWOVEC_TEST(suqadd_s_s,  "suqadd s22, s23",   22, 23)
4370 GEN_TWOVEC_TEST(suqadd_h_h,  "suqadd h22, h23",   22, 23)
4371 GEN_TWOVEC_TEST(suqadd_b_b,  "suqadd b22, b23",   22, 23)
4372 GEN_TWOVEC_TEST(usqadd_d_d,  "usqadd d22, d23",   22, 23)
4373 GEN_TWOVEC_TEST(usqadd_s_s,  "usqadd s22, s23",   22, 23)
4374 GEN_TWOVEC_TEST(usqadd_h_h,  "usqadd h22, h23",   22, 23)
4375 GEN_TWOVEC_TEST(usqadd_b_b,  "usqadd b22, b23",   22, 23)
4376 
4377 GEN_TWOVEC_TEST(suqadd_2d_2d,   "suqadd v6.2d,  v27.2d",  6, 27)
4378 GEN_TWOVEC_TEST(suqadd_4s_4s,   "suqadd v6.4s,  v27.4s",  6, 27)
4379 GEN_TWOVEC_TEST(suqadd_2s_2s,   "suqadd v6.2s,  v27.2s",  6, 27)
4380 GEN_TWOVEC_TEST(suqadd_8h_8h,   "suqadd v6.8h,  v27.8h",  6, 27)
4381 GEN_TWOVEC_TEST(suqadd_4h_4h,   "suqadd v6.4h,  v27.4h",  6, 27)
4382 GEN_TWOVEC_TEST(suqadd_16b_16b, "suqadd v6.16b, v27.16b", 6, 27)
4383 GEN_TWOVEC_TEST(suqadd_8b_8b,   "suqadd v6.8b,  v27.8b",  6, 27)
4384 GEN_TWOVEC_TEST(usqadd_2d_2d,   "usqadd v6.2d,  v27.2d",  6, 27)
4385 GEN_TWOVEC_TEST(usqadd_4s_4s,   "usqadd v6.4s,  v27.4s",  6, 27)
4386 GEN_TWOVEC_TEST(usqadd_2s_2s,   "usqadd v6.2s,  v27.2s",  6, 27)
4387 GEN_TWOVEC_TEST(usqadd_8h_8h,   "usqadd v6.8h,  v27.8h",  6, 27)
4388 GEN_TWOVEC_TEST(usqadd_4h_4h,   "usqadd v6.4h,  v27.4h",  6, 27)
4389 GEN_TWOVEC_TEST(usqadd_16b_16b, "usqadd v6.16b, v27.16b", 6, 27)
4390 GEN_TWOVEC_TEST(usqadd_8b_8b,   "usqadd v6.8b,  v27.8b",  6, 27)
4391 
4392 // Uses v15 as the first table entry
4393 GEN_THREEVEC_TEST(
4394    tbl_16b_1reg, "tbl v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
4395 // and v15 ^ v21 as the second table entry
4396 GEN_THREEVEC_TEST(
4397    tbl_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
4398                  "tbl v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
4399 // and v15 ^ v23 as the third table entry
4400 GEN_THREEVEC_TEST(
4401    tbl_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
4402                  "eor v17.16b, v15.16b, v23.16b ; "
4403                  "tbl v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
4404                  21, 15, 23)
4405 // and v21 ^ v23 as the fourth table entry
4406 GEN_THREEVEC_TEST(
4407    tbl_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
4408                  "eor v17.16b, v15.16b, v23.16b ; "
4409                  "eor v18.16b, v21.16b, v23.16b ; "
4410                  "tbl v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
4411                  21, 15, 23)
4412 // Same register scheme for tbl .8b, tbx .16b, tbx.8b
4413 GEN_THREEVEC_TEST(
4414    tbl_8b_1reg, "tbl v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
4415 GEN_THREEVEC_TEST(
4416    tbl_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
4417                 "tbl v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
4418 GEN_THREEVEC_TEST(
4419    tbl_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
4420                 "eor v17.16b, v15.16b, v23.16b ; "
4421                 "tbl v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
4422                 21, 15, 23)
4423 GEN_THREEVEC_TEST(
4424    tbl_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
4425                 "eor v17.16b, v15.16b, v23.16b ; "
4426                 "eor v18.16b, v21.16b, v23.16b ; "
4427                 "tbl v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
4428                 21, 15, 23)
4429 
4430 GEN_THREEVEC_TEST(
4431    tbx_16b_1reg, "tbx v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
4432 GEN_THREEVEC_TEST(
4433    tbx_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
4434                  "tbx v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
4435 GEN_THREEVEC_TEST(
4436    tbx_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
4437                  "eor v17.16b, v15.16b, v23.16b ; "
4438                  "tbx v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
4439                  21, 15, 23)
4440 GEN_THREEVEC_TEST(
4441    tbx_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
4442                  "eor v17.16b, v15.16b, v23.16b ; "
4443                  "eor v18.16b, v21.16b, v23.16b ; "
4444                  "tbx v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
4445                  21, 15, 23)
4446 // Same register scheme for tbx .8b, tbx .16b, tbx.8b
4447 GEN_THREEVEC_TEST(
4448    tbx_8b_1reg, "tbx v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
4449 GEN_THREEVEC_TEST(
4450    tbx_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
4451                 "tbx v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
4452 GEN_THREEVEC_TEST(
4453    tbx_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
4454                 "eor v17.16b, v15.16b, v23.16b ; "
4455                 "tbx v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
4456                 21, 15, 23)
4457 GEN_THREEVEC_TEST(
4458    tbx_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
4459                 "eor v17.16b, v15.16b, v23.16b ; "
4460                 "eor v18.16b, v21.16b, v23.16b ; "
4461                 "tbx v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
4462                 21, 15, 23)
4463 
4464 GEN_THREEVEC_TEST(trn1_2d_2d_2d,    "trn1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
4465 GEN_THREEVEC_TEST(trn1_4s_4s_4s,    "trn1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
4466 GEN_THREEVEC_TEST(trn1_2s_2s_2s,    "trn1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
4467 GEN_THREEVEC_TEST(trn1_8h_8h_8h,    "trn1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
4468 GEN_THREEVEC_TEST(trn1_4h_4h_4h,    "trn1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
4469 GEN_THREEVEC_TEST(trn1_16b_16b_16b, "trn1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
4470 GEN_THREEVEC_TEST(trn1_8b_8b_8b,    "trn1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
4471 GEN_THREEVEC_TEST(trn2_2d_2d_2d,    "trn2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
4472 GEN_THREEVEC_TEST(trn2_4s_4s_4s,    "trn2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
4473 GEN_THREEVEC_TEST(trn2_2s_2s_2s,    "trn2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
4474 GEN_THREEVEC_TEST(trn2_8h_8h_8h,    "trn2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
4475 GEN_THREEVEC_TEST(trn2_4h_4h_4h,    "trn2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
4476 GEN_THREEVEC_TEST(trn2_16b_16b_16b, "trn2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
4477 GEN_THREEVEC_TEST(trn2_8b_8b_8b,    "trn2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
4478 
4479 GEN_TWOVEC_TEST(urecpe_4s_4s,   "urecpe v6.4s,  v27.4s",  6, 27)
4480 GEN_TWOVEC_TEST(urecpe_2s_2s,   "urecpe v6.2s,  v27.2s",  6, 27)
4481 GEN_TWOVEC_TEST(ursqrte_4s_4s,   "ursqrte v6.4s,  v27.4s",  6, 27)
4482 GEN_TWOVEC_TEST(ursqrte_2s_2s,   "ursqrte v6.2s,  v27.2s",  6, 27)
4483 
4484 GEN_THREEVEC_TEST(uzp1_2d_2d_2d,    "uzp1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
4485 GEN_THREEVEC_TEST(uzp1_4s_4s_4s,    "uzp1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
4486 GEN_THREEVEC_TEST(uzp1_2s_2s_2s,    "uzp1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
4487 GEN_THREEVEC_TEST(uzp1_8h_8h_8h,    "uzp1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
4488 GEN_THREEVEC_TEST(uzp1_4h_4h_4h,    "uzp1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
4489 GEN_THREEVEC_TEST(uzp1_16b_16b_16b, "uzp1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
4490 GEN_THREEVEC_TEST(uzp1_8b_8b_8b,    "uzp1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
4491 GEN_THREEVEC_TEST(uzp2_2d_2d_2d,    "uzp2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
4492 GEN_THREEVEC_TEST(uzp2_4s_4s_4s,    "uzp2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
4493 GEN_THREEVEC_TEST(uzp2_2s_2s_2s,    "uzp2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
4494 GEN_THREEVEC_TEST(uzp2_8h_8h_8h,    "uzp2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
4495 GEN_THREEVEC_TEST(uzp2_4h_4h_4h,    "uzp2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
4496 GEN_THREEVEC_TEST(uzp2_16b_16b_16b, "uzp2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
4497 GEN_THREEVEC_TEST(uzp2_8b_8b_8b,    "uzp2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
4498 GEN_THREEVEC_TEST(zip1_2d_2d_2d,    "zip1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
4499 GEN_THREEVEC_TEST(zip1_4s_4s_4s,    "zip1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
4500 GEN_THREEVEC_TEST(zip1_2s_2s_2s,    "zip1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
4501 GEN_THREEVEC_TEST(zip1_8h_8h_8h,    "zip1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
4502 GEN_THREEVEC_TEST(zip1_4h_4h_4h,    "zip1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
4503 GEN_THREEVEC_TEST(zip1_16b_16b_16b, "zip1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
4504 GEN_THREEVEC_TEST(zip1_8b_8b_8b,    "zip1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
4505 GEN_THREEVEC_TEST(zip2_2d_2d_2d,    "zip2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
4506 GEN_THREEVEC_TEST(zip2_4s_4s_4s,    "zip2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
4507 GEN_THREEVEC_TEST(zip2_2s_2s_2s,    "zip2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
4508 GEN_THREEVEC_TEST(zip2_8h_8h_8h,    "zip2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
4509 GEN_THREEVEC_TEST(zip2_4h_4h_4h,    "zip2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
4510 GEN_THREEVEC_TEST(zip2_16b_16b_16b, "zip2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
4511 GEN_THREEVEC_TEST(zip2_8b_8b_8b,    "zip2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
4512 
4513 GEN_UNARY_TEST(xtn,  2s, 2d)
4514 GEN_UNARY_TEST(xtn2, 4s, 2d)
4515 GEN_UNARY_TEST(xtn,  4h, 4s)
4516 GEN_UNARY_TEST(xtn2, 8h, 4s)
4517 GEN_UNARY_TEST(xtn,  8b, 8h)
4518 GEN_UNARY_TEST(xtn2, 16b, 8h)
4519 
4520 // ======================== MEM ========================
4521 
4522 // All the SIMD and FP memory tests are in none/tests/arm64/memory.c.
4523 
4524 // ======================== CRYPTO ========================
4525 
4526 GEN_TWOVEC_TEST(aesd_16b_16b,    "aesd v6.16b,  v27.16b",  6, 27)
4527 GEN_TWOVEC_TEST(aese_16b_16b,    "aese v6.16b,  v27.16b",  6, 27)
4528 GEN_TWOVEC_TEST(aesimc_16b_16b,  "aesimc v6.16b,  v27.16b",  6, 27)
4529 GEN_TWOVEC_TEST(aesmc_16b_16b,   "aesmc v6.16b,  v27.16b",  6, 27)
4530 
4531 GEN_THREEVEC_TEST(sha1c_q_s_4s,     "sha1c q29, s28, v27.4s", 29,28,27)
4532 GEN_TWOVEC_TEST(sha1h_s_s,          "sha1h s6,  s27",  6, 27)
4533 GEN_THREEVEC_TEST(sha1m_q_s_4s,     "sha1m q29, s28, v27.4s", 29,28,27)
4534 GEN_THREEVEC_TEST(sha1p_q_s_4s,     "sha1p q29, s28, v27.4s", 29,28,27)
4535 GEN_THREEVEC_TEST(sha1su0_4s_4s_4s, "sha1su0 v29.4s, v28.4s, v27.4s", 29,28,27)
4536 GEN_TWOVEC_TEST(sha1su1_4s_4s,      "sha1su1 v6.4s,  v27.4s",  6, 27)
4537 
4538 GEN_THREEVEC_TEST(sha256h2_q_q_4s,  "sha256h2 q29, q28, v27.4s", 29,28,27)
4539 GEN_THREEVEC_TEST(sha256h_q_q_4s,   "sha256h q29, q28, v27.4s", 29,28,27)
4540 GEN_TWOVEC_TEST(sha256su0_4s_4s,    "sha256su0 v6.4s,  v27.4s",  6, 27)
4541 GEN_THREEVEC_TEST(sha256su1_4s_4s_4s, "sha256su1 v29.4s, v28.4s, v27.4s",
4542                                       29,28,27)
4543 
4544 
4545 /* ---------------------------------------------------------------- */
4546 /* -- main()                                                     -- */
4547 /* ---------------------------------------------------------------- */
4548 
main(void)4549 int main ( void )
4550 {
4551    assert(sizeof(V128) == 16);
4552 
4553    // ======================== FP ========================
4554 
4555    // fabs      d,s
4556    // fabs      2d,4s,2s
4557    if (1) test_fabs_d_d(TyDF);
4558    if (1) test_fabs_s_s(TySF);
4559    if (1) test_fabs_2d_2d(TyDF);
4560    if (1) test_fabs_4s_4s(TySF);
4561    if (1) test_fabs_2s_2s(TyDF);
4562 
4563    // fneg      d,s
4564    // fneg      2d,4s,2s
4565    if (1) test_fneg_d_d(TyDF);
4566    if (1) test_fneg_s_s(TySF);
4567    if (1) test_fneg_2d_2d(TySF);
4568    if (1) test_fneg_4s_4s(TyDF);
4569    if (1) test_fneg_2s_2s(TySF);
4570 
4571    // fsqrt     d,s
4572    // fsqrt     2d,4s,2s
4573    if (1) test_fsqrt_d_d(TyDF);
4574    if (1) test_fsqrt_s_s(TySF);
4575    if (1) test_fsqrt_2d_2d(TySF);
4576    if (1) test_fsqrt_4s_4s(TyDF);
4577    if (1) test_fsqrt_2s_2s(TySF);
4578 
4579    // fadd      d,s
4580    // fsub      d,s
4581    if (1) test_fadd_d_d_d(TyDF);
4582    if (1) test_fadd_s_s_s(TySF);
4583    if (1) test_fsub_d_d_d(TyDF);
4584    if (1) test_fsub_s_s_s(TySF);
4585 
4586    // fadd      2d,4s,2s
4587    // fsub      2d,4s,2s
4588    if (1) test_fadd_2d_2d_2d(TyDF);
4589    if (1) test_fadd_4s_4s_4s(TySF);
4590    if (1) test_fadd_2s_2s_2s(TySF);
4591    if (1) test_fsub_2d_2d_2d(TyDF);
4592    if (1) test_fsub_4s_4s_4s(TySF);
4593    if (1) test_fsub_2s_2s_2s(TySF);
4594 
4595    // fabd      d,s
4596    // fabd      2d,4s,2s
4597    if (1) test_fabd_d_d_d(TyDF);
4598    if (1) test_fabd_s_s_s(TySF);
4599    if (1) test_fabd_2d_2d_2d(TyDF);
4600    if (1) test_fabd_4s_4s_4s(TySF);
4601    if (1) test_fabd_2s_2s_2s(TySF);
4602 
4603    // faddp     d,s (floating add pair)
4604    // faddp     2d,4s,2s
4605    if (1) test_faddp_d_2d(TyDF);
4606    if (1) test_faddp_s_2s(TySF);
4607    if (1) test_faddp_2d_2d_2d(TySF);
4608    if (1) test_faddp_4s_4s_4s(TyDF);
4609    if (1) test_faddp_2s_2s_2s(TySF);
4610 
4611    // fccmp     d,s (floating point conditional quiet compare)
4612    // fccmpe    d,s (floating point conditional signaling compare)
4613    if (1) DO50( test_FCCMP_D_D_0xF_EQ() );
4614    if (1) DO50( test_FCCMP_D_D_0xF_NE() );
4615    if (1) DO50( test_FCCMP_D_D_0x0_EQ() );
4616    if (1) DO50( test_FCCMP_D_D_0x0_NE() );
4617    if (1) DO50( test_FCCMP_S_S_0xF_EQ() );
4618    if (1) DO50( test_FCCMP_S_S_0xF_NE() );
4619    if (1) DO50( test_FCCMP_S_S_0x0_EQ() );
4620    if (1) DO50( test_FCCMP_S_S_0x0_NE() );
4621    if (1) DO50( test_FCCMPE_D_D_0xF_EQ() );
4622    if (1) DO50( test_FCCMPE_D_D_0xF_NE() );
4623    if (1) DO50( test_FCCMPE_D_D_0x0_EQ() );
4624    if (1) DO50( test_FCCMPE_D_D_0x0_NE() );
4625    if (1) DO50( test_FCCMPE_S_S_0xF_EQ() );
4626    if (1) DO50( test_FCCMPE_S_S_0xF_NE() );
4627    if (1) DO50( test_FCCMPE_S_S_0x0_EQ() );
4628    if (1) DO50( test_FCCMPE_S_S_0x0_NE() );
4629 
4630    // fcmeq     d,s
4631    // fcmge     d,s
4632    // fcmgt     d,s
4633    // facgt     d,s  (floating abs compare GE)
4634    // facge     d,s  (floating abs compare GE)
4635    if (1) DO50( test_FCMEQ_D_D_D() );
4636    if (1) DO50( test_FCMEQ_S_S_S() );
4637    if (1) DO50( test_FCMGE_D_D_D() );
4638    if (1) DO50( test_FCMGE_S_S_S() );
4639    if (1) DO50( test_FCMGT_D_D_D() );
4640    if (1) DO50( test_FCMGT_S_S_S() );
4641    if (1) DO50( test_FACGT_D_D_D() );
4642    if (1) DO50( test_FACGT_S_S_S() );
4643    if (1) DO50( test_FACGE_D_D_D() );
4644    if (1) DO50( test_FACGE_S_S_S() );
4645 
4646    // fcmeq     2d,4s,2s
4647    // fcmge     2d,4s,2s
4648    // fcmgt     2d,4s,2s
4649    // facge     2d,4s,2s
4650    // facgt     2d,4s,2s
4651    if (1) test_fcmeq_2d_2d_2d(TyDF);
4652    if (1) test_fcmeq_4s_4s_4s(TySF);
4653    if (1) test_fcmeq_2s_2s_2s(TySF);
4654    if (1) test_fcmge_2d_2d_2d(TyDF);
4655    if (1) test_fcmge_4s_4s_4s(TySF);
4656    if (1) test_fcmge_2s_2s_2s(TySF);
4657    if (1) test_fcmgt_2d_2d_2d(TyDF);
4658    if (1) test_fcmgt_4s_4s_4s(TySF);
4659    if (1) test_fcmgt_2s_2s_2s(TySF);
4660    if (1) test_facge_2d_2d_2d(TyDF);
4661    if (1) test_facge_4s_4s_4s(TySF);
4662    if (1) test_facge_2s_2s_2s(TySF);
4663    if (1) test_facgt_2d_2d_2d(TyDF);
4664    if (1) test_facgt_4s_4s_4s(TySF);
4665    if (1) test_facgt_2s_2s_2s(TySF);
4666 
4667    // fcmeq_z   d,s
4668    // fcmge_z   d,s
4669    // fcmgt_z   d,s
4670    // fcmle_z   d,s
4671    // fcmlt_z   d,s
4672    if (1) DO50( test_FCMEQ_D_D_Z() );
4673    if (1) DO50( test_FCMEQ_S_S_Z() );
4674    if (1) DO50( test_FCMGE_D_D_Z() );
4675    if (1) DO50( test_FCMGE_S_S_Z() );
4676    if (1) DO50( test_FCMGT_D_D_Z() );
4677    if (1) DO50( test_FCMGT_S_S_Z() );
4678    if (1) DO50( test_FCMLE_D_D_Z() );
4679    if (1) DO50( test_FCMLE_S_S_Z() );
4680    if (1) DO50( test_FCMLT_D_D_Z() );
4681    if (1) DO50( test_FCMLT_S_S_Z() );
4682 
4683    // fcmeq_z   2d,4s,2s
4684    // fcmge_z   2d,4s,2s
4685    // fcmgt_z   2d,4s,2s
4686    // fcmle_z   2d,4s,2s
4687    // fcmlt_z   2d,4s,2s
4688    if (1) test_fcmeq_z_2d_2d(TyDF);
4689    if (1) test_fcmeq_z_4s_4s(TySF);
4690    if (1) test_fcmeq_z_2s_2s(TySF);
4691    if (1) test_fcmge_z_2d_2d(TyDF);
4692    if (1) test_fcmge_z_4s_4s(TySF);
4693    if (1) test_fcmge_z_2s_2s(TySF);
4694    if (1) test_fcmgt_z_2d_2d(TyDF);
4695    if (1) test_fcmgt_z_4s_4s(TySF);
4696    if (1) test_fcmgt_z_2s_2s(TySF);
4697    if (1) test_fcmle_z_2d_2d(TyDF);
4698    if (1) test_fcmle_z_4s_4s(TySF);
4699    if (1) test_fcmle_z_2s_2s(TySF);
4700    if (1) test_fcmlt_z_2d_2d(TyDF);
4701    if (1) test_fcmlt_z_4s_4s(TySF);
4702    if (1) test_fcmlt_z_2s_2s(TySF);
4703 
4704    // fcmp_z    d,s
4705    // fcmpe_z   d,s
4706    // fcmp      d,s (floating point quiet, set flags)
4707    // fcmpe     d,s (floating point signaling, set flags)
4708    if (1) DO50( test_FCMP_D_Z() );
4709    if (1) DO50( test_FCMP_S_Z() );
4710    if (1) DO50( test_FCMPE_D_Z() );
4711    if (1) DO50( test_FCMPE_S_Z() );
4712    if (1) DO50( test_FCMP_D_D() );
4713    if (1) DO50( test_FCMP_S_S() );
4714    if (1) DO50( test_FCMPE_D_D() );
4715    if (1) DO50( test_FCMPE_S_S() );
4716 
4717    // fcsel     d,s (fp cond select)
4718    if (1) DO50( test_FCSEL_D_D_D_EQ() );
4719    if (1) DO50( test_FCSEL_D_D_D_NE() );
4720    if (1) DO50( test_FCSEL_S_S_S_EQ() );
4721    if (1) DO50( test_FCSEL_S_S_S_NE() );
4722 
4723    // fdiv      d,s
4724    // fdiv      2d,4s,2s
4725    if (1) test_fdiv_d_d_d(TyDF);
4726    if (1) test_fdiv_s_s_s(TySF);
4727    if (1) test_fdiv_2d_2d_2d(TyDF);
4728    if (1) test_fdiv_4s_4s_4s(TySF);
4729    if (1) test_fdiv_2s_2s_2s(TySF);
4730 
4731    // fmadd     d,s
4732    // fnmadd    d,s
4733    // fmsub     d,s
4734    // fnmsub    d,s
4735    if (1) test_fmadd_d_d_d_d(TyDF);
4736    if (1) test_fmadd_s_s_s_s(TySF);
4737    if (1) test_fnmadd_d_d_d_d(TyDF);
4738    if (1) test_fnmadd_s_s_s_s(TySF);
4739    if (1) test_fmsub_d_d_d_d(TyDF);
4740    if (1) test_fmsub_s_s_s_s(TySF);
4741    if (1) test_fnmsub_d_d_d_d(TyDF);
4742    if (1) test_fnmsub_s_s_s_s(TySF);
4743 
4744    // fnmul     d,s
4745    if (1) test_fnmul_d_d_d(TyDF);
4746    if (1) test_fnmul_s_s_s(TySF);
4747 
4748    // fmax      d,s
4749    // fmin      d,s
4750    // fmaxnm    d,s ("max number")
4751    // fminnm    d,s
4752    if (1) test_fmax_d_d_d(TyDF);
4753    if (1) test_fmax_s_s_s(TySF);
4754    if (1) test_fmin_d_d_d(TyDF);
4755    if (1) test_fmin_s_s_s(TySF);
4756    if (1) test_fmaxnm_d_d_d(TyDF);
4757    if (1) test_fmaxnm_s_s_s(TySF);
4758    if (1) test_fminnm_d_d_d(TyDF);
4759    if (1) test_fminnm_s_s_s(TySF);
4760 
4761    // fmax      2d,4s,2s
4762    // fmin      2d,4s,2s
4763    // fmaxnm    2d,4s,2s
4764    // fminnm    2d,4s,2s
4765    if (1) test_fmax_2d_2d_2d(TyDF);
4766    if (1) test_fmax_4s_4s_4s(TySF);
4767    if (1) test_fmax_2s_2s_2s(TySF);
4768    if (1) test_fmin_2d_2d_2d(TyDF);
4769    if (1) test_fmin_4s_4s_4s(TySF);
4770    if (1) test_fmin_2s_2s_2s(TySF);
4771    if (1) test_fmaxnm_2d_2d_2d(TyDF);
4772    if (1) test_fmaxnm_4s_4s_4s(TySF);
4773    if (1) test_fmaxnm_2s_2s_2s(TySF);
4774    if (1) test_fminnm_2d_2d_2d(TyDF);
4775    if (1) test_fminnm_4s_4s_4s(TySF);
4776    if (1) test_fminnm_2s_2s_2s(TySF);
4777 
4778    // fmaxnmp   d_2d,s_2s ("max number pairwise")
4779    // fminnmp   d_2d,s_2s
4780    if (1) test_fmaxnmp_d_2d(TyDF);
4781    if (1) test_fmaxnmp_s_2s(TySF);
4782    if (1) test_fminnmp_d_2d(TyDF);
4783    if (1) test_fminnmp_s_2s(TySF);
4784 
4785    // fmaxnmp   2d,4s,2s
4786    // fminnmp   2d,4s,2s
4787    if (1) test_fmaxnmp_2d_2d_2d(TyDF);
4788    if (1) test_fmaxnmp_4s_4s_4s(TySF);
4789    if (1) test_fmaxnmp_2s_2s_2s(TySF);
4790    if (1) test_fminnmp_2d_2d_2d(TyDF);
4791    if (1) test_fminnmp_4s_4s_4s(TySF);
4792    if (1) test_fminnmp_2s_2s_2s(TySF);
4793 
4794    // fmaxnmv   s_4s (maxnum across vector)
4795    // fminnmv   s_4s
4796    if (1) test_fmaxnmv_s_4s(TySF);
4797    if (1) test_fminnmv_s_4s(TySF);
4798 
4799    // fmaxp     d_2d,s_2s (max of a pair)
4800    // fminp     d_2d,s_2s (max of a pair)
4801    if (1) test_fmaxp_d_2d(TyDF);
4802    if (1) test_fmaxp_s_2s(TySF);
4803    if (1) test_fminp_d_2d(TyDF);
4804    if (1) test_fminp_s_2s(TySF);
4805 
4806    // fmaxp     2d,4s,2s  (max pairwise)
4807    // fminp     2d,4s,2s
4808    if (1) test_fmaxp_2d_2d_2d(TyDF);
4809    if (1) test_fmaxp_4s_4s_4s(TySF);
4810    if (1) test_fmaxp_2s_2s_2s(TySF);
4811    if (1) test_fminp_2d_2d_2d(TyDF);
4812    if (1) test_fminp_4s_4s_4s(TySF);
4813    if (1) test_fminp_2s_2s_2s(TySF);
4814 
4815    // fmaxv     s_4s (max across vector)
4816    // fminv     s_4s
4817    if (1) test_fmaxv_s_4s(TySF);
4818    if (1) test_fminv_s_4s(TySF);
4819 
4820    // fmla      2d,4s,2s
4821    // fmls      2d,4s,2s
4822    if (1) test_fmla_2d_2d_2d(TyDF);
4823    if (1) test_fmla_4s_4s_4s(TySF);
4824    if (1) test_fmla_2s_2s_2s(TySF);
4825    if (1) test_fmls_2d_2d_2d(TyDF);
4826    if (1) test_fmls_4s_4s_4s(TySF);
4827    if (1) test_fmls_2s_2s_2s(TySF);
4828 
4829    // fmla      d_d_d[],s_s_s[] (by element)
4830    // fmls      d_d_d[],s_s_s[] (by element)
4831    if (1) test_fmla_d_d_d0(TyDF);
4832    if (1) test_fmla_d_d_d1(TyDF);
4833    if (1) test_fmla_s_s_s0(TySF);
4834    if (1) test_fmla_s_s_s3(TySF);
4835    if (1) test_fmls_d_d_d0(TyDF);
4836    if (1) test_fmls_d_d_d1(TyDF);
4837    if (1) test_fmls_s_s_s0(TySF);
4838    if (1) test_fmls_s_s_s3(TySF);
4839 
4840    // fmla      2d_2d_d[],4s_4s_s[],2s_2s_s[]
4841    // fmls      2d_2d_d[],4s_4s_s[],2s_2s_s[]
4842    if (1) test_fmla_2d_2d_d0(TyDF);
4843    if (1) test_fmla_2d_2d_d1(TyDF);
4844    if (1) test_fmla_4s_4s_s0(TySF);
4845    if (1) test_fmla_4s_4s_s3(TySF);
4846    if (1) test_fmla_2s_2s_s0(TySF);
4847    if (1) test_fmla_2s_2s_s3(TySF);
4848    if (1) test_fmls_2d_2d_d0(TyDF);
4849    if (1) test_fmls_2d_2d_d1(TyDF);
4850    if (1) test_fmls_4s_4s_s0(TySF);
4851    if (1) test_fmls_4s_4s_s3(TySF);
4852    if (1) test_fmls_2s_2s_s0(TySF);
4853    if (1) test_fmls_2s_2s_s3(TySF);
4854 
4855    // fmov      2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
4856    if (1) test_fmov_2d_imm_01(TyD);
4857    if (1) test_fmov_2d_imm_02(TyD);
4858    if (1) test_fmov_2d_imm_03(TyD);
4859    if (1) test_fmov_4s_imm_01(TyS);
4860    if (1) test_fmov_4s_imm_02(TyS);
4861    if (1) test_fmov_4s_imm_03(TyS);
4862    if (1) test_fmov_2s_imm_01(TyS);
4863    if (1) test_fmov_2s_imm_02(TyS);
4864    if (1) test_fmov_2s_imm_03(TyS);
4865 
4866    // fmov      d_d,s_s
4867    if (1) test_fmov_d_d(TyDF);
4868    if (1) test_fmov_s_s(TySF);
4869 
4870    // fmov      s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
4871    if (1) test_fmov_s_w(TyS);
4872    if (1) test_fmov_d_x(TyD);
4873    if (1) test_fmov_d1_x(TyD);
4874    if (1) test_fmov_w_s(TyS);
4875    if (1) test_fmov_x_d(TyD);
4876    if (1) test_fmov_x_d1(TyD);
4877 
4878    // fmov      d,s #imm
4879    if (1) test_fmov_d_imm_01(TyNONE);
4880    if (1) test_fmov_d_imm_02(TyNONE);
4881    if (1) test_fmov_d_imm_03(TyNONE);
4882    if (1) test_fmov_s_imm_01(TyNONE);
4883    if (1) test_fmov_s_imm_02(TyNONE);
4884    if (1) test_fmov_s_imm_03(TyNONE);
4885 
4886    // fmul      d_d_d[],s_s_s[]
4887    if (1) test_fmul_d_d_d0(TyDF);
4888    if (1) test_fmul_d_d_d1(TyDF);
4889    if (1) test_fmul_s_s_s0(TySF);
4890    if (1) test_fmul_s_s_s3(TySF);
4891 
4892    // fmul      2d_2d_d[],4s_4s_s[],2s_2s_s[]
4893    if (1) test_fmul_2d_2d_d0(TyDF);
4894    if (1) test_fmul_2d_2d_d1(TyDF);
4895    if (1) test_fmul_4s_4s_s0(TySF);
4896    if (1) test_fmul_4s_4s_s3(TySF);
4897    if (1) test_fmul_2s_2s_s0(TySF);
4898    if (1) test_fmul_2s_2s_s3(TySF);
4899 
4900    // fmul      d,s
4901    // fmul      2d,4s,2s
4902    if (1) test_fmul_d_d_d(TyDF);
4903    if (1) test_fmul_s_s_s(TySF);
4904    if (1) test_fmul_2d_2d_2d(TyDF);
4905    if (1) test_fmul_4s_4s_4s(TySF);
4906    if (1) test_fmul_2s_2s_2s(TySF);
4907 
4908    // fmulx     d_d_d[],s_s_s[]
4909    // fmulx     2d_2d_d[],4s_4s_s[],2s_2s_s[]
4910    if (1) test_fmulx_d_d_d0(TyDF);
4911    if (1) test_fmulx_d_d_d1(TyDF);
4912    if (1) test_fmulx_s_s_s0(TySF);
4913    if (1) test_fmulx_s_s_s3(TySF);
4914    if (1) test_fmulx_2d_2d_d0(TyDF);
4915    if (1) test_fmulx_2d_2d_d1(TyDF);
4916    if (1) test_fmulx_4s_4s_s0(TySF);
4917    if (1) test_fmulx_4s_4s_s3(TySF);
4918    if (1) test_fmulx_2s_2s_s0(TySF);
4919    if (1) test_fmulx_2s_2s_s3(TySF);
4920 
4921    // fmulx     d,s
4922    // fmulx     2d,4s,2s
4923    if (1) test_fmulx_d_d_d(TyDF);
4924    if (1) test_fmulx_s_s_s(TySF);
4925    if (1) test_fmulx_2d_2d_2d(TyDF);
4926    if (1) test_fmulx_4s_4s_4s(TySF);
4927    if (1) test_fmulx_2s_2s_2s(TySF);
4928 
4929    // frecpe    d,s (recip estimate)
4930    // frecpe    2d,4s,2s
4931    if (1) test_frecpe_d_d(TyDF);
4932    if (1) test_frecpe_s_s(TySF);
4933    if (1) test_frecpe_2d_2d(TyDF);
4934    if (1) test_frecpe_4s_4s(TySF);
4935    if (1) test_frecpe_2s_2s(TySF);
4936 
4937    // frecps    d,s (recip step)
4938    // frecps    2d,4s,2s
4939    if (1) test_frecps_d_d_d(TyDF);
4940    if (1) test_frecps_s_s_s(TySF);
4941    if (1) test_frecps_2d_2d_2d(TyDF);
4942    if (1) test_frecps_4s_4s_4s(TySF);
4943    if (1) test_frecps_2s_2s_2s(TySF);
4944 
4945    // frecpx    d,s (recip exponent)
4946    if (1) test_frecpx_d_d(TyDF);
4947    if (1) test_frecpx_s_s(TySF);
4948 
4949    // frinta    d,s
4950    // frinti    d,s
4951    // frintm    d,s
4952    // frintn    d,s
4953    // frintp    d,s
4954    // frintx    d,s
4955    // frintz    d,s
4956    if (1) test_frinta_d_d(TyDF);
4957    if (1) test_frinta_s_s(TySF);
4958    if (1) test_frinti_d_d(TyDF);
4959    if (1) test_frinti_s_s(TySF);
4960    if (1) test_frintm_d_d(TyDF);
4961    if (1) test_frintm_s_s(TySF);
4962    if (1) test_frintn_d_d(TyDF);
4963    if (1) test_frintn_s_s(TySF);
4964    if (1) test_frintp_d_d(TyDF);
4965    if (1) test_frintp_s_s(TySF);
4966    if (1) test_frintx_d_d(TyDF);
4967    if (1) test_frintx_s_s(TySF);
4968    if (1) test_frintz_d_d(TyDF);
4969    if (1) test_frintz_s_s(TySF);
4970 
4971    // frinta    2d,4s,2s (round to integral, nearest away)
4972    // frinti    2d,4s,2s (round to integral, per FPCR)
4973    // frintm    2d,4s,2s (round to integral, minus inf)
4974    // frintn    2d,4s,2s (round to integral, nearest, to even)
4975    // frintp    2d,4s,2s (round to integral, plus inf)
4976    // frintx    2d,4s,2s (round to integral exact, per FPCR)
4977    // frintz    2d,4s,2s (round to integral, zero)
4978    if (1) test_frinta_2d_2d(TyDF);
4979    if (1) test_frinta_4s_4s(TySF);
4980    if (1) test_frinta_2s_2s(TySF);
4981    if (1) test_frinti_2d_2d(TyDF);
4982    if (1) test_frinti_4s_4s(TySF);
4983    if (1) test_frinti_2s_2s(TySF);
4984    if (1) test_frintm_2d_2d(TyDF);
4985    if (1) test_frintm_4s_4s(TySF);
4986    if (1) test_frintm_2s_2s(TySF);
4987    if (1) test_frintn_2d_2d(TyDF);
4988    if (1) test_frintn_4s_4s(TySF);
4989    if (1) test_frintn_2s_2s(TySF);
4990    if (1) test_frintp_2d_2d(TyDF);
4991    if (1) test_frintp_4s_4s(TySF);
4992    if (1) test_frintp_2s_2s(TySF);
4993    if (1) test_frintx_2d_2d(TyDF);
4994    if (1) test_frintx_4s_4s(TySF);
4995    if (1) test_frintx_2s_2s(TySF);
4996    if (1) test_frintz_2d_2d(TyDF);
4997    if (1) test_frintz_4s_4s(TySF);
4998    if (1) test_frintz_2s_2s(TySF);
4999 
5000    // frsqrte   d,s (est)
5001    // frsqrte   2d,4s,2s
5002    if (1) test_frsqrte_d_d(TyDF);
5003    if (1) test_frsqrte_s_s(TySF);
5004    if (1) test_frsqrte_2d_2d(TyDF);
5005    if (1) test_frsqrte_4s_4s(TySF);
5006    if (1) test_frsqrte_2s_2s(TySF);
5007 
5008    // frsqrts   d,s (step)
5009    // frsqrts   2d,4s,2s
5010    if (1) test_frsqrts_d_d_d(TyDF);
5011    if (1) test_frsqrts_s_s_s(TySF);
5012    if (1) test_frsqrts_2d_2d_2d(TyDF);
5013    if (1) test_frsqrts_4s_4s_4s(TySF);
5014    if (1) test_frsqrts_2s_2s_2s(TySF);
5015 
5016    // ======================== CONV ========================
5017 
5018    // fcvt      s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
5019    if (1) test_fcvt_s_h(TyHF);
5020    if (1) test_fcvt_d_h(TyHF);
5021    if (1) test_fcvt_h_s(TySF);
5022    if (1) test_fcvt_d_s(TySF);
5023    if (1) test_fcvt_h_d(TyDF);
5024    if (1) test_fcvt_s_d(TyDF);
5025 
5026    // fcvtl{2}  4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
5027    if (1) test_fcvtl_4s_4h(TyHF);
5028    if (1) test_fcvtl_4s_8h(TyHF);
5029    if (1) test_fcvtl_2d_2s(TySF);
5030    if (1) test_fcvtl_2d_4s(TySF);
5031 
5032    // fcvtn{2}  4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
5033    if (1) test_fcvtn_4h_4s(TySF);
5034    if (1) test_fcvtn_8h_4s(TySF);
5035    if (1) test_fcvtn_2s_2d(TyDF);
5036    if (1) test_fcvtn_4s_2d(TyDF);
5037 
5038    // fcvtas    d,s  (fcvt to signed int,   nearest, ties away)
5039    // fcvtau    d,s  (fcvt to unsigned int, nearest, ties away)
5040    // fcvtas    2d,4s,2s
5041    // fcvtau    2d,4s,2s
5042    // fcvtas    w_s,x_s,w_d,x_d
5043    // fcvtau    w_s,x_s,w_d,x_d
5044    if (1) test_fcvtas_d_d(TyDF);
5045    if (1) test_fcvtau_d_d(TyDF);
5046    if (1) test_fcvtas_s_s(TySF);
5047    if (1) test_fcvtau_s_s(TySF);
5048    if (1) test_fcvtas_2d_2d(TyDF);
5049    if (1) test_fcvtau_2d_2d(TyDF);
5050    if (1) test_fcvtas_4s_4s(TySF);
5051    if (1) test_fcvtau_4s_4s(TySF);
5052    if (1) test_fcvtas_2s_2s(TySF);
5053    if (1) test_fcvtau_2s_2s(TySF);
5054    if (1) test_fcvtas_w_s(TySF);
5055    if (1) test_fcvtau_w_s(TySF);
5056    if (1) test_fcvtas_x_s(TySF);
5057    if (1) test_fcvtau_x_s(TySF);
5058    if (1) test_fcvtas_w_d(TyDF);
5059    if (1) test_fcvtau_w_d(TyDF);
5060    if (1) test_fcvtas_x_d(TyDF);
5061    if (1) test_fcvtau_x_d(TyDF);
5062 
5063    // fcvtms    d,s  (fcvt to signed int,   minus inf)
5064    // fcvtmu    d,s  (fcvt to unsigned int, minus inf)
5065    // fcvtms    2d,4s,2s
5066    // fcvtmu    2d,4s,2s
5067    // fcvtms    w_s,x_s,w_d,x_d
5068    // fcvtmu    w_s,x_s,w_d,x_d
5069    if (1) test_fcvtms_d_d(TyDF);
5070    if (1) test_fcvtmu_d_d(TyDF);
5071    if (1) test_fcvtms_s_s(TySF);
5072    if (1) test_fcvtmu_s_s(TySF);
5073    if (1) test_fcvtms_2d_2d(TyDF);
5074    if (1) test_fcvtmu_2d_2d(TyDF);
5075    if (1) test_fcvtms_4s_4s(TySF);
5076    if (1) test_fcvtmu_4s_4s(TySF);
5077    if (1) test_fcvtms_2s_2s(TySF);
5078    if (1) test_fcvtmu_2s_2s(TySF);
5079    if (1) test_fcvtms_w_s(TySF);
5080    if (1) test_fcvtmu_w_s(TySF);
5081    if (1) test_fcvtms_x_s(TySF);
5082    if (1) test_fcvtmu_x_s(TySF);
5083    if (1) test_fcvtms_w_d(TyDF);
5084    if (1) test_fcvtmu_w_d(TyDF);
5085    if (1) test_fcvtms_x_d(TyDF);
5086    if (1) test_fcvtmu_x_d(TyDF);
5087 
5088    // fcvtns    d,s  (fcvt to signed int,   nearest)
5089    // fcvtnu    d,s  (fcvt to unsigned int, nearest)
5090    // fcvtns    2d,4s,2s
5091    // fcvtnu    2d,4s,2s
5092    // fcvtns    w_s,x_s,w_d,x_d
5093    // fcvtnu    w_s,x_s,w_d,x_d
5094    if (1) test_fcvtns_d_d(TyDF);
5095    if (1) test_fcvtnu_d_d(TyDF);
5096    if (1) test_fcvtns_s_s(TySF);
5097    if (1) test_fcvtnu_s_s(TySF);
5098    if (1) test_fcvtns_2d_2d(TyDF);
5099    if (1) test_fcvtnu_2d_2d(TyDF);
5100    if (1) test_fcvtns_4s_4s(TySF);
5101    if (1) test_fcvtnu_4s_4s(TySF);
5102    if (1) test_fcvtns_2s_2s(TySF);
5103    if (1) test_fcvtnu_2s_2s(TySF);
5104    if (1) test_fcvtns_w_s(TySF);
5105    if (1) test_fcvtnu_w_s(TySF);
5106    if (1) test_fcvtns_x_s(TySF);
5107    if (1) test_fcvtnu_x_s(TySF);
5108    if (1) test_fcvtns_w_d(TyDF);
5109    if (1) test_fcvtnu_w_d(TyDF);
5110    if (1) test_fcvtns_x_d(TyDF);
5111    if (1) test_fcvtnu_x_d(TyDF);
5112 
5113    // fcvtps    d,s  (fcvt to signed int,   plus inf)
5114    // fcvtpu    d,s  (fcvt to unsigned int, plus inf)
5115    // fcvtps    2d,4s,2s
5116    // fcvtpu    2d,4s,2s
5117    // fcvtps    w_s,x_s,w_d,x_d
5118    // fcvtpu    w_s,x_s,w_d,x_d
5119    if (1) test_fcvtps_d_d(TyDF);
5120    if (1) test_fcvtpu_d_d(TyDF);
5121    if (1) test_fcvtps_s_s(TySF);
5122    if (1) test_fcvtpu_s_s(TySF);
5123    if (1) test_fcvtps_2d_2d(TyDF);
5124    if (1) test_fcvtpu_2d_2d(TyDF);
5125    if (1) test_fcvtps_4s_4s(TySF);
5126    if (1) test_fcvtpu_4s_4s(TySF);
5127    if (1) test_fcvtps_2s_2s(TySF);
5128    if (1) test_fcvtpu_2s_2s(TySF);
5129    if (1) test_fcvtps_w_s(TySF);
5130    if (1) test_fcvtpu_w_s(TySF);
5131    if (1) test_fcvtps_x_s(TySF);
5132    if (1) test_fcvtpu_x_s(TySF);
5133    if (1) test_fcvtps_w_d(TyDF);
5134    if (1) test_fcvtpu_w_d(TyDF);
5135    if (1) test_fcvtps_x_d(TyDF);
5136    if (1) test_fcvtpu_x_d(TyDF);
5137 
5138    // fcvtzs    d,s (fcvt to signed integer,   to zero)
5139    // fcvtzu    d,s (fcvt to unsigned integer, to zero)
5140    // fcvtzs    2d,4s,2s
5141    // fcvtzu    2d,4s,2s
5142    // fcvtzs    w_s,x_s,w_d,x_d
5143    // fcvtzu    w_s,x_s,w_d,x_d
5144    if (1) test_fcvtzs_d_d(TyDF);
5145    if (1) test_fcvtzu_d_d(TyDF);
5146    if (1) test_fcvtzs_s_s(TySF);
5147    if (1) test_fcvtzu_s_s(TySF);
5148    if (1) test_fcvtzs_2d_2d(TyDF);
5149    if (1) test_fcvtzu_2d_2d(TyDF);
5150    if (1) test_fcvtzs_4s_4s(TySF);
5151    if (1) test_fcvtzu_4s_4s(TySF);
5152    if (1) test_fcvtzs_2s_2s(TySF);
5153    if (1) test_fcvtzu_2s_2s(TySF);
5154    if (1) test_fcvtzs_w_s(TySF);
5155    if (1) test_fcvtzu_w_s(TySF);
5156    if (1) test_fcvtzs_x_s(TySF);
5157    if (1) test_fcvtzu_x_s(TySF);
5158    if (1) test_fcvtzs_w_d(TyDF);
5159    if (1) test_fcvtzu_w_d(TyDF);
5160    if (1) test_fcvtzs_x_d(TyDF);
5161    if (1) test_fcvtzu_x_d(TyDF);
5162 
5163    // fcvtzs    d,s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
5164    // fcvtzu    d,s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
5165    // fcvtzs    2d,4s,2s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
5166    // fcvtzu    2d,4s,2s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
5167    // fcvtzs    w_s,x_s,w_d,x_d (fcvt to signed fixedpt,   to zero) (w/ #fbits)
5168    // fcvtzu    w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
5169    if (1) test_fcvtzs_d_d_fbits1(TyDF);
5170    if (1) test_fcvtzs_d_d_fbits32(TyDF);
5171    if (1) test_fcvtzs_d_d_fbits64(TyDF);
5172    if (1) test_fcvtzu_d_d_fbits1(TyDF);
5173    if (1) test_fcvtzu_d_d_fbits32(TyDF);
5174    if (1) test_fcvtzu_d_d_fbits64(TyDF);
5175    if (1) test_fcvtzs_s_s_fbits1(TySF);
5176    if (1) test_fcvtzs_s_s_fbits16(TySF);
5177    if (1) test_fcvtzs_s_s_fbits32(TySF);
5178    if (1) test_fcvtzu_s_s_fbits1(TySF);
5179    if (1) test_fcvtzu_s_s_fbits16(TySF);
5180    if (1) test_fcvtzu_s_s_fbits32(TySF);
5181    if (1) test_fcvtzs_2d_2d_fbits1(TyDF);
5182    if (1) test_fcvtzs_2d_2d_fbits32(TyDF);
5183    if (1) test_fcvtzs_2d_2d_fbits64(TyDF);
5184    if (1) test_fcvtzu_2d_2d_fbits1(TyDF);
5185    if (1) test_fcvtzu_2d_2d_fbits32(TyDF);
5186    if (1) test_fcvtzu_2d_2d_fbits64(TyDF);
5187    if (1) test_fcvtzs_4s_4s_fbits1(TySF);
5188    if (1) test_fcvtzs_4s_4s_fbits16(TySF);
5189    if (1) test_fcvtzs_4s_4s_fbits32(TySF);
5190    if (1) test_fcvtzu_4s_4s_fbits1(TySF);
5191    if (1) test_fcvtzu_4s_4s_fbits16(TySF);
5192    if (1) test_fcvtzu_4s_4s_fbits32(TySF);
5193    if (1) test_fcvtzs_2s_2s_fbits1(TySF);
5194    if (1) test_fcvtzs_2s_2s_fbits16(TySF);
5195    if (1) test_fcvtzs_2s_2s_fbits32(TySF);
5196    if (1) test_fcvtzu_2s_2s_fbits1(TySF);
5197    if (1) test_fcvtzu_2s_2s_fbits16(TySF);
5198    if (1) test_fcvtzu_2s_2s_fbits32(TySF);
5199    if (1) test_fcvtzs_w_s_fbits1(TySF);
5200    if (1) test_fcvtzs_w_s_fbits16(TySF);
5201    if (1) test_fcvtzs_w_s_fbits32(TySF);
5202    if (1) test_fcvtzu_w_s_fbits1(TySF);
5203    if (1) test_fcvtzu_w_s_fbits16(TySF);
5204    if (1) test_fcvtzu_w_s_fbits32(TySF);
5205    if (1) test_fcvtzs_x_s_fbits1(TySF);
5206    if (1) test_fcvtzs_x_s_fbits32(TySF);
5207    if (1) test_fcvtzs_x_s_fbits64(TySF);
5208    if (1) test_fcvtzu_x_s_fbits1(TySF);
5209    if (1) test_fcvtzu_x_s_fbits32(TySF);
5210    if (1) test_fcvtzu_x_s_fbits64(TySF);
5211    if (1) test_fcvtzs_w_d_fbits1(TyDF);
5212    if (1) test_fcvtzs_w_d_fbits16(TyDF);
5213    if (1) test_fcvtzs_w_d_fbits32(TyDF);
5214    if (1) test_fcvtzu_w_d_fbits1(TyDF);
5215    if (1) test_fcvtzu_w_d_fbits16(TyDF);
5216    if (1) test_fcvtzu_w_d_fbits32(TyDF);
5217    if (1) test_fcvtzs_x_d_fbits1(TyDF);
5218    if (1) test_fcvtzs_x_d_fbits32(TyDF);
5219    if (1) test_fcvtzs_x_d_fbits64(TyDF);
5220    if (1) test_fcvtzu_x_d_fbits1(TyDF);
5221    if (1) test_fcvtzu_x_d_fbits32(TyDF);
5222    if (1) test_fcvtzu_x_d_fbits64(TyDF);
5223 
5224    // fcvtxn    s_d (fcvt to lower prec narrow, rounding to odd)
5225    // fcvtxn    2s_2d,4s_2d
5226    if (1) test_fcvtxn_s_d(TyDF);
5227    if (1) test_fcvtxn_2s_2d(TyDF);
5228    if (1) test_fcvtxn_4s_2d(TyDF);
5229 
5230    // scvtf     d,s        _#fbits
5231    // ucvtf     d,s        _#fbits
5232    // scvtf     2d,4s,2s   _#fbits
5233    // ucvtf     2d,4s,2s   _#fbits
5234    if (1) test_scvtf_d_d_fbits1(TyD);
5235    if (1) test_scvtf_d_d_fbits32(TyD);
5236    if (1) test_scvtf_d_d_fbits64(TyD);
5237    if (1) test_ucvtf_d_d_fbits1(TyD);
5238    if (1) test_ucvtf_d_d_fbits32(TyD);
5239    if (1) test_ucvtf_d_d_fbits64(TyD);
5240    if (1) test_scvtf_s_s_fbits1(TyS);
5241    if (1) test_scvtf_s_s_fbits16(TyS);
5242    if (1) test_scvtf_s_s_fbits32(TyS);
5243    if (1) test_ucvtf_s_s_fbits1(TyS);
5244    if (1) test_ucvtf_s_s_fbits16(TyS);
5245    if (1) test_ucvtf_s_s_fbits32(TyS);
5246    if (1) test_scvtf_2d_2d_fbits1(TyD);
5247    if (1) test_scvtf_2d_2d_fbits32(TyD);
5248    if (1) test_scvtf_2d_2d_fbits64(TyD);
5249    if (1) test_ucvtf_2d_2d_fbits1(TyD);
5250    if (1) test_ucvtf_2d_2d_fbits32(TyD);
5251    if (1) test_ucvtf_2d_2d_fbits64(TyD);
5252    if (1) test_scvtf_4s_4s_fbits1(TyS);
5253    if (1) test_scvtf_4s_4s_fbits16(TyS);
5254    if (1) test_scvtf_4s_4s_fbits32(TyS);
5255    if (1) test_ucvtf_4s_4s_fbits1(TyS);
5256    if (1) test_ucvtf_4s_4s_fbits16(TyS);
5257    if (1) test_ucvtf_4s_4s_fbits32(TyS);
5258    if (1) test_scvtf_2s_2s_fbits1(TyS);
5259    if (1) test_scvtf_2s_2s_fbits16(TyS);
5260    if (1) test_scvtf_2s_2s_fbits32(TyS);
5261    if (1) test_ucvtf_2s_2s_fbits1(TyS);
5262    if (1) test_ucvtf_2s_2s_fbits16(TyS);
5263    if (1) test_ucvtf_2s_2s_fbits32(TyS);
5264 
5265    // scvtf     d,s
5266    // ucvtf     d,s
5267    // scvtf     2d,4s,2s
5268    // ucvtf     2d,4s,2s
5269    if (1) test_scvtf_d_d(TyD);
5270    if (1) test_ucvtf_d_d(TyD);
5271    if (1) test_scvtf_s_s(TyS);
5272    if (1) test_ucvtf_s_s(TyS);
5273    if (1) test_scvtf_2d_2d(TyD);
5274    if (1) test_ucvtf_2d_2d(TyD);
5275    if (1) test_scvtf_4s_4s(TyS);
5276    if (1) test_ucvtf_4s_4s(TyS);
5277    if (1) test_scvtf_2s_2s(TyS);
5278    if (1) test_ucvtf_2s_2s(TyS);
5279 
5280    // scvtf     s_w, d_w, s_x, d_x,   _#fbits
5281    // ucvtf     s_w, d_w, s_x, d_x,   _#fbits
5282    if (1) test_scvtf_s_w_fbits1(TyS);
5283    if (1) test_scvtf_s_w_fbits16(TyS);
5284    if (1) test_scvtf_s_w_fbits32(TyS);
5285    if (1) test_scvtf_d_w_fbits1(TyS);
5286    if (1) test_scvtf_d_w_fbits16(TyS);
5287    if (1) test_scvtf_d_w_fbits32(TyS);
5288    if (1) test_scvtf_s_x_fbits1(TyD);
5289    if (1) test_scvtf_s_x_fbits32(TyD);
5290    if (1) test_scvtf_s_x_fbits64(TyD);
5291    if (1) test_scvtf_d_x_fbits1(TyD);
5292    if (1) test_scvtf_d_x_fbits32(TyD);
5293    if (1) test_scvtf_d_x_fbits64(TyD);
5294    if (1) test_ucvtf_s_w_fbits1(TyS);
5295    if (1) test_ucvtf_s_w_fbits16(TyS);
5296    if (1) test_ucvtf_s_w_fbits32(TyS);
5297    if (1) test_ucvtf_d_w_fbits1(TyS);
5298    if (1) test_ucvtf_d_w_fbits16(TyS);
5299    if (1) test_ucvtf_d_w_fbits32(TyS);
5300    if (1) test_ucvtf_s_x_fbits1(TyD);
5301    if (1) test_ucvtf_s_x_fbits32(TyD);
5302    if (1) test_ucvtf_s_x_fbits64(TyD);
5303    if (1) test_ucvtf_d_x_fbits1(TyD);
5304    if (1) test_ucvtf_d_x_fbits32(TyD);
5305    if (1) test_ucvtf_d_x_fbits64(TyD);
5306 
5307    // scvtf     s_w, d_w, s_x, d_x
5308    // ucvtf     s_w, d_w, s_x, d_x
5309    if (1) test_scvtf_s_w(TyS);
5310    if (1) test_scvtf_d_w(TyS);
5311    if (1) test_scvtf_s_x(TyD);
5312    if (1) test_scvtf_d_x(TyD);
5313    if (1) test_ucvtf_s_w(TyS);
5314    if (1) test_ucvtf_d_w(TyS);
5315    if (1) test_ucvtf_s_x(TyD);
5316    if (1) test_ucvtf_d_x(TyD);
5317 
5318    // ======================== INT ========================
5319 
5320    // abs       d
5321    // neg       d
5322    if (1) test_abs_d_d(TyD);
5323    if (1) test_neg_d_d(TyD);
5324 
5325    // abs       2d,4s,2s,8h,4h,16b,8b
5326    // neg       2d,4s,2s,8h,4h,16b,8b
5327    if (1) test_abs_2d_2d(TyD);
5328    if (1) test_abs_4s_4s(TyS);
5329    if (1) test_abs_2s_2s(TyS);
5330    if (1) test_abs_8h_8h(TyH);
5331    if (1) test_abs_4h_4h(TyH);
5332    if (1) test_abs_16b_16b(TyB);
5333    if (1) test_abs_8b_8b(TyB);
5334    if (1) test_neg_2d_2d(TyD);
5335    if (1) test_neg_4s_4s(TyS);
5336    if (1) test_neg_2s_2s(TyS);
5337    if (1) test_neg_8h_8h(TyH);
5338    if (1) test_neg_4h_4h(TyH);
5339    if (1) test_neg_16b_16b(TyB);
5340    if (1) test_neg_8b_8b(TyB);
5341 
5342    // add       d
5343    // sub       d
5344    if (1) test_add_d_d_d(TyD);
5345    if (1) test_sub_d_d_d(TyD);
5346 
5347    // add       2d,4s,2s,8h,4h,16b,8b
5348    // sub       2d,4s,2s,8h,4h,16b,8b
5349    if (1) test_add_2d_2d_2d(TyD);
5350    if (1) test_add_4s_4s_4s(TyS);
5351    if (1) test_add_2s_2s_2s(TyS);
5352    if (1) test_add_8h_8h_8h(TyH);
5353    if (1) test_add_4h_4h_4h(TyH);
5354    if (1) test_add_16b_16b_16b(TyB);
5355    if (1) test_add_8b_8b_8b(TyB);
5356    if (1) test_sub_2d_2d_2d(TyD);
5357    if (1) test_sub_4s_4s_4s(TyS);
5358    if (1) test_sub_2s_2s_2s(TyS);
5359    if (1) test_sub_8h_8h_8h(TyH);
5360    if (1) test_sub_4h_4h_4h(TyH);
5361    if (1) test_sub_16b_16b_16b(TyB);
5362    if (1) test_sub_8b_8b_8b(TyB);
5363 
5364    // addhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
5365    // subhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
5366    // raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
5367    // rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
5368    if (1) test_addhn_2s_2d_2d(TyD);
5369    if (1) test_addhn2_4s_2d_2d(TyD);
5370    if (1) test_addhn_4h_4s_4s(TyS);
5371    if (1) test_addhn2_8h_4s_4s(TyS);
5372    if (1) test_addhn_8b_8h_8h(TyH);
5373    if (1) test_addhn2_16b_8h_8h(TyH);
5374    if (1) test_subhn_2s_2d_2d(TyD);
5375    if (1) test_subhn2_4s_2d_2d(TyD);
5376    if (1) test_subhn_4h_4s_4s(TyS);
5377    if (1) test_subhn2_8h_4s_4s(TyS);
5378    if (1) test_subhn_8b_8h_8h(TyH);
5379    if (1) test_subhn2_16b_8h_8h(TyH);
5380    if (1) test_raddhn_2s_2d_2d(TyD);
5381    if (1) test_raddhn2_4s_2d_2d(TyD);
5382    if (1) test_raddhn_4h_4s_4s(TyS);
5383    if (1) test_raddhn2_8h_4s_4s(TyS);
5384    if (1) test_raddhn_8b_8h_8h(TyH);
5385    if (1) test_raddhn2_16b_8h_8h(TyH);
5386    if (1) test_rsubhn_2s_2d_2d(TyD);
5387    if (1) test_rsubhn2_4s_2d_2d(TyD);
5388    if (1) test_rsubhn_4h_4s_4s(TyS);
5389    if (1) test_rsubhn2_8h_4s_4s(TyS);
5390    if (1) test_rsubhn_8b_8h_8h(TyH);
5391    if (1) test_rsubhn2_16b_8h_8h(TyH);
5392 
5393    // addp     d (add pairs, across)
5394    if (1) test_addp_d_2d(TyD);
5395 
5396    // addp     2d,4s,2s,8h,4h,16b,8b
5397    if (1) test_addp_2d_2d_2d(TyD);
5398    if (1) test_addp_4s_4s_4s(TyS);
5399    if (1) test_addp_2s_2s_2s(TyS);
5400    if (1) test_addp_8h_8h_8h(TyH);
5401    if (1) test_addp_4h_4h_4h(TyH);
5402    if (1) test_addp_16b_16b_16b(TyB);
5403    if (1) test_addp_8b_8b_8b(TyB);
5404 
5405    // addv     4s,8h,4h,16b,18b (reduce across vector)
5406    if (1) test_addv_s_4s(TyS);
5407    if (1) test_addv_h_8h(TyH);
5408    if (1) test_addv_h_4h(TyH);
5409    if (1) test_addv_b_16b(TyB);
5410    if (1) test_addv_b_8b(TyB);
5411 
5412    // and      16b,8b
5413    // bic      16b,8b
5414    // orn      16b,8b
5415    // orr      16b,8b
5416    if (1) test_and_16b_16b_16b(TyB);
5417    if (1) test_and_8b_8b_8b(TyB);
5418    if (1) test_bic_16b_16b_16b(TyB);
5419    if (1) test_bic_8b_8b_8b(TyB);
5420    if (1) test_orr_16b_16b_16b(TyB);
5421    if (1) test_orr_8b_8b_8b(TyB);
5422    if (1) test_orn_16b_16b_16b(TyB);
5423    if (1) test_orn_8b_8b_8b(TyB);
5424 
5425    // orr      8h,4h   #imm8, LSL #0 or 8
5426    // orr      4s,2s   #imm8, LSL #0, 8, 16 or 24
5427    // bic      8h,4h   #imm8, LSL #0 or 8
5428    // bic      4s,2s   #imm8, LSL #0, 8, 16 or 24
5429    // movi and mvni are very similar, a superset of these.
5430    // Cases are below.
5431    if (1) test_orr_8h_0x5A_lsl0(TyH);
5432    if (1) test_orr_8h_0xA5_lsl8(TyH);
5433    if (1) test_orr_4h_0x5A_lsl0(TyH);
5434    if (1) test_orr_4h_0xA5_lsl8(TyH);
5435    if (1) test_orr_4s_0x5A_lsl0(TyS);
5436    if (1) test_orr_4s_0x6B_lsl8(TyS);
5437    if (1) test_orr_4s_0x49_lsl16(TyS);
5438    if (1) test_orr_4s_0x3D_lsl24(TyS);
5439    if (1) test_orr_2s_0x5A_lsl0(TyS);
5440    if (1) test_orr_2s_0x6B_lsl8(TyS);
5441    if (1) test_orr_2s_0x49_lsl16(TyS);
5442    if (1) test_orr_2s_0x3D_lsl24(TyS);
5443    if (1) test_bic_8h_0x5A_lsl0(TyH);
5444    if (1) test_bic_8h_0xA5_lsl8(TyH);
5445    if (1) test_bic_4h_0x5A_lsl0(TyH);
5446    if (1) test_bic_4h_0xA5_lsl8(TyH);
5447    if (1) test_bic_4s_0x5A_lsl0(TyS);
5448    if (1) test_bic_4s_0x6B_lsl8(TyS);
5449    if (1) test_bic_4s_0x49_lsl16(TyS);
5450    if (1) test_bic_4s_0x3D_lsl24(TyS);
5451    if (1) test_bic_2s_0x5A_lsl0(TyS);
5452    if (1) test_bic_2s_0x6B_lsl8(TyS);
5453    if (1) test_bic_2s_0x49_lsl16(TyS);
5454    if (1) test_bic_2s_0x3D_lsl24(TyS);
5455 
5456    // bif      16b,8b (vector) (bit insert if false)
5457    // bit      16b,8b (vector) (bit insert if true)
5458    // bsl      16b,8b (vector) (bit select)
5459    // eor      16b,8b (vector)
5460    if (1) test_bif_16b_16b_16b(TyB);
5461    if (1) test_bif_8b_8b_8b(TyB);
5462    if (1) test_bit_16b_16b_16b(TyB);
5463    if (1) test_bit_8b_8b_8b(TyB);
5464    if (1) test_bsl_16b_16b_16b(TyB);
5465    if (1) test_bsl_8b_8b_8b(TyB);
5466    if (1) test_eor_16b_16b_16b(TyB);
5467    if (1) test_eor_8b_8b_8b(TyB);
5468 
5469    // cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
5470    // clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
5471    if (1) test_cls_4s_4s(TyS);
5472    if (1) test_cls_2s_2s(TyS);
5473    if (1) test_cls_8h_8h(TyH);
5474    if (1) test_cls_4h_4h(TyH);
5475    if (1) test_cls_16b_16b(TyB);
5476    if (1) test_cls_8b_8b(TyB);
5477    if (1) test_clz_4s_4s(TyS);
5478    if (1) test_clz_2s_2s(TyS);
5479    if (1) test_clz_8h_8h(TyH);
5480    if (1) test_clz_4h_4h(TyH);
5481    if (1) test_clz_16b_16b(TyB);
5482    if (1) test_clz_8b_8b(TyB);
5483 
5484    // cmeq     d
5485    // cmge     d
5486    // cmgt     d
5487    // cmhi     d
5488    // cmhs     d
5489    // cmtst    d
5490    if (1) test_cmeq_d_d_d(TyD);
5491    if (1) test_cmge_d_d_d(TyD);
5492    if (1) test_cmgt_d_d_d(TyD);
5493    if (1) test_cmhi_d_d_d(TyD);
5494    if (1) test_cmhs_d_d_d(TyD);
5495    if (1) test_cmtst_d_d_d(TyD);
5496 
5497    // cmeq     2d,4s,2s,8h,4h,16b,8b
5498    // cmge     2d,4s,2s,8h,4h,16b,8b
5499    // cmgt     2d,4s,2s,8h,4h,16b,8b
5500    // cmhi     2d,4s,2s,8h,4h,16b,8b
5501    // cmhs     2d,4s,2s,8h,4h,16b,8b
5502    // cmtst    2d,4s,2s,8h,4h,16b,8b
5503    if (1) test_cmeq_2d_2d_2d(TyD);
5504    if (1) test_cmeq_4s_4s_4s(TyS);
5505    if (1) test_cmeq_2s_2s_2s(TyS);
5506    if (1) test_cmeq_8h_8h_8h(TyH);
5507    if (1) test_cmeq_4h_4h_4h(TyH);
5508    if (1) test_cmeq_16b_16b_16b(TyB);
5509    if (1) test_cmeq_8b_8b_8b(TyB);
5510    if (1) test_cmge_2d_2d_2d(TyD);
5511    if (1) test_cmge_4s_4s_4s(TyS);
5512    if (1) test_cmge_2s_2s_2s(TyS);
5513    if (1) test_cmge_8h_8h_8h(TyH);
5514    if (1) test_cmge_4h_4h_4h(TyH);
5515    if (1) test_cmge_16b_16b_16b(TyB);
5516    if (1) test_cmge_8b_8b_8b(TyB);
5517    if (1) test_cmgt_2d_2d_2d(TyD);
5518    if (1) test_cmgt_4s_4s_4s(TyS);
5519    if (1) test_cmgt_2s_2s_2s(TyS);
5520    if (1) test_cmgt_8h_8h_8h(TyH);
5521    if (1) test_cmgt_4h_4h_4h(TyH);
5522    if (1) test_cmgt_16b_16b_16b(TyB);
5523    if (1) test_cmgt_8b_8b_8b(TyB);
5524    if (1) test_cmhi_2d_2d_2d(TyD);
5525    if (1) test_cmhi_4s_4s_4s(TyS);
5526    if (1) test_cmhi_2s_2s_2s(TyS);
5527    if (1) test_cmhi_8h_8h_8h(TyH);
5528    if (1) test_cmhi_4h_4h_4h(TyH);
5529    if (1) test_cmhi_16b_16b_16b(TyB);
5530    if (1) test_cmhi_8b_8b_8b(TyB);
5531    if (1) test_cmhs_2d_2d_2d(TyD);
5532    if (1) test_cmhs_4s_4s_4s(TyS);
5533    if (1) test_cmhs_2s_2s_2s(TyS);
5534    if (1) test_cmhs_8h_8h_8h(TyH);
5535    if (1) test_cmhs_4h_4h_4h(TyH);
5536    if (1) test_cmhs_16b_16b_16b(TyB);
5537    if (1) test_cmhs_8b_8b_8b(TyB);
5538    if (1) test_cmtst_2d_2d_2d(TyD);
5539    if (1) test_cmtst_4s_4s_4s(TyS);
5540    if (1) test_cmtst_2s_2s_2s(TyS);
5541    if (1) test_cmtst_8h_8h_8h(TyH);
5542    if (1) test_cmtst_4h_4h_4h(TyH);
5543    if (1) test_cmtst_16b_16b_16b(TyB);
5544    if (1) test_cmtst_8b_8b_8b(TyB);
5545 
5546    // cmeq_z   d
5547    // cmge_z   d
5548    // cmgt_z   d
5549    // cmle_z   d
5550    // cmlt_z   d
5551    if (1) test_cmeq_zero_d_d(TyD);
5552    if (1) test_cmge_zero_d_d(TyD);
5553    if (1) test_cmgt_zero_d_d(TyD);
5554    if (1) test_cmle_zero_d_d(TyD);
5555    if (1) test_cmlt_zero_d_d(TyD);
5556 
5557    // cmeq_z   2d,4s,2s,8h,4h,16b,8b
5558    // cmge_z   2d,4s,2s,8h,4h,16b,8b
5559    // cmgt_z   2d,4s,2s,8h,4h,16b,8b
5560    // cmle_z   2d,4s,2s,8h,4h,16b,8b
5561    // cmlt_z   2d,4s,2s,8h,4h,16b,8b
5562    if (1) test_cmeq_zero_2d_2d(TyD);
5563    if (1) test_cmeq_zero_4s_4s(TyS);
5564    if (1) test_cmeq_zero_2s_2s(TyS);
5565    if (1) test_cmeq_zero_8h_8h(TyH);
5566    if (1) test_cmeq_zero_4h_4h(TyH);
5567    if (1) test_cmeq_zero_16b_16b(TyB);
5568    if (1) test_cmeq_zero_8b_8b(TyB);
5569    if (1) test_cmge_zero_2d_2d(TyD);
5570    if (1) test_cmge_zero_4s_4s(TyS);
5571    if (1) test_cmge_zero_2s_2s(TyS);
5572    if (1) test_cmge_zero_8h_8h(TyH);
5573    if (1) test_cmge_zero_4h_4h(TyH);
5574    if (1) test_cmge_zero_16b_16b(TyB);
5575    if (1) test_cmge_zero_8b_8b(TyB);
5576    if (1) test_cmgt_zero_2d_2d(TyD);
5577    if (1) test_cmgt_zero_4s_4s(TyS);
5578    if (1) test_cmgt_zero_2s_2s(TyS);
5579    if (1) test_cmgt_zero_8h_8h(TyH);
5580    if (1) test_cmgt_zero_4h_4h(TyH);
5581    if (1) test_cmgt_zero_16b_16b(TyB);
5582    if (1) test_cmgt_zero_8b_8b(TyB);
5583    if (1) test_cmle_zero_2d_2d(TyD);
5584    if (1) test_cmle_zero_4s_4s(TyS);
5585    if (1) test_cmle_zero_2s_2s(TyS);
5586    if (1) test_cmle_zero_8h_8h(TyH);
5587    if (1) test_cmle_zero_4h_4h(TyH);
5588    if (1) test_cmle_zero_16b_16b(TyB);
5589    if (1) test_cmle_zero_8b_8b(TyB);
5590    if (1) test_cmlt_zero_2d_2d(TyD);
5591    if (1) test_cmlt_zero_4s_4s(TyS);
5592    if (1) test_cmlt_zero_2s_2s(TyS);
5593    if (1) test_cmlt_zero_8h_8h(TyH);
5594    if (1) test_cmlt_zero_4h_4h(TyH);
5595    if (1) test_cmlt_zero_16b_16b(TyB);
5596    if (1) test_cmlt_zero_8b_8b(TyB);
5597 
5598    // cnt      16b,8b (population count per byte)
5599    if (1) test_cnt_16b_16b(TyB);
5600    if (1) test_cnt_8b_8b(TyB);
5601 
5602    // dup      d,s,h,b (vec elem to scalar)
5603    if (1) test_dup_d_d0(TyD);
5604    if (1) test_dup_d_d1(TyD);
5605    if (1) test_dup_s_s0(TyS);
5606    if (1) test_dup_s_s3(TyS);
5607    if (1) test_dup_h_h0(TyH);
5608    if (1) test_dup_h_h6(TyH);
5609    if (1) test_dup_b_b0(TyB);
5610    if (1) test_dup_b_b13(TyB);
5611 
5612    // dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
5613    if (1) test_dup_2d_d0(TyD);
5614    if (1) test_dup_2d_d1(TyD);
5615    if (1) test_dup_4s_s0(TyS);
5616    if (1) test_dup_4s_s3(TyS);
5617    if (1) test_dup_2s_s0(TyS);
5618    if (1) test_dup_2s_s2(TyS);
5619    if (1) test_dup_8h_h0(TyH);
5620    if (1) test_dup_8h_h6(TyH);
5621    if (1) test_dup_4h_h1(TyH);
5622    if (1) test_dup_4h_h5(TyH);
5623    if (1) test_dup_16b_b2(TyB);
5624    if (1) test_dup_16b_b12(TyB);
5625    if (1) test_dup_8b_b3(TyB);
5626    if (1) test_dup_8b_b13(TyB);
5627 
5628    // dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
5629    if (1) test_dup_2d_x(TyD);
5630    if (1) test_dup_4s_w(TyS);
5631    if (1) test_dup_2s_w(TyS);
5632    if (1) test_dup_8h_w(TyH);
5633    if (1) test_dup_4h_w(TyH);
5634    if (1) test_dup_16b_w(TyB);
5635    if (1) test_dup_8b_w(TyB);
5636 
5637    // ext      16b,8b,#imm4 (concat 2 vectors, then slice)
5638    if (1) test_ext_16b_16b_16b_0x0(TyB);
5639    if (1) test_ext_16b_16b_16b_0x1(TyB);
5640    if (1) test_ext_16b_16b_16b_0x2(TyB);
5641    if (1) test_ext_16b_16b_16b_0x3(TyB);
5642    if (1) test_ext_16b_16b_16b_0x4(TyB);
5643    if (1) test_ext_16b_16b_16b_0x5(TyB);
5644    if (1) test_ext_16b_16b_16b_0x6(TyB);
5645    if (1) test_ext_16b_16b_16b_0x7(TyB);
5646    if (1) test_ext_16b_16b_16b_0x8(TyB);
5647    if (1) test_ext_16b_16b_16b_0x9(TyB);
5648    if (1) test_ext_16b_16b_16b_0xA(TyB);
5649    if (1) test_ext_16b_16b_16b_0xB(TyB);
5650    if (1) test_ext_16b_16b_16b_0xC(TyB);
5651    if (1) test_ext_16b_16b_16b_0xD(TyB);
5652    if (1) test_ext_16b_16b_16b_0xE(TyB);
5653    if (1) test_ext_16b_16b_16b_0xF(TyB);
5654    if (1) test_ext_8b_8b_8b_0x0(TyB);
5655    if (1) test_ext_8b_8b_8b_0x1(TyB);
5656    if (1) test_ext_8b_8b_8b_0x2(TyB);
5657    if (1) test_ext_8b_8b_8b_0x3(TyB);
5658    if (1) test_ext_8b_8b_8b_0x4(TyB);
5659    if (1) test_ext_8b_8b_8b_0x5(TyB);
5660    if (1) test_ext_8b_8b_8b_0x6(TyB);
5661    if (1) test_ext_8b_8b_8b_0x7(TyB);
5662 
5663    // ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
5664    if (1) test_ins_d0_d0(TyD);
5665    if (1) test_ins_d0_d1(TyD);
5666    if (1) test_ins_d1_d0(TyD);
5667    if (1) test_ins_d1_d1(TyD);
5668    if (1) test_ins_s0_s2(TyS);
5669    if (1) test_ins_s3_s0(TyS);
5670    if (1) test_ins_s2_s1(TyS);
5671    if (1) test_ins_s1_s3(TyS);
5672    if (1) test_ins_h0_h6(TyH);
5673    if (1) test_ins_h7_h0(TyH);
5674    if (1) test_ins_h6_h1(TyH);
5675    if (1) test_ins_h1_h7(TyH);
5676    if (1) test_ins_b0_b14(TyB);
5677    if (1) test_ins_b15_b8(TyB);
5678    if (1) test_ins_b13_b9(TyB);
5679    if (1) test_ins_b5_b12(TyB);
5680 
5681    // ins      d[]_x, s[]_w, h[]_w, b[]_w
5682    if (1) test_INS_general();
5683 
5684    // mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
5685    // mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
5686    // mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
5687    if (1) test_mla_4s_4s_s0(TyS);
5688    if (1) test_mla_4s_4s_s3(TyS);
5689    if (1) test_mla_2s_2s_s0(TyS);
5690    if (1) test_mla_2s_2s_s3(TyS);
5691    if (1) test_mla_8h_8h_h1(TyH);
5692    if (1) test_mla_8h_8h_h5(TyH);
5693    if (1) test_mla_4h_4h_h2(TyH);
5694    if (1) test_mla_4h_4h_h7(TyH);
5695    if (1) test_mls_4s_4s_s0(TyS);
5696    if (1) test_mls_4s_4s_s3(TyS);
5697    if (1) test_mls_2s_2s_s0(TyS);
5698    if (1) test_mls_2s_2s_s3(TyS);
5699    if (1) test_mls_8h_8h_h1(TyH);
5700    if (1) test_mls_8h_8h_h5(TyH);
5701    if (1) test_mls_4h_4h_h2(TyH);
5702    if (1) test_mls_4h_4h_h7(TyH);
5703    if (1) test_mul_4s_4s_s0(TyS);
5704    if (1) test_mul_4s_4s_s3(TyS);
5705    if (1) test_mul_2s_2s_s0(TyS);
5706    if (1) test_mul_2s_2s_s3(TyS);
5707    if (1) test_mul_8h_8h_h1(TyH);
5708    if (1) test_mul_8h_8h_h5(TyH);
5709    if (1) test_mul_4h_4h_h2(TyH);
5710    if (1) test_mul_4h_4h_h7(TyH);
5711 
5712    // mla   4s,2s,8h,4h,16b,8b
5713    // mls   4s,2s,8h,4h,16b,8b
5714    // mul   4s,2s,8h,4h,16b,8b
5715    if (1) test_mla_4s_4s_4s(TyS);
5716    if (1) test_mla_2s_2s_2s(TyS);
5717    if (1) test_mla_8h_8h_8h(TyH);
5718    if (1) test_mla_4h_4h_4h(TyH);
5719    if (1) test_mla_16b_16b_16b(TyB);
5720    if (1) test_mla_8b_8b_8b(TyB);
5721    if (1) test_mls_4s_4s_4s(TyS);
5722    if (1) test_mls_2s_2s_2s(TyS);
5723    if (1) test_mls_8h_8h_8h(TyH);
5724    if (1) test_mls_4h_4h_4h(TyH);
5725    if (1) test_mls_16b_16b_16b(TyB);
5726    if (1) test_mls_8b_8b_8b(TyB);
5727    if (1) test_mul_4s_4s_4s(TyS);
5728    if (1) test_mul_2s_2s_2s(TyS);
5729    if (1) test_mul_8h_8h_8h(TyH);
5730    if (1) test_mul_4h_4h_4h(TyH);
5731    if (1) test_mul_16b_16b_16b(TyB);
5732    if (1) test_mul_8b_8b_8b(TyB);
5733 
5734    // Some of these movi and mvni cases are similar to orr and bic
5735    // cases with immediates.  Maybe they should be moved together.
5736    // movi  16b,8b   #imm8, LSL #0
5737    if (1) test_movi_16b_0x9C_lsl0(TyB);
5738    if (1) test_movi_8b_0x8B_lsl0(TyB);
5739 
5740    // movi  8h,4h    #imm8, LSL #0 or 8
5741    // mvni  8h,4h    #imm8, LSL #0 or 8
5742    if (1) test_movi_8h_0x5A_lsl0(TyH);
5743    if (1) test_movi_8h_0xA5_lsl8(TyH);
5744    if (1) test_movi_4h_0x5A_lsl0(TyH);
5745    if (1) test_movi_4h_0xA5_lsl8(TyH);
5746    if (1) test_mvni_8h_0x5A_lsl0(TyH);
5747    if (1) test_mvni_8h_0xA5_lsl8(TyH);
5748    if (1) test_mvni_4h_0x5A_lsl0(TyH);
5749    if (1) test_mvni_4h_0xA5_lsl8(TyH);
5750 
5751    // movi  4s,2s    #imm8, LSL #0, 8, 16, 24
5752    // mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
5753    if (1) test_movi_4s_0x5A_lsl0(TyS);
5754    if (1) test_movi_4s_0x6B_lsl8(TyS);
5755    if (1) test_movi_4s_0x49_lsl16(TyS);
5756    if (1) test_movi_4s_0x3D_lsl24(TyS);
5757    if (1) test_movi_2s_0x5A_lsl0(TyS);
5758    if (1) test_movi_2s_0x6B_lsl8(TyS);
5759    if (1) test_movi_2s_0x49_lsl16(TyS);
5760    if (1) test_movi_2s_0x3D_lsl24(TyS);
5761    if (1) test_mvni_4s_0x5A_lsl0(TyS);
5762    if (1) test_mvni_4s_0x6B_lsl8(TyS);
5763    if (1) test_mvni_4s_0x49_lsl16(TyS);
5764    if (1) test_mvni_4s_0x3D_lsl24(TyS);
5765    if (1) test_mvni_2s_0x5A_lsl0(TyS);
5766    if (1) test_mvni_2s_0x6B_lsl8(TyS);
5767    if (1) test_mvni_2s_0x49_lsl16(TyS);
5768    if (1) test_mvni_2s_0x3D_lsl24(TyS);
5769 
5770    // movi  4s,2s    #imm8, MSL #8 or 16
5771    // mvni  4s,2s    #imm8, MSL #8 or 16
5772    if (1) test_movi_4s_0x6B_msl8(TyS);
5773    if (1) test_movi_4s_0x94_msl16(TyS);
5774    if (1) test_movi_2s_0x7A_msl8(TyS);
5775    if (1) test_movi_2s_0xA5_msl16(TyS);
5776    if (1) test_mvni_4s_0x6B_msl8(TyS);
5777    if (1) test_mvni_4s_0x94_msl16(TyS);
5778    if (1) test_mvni_2s_0x7A_msl8(TyS);
5779    if (1) test_mvni_2s_0xA5_msl16(TyS);
5780 
5781    // movi  d,       #imm64
5782    // movi  2d,      #imm64
5783    if (1) test_movi_d_0xA5(TyD);
5784    if (1) test_movi_2d_0xB4(TyD);
5785 
5786    // not   16b,8b
5787    if (1) test_not_16b_16b(TyB);
5788    if (1) test_not_8b_8b(TyB);
5789 
5790    // pmul  16b,8b
5791    if (1) test_pmul_16b_16b_16b(TyB);
5792    if (1) test_pmul_8b_8b_8b(TyB);
5793 
5794    // pmull{2}  8h_8b_8b,8h_16b_16b
5795    // pmull{2} 1q_1d_1d,1q_2d_2d is in the crypto section below
5796    if (1) test_pmull_8h_8b_8b(TyB);
5797    if (1) test_pmull2_8h_16b_16b(TyB);
5798 
5799    // rbit    16b,8b
5800    // rev16   16b,8b
5801    // rev32   16b,8b,8h,4h
5802    // rev64   16b,8b,8h,4h,4s,2s
5803    if (1) test_rbit_16b_16b(TyB);
5804    if (1) test_rbit_8b_8b(TyB);
5805    if (1) test_rev16_16b_16b(TyB);
5806    if (1) test_rev16_8b_8b(TyB);
5807    if (1) test_rev32_16b_16b(TyB);
5808    if (1) test_rev32_8b_8b(TyB);
5809    if (1) test_rev32_8h_8h(TyH);
5810    if (1) test_rev32_4h_4h(TyH);
5811    if (1) test_rev64_16b_16b(TyB);
5812    if (1) test_rev64_8b_8b(TyB);
5813    if (1) test_rev64_8h_8h(TyH);
5814    if (1) test_rev64_4h_4h(TyH);
5815    if (1) test_rev64_4s_4s(TyS);
5816    if (1) test_rev64_2s_2s(TyS);
5817 
5818    // saba      16b,8b,8h,4h,4s,2s
5819    // uaba      16b,8b,8h,4h,4s,2s
5820    if (1) test_saba_4s_4s_4s(TyS);
5821    if (1) test_saba_2s_2s_2s(TyS);
5822    if (1) test_saba_8h_8h_8h(TyH);
5823    if (1) test_saba_4h_4h_4h(TyH);
5824    if (1) test_saba_16b_16b_16b(TyB);
5825    if (1) test_saba_8b_8b_8b(TyB);
5826    if (1) test_uaba_4s_4s_4s(TyS);
5827    if (1) test_uaba_2s_2s_2s(TyS);
5828    if (1) test_uaba_8h_8h_8h(TyH);
5829    if (1) test_uaba_4h_4h_4h(TyH);
5830    if (1) test_uaba_16b_16b_16b(TyB);
5831    if (1) test_uaba_8b_8b_8b(TyB);
5832 
5833    // sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
5834    // uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
5835    if (1) test_sabal_2d_2s_2s(TyS);
5836    if (1) test_sabal2_2d_4s_4s(TyS);
5837    if (1) test_sabal_4s_4h_4h(TyH);
5838    if (1) test_sabal2_4s_8h_8h(TyH);
5839    if (1) test_sabal_8h_8b_8b(TyB);
5840    if (1) test_sabal2_8h_16b_16b(TyB);
5841    if (1) test_uabal_2d_2s_2s(TyS);
5842    if (1) test_uabal2_2d_4s_4s(TyS);
5843    if (1) test_uabal_4s_4h_4h(TyH);
5844    if (1) test_uabal2_4s_8h_8h(TyH);
5845    if (1) test_uabal_8h_8b_8b(TyB);
5846    if (1) test_uabal2_8h_16b_16b(TyB);
5847 
5848    // sabd      16b,8b,8h,4h,4s,2s
5849    // uabd      16b,8b,8h,4h,4s,2s
5850    if (1) test_sabd_4s_4s_4s(TyS);
5851    if (1) test_sabd_2s_2s_2s(TyS);
5852    if (1) test_sabd_8h_8h_8h(TyH);
5853    if (1) test_sabd_4h_4h_4h(TyH);
5854    if (1) test_sabd_16b_16b_16b(TyB);
5855    if (1) test_sabd_8b_8b_8b(TyB);
5856    if (1) test_uabd_4s_4s_4s(TyS);
5857    if (1) test_uabd_2s_2s_2s(TyS);
5858    if (1) test_uabd_8h_8h_8h(TyH);
5859    if (1) test_uabd_4h_4h_4h(TyH);
5860    if (1) test_uabd_16b_16b_16b(TyB);
5861    if (1) test_uabd_8b_8b_8b(TyB);
5862 
5863    // sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
5864    // uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
5865    if (1) test_sabdl_2d_2s_2s(TyS);
5866    if (1) test_sabdl2_2d_4s_4s(TyS);
5867    if (1) test_sabdl_4s_4h_4h(TyH);
5868    if (1) test_sabdl2_4s_8h_8h(TyH);
5869    if (1) test_sabdl_8h_8b_8b(TyB);
5870    if (1) test_sabdl2_8h_16b_16b(TyB);
5871    if (1) test_uabdl_2d_2s_2s(TyS);
5872    if (1) test_uabdl2_2d_4s_4s(TyS);
5873    if (1) test_uabdl_4s_4h_4h(TyH);
5874    if (1) test_uabdl2_4s_8h_8h(TyH);
5875    if (1) test_uabdl_8h_8b_8b(TyB);
5876    if (1) test_uabdl2_8h_16b_16b(TyB);
5877 
5878    // sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
5879    // uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
5880    if (1) test_sadalp_1d_2s(TyS);
5881    if (1) test_sadalp_2d_4s(TyS);
5882    if (1) test_sadalp_2s_4h(TyH);
5883    if (1) test_sadalp_4s_8h(TyH);
5884    if (1) test_sadalp_4h_8b(TyB);
5885    if (1) test_sadalp_8h_16b(TyB);
5886    if (1) test_uadalp_1d_2s(TyS);
5887    if (1) test_uadalp_2d_4s(TyS);
5888    if (1) test_uadalp_2s_4h(TyH);
5889    if (1) test_uadalp_4s_8h(TyH);
5890    if (1) test_uadalp_4h_8b(TyB);
5891    if (1) test_uadalp_8h_16b(TyB);
5892 
5893    // saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
5894    // uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
5895    // ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
5896    // usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
5897    if (1) test_saddl_2d_2s_2s(TyS);
5898    if (1) test_saddl2_2d_4s_4s(TyS);
5899    if (1) test_saddl_4s_4h_4h(TyH);
5900    if (1) test_saddl2_4s_8h_8h(TyH);
5901    if (1) test_saddl_8h_8b_8b(TyB);
5902    if (1) test_saddl2_8h_16b_16b(TyB);
5903    if (1) test_uaddl_2d_2s_2s(TyS);
5904    if (1) test_uaddl2_2d_4s_4s(TyS);
5905    if (1) test_uaddl_4s_4h_4h(TyH);
5906    if (1) test_uaddl2_4s_8h_8h(TyH);
5907    if (1) test_uaddl_8h_8b_8b(TyB);
5908    if (1) test_uaddl2_8h_16b_16b(TyB);
5909    if (1) test_ssubl_2d_2s_2s(TyS);
5910    if (1) test_ssubl2_2d_4s_4s(TyS);
5911    if (1) test_ssubl_4s_4h_4h(TyH);
5912    if (1) test_ssubl2_4s_8h_8h(TyH);
5913    if (1) test_ssubl_8h_8b_8b(TyB);
5914    if (1) test_ssubl2_8h_16b_16b(TyB);
5915    if (1) test_usubl_2d_2s_2s(TyS);
5916    if (1) test_usubl2_2d_4s_4s(TyS);
5917    if (1) test_usubl_4s_4h_4h(TyH);
5918    if (1) test_usubl2_4s_8h_8h(TyH);
5919    if (1) test_usubl_8h_8b_8b(TyB);
5920    if (1) test_usubl2_8h_16b_16b(TyB);
5921 
5922    // saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
5923    // uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
5924    if (1) test_saddlp_1d_2s(TyS);
5925    if (1) test_saddlp_2d_4s(TyS);
5926    if (1) test_saddlp_2s_4h(TyH);
5927    if (1) test_saddlp_4s_8h(TyH);
5928    if (1) test_saddlp_4h_8b(TyB);
5929    if (1) test_saddlp_8h_16b(TyB);
5930    if (1) test_uaddlp_1d_2s(TyS);
5931    if (1) test_uaddlp_2d_4s(TyS);
5932    if (1) test_uaddlp_2s_4h(TyH);
5933    if (1) test_uaddlp_4s_8h(TyH);
5934    if (1) test_uaddlp_4h_8b(TyB);
5935    if (1) test_uaddlp_8h_16b(TyB);
5936 
5937    // saddlv    h_16b/8b, s_8h/4h, d_4s
5938    // uaddlv    h_16b/8b, s_8h/4h, d_4s
5939    if (1) test_saddlv_h_16b(TyB);
5940    if (1) test_saddlv_h_8b(TyB);
5941    if (1) test_saddlv_s_8h(TyH);
5942    if (1) test_saddlv_s_4h(TyH);
5943    if (1) test_saddlv_d_4s(TyH);
5944    if (1) test_uaddlv_h_16b(TyB);
5945    if (1) test_uaddlv_h_8b(TyB);
5946    if (1) test_uaddlv_s_8h(TyH);
5947    if (1) test_uaddlv_s_4h(TyH);
5948    if (1) test_uaddlv_d_4s(TyH);
5949 
5950    // saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
5951    // uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
5952    // ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
5953    // usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
5954    if (1) test_saddw2_8h_8h_16b(TyB);
5955    if (1) test_saddw_8h_8h_8b(TyB);
5956    if (1) test_saddw2_4s_4s_8h(TyH);
5957    if (1) test_saddw_4s_4s_4h(TyH);
5958    if (1) test_saddw2_2d_2d_4s(TyS);
5959    if (1) test_saddw_2d_2d_2s(TyS);
5960    if (1) test_uaddw2_8h_8h_16b(TyB);
5961    if (1) test_uaddw_8h_8h_8b(TyB);
5962    if (1) test_uaddw2_4s_4s_8h(TyH);
5963    if (1) test_uaddw_4s_4s_4h(TyH);
5964    if (1) test_uaddw2_2d_2d_4s(TyS);
5965    if (1) test_uaddw_2d_2d_2s(TyS);
5966    if (1) test_ssubw2_8h_8h_16b(TyB);
5967    if (1) test_ssubw_8h_8h_8b(TyB);
5968    if (1) test_ssubw2_4s_4s_8h(TyH);
5969    if (1) test_ssubw_4s_4s_4h(TyH);
5970    if (1) test_ssubw2_2d_2d_4s(TyS);
5971    if (1) test_ssubw_2d_2d_2s(TyS);
5972    if (1) test_usubw2_8h_8h_16b(TyB);
5973    if (1) test_usubw_8h_8h_8b(TyB);
5974    if (1) test_usubw2_4s_4s_8h(TyH);
5975    if (1) test_usubw_4s_4s_4h(TyH);
5976    if (1) test_usubw2_2d_2d_4s(TyS);
5977    if (1) test_usubw_2d_2d_2s(TyS);
5978 
5979    // shadd        16b,8b,8h,4h,4s,2s
5980    // uhadd        16b,8b,8h,4h,4s,2s
5981    // shsub        16b,8b,8h,4h,4s,2s
5982    // uhsub        16b,8b,8h,4h,4s,2s
5983    if (1) test_shadd_4s_4s_4s(TyS);
5984    if (1) test_shadd_2s_2s_2s(TyS);
5985    if (1) test_shadd_8h_8h_8h(TyH);
5986    if (1) test_shadd_4h_4h_4h(TyH);
5987    if (1) test_shadd_16b_16b_16b(TyB);
5988    if (1) test_shadd_8b_8b_8b(TyB);
5989    if (1) test_uhadd_4s_4s_4s(TyS);
5990    if (1) test_uhadd_2s_2s_2s(TyS);
5991    if (1) test_uhadd_8h_8h_8h(TyH);
5992    if (1) test_uhadd_4h_4h_4h(TyH);
5993    if (1) test_uhadd_16b_16b_16b(TyB);
5994    if (1) test_uhadd_8b_8b_8b(TyB);
5995    if (1) test_shsub_4s_4s_4s(TyS);
5996    if (1) test_shsub_2s_2s_2s(TyS);
5997    if (1) test_shsub_8h_8h_8h(TyH);
5998    if (1) test_shsub_4h_4h_4h(TyH);
5999    if (1) test_shsub_16b_16b_16b(TyB);
6000    if (1) test_shsub_8b_8b_8b(TyB);
6001    if (1) test_uhsub_4s_4s_4s(TyS);
6002    if (1) test_uhsub_2s_2s_2s(TyS);
6003    if (1) test_uhsub_8h_8h_8h(TyH);
6004    if (1) test_uhsub_4h_4h_4h(TyH);
6005    if (1) test_uhsub_16b_16b_16b(TyB);
6006    if (1) test_uhsub_8b_8b_8b(TyB);
6007 
6008    // shll{2}      8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
6009    if (1) test_shll_8h_8b_8(TyB);
6010    if (1) test_shll2_8h_16b_8(TyB);
6011    if (1) test_shll_4s_4h_16(TyH);
6012    if (1) test_shll2_4s_8h_16(TyH);
6013    if (1) test_shll_2d_2s_32(TyS);
6014    if (1) test_shll2_2d_4s_32(TyS);
6015 
6016    // shrn{2}      2s/4s_2d, 8h/4h_4s, 8b/16b_8h,   #imm in 1 .. elem_bits
6017    // rshrn{2}     2s/4s_2d, 8h/4h_4s, 8b/16b_8h,   #imm in 1 .. elem_bits
6018    if (1) test_shrn_2s_2d_1(TyD);
6019    if (1) test_shrn_2s_2d_32(TyD);
6020    if (1) test_shrn2_4s_2d_1(TyD);
6021    if (1) test_shrn2_4s_2d_32(TyD);
6022    if (1) test_shrn_4h_4s_1(TyS);
6023    if (1) test_shrn_4h_4s_16(TyS);
6024    if (1) test_shrn2_8h_4s_1(TyS);
6025    if (1) test_shrn2_8h_4s_16(TyS);
6026    if (1) test_shrn_8b_8h_1(TyH);
6027    if (1) test_shrn_8b_8h_8(TyH);
6028    if (1) test_shrn2_16b_8h_1(TyH);
6029    if (1) test_shrn2_16b_8h_8(TyH);
6030    if (1) test_rshrn_2s_2d_1(TyD);
6031    if (1) test_rshrn_2s_2d_32(TyD);
6032    if (1) test_rshrn2_4s_2d_1(TyD);
6033    if (1) test_rshrn2_4s_2d_32(TyD);
6034    if (1) test_rshrn_4h_4s_1(TyS);
6035    if (1) test_rshrn_4h_4s_16(TyS);
6036    if (1) test_rshrn2_8h_4s_1(TyS);
6037    if (1) test_rshrn2_8h_4s_16(TyS);
6038    if (1) test_rshrn_8b_8h_1(TyH);
6039    if (1) test_rshrn_8b_8h_8(TyH);
6040    if (1) test_rshrn2_16b_8h_1(TyH);
6041    if (1) test_rshrn2_16b_8h_8(TyH);
6042 
6043    // sli          d_#imm
6044    // sri          d_#imm
6045    if (1) test_sli_d_d_0(TyD);
6046    if (1) test_sli_d_d_32(TyD);
6047    if (1) test_sli_d_d_63(TyD);
6048    if (1) test_sri_d_d_1(TyD);
6049    if (1) test_sri_d_d_33(TyD);
6050    if (1) test_sri_d_d_64(TyD);
6051 
6052    // sli          2d,4s,2s,8h,4h,16b,8b  _#imm
6053    // sri          2d,4s,2s,8h,4h,16b,8b  _#imm
6054    if (1) test_sli_2d_2d_0(TyD);
6055    if (1) test_sli_2d_2d_32(TyD);
6056    if (1) test_sli_2d_2d_63(TyD);
6057    if (1) test_sli_4s_4s_0(TyS);
6058    if (1) test_sli_4s_4s_16(TyS);
6059    if (1) test_sli_4s_4s_31(TyS);
6060    if (1) test_sli_2s_2s_0(TyS);
6061    if (1) test_sli_2s_2s_16(TyS);
6062    if (1) test_sli_2s_2s_31(TyS);
6063    if (1) test_sli_8h_8h_0(TyH);
6064    if (1) test_sli_8h_8h_8(TyH);
6065    if (1) test_sli_8h_8h_15(TyH);
6066    if (1) test_sli_4h_4h_0(TyH);
6067    if (1) test_sli_4h_4h_8(TyH);
6068    if (1) test_sli_4h_4h_15(TyH);
6069    if (1) test_sli_16b_16b_0(TyB);
6070    if (1) test_sli_16b_16b_3(TyB);
6071    if (1) test_sli_16b_16b_7(TyB);
6072    if (1) test_sli_8b_8b_0(TyB);
6073    if (1) test_sli_8b_8b_3(TyB);
6074    if (1) test_sli_8b_8b_7(TyB);
6075    if (1) test_sri_2d_2d_1(TyD);
6076    if (1) test_sri_2d_2d_33(TyD);
6077    if (1) test_sri_2d_2d_64(TyD);
6078    if (1) test_sri_4s_4s_1(TyS);
6079    if (1) test_sri_4s_4s_17(TyS);
6080    if (1) test_sri_4s_4s_32(TyS);
6081    if (1) test_sri_2s_2s_1(TyS);
6082    if (1) test_sri_2s_2s_17(TyS);
6083    if (1) test_sri_2s_2s_32(TyS);
6084    if (1) test_sri_8h_8h_1(TyH);
6085    if (1) test_sri_8h_8h_8(TyH);
6086    if (1) test_sri_8h_8h_16(TyH);
6087    if (1) test_sri_4h_4h_1(TyH);
6088    if (1) test_sri_4h_4h_8(TyH);
6089    if (1) test_sri_4h_4h_16(TyH);
6090    if (1) test_sri_16b_16b_1(TyB);
6091    if (1) test_sri_16b_16b_4(TyB);
6092    if (1) test_sri_16b_16b_8(TyB);
6093    if (1) test_sri_8b_8b_1(TyB);
6094    if (1) test_sri_8b_8b_4(TyB);
6095    if (1) test_sri_8b_8b_8(TyB);
6096 
6097    // smax         4s,2s,8h,4h,16b,8b
6098    // umax         4s,2s,8h,4h,16b,8b
6099    // smin         4s,2s,8h,4h,16b,8b
6100    // umin         4s,2s,8h,4h,16b,8b
6101    if (1) test_smax_4s_4s_4s(TyS);
6102    if (1) test_smax_2s_2s_2s(TyS);
6103    if (1) test_smax_8h_8h_8h(TyH);
6104    if (1) test_smax_4h_4h_4h(TyH);
6105    if (1) test_smax_16b_16b_16b(TyB);
6106    if (1) test_smax_8b_8b_8b(TyB);
6107    if (1) test_umax_4s_4s_4s(TyS);
6108    if (1) test_umax_2s_2s_2s(TyS);
6109    if (1) test_umax_8h_8h_8h(TyH);
6110    if (1) test_umax_4h_4h_4h(TyH);
6111    if (1) test_umax_16b_16b_16b(TyB);
6112    if (1) test_umax_8b_8b_8b(TyB);
6113    if (1) test_smin_4s_4s_4s(TyS);
6114    if (1) test_smin_2s_2s_2s(TyS);
6115    if (1) test_smin_8h_8h_8h(TyH);
6116    if (1) test_smin_4h_4h_4h(TyH);
6117    if (1) test_smin_16b_16b_16b(TyB);
6118    if (1) test_smin_8b_8b_8b(TyB);
6119    if (1) test_umin_4s_4s_4s(TyS);
6120    if (1) test_umin_2s_2s_2s(TyS);
6121    if (1) test_umin_8h_8h_8h(TyH);
6122    if (1) test_umin_4h_4h_4h(TyH);
6123    if (1) test_umin_16b_16b_16b(TyB);
6124    if (1) test_umin_8b_8b_8b(TyB);
6125 
6126    // smaxp        4s,2s,8h,4h,16b,8b
6127    // umaxp        4s,2s,8h,4h,16b,8b
6128    // sminp        4s,2s,8h,4h,16b,8b
6129    // uminp        4s,2s,8h,4h,16b,8b
6130    if (1) test_smaxp_4s_4s_4s(TyS);
6131    if (1) test_smaxp_2s_2s_2s(TyS);
6132    if (1) test_smaxp_8h_8h_8h(TyH);
6133    if (1) test_smaxp_4h_4h_4h(TyH);
6134    if (1) test_smaxp_16b_16b_16b(TyB);
6135    if (1) test_smaxp_8b_8b_8b(TyB);
6136    if (1) test_umaxp_4s_4s_4s(TyS);
6137    if (1) test_umaxp_2s_2s_2s(TyS);
6138    if (1) test_umaxp_8h_8h_8h(TyH);
6139    if (1) test_umaxp_4h_4h_4h(TyH);
6140    if (1) test_umaxp_16b_16b_16b(TyB);
6141    if (1) test_umaxp_8b_8b_8b(TyB);
6142    if (1) test_sminp_4s_4s_4s(TyS);
6143    if (1) test_sminp_2s_2s_2s(TyS);
6144    if (1) test_sminp_8h_8h_8h(TyH);
6145    if (1) test_sminp_4h_4h_4h(TyH);
6146    if (1) test_sminp_16b_16b_16b(TyB);
6147    if (1) test_sminp_8b_8b_8b(TyB);
6148    if (1) test_uminp_4s_4s_4s(TyS);
6149    if (1) test_uminp_2s_2s_2s(TyS);
6150    if (1) test_uminp_8h_8h_8h(TyH);
6151    if (1) test_uminp_4h_4h_4h(TyH);
6152    if (1) test_uminp_16b_16b_16b(TyB);
6153    if (1) test_uminp_8b_8b_8b(TyB);
6154 
6155    // smaxv        s_4s,h_8h,h_4h,b_16b,b_8b
6156    // umaxv        s_4s,h_8h,h_4h,b_16b,b_8b
6157    // sminv        s_4s,h_8h,h_4h,b_16b,b_8b
6158    // uminv        s_4s,h_8h,h_4h,b_16b,b_8b
6159    if (1) test_SMAXV();
6160    if (1) test_UMAXV();
6161    if (1) test_SMINV();
6162    if (1) test_UMINV();
6163 
6164    // smlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
6165    // umlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
6166    // smlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
6167    // umlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
6168    // smull{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
6169    // umull{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
6170    if (1) test_smlal_2d_2s_s0(TyS);
6171    if (1) test_smlal_2d_2s_s3(TyS);
6172    if (1) test_smlal2_2d_4s_s1(TyS);
6173    if (1) test_smlal2_2d_4s_s2(TyS);
6174    if (1) test_smlal_4s_4h_h0(TyH);
6175    if (1) test_smlal_4s_4h_h7(TyH);
6176    if (1) test_smlal2_4s_8h_h1(TyH);
6177    if (1) test_smlal2_4s_8h_h4(TyH);
6178    if (1) test_umlal_2d_2s_s0(TyS);
6179    if (1) test_umlal_2d_2s_s3(TyS);
6180    if (1) test_umlal2_2d_4s_s1(TyS);
6181    if (1) test_umlal2_2d_4s_s2(TyS);
6182    if (1) test_umlal_4s_4h_h0(TyH);
6183    if (1) test_umlal_4s_4h_h7(TyH);
6184    if (1) test_umlal2_4s_8h_h1(TyH);
6185    if (1) test_umlal2_4s_8h_h4(TyH);
6186    if (1) test_smlsl_2d_2s_s0(TyS);
6187    if (1) test_smlsl_2d_2s_s3(TyS);
6188    if (1) test_smlsl2_2d_4s_s1(TyS);
6189    if (1) test_smlsl2_2d_4s_s2(TyS);
6190    if (1) test_smlsl_4s_4h_h0(TyH);
6191    if (1) test_smlsl_4s_4h_h7(TyH);
6192    if (1) test_smlsl2_4s_8h_h1(TyH);
6193    if (1) test_smlsl2_4s_8h_h4(TyH);
6194    if (1) test_umlsl_2d_2s_s0(TyS);
6195    if (1) test_umlsl_2d_2s_s3(TyS);
6196    if (1) test_umlsl2_2d_4s_s1(TyS);
6197    if (1) test_umlsl2_2d_4s_s2(TyS);
6198    if (1) test_umlsl_4s_4h_h0(TyH);
6199    if (1) test_umlsl_4s_4h_h7(TyH);
6200    if (1) test_umlsl2_4s_8h_h1(TyH);
6201    if (1) test_umlsl2_4s_8h_h4(TyH);
6202    if (1) test_smull_2d_2s_s0(TyS);
6203    if (1) test_smull_2d_2s_s3(TyS);
6204    if (1) test_smull2_2d_4s_s1(TyS);
6205    if (1) test_smull2_2d_4s_s2(TyS);
6206    if (1) test_smull_4s_4h_h0(TyH);
6207    if (1) test_smull_4s_4h_h7(TyH);
6208    if (1) test_smull2_4s_8h_h1(TyH);
6209    if (1) test_smull2_4s_8h_h4(TyH);
6210    if (1) test_umull_2d_2s_s0(TyS);
6211    if (1) test_umull_2d_2s_s3(TyS);
6212    if (1) test_umull2_2d_4s_s1(TyS);
6213    if (1) test_umull2_2d_4s_s2(TyS);
6214    if (1) test_umull_4s_4h_h0(TyH);
6215    if (1) test_umull_4s_4h_h7(TyH);
6216    if (1) test_umull2_4s_8h_h1(TyH);
6217    if (1) test_umull2_4s_8h_h4(TyH);
6218 
6219    // smlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
6220    // umlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
6221    // smlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
6222    // umlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
6223    // smull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
6224    // umull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
6225    if (1) test_smlal_2d_2s_2s(TyS);
6226    if (1) test_smlal2_2d_4s_4s(TyS);
6227    if (1) test_smlal_4s_4h_4h(TyH);
6228    if (1) test_smlal2_4s_8h_8h(TyH);
6229    if (1) test_smlal_8h_8b_8b(TyB);
6230    if (1) test_smlal2_8h_16b_16b(TyB);
6231    if (1) test_umlal_2d_2s_2s(TyS);
6232    if (1) test_umlal2_2d_4s_4s(TyS);
6233    if (1) test_umlal_4s_4h_4h(TyH);
6234    if (1) test_umlal2_4s_8h_8h(TyH);
6235    if (1) test_umlal_8h_8b_8b(TyB);
6236    if (1) test_umlal2_8h_16b_16b(TyB);
6237    if (1) test_smlsl_2d_2s_2s(TyS);
6238    if (1) test_smlsl2_2d_4s_4s(TyS);
6239    if (1) test_smlsl_4s_4h_4h(TyH);
6240    if (1) test_smlsl2_4s_8h_8h(TyH);
6241    if (1) test_smlsl_8h_8b_8b(TyB);
6242    if (1) test_smlsl2_8h_16b_16b(TyB);
6243    if (1) test_umlsl_2d_2s_2s(TyS);
6244    if (1) test_umlsl2_2d_4s_4s(TyS);
6245    if (1) test_umlsl_4s_4h_4h(TyH);
6246    if (1) test_umlsl2_4s_8h_8h(TyH);
6247    if (1) test_umlsl_8h_8b_8b(TyB);
6248    if (1) test_umlsl2_8h_16b_16b(TyB);
6249    if (1) test_smull_2d_2s_2s(TyS);
6250    if (1) test_smull2_2d_4s_4s(TyS);
6251    if (1) test_smull_4s_4h_4h(TyH);
6252    if (1) test_smull2_4s_8h_8h(TyH);
6253    if (1) test_smull_8h_8b_8b(TyB);
6254    if (1) test_smull2_8h_16b_16b(TyB);
6255    if (1) test_umull_2d_2s_2s(TyS);
6256    if (1) test_umull2_2d_4s_4s(TyS);
6257    if (1) test_umull_4s_4h_4h(TyH);
6258    if (1) test_umull2_4s_8h_8h(TyH);
6259    if (1) test_umull_8h_8b_8b(TyB);
6260    if (1) test_umull2_8h_16b_16b(TyB);
6261 
6262    // smov         w_b[], w_h[], x_b[], x_h[], x_s[]
6263    // umov         w_b[], w_h[],               w_s[], x_d[]
6264    if (1) test_umov_x_d0(TyD);
6265    if (1) test_umov_x_d1(TyD);
6266    if (1) test_umov_w_s0(TyS);
6267    if (1) test_umov_w_s3(TyS);
6268    if (1) test_umov_w_h0(TyH);
6269    if (1) test_umov_w_h7(TyH);
6270    if (1) test_umov_w_b0(TyB);
6271    if (1) test_umov_w_b15(TyB);
6272    if (1) test_smov_x_s0(TyS);
6273    if (1) test_smov_x_s3(TyS);
6274    if (1) test_smov_x_h0(TyH);
6275    if (1) test_smov_x_h7(TyH);
6276    if (1) test_smov_w_h0(TyH);
6277    if (1) test_smov_w_h7(TyH);
6278    if (1) test_smov_x_b0(TyB);
6279    if (1) test_smov_x_b15(TyB);
6280    if (1) test_smov_w_b0(TyB);
6281    if (1) test_smov_w_b15(TyB);
6282 
6283    // sqabs        d,s,h,b
6284    // sqneg        d,s,h,b
6285    if (1) test_sqabs_d_d(TyD);
6286    if (1) test_sqabs_s_s(TyS);
6287    if (1) test_sqabs_h_h(TyH);
6288    if (1) test_sqabs_b_b(TyB);
6289    if (1) test_sqneg_d_d(TyD);
6290    if (1) test_sqneg_s_s(TyS);
6291    if (1) test_sqneg_h_h(TyH);
6292    if (1) test_sqneg_b_b(TyB);
6293 
6294    // sqabs        2d,4s,2s,8h,4h,16b,8b
6295    // sqneg        2d,4s,2s,8h,4h,16b,8b
6296    if (1) test_sqabs_2d_2d(TyD);
6297    if (1) test_sqabs_4s_4s(TyS);
6298    if (1) test_sqabs_2s_2s(TyS);
6299    if (1) test_sqabs_8h_8h(TyH);
6300    if (1) test_sqabs_4h_4h(TyH);
6301    if (1) test_sqabs_16b_16b(TyB);
6302    if (1) test_sqabs_8b_8b(TyB);
6303    if (1) test_sqneg_2d_2d(TyD);
6304    if (1) test_sqneg_4s_4s(TyS);
6305    if (1) test_sqneg_2s_2s(TyS);
6306    if (1) test_sqneg_8h_8h(TyH);
6307    if (1) test_sqneg_4h_4h(TyH);
6308    if (1) test_sqneg_16b_16b(TyB);
6309    if (1) test_sqneg_8b_8b(TyB);
6310 
6311    // sqadd        d,s,h,b
6312    // uqadd        d,s,h,b
6313    // sqsub        d,s,h,b
6314    // uqsub        d,s,h,b
6315    if (1) test_sqadd_d_d_d(TyD);
6316    if (1) test_sqadd_s_s_s(TyS);
6317    if (1) test_sqadd_h_h_h(TyH);
6318    if (1) test_sqadd_b_b_b(TyB);
6319    if (1) test_uqadd_d_d_d(TyD);
6320    if (1) test_uqadd_s_s_s(TyS);
6321    if (1) test_uqadd_h_h_h(TyH);
6322    if (1) test_uqadd_b_b_b(TyB);
6323    if (1) test_sqsub_d_d_d(TyD);
6324    if (1) test_sqsub_s_s_s(TyS);
6325    if (1) test_sqsub_h_h_h(TyH);
6326    if (1) test_sqsub_b_b_b(TyB);
6327    if (1) test_uqsub_d_d_d(TyD);
6328    if (1) test_uqsub_s_s_s(TyS);
6329    if (1) test_uqsub_h_h_h(TyH);
6330    if (1) test_uqsub_b_b_b(TyB);
6331 
6332    // sqadd        2d,4s,2s,8h,4h,16b,8b
6333    // uqadd        2d,4s,2s,8h,4h,16b,8b
6334    // sqsub        2d,4s,2s,8h,4h,16b,8b
6335    // uqsub        2d,4s,2s,8h,4h,16b,8b
6336    if (1) test_sqadd_2d_2d_2d(TyD);
6337    if (1) test_sqadd_4s_4s_4s(TyS);
6338    if (1) test_sqadd_2s_2s_2s(TyS);
6339    if (1) test_sqadd_8h_8h_8h(TyH);
6340    if (1) test_sqadd_4h_4h_4h(TyH);
6341    if (1) test_sqadd_16b_16b_16b(TyB);
6342    if (1) test_sqadd_8b_8b_8b(TyB);
6343    if (1) test_uqadd_2d_2d_2d(TyD);
6344    if (1) test_uqadd_4s_4s_4s(TyS);
6345    if (1) test_uqadd_2s_2s_2s(TyS);
6346    if (1) test_uqadd_8h_8h_8h(TyH);
6347    if (1) test_uqadd_4h_4h_4h(TyH);
6348    if (1) test_uqadd_16b_16b_16b(TyB);
6349    if (1) test_uqadd_8b_8b_8b(TyB);
6350    if (1) test_sqsub_2d_2d_2d(TyD);
6351    if (1) test_sqsub_4s_4s_4s(TyS);
6352    if (1) test_sqsub_2s_2s_2s(TyS);
6353    if (1) test_sqsub_8h_8h_8h(TyH);
6354    if (1) test_sqsub_4h_4h_4h(TyH);
6355    if (1) test_sqsub_16b_16b_16b(TyB);
6356    if (1) test_sqsub_8b_8b_8b(TyB);
6357    if (1) test_uqsub_2d_2d_2d(TyD);
6358    if (1) test_uqsub_4s_4s_4s(TyS);
6359    if (1) test_uqsub_2s_2s_2s(TyS);
6360    if (1) test_uqsub_8h_8h_8h(TyH);
6361    if (1) test_uqsub_4h_4h_4h(TyH);
6362    if (1) test_uqsub_16b_16b_16b(TyB);
6363    if (1) test_uqsub_8b_8b_8b(TyB);
6364 
6365    // sqdmlal      d_s_s[], s_h_h[]
6366    // sqdmlsl      d_s_s[], s_h_h[]
6367    // sqdmull      d_s_s[], s_h_h[]
6368    if (1) test_sqdmlal_d_s_s0(TyS);
6369    if (1) test_sqdmlal_d_s_s3(TyS);
6370    if (1) test_sqdmlal_s_h_h1(TyH);
6371    if (1) test_sqdmlal_s_h_h5(TyH);
6372    if (1) test_sqdmlsl_d_s_s0(TyS);
6373    if (1) test_sqdmlsl_d_s_s3(TyS);
6374    if (1) test_sqdmlsl_s_h_h1(TyH);
6375    if (1) test_sqdmlsl_s_h_h5(TyH);
6376    if (1) test_sqdmull_d_s_s0(TyS);
6377    if (1) test_sqdmull_d_s_s3(TyS);
6378    if (1) test_sqdmull_s_h_h1(TyH);
6379    if (1) test_sqdmull_s_h_h5(TyH);
6380 
6381    // sqdmlal{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
6382    // sqdmlsl{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
6383    // sqdmull{2}   2d_2s/4s_s[], 4s_4h/2h_h[]
6384    if (1) test_sqdmlal_2d_2s_s0(TyS);
6385    if (1) test_sqdmlal_2d_2s_s3(TyS);
6386    if (1) test_sqdmlal2_2d_4s_s1(TyS);
6387    if (1) test_sqdmlal2_2d_4s_s2(TyS);
6388    if (1) test_sqdmlal_4s_4h_h0(TyH);
6389    if (1) test_sqdmlal_4s_4h_h7(TyH);
6390    if (1) test_sqdmlal2_4s_8h_h1(TyH);
6391    if (1) test_sqdmlal2_4s_8h_h4(TyH);
6392    if (1) test_sqdmlsl_2d_2s_s0(TyS);
6393    if (1) test_sqdmlsl_2d_2s_s3(TyS);
6394    if (1) test_sqdmlsl2_2d_4s_s1(TyS);
6395    if (1) test_sqdmlsl2_2d_4s_s2(TyS);
6396    if (1) test_sqdmlsl_4s_4h_h0(TyH);
6397    if (1) test_sqdmlsl_4s_4h_h7(TyH);
6398    if (1) test_sqdmlsl2_4s_8h_h1(TyH);
6399    if (1) test_sqdmlsl2_4s_8h_h4(TyH);
6400    if (1) test_sqdmull_2d_2s_s0(TyS);
6401    if (1) test_sqdmull_2d_2s_s3(TyS);
6402    if (1) test_sqdmull2_2d_4s_s1(TyS);
6403    if (1) test_sqdmull2_2d_4s_s2(TyS);
6404    if (1) test_sqdmull_4s_4h_h0(TyH);
6405    if (1) test_sqdmull_4s_4h_h7(TyH);
6406    if (1) test_sqdmull2_4s_8h_h1(TyH);
6407    if (1) test_sqdmull2_4s_8h_h4(TyH);
6408 
6409    // sqdmlal      d_s_s, s_h_h
6410    // sqdmlsl      d_s_s, s_h_h
6411    // sqdmull      d_s_s, s_h_h
6412    if (1) test_sqdmlal_d_s_s(TyS);
6413    if (1) test_sqdmlal_s_h_h(TyH);
6414    if (1) test_sqdmlsl_d_s_s(TyS);
6415    if (1) test_sqdmlsl_s_h_h(TyH);
6416    if (1) test_sqdmull_d_s_s(TyS);
6417    if (1) test_sqdmull_s_h_h(TyH);
6418 
6419    // sqdmlal{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
6420    // sqdmlsl{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
6421    // sqdmull{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
6422    if (1) test_sqdmlal_2d_2s_2s(TyS);
6423    if (1) test_sqdmlal2_2d_4s_4s(TyS);
6424    if (1) test_sqdmlal_4s_4h_4h(TyH);
6425    if (1) test_sqdmlal2_4s_8h_8h(TyH);
6426    if (1) test_sqdmlsl_2d_2s_2s(TyS);
6427    if (1) test_sqdmlsl2_2d_4s_4s(TyS);
6428    if (1) test_sqdmlsl_4s_4h_4h(TyH);
6429    if (1) test_sqdmlsl2_4s_8h_8h(TyH);
6430    if (1) test_sqdmull_2d_2s_2s(TyS);
6431    if (1) test_sqdmull2_2d_4s_4s(TyS);
6432    if (1) test_sqdmull_4s_4h_4h(TyH);
6433    if (1) test_sqdmull2_4s_8h_8h(TyH);
6434 
6435    // sqdmulh      s_s_s[], h_h_h[]
6436    // sqrdmulh     s_s_s[], h_h_h[]
6437    if (1) test_sqdmulh_s_s_s1(TyS);
6438    if (1) test_sqdmulh_s_s_s3(TyS);
6439    if (1) test_sqdmulh_h_h_h2(TyH);
6440    if (1) test_sqdmulh_h_h_h7(TyH);
6441    if (1) test_sqrdmulh_s_s_s1(TyS);
6442    if (1) test_sqrdmulh_s_s_s3(TyS);
6443    if (1) test_sqrdmulh_h_h_h2(TyH);
6444    if (1) test_sqrdmulh_h_h_h7(TyH);
6445 
6446    // sqdmulh      4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
6447    // sqrdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
6448    if (1) test_sqdmulh_4s_4s_s1(TyS);
6449    if (1) test_sqdmulh_4s_4s_s3(TyS);
6450    if (1) test_sqdmulh_2s_2s_s1(TyS);
6451    if (1) test_sqdmulh_2s_2s_s3(TyS);
6452    if (1) test_sqdmulh_8h_8h_h2(TyH);
6453    if (1) test_sqdmulh_8h_8h_h7(TyH);
6454    if (1) test_sqdmulh_4h_4h_h2(TyH);
6455    if (1) test_sqdmulh_4h_4h_h7(TyH);
6456    if (1) test_sqrdmulh_4s_4s_s1(TyS);
6457    if (1) test_sqrdmulh_4s_4s_s3(TyS);
6458    if (1) test_sqrdmulh_2s_2s_s1(TyS);
6459    if (1) test_sqrdmulh_2s_2s_s3(TyS);
6460    if (1) test_sqrdmulh_8h_8h_h2(TyH);
6461    if (1) test_sqrdmulh_8h_8h_h7(TyH);
6462    if (1) test_sqrdmulh_4h_4h_h2(TyH);
6463    if (1) test_sqrdmulh_4h_4h_h7(TyH);
6464 
6465    // sqdmulh      h,s
6466    // sqrdmulh     h,s
6467    if (1) test_sqdmulh_s_s_s(TyS);
6468    if (1) test_sqdmulh_h_h_h(TyH);
6469    if (1) test_sqrdmulh_s_s_s(TyS);
6470    if (1) test_sqrdmulh_h_h_h(TyH);
6471 
6472    // sqdmulh      4s,2s,8h,4h
6473    // sqrdmulh     4s,2s,8h,4h
6474    if (1) test_sqdmulh_4s_4s_4s(TyS);
6475    if (1) test_sqdmulh_2s_2s_2s(TyS);
6476    if (1) test_sqdmulh_8h_8h_8h(TyH);
6477    if (1) test_sqdmulh_4h_4h_4h(TyH);
6478    if (1) test_sqrdmulh_4s_4s_4s(TyS);
6479    if (1) test_sqrdmulh_2s_2s_2s(TyS);
6480    if (1) test_sqrdmulh_8h_8h_8h(TyH);
6481    if (1) test_sqrdmulh_4h_4h_4h(TyH);
6482 
6483    // sqshl (reg)  d,s,h,b
6484    // uqshl (reg)  d,s,h,b
6485    // sqrshl (reg) d,s,h,b
6486    // uqrshl (reg) d,s,h,b
6487    if (1) test_sqshl_d_d_d(TyD);
6488    if (1) test_sqshl_s_s_s(TyS);
6489    if (1) test_sqshl_h_h_h(TyH);
6490    if (1) test_sqshl_b_b_b(TyB);
6491    if (1) test_uqshl_d_d_d(TyD);
6492    if (1) test_uqshl_s_s_s(TyS);
6493    if (1) test_uqshl_h_h_h(TyH);
6494    if (1) test_uqshl_b_b_b(TyB);
6495    if (1) test_sqrshl_d_d_d(TyD);
6496    if (1) test_sqrshl_s_s_s(TyS);
6497    if (1) test_sqrshl_h_h_h(TyH);
6498    if (1) test_sqrshl_b_b_b(TyB);
6499    if (1) test_uqrshl_d_d_d(TyD);
6500    if (1) test_uqrshl_s_s_s(TyS);
6501    if (1) test_uqrshl_h_h_h(TyH);
6502    if (1) test_uqrshl_b_b_b(TyB);
6503 
6504    // sqshl (reg)  2d,4s,2s,8h,4h,16b,8b
6505    // uqshl (reg)  2d,4s,2s,8h,4h,16b,8b
6506    // sqrshl (reg) 2d,4s,2s,8h,4h,16b,8b
6507    // uqrshl (reg) 2d,4s,2s,8h,4h,16b,8b
6508    if (1) test_sqshl_2d_2d_2d(TyD);
6509    if (1) test_sqshl_4s_4s_4s(TyS);
6510    if (1) test_sqshl_2s_2s_2s(TyS);
6511    if (1) test_sqshl_8h_8h_8h(TyH);
6512    if (1) test_sqshl_4h_4h_4h(TyH);
6513    if (1) test_sqshl_16b_16b_16b(TyB);
6514    if (1) test_sqshl_8b_8b_8b(TyB);
6515    if (1) test_uqshl_2d_2d_2d(TyD);
6516    if (1) test_uqshl_4s_4s_4s(TyS);
6517    if (1) test_uqshl_2s_2s_2s(TyS);
6518    if (1) test_uqshl_8h_8h_8h(TyH);
6519    if (1) test_uqshl_4h_4h_4h(TyH);
6520    if (1) test_uqshl_16b_16b_16b(TyB);
6521    if (1) test_uqshl_8b_8b_8b(TyB);
6522    if (1) test_sqrshl_2d_2d_2d(TyD);
6523    if (1) test_sqrshl_4s_4s_4s(TyS);
6524    if (1) test_sqrshl_2s_2s_2s(TyS);
6525    if (1) test_sqrshl_8h_8h_8h(TyH);
6526    if (1) test_sqrshl_4h_4h_4h(TyH);
6527    if (1) test_sqrshl_16b_16b_16b(TyB);
6528    if (1) test_sqrshl_8b_8b_8b(TyB);
6529    if (1) test_uqrshl_2d_2d_2d(TyD);
6530    if (1) test_uqrshl_4s_4s_4s(TyS);
6531    if (1) test_uqrshl_2s_2s_2s(TyS);
6532    if (1) test_uqrshl_8h_8h_8h(TyH);
6533    if (1) test_uqrshl_4h_4h_4h(TyH);
6534    if (1) test_uqrshl_16b_16b_16b(TyB);
6535    if (1) test_uqrshl_8b_8b_8b(TyB);
6536 
6537    // sqrshrn      s_d, h_s, b_h   #imm
6538    // uqrshrn      s_d, h_s, b_h   #imm
6539    // sqshrn       s_d, h_s, b_h   #imm
6540    // uqshrn       s_d, h_s, b_h   #imm
6541    // sqrshrun     s_d, h_s, b_h   #imm
6542    // sqshrun      s_d, h_s, b_h   #imm
6543    if (1) test_sqrshrn_s_d_1(TyD);
6544    if (1) test_sqrshrn_s_d_17(TyD);
6545    if (1) test_sqrshrn_s_d_32(TyD);
6546    if (1) test_sqrshrn_h_s_1(TyS);
6547    if (1) test_sqrshrn_h_s_9(TyS);
6548    if (1) test_sqrshrn_h_s_16(TyS);
6549    if (1) test_sqrshrn_b_h_1(TyH);
6550    if (1) test_sqrshrn_b_h_4(TyH);
6551    if (1) test_sqrshrn_b_h_8(TyH);
6552    if (1) test_uqrshrn_s_d_1(TyD);
6553    if (1) test_uqrshrn_s_d_17(TyD);
6554    if (1) test_uqrshrn_s_d_32(TyD);
6555    if (1) test_uqrshrn_h_s_1(TyS);
6556    if (1) test_uqrshrn_h_s_9(TyS);
6557    if (1) test_uqrshrn_h_s_16(TyS);
6558    if (1) test_uqrshrn_b_h_1(TyH);
6559    if (1) test_uqrshrn_b_h_4(TyH);
6560    if (1) test_uqrshrn_b_h_8(TyH);
6561    if (1) test_sqshrn_s_d_1(TyD);
6562    if (1) test_sqshrn_s_d_17(TyD);
6563    if (1) test_sqshrn_s_d_32(TyD);
6564    if (1) test_sqshrn_h_s_1(TyS);
6565    if (1) test_sqshrn_h_s_9(TyS);
6566    if (1) test_sqshrn_h_s_16(TyS);
6567    if (1) test_sqshrn_b_h_1(TyH);
6568    if (1) test_sqshrn_b_h_4(TyH);
6569    if (1) test_sqshrn_b_h_8(TyH);
6570    if (1) test_uqshrn_s_d_1(TyD);
6571    if (1) test_uqshrn_s_d_17(TyD);
6572    if (1) test_uqshrn_s_d_32(TyD);
6573    if (1) test_uqshrn_h_s_1(TyS);
6574    if (1) test_uqshrn_h_s_9(TyS);
6575    if (1) test_uqshrn_h_s_16(TyS);
6576    if (1) test_uqshrn_b_h_1(TyH);
6577    if (1) test_uqshrn_b_h_4(TyH);
6578    if (1) test_uqshrn_b_h_8(TyH);
6579    if (1) test_sqrshrun_s_d_1(TyD);
6580    if (1) test_sqrshrun_s_d_17(TyD);
6581    if (1) test_sqrshrun_s_d_32(TyD);
6582    if (1) test_sqrshrun_h_s_1(TyS);
6583    if (1) test_sqrshrun_h_s_9(TyS);
6584    if (1) test_sqrshrun_h_s_16(TyS);
6585    if (1) test_sqrshrun_b_h_1(TyH);
6586    if (1) test_sqrshrun_b_h_4(TyH);
6587    if (1) test_sqrshrun_b_h_8(TyH);
6588    if (1) test_sqshrun_s_d_1(TyD);
6589    if (1) test_sqshrun_s_d_17(TyD);
6590    if (1) test_sqshrun_s_d_32(TyD);
6591    if (1) test_sqshrun_h_s_1(TyS);
6592    if (1) test_sqshrun_h_s_9(TyS);
6593    if (1) test_sqshrun_h_s_16(TyS);
6594    if (1) test_sqshrun_b_h_1(TyH);
6595    if (1) test_sqshrun_b_h_4(TyH);
6596    if (1) test_sqshrun_b_h_8(TyH);
6597 
6598    // sqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
6599    // uqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
6600    // sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
6601    // uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
6602    // sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
6603    // sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
6604    if (1) test_sqrshrn_2s_2d_1(TyD);
6605    if (1) test_sqrshrn_2s_2d_17(TyD);
6606    if (1) test_sqrshrn_2s_2d_32(TyD);
6607    if (1) test_sqrshrn2_4s_2d_1(TyD);
6608    if (1) test_sqrshrn2_4s_2d_17(TyD);
6609    if (1) test_sqrshrn2_4s_2d_32(TyD);
6610    if (1) test_sqrshrn_4h_4s_1(TyS);
6611    if (1) test_sqrshrn_4h_4s_9(TyS);
6612    if (1) test_sqrshrn_4h_4s_16(TyS);
6613    if (1) test_sqrshrn2_8h_4s_1(TyS);
6614    if (1) test_sqrshrn2_8h_4s_9(TyS);
6615    if (1) test_sqrshrn2_8h_4s_16(TyS);
6616    if (1) test_sqrshrn_8b_8h_1(TyH);
6617    if (1) test_sqrshrn_8b_8h_4(TyH);
6618    if (1) test_sqrshrn_8b_8h_8(TyH);
6619    if (1) test_sqrshrn2_16b_8h_1(TyH);
6620    if (1) test_sqrshrn2_16b_8h_4(TyH);
6621    if (1) test_sqrshrn2_16b_8h_8(TyH);
6622    if (1) test_uqrshrn_2s_2d_1(TyD);
6623    if (1) test_uqrshrn_2s_2d_17(TyD);
6624    if (1) test_uqrshrn_2s_2d_32(TyD);
6625    if (1) test_uqrshrn2_4s_2d_1(TyD);
6626    if (1) test_uqrshrn2_4s_2d_17(TyD);
6627    if (1) test_uqrshrn2_4s_2d_32(TyD);
6628    if (1) test_uqrshrn_4h_4s_1(TyS);
6629    if (1) test_uqrshrn_4h_4s_9(TyS);
6630    if (1) test_uqrshrn_4h_4s_16(TyS);
6631    if (1) test_uqrshrn2_8h_4s_1(TyS);
6632    if (1) test_uqrshrn2_8h_4s_9(TyS);
6633    if (1) test_uqrshrn2_8h_4s_16(TyS);
6634    if (1) test_uqrshrn_8b_8h_1(TyH);
6635    if (1) test_uqrshrn_8b_8h_4(TyH);
6636    if (1) test_uqrshrn_8b_8h_8(TyH);
6637    if (1) test_uqrshrn2_16b_8h_1(TyH);
6638    if (1) test_uqrshrn2_16b_8h_4(TyH);
6639    if (1) test_uqrshrn2_16b_8h_8(TyH);
6640    if (1) test_sqshrn_2s_2d_1(TyD);
6641    if (1) test_sqshrn_2s_2d_17(TyD);
6642    if (1) test_sqshrn_2s_2d_32(TyD);
6643    if (1) test_sqshrn2_4s_2d_1(TyD);
6644    if (1) test_sqshrn2_4s_2d_17(TyD);
6645    if (1) test_sqshrn2_4s_2d_32(TyD);
6646    if (1) test_sqshrn_4h_4s_1(TyS);
6647    if (1) test_sqshrn_4h_4s_9(TyS);
6648    if (1) test_sqshrn_4h_4s_16(TyS);
6649    if (1) test_sqshrn2_8h_4s_1(TyS);
6650    if (1) test_sqshrn2_8h_4s_9(TyS);
6651    if (1) test_sqshrn2_8h_4s_16(TyS);
6652    if (1) test_sqshrn_8b_8h_1(TyH);
6653    if (1) test_sqshrn_8b_8h_4(TyH);
6654    if (1) test_sqshrn_8b_8h_8(TyH);
6655    if (1) test_sqshrn2_16b_8h_1(TyH);
6656    if (1) test_sqshrn2_16b_8h_4(TyH);
6657    if (1) test_sqshrn2_16b_8h_8(TyH);
6658    if (1) test_uqshrn_2s_2d_1(TyD);
6659    if (1) test_uqshrn_2s_2d_17(TyD);
6660    if (1) test_uqshrn_2s_2d_32(TyD);
6661    if (1) test_uqshrn2_4s_2d_1(TyD);
6662    if (1) test_uqshrn2_4s_2d_17(TyD);
6663    if (1) test_uqshrn2_4s_2d_32(TyD);
6664    if (1) test_uqshrn_4h_4s_1(TyS);
6665    if (1) test_uqshrn_4h_4s_9(TyS);
6666    if (1) test_uqshrn_4h_4s_16(TyS);
6667    if (1) test_uqshrn2_8h_4s_1(TyS);
6668    if (1) test_uqshrn2_8h_4s_9(TyS);
6669    if (1) test_uqshrn2_8h_4s_16(TyS);
6670    if (1) test_uqshrn_8b_8h_1(TyH);
6671    if (1) test_uqshrn_8b_8h_4(TyH);
6672    if (1) test_uqshrn_8b_8h_8(TyH);
6673    if (1) test_uqshrn2_16b_8h_1(TyH);
6674    if (1) test_uqshrn2_16b_8h_4(TyH);
6675    if (1) test_uqshrn2_16b_8h_8(TyH);
6676    if (1) test_sqrshrun_2s_2d_1(TyD);
6677    if (1) test_sqrshrun_2s_2d_17(TyD);
6678    if (1) test_sqrshrun_2s_2d_32(TyD);
6679    if (1) test_sqrshrun2_4s_2d_1(TyD);
6680    if (1) test_sqrshrun2_4s_2d_17(TyD);
6681    if (1) test_sqrshrun2_4s_2d_32(TyD);
6682    if (1) test_sqrshrun_4h_4s_1(TyS);
6683    if (1) test_sqrshrun_4h_4s_9(TyS);
6684    if (1) test_sqrshrun_4h_4s_16(TyS);
6685    if (1) test_sqrshrun2_8h_4s_1(TyS);
6686    if (1) test_sqrshrun2_8h_4s_9(TyS);
6687    if (1) test_sqrshrun2_8h_4s_16(TyS);
6688    if (1) test_sqrshrun_8b_8h_1(TyH);
6689    if (1) test_sqrshrun_8b_8h_4(TyH);
6690    if (1) test_sqrshrun_8b_8h_8(TyH);
6691    if (1) test_sqrshrun2_16b_8h_1(TyH);
6692    if (1) test_sqrshrun2_16b_8h_4(TyH);
6693    if (1) test_sqrshrun2_16b_8h_8(TyH);
6694    if (1) test_sqshrun_2s_2d_1(TyD);
6695    if (1) test_sqshrun_2s_2d_17(TyD);
6696    if (1) test_sqshrun_2s_2d_32(TyD);
6697    if (1) test_sqshrun2_4s_2d_1(TyD);
6698    if (1) test_sqshrun2_4s_2d_17(TyD);
6699    if (1) test_sqshrun2_4s_2d_32(TyD);
6700    if (1) test_sqshrun_4h_4s_1(TyS);
6701    if (1) test_sqshrun_4h_4s_9(TyS);
6702    if (1) test_sqshrun_4h_4s_16(TyS);
6703    if (1) test_sqshrun2_8h_4s_1(TyS);
6704    if (1) test_sqshrun2_8h_4s_9(TyS);
6705    if (1) test_sqshrun2_8h_4s_16(TyS);
6706    if (1) test_sqshrun_8b_8h_1(TyH);
6707    if (1) test_sqshrun_8b_8h_4(TyH);
6708    if (1) test_sqshrun_8b_8h_8(TyH);
6709    if (1) test_sqshrun2_16b_8h_1(TyH);
6710    if (1) test_sqshrun2_16b_8h_4(TyH);
6711    if (1) test_sqshrun2_16b_8h_8(TyH);
6712 
6713    // sqshl (imm)  d,s,h,b   _#imm
6714    // uqshl (imm)  d,s,h,b   _#imm
6715    // sqshlu (imm) d,s,h,b   _#imm
6716    if (1) test_sqshl_d_d_0(TyD);
6717    if (1) test_sqshl_d_d_32(TyD);
6718    if (1) test_sqshl_d_d_63(TyD);
6719    if (1) test_sqshl_s_s_0(TyS);
6720    if (1) test_sqshl_s_s_16(TyS);
6721    if (1) test_sqshl_s_s_31(TyS);
6722    if (1) test_sqshl_h_h_0(TyH);
6723    if (1) test_sqshl_h_h_8(TyH);
6724    if (1) test_sqshl_h_h_15(TyH);
6725    if (1) test_sqshl_b_b_0(TyB);
6726    if (1) test_sqshl_b_b_1(TyB);
6727    if (1) test_sqshl_b_b_4(TyB);
6728    if (1) test_sqshl_b_b_6(TyB);
6729    if (1) test_sqshl_b_b_7(TyB);
6730    if (1) test_uqshl_d_d_0(TyD);
6731    if (1) test_uqshl_d_d_32(TyD);
6732    if (1) test_uqshl_d_d_63(TyD);
6733    if (1) test_uqshl_s_s_0(TyS);
6734    if (1) test_uqshl_s_s_16(TyS);
6735    if (1) test_uqshl_s_s_31(TyS);
6736    if (1) test_uqshl_h_h_0(TyH);
6737    if (1) test_uqshl_h_h_8(TyH);
6738    if (1) test_uqshl_h_h_15(TyH);
6739    if (1) test_uqshl_b_b_0(TyB);
6740    if (1) test_uqshl_b_b_1(TyB);
6741    if (1) test_uqshl_b_b_4(TyB);
6742    if (1) test_uqshl_b_b_6(TyB);
6743    if (1) test_uqshl_b_b_7(TyB);
6744    if (1) test_sqshlu_d_d_0(TyD);
6745    if (1) test_sqshlu_d_d_32(TyD);
6746    if (1) test_sqshlu_d_d_63(TyD);
6747    if (1) test_sqshlu_s_s_0(TyS);
6748    if (1) test_sqshlu_s_s_16(TyS);
6749    if (1) test_sqshlu_s_s_31(TyS);
6750    if (1) test_sqshlu_h_h_0(TyH);
6751    if (1) test_sqshlu_h_h_8(TyH);
6752    if (1) test_sqshlu_h_h_15(TyH);
6753    if (1) test_sqshlu_b_b_0(TyB);
6754    if (1) test_sqshlu_b_b_1(TyB);
6755    if (1) test_sqshlu_b_b_2(TyB);
6756    if (1) test_sqshlu_b_b_3(TyB);
6757    if (1) test_sqshlu_b_b_4(TyB);
6758    if (1) test_sqshlu_b_b_5(TyB);
6759    if (1) test_sqshlu_b_b_6(TyB);
6760    if (1) test_sqshlu_b_b_7(TyB);
6761 
6762    // sqshl (imm)  2d,4s,2s,8h,4h,16b,8b   _#imm
6763    // uqshl (imm)  2d,4s,2s,8h,4h,16b,8b   _#imm
6764    // sqshlu (imm) 2d,4s,2s,8h,4h,16b,8b   _#imm
6765    if (1) test_sqshl_2d_2d_0(TyD);
6766    if (1) test_sqshl_2d_2d_32(TyD);
6767    if (1) test_sqshl_2d_2d_63(TyD);
6768    if (1) test_sqshl_4s_4s_0(TyS);
6769    if (1) test_sqshl_4s_4s_16(TyS);
6770    if (1) test_sqshl_4s_4s_31(TyS);
6771    if (1) test_sqshl_2s_2s_0(TyS);
6772    if (1) test_sqshl_2s_2s_16(TyS);
6773    if (1) test_sqshl_2s_2s_31(TyS);
6774    if (1) test_sqshl_8h_8h_0(TyH);
6775    if (1) test_sqshl_8h_8h_8(TyH);
6776    if (1) test_sqshl_8h_8h_15(TyH);
6777    if (1) test_sqshl_4h_4h_0(TyH);
6778    if (1) test_sqshl_4h_4h_8(TyH);
6779    if (1) test_sqshl_4h_4h_15(TyH);
6780    if (1) test_sqshl_16b_16b_0(TyB);
6781    if (1) test_sqshl_16b_16b_3(TyB);
6782    if (1) test_sqshl_16b_16b_7(TyB);
6783    if (1) test_sqshl_8b_8b_0(TyB);
6784    if (1) test_sqshl_8b_8b_3(TyB);
6785    if (1) test_sqshl_8b_8b_7(TyB);
6786    if (1) test_uqshl_2d_2d_0(TyD);
6787    if (1) test_uqshl_2d_2d_32(TyD);
6788    if (1) test_uqshl_2d_2d_63(TyD);
6789    if (1) test_uqshl_4s_4s_0(TyS);
6790    if (1) test_uqshl_4s_4s_16(TyS);
6791    if (1) test_uqshl_4s_4s_31(TyS);
6792    if (1) test_uqshl_2s_2s_0(TyS);
6793    if (1) test_uqshl_2s_2s_16(TyS);
6794    if (1) test_uqshl_2s_2s_31(TyS);
6795    if (1) test_uqshl_8h_8h_0(TyH);
6796    if (1) test_uqshl_8h_8h_8(TyH);
6797    if (1) test_uqshl_8h_8h_15(TyH);
6798    if (1) test_uqshl_4h_4h_0(TyH);
6799    if (1) test_uqshl_4h_4h_8(TyH);
6800    if (1) test_uqshl_4h_4h_15(TyH);
6801    if (1) test_uqshl_16b_16b_0(TyB);
6802    if (1) test_uqshl_16b_16b_3(TyB);
6803    if (1) test_uqshl_16b_16b_7(TyB);
6804    if (1) test_uqshl_8b_8b_0(TyB);
6805    if (1) test_uqshl_8b_8b_3(TyB);
6806    if (1) test_uqshl_8b_8b_7(TyB);
6807    if (1) test_sqshlu_2d_2d_0(TyD);
6808    if (1) test_sqshlu_2d_2d_32(TyD);
6809    if (1) test_sqshlu_2d_2d_63(TyD);
6810    if (1) test_sqshlu_4s_4s_0(TyS);
6811    if (1) test_sqshlu_4s_4s_16(TyS);
6812    if (1) test_sqshlu_4s_4s_31(TyS);
6813    if (1) test_sqshlu_2s_2s_0(TyS);
6814    if (1) test_sqshlu_2s_2s_16(TyS);
6815    if (1) test_sqshlu_2s_2s_31(TyS);
6816    if (1) test_sqshlu_8h_8h_0(TyH);
6817    if (1) test_sqshlu_8h_8h_8(TyH);
6818    if (1) test_sqshlu_8h_8h_15(TyH);
6819    if (1) test_sqshlu_4h_4h_0(TyH);
6820    if (1) test_sqshlu_4h_4h_8(TyH);
6821    if (1) test_sqshlu_4h_4h_15(TyH);
6822    if (1) test_sqshlu_16b_16b_0(TyB);
6823    if (1) test_sqshlu_16b_16b_3(TyB);
6824    if (1) test_sqshlu_16b_16b_7(TyB);
6825    if (1) test_sqshlu_8b_8b_0(TyB);
6826    if (1) test_sqshlu_8b_8b_3(TyB);
6827    if (1) test_sqshlu_8b_8b_7(TyB);
6828 
6829    // sqxtn        s_d,h_s,b_h
6830    // uqxtn        s_d,h_s,b_h
6831    // sqxtun       s_d,h_s,b_h
6832    if (1) test_sqxtn_s_d(TyD);
6833    if (1) test_sqxtn_h_s(TyS);
6834    if (1) test_sqxtn_b_h(TyH);
6835    if (1) test_uqxtn_s_d(TyD);
6836    if (1) test_uqxtn_h_s(TyS);
6837    if (1) test_uqxtn_b_h(TyH);
6838    if (1) test_sqxtun_s_d(TyD);
6839    if (1) test_sqxtun_h_s(TyS);
6840    if (1) test_sqxtun_b_h(TyH);
6841 
6842    // sqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
6843    // uqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
6844    // sqxtun{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
6845    if (1) test_sqxtn_2s_2d(TyD);
6846    if (1) test_sqxtn2_4s_2d(TyD);
6847    if (1) test_sqxtn_4h_4s(TyS);
6848    if (1) test_sqxtn2_8h_4s(TyS);
6849    if (1) test_sqxtn_8b_8h(TyH);
6850    if (1) test_sqxtn2_16b_8h(TyH);
6851    if (1) test_uqxtn_2s_2d(TyD);
6852    if (1) test_uqxtn2_4s_2d(TyD);
6853    if (1) test_uqxtn_4h_4s(TyS);
6854    if (1) test_uqxtn2_8h_4s(TyS);
6855    if (1) test_uqxtn_8b_8h(TyH);
6856    if (1) test_uqxtn2_16b_8h(TyH);
6857    if (1) test_sqxtun_2s_2d(TyD);
6858    if (1) test_sqxtun2_4s_2d(TyD);
6859    if (1) test_sqxtun_4h_4s(TyS);
6860    if (1) test_sqxtun2_8h_4s(TyS);
6861    if (1) test_sqxtun_8b_8h(TyH);
6862    if (1) test_sqxtun2_16b_8h(TyH);
6863 
6864    // srhadd       4s,2s,8h,4h,16b,8b
6865    // urhadd       4s,2s,8h,4h,16b,8b
6866    if (1) test_srhadd_4s_4s_4s(TyS);
6867    if (1) test_srhadd_2s_2s_2s(TyS);
6868    if (1) test_srhadd_8h_8h_8h(TyH);
6869    if (1) test_srhadd_4h_4h_4h(TyH);
6870    if (1) test_srhadd_16b_16b_16b(TyB);
6871    if (1) test_srhadd_8b_8b_8b(TyB);
6872    if (1) test_urhadd_4s_4s_4s(TyS);
6873    if (1) test_urhadd_2s_2s_2s(TyS);
6874    if (1) test_urhadd_8h_8h_8h(TyH);
6875    if (1) test_urhadd_4h_4h_4h(TyH);
6876    if (1) test_urhadd_16b_16b_16b(TyB);
6877    if (1) test_urhadd_8b_8b_8b(TyB);
6878 
6879    // sshl (reg)   d
6880    // ushl (reg)   d
6881    if (1) test_sshl_d_d_d(TyD);
6882    if (1) test_ushl_d_d_d(TyD);
6883 
6884    // sshl (reg)   2d,4s,2s,8h,4h,16b,8b
6885    // ushl (reg)   2d,4s,2s,8h,4h,16b,8b
6886    if (1) test_sshl_2d_2d_2d(TyD);
6887    if (1) test_sshl_4s_4s_4s(TyS);
6888    if (1) test_sshl_2s_2s_2s(TyS);
6889    if (1) test_sshl_8h_8h_8h(TyH);
6890    if (1) test_sshl_4h_4h_4h(TyH);
6891    if (1) test_sshl_16b_16b_16b(TyB);
6892    if (1) test_sshl_8b_8b_8b(TyB);
6893    if (1) test_ushl_2d_2d_2d(TyD);
6894    if (1) test_ushl_4s_4s_4s(TyS);
6895    if (1) test_ushl_2s_2s_2s(TyS);
6896    if (1) test_ushl_8h_8h_8h(TyH);
6897    if (1) test_ushl_4h_4h_4h(TyH);
6898    if (1) test_ushl_16b_16b_16b(TyB);
6899    if (1) test_ushl_8b_8b_8b(TyB);
6900 
6901    // shl  (imm)   d
6902    // sshr (imm)   d
6903    // ushr (imm)   d
6904    if (1) test_shl_d_d_0(TyD);
6905    if (1) test_shl_d_d_32(TyD);
6906    if (1) test_shl_d_d_63(TyD);
6907    if (1) test_sshr_d_d_1(TyD);
6908    if (1) test_sshr_d_d_32(TyD);
6909    if (1) test_sshr_d_d_64(TyD);
6910    if (1) test_ushr_d_d_1(TyD);
6911    if (1) test_ushr_d_d_32(TyD);
6912    if (1) test_ushr_d_d_64(TyD);
6913 
6914    // shl  (imm)   16b,8b,8h,4h,4s,2s,2d
6915    // sshr (imm)   2d,4s,2s,8h,4h,16b,8b
6916    // ushr (imm)   2d,4s,2s,8h,4h,16b,8b
6917    if (1) test_shl_2d_2d_0(TyD);
6918    if (1) test_shl_2d_2d_13(TyD);
6919    if (1) test_shl_2d_2d_63(TyD);
6920    if (1) test_shl_4s_4s_0(TyS);
6921    if (1) test_shl_4s_4s_13(TyS);
6922    if (1) test_shl_4s_4s_31(TyS);
6923    if (1) test_shl_2s_2s_0(TyS);
6924    if (1) test_shl_2s_2s_13(TyS);
6925    if (1) test_shl_2s_2s_31(TyS);
6926    if (1) test_shl_8h_8h_0(TyH);
6927    if (1) test_shl_8h_8h_13(TyH);
6928    if (1) test_shl_8h_8h_15(TyH);
6929    if (1) test_shl_4h_4h_0(TyH);
6930    if (1) test_shl_4h_4h_13(TyH);
6931    if (1) test_shl_4h_4h_15(TyH);
6932    if (1) test_shl_16b_16b_0(TyB);
6933    if (1) test_shl_16b_16b_7(TyB);
6934    if (1) test_shl_8b_8b_0(TyB);
6935    if (1) test_shl_8b_8b_7(TyB);
6936    if (1) test_sshr_2d_2d_1(TyD);
6937    if (1) test_sshr_2d_2d_13(TyD);
6938    if (1) test_sshr_2d_2d_64(TyD);
6939    if (1) test_sshr_4s_4s_1(TyS);
6940    if (1) test_sshr_4s_4s_13(TyS);
6941    if (1) test_sshr_4s_4s_32(TyS);
6942    if (1) test_sshr_2s_2s_1(TyS);
6943    if (1) test_sshr_2s_2s_13(TyS);
6944    if (1) test_sshr_2s_2s_32(TyS);
6945    if (1) test_sshr_8h_8h_1(TyH);
6946    if (1) test_sshr_8h_8h_13(TyH);
6947    if (1) test_sshr_8h_8h_16(TyH);
6948    if (1) test_sshr_4h_4h_1(TyH);
6949    if (1) test_sshr_4h_4h_13(TyH);
6950    if (1) test_sshr_4h_4h_16(TyH);
6951    if (1) test_sshr_16b_16b_1(TyB);
6952    if (1) test_sshr_16b_16b_8(TyB);
6953    if (1) test_sshr_8b_8b_1(TyB);
6954    if (1) test_sshr_8b_8b_8(TyB);
6955    if (1) test_ushr_2d_2d_1(TyD);
6956    if (1) test_ushr_2d_2d_13(TyD);
6957    if (1) test_ushr_2d_2d_64(TyD);
6958    if (1) test_ushr_4s_4s_1(TyS);
6959    if (1) test_ushr_4s_4s_13(TyS);
6960    if (1) test_ushr_4s_4s_32(TyS);
6961    if (1) test_ushr_2s_2s_1(TyS);
6962    if (1) test_ushr_2s_2s_13(TyS);
6963    if (1) test_ushr_2s_2s_32(TyS);
6964    if (1) test_ushr_8h_8h_1(TyH);
6965    if (1) test_ushr_8h_8h_13(TyH);
6966    if (1) test_ushr_8h_8h_16(TyH);
6967    if (1) test_ushr_4h_4h_1(TyH);
6968    if (1) test_ushr_4h_4h_13(TyH);
6969    if (1) test_ushr_4h_4h_16(TyH);
6970    if (1) test_ushr_16b_16b_1(TyB);
6971    if (1) test_ushr_16b_16b_8(TyB);
6972    if (1) test_ushr_8b_8b_1(TyB);
6973    if (1) test_ushr_8b_8b_8(TyB);
6974 
6975    // ssra (imm)   d
6976    // usra (imm)   d
6977    if (1) test_ssra_d_d_1(TyD);
6978    if (1) test_ssra_d_d_32(TyD);
6979    if (1) test_ssra_d_d_64(TyD);
6980    if (1) test_usra_d_d_1(TyD);
6981    if (1) test_usra_d_d_32(TyD);
6982    if (1) test_usra_d_d_64(TyD);
6983 
6984    // ssra (imm)   2d,4s,2s,8h,4h,16b,8b
6985    // usra (imm)   2d,4s,2s,8h,4h,16b,8b
6986    if (1) test_ssra_2d_2d_1(TyD);
6987    if (1) test_ssra_2d_2d_32(TyD);
6988    if (1) test_ssra_2d_2d_64(TyD);
6989    if (1) test_ssra_4s_4s_1(TyS);
6990    if (1) test_ssra_4s_4s_16(TyS);
6991    if (1) test_ssra_4s_4s_32(TyS);
6992    if (1) test_ssra_2s_2s_1(TyS);
6993    if (1) test_ssra_2s_2s_16(TyS);
6994    if (1) test_ssra_2s_2s_32(TyS);
6995    if (1) test_ssra_8h_8h_1(TyH);
6996    if (1) test_ssra_8h_8h_8(TyH);
6997    if (1) test_ssra_8h_8h_16(TyH);
6998    if (1) test_ssra_4h_4h_1(TyH);
6999    if (1) test_ssra_4h_4h_8(TyH);
7000    if (1) test_ssra_4h_4h_16(TyH);
7001    if (1) test_ssra_16b_16b_1(TyB);
7002    if (1) test_ssra_16b_16b_3(TyB);
7003    if (1) test_ssra_16b_16b_8(TyB);
7004    if (1) test_ssra_8b_8b_1(TyB);
7005    if (1) test_ssra_8b_8b_3(TyB);
7006    if (1) test_ssra_8b_8b_8(TyB);
7007    if (1) test_usra_2d_2d_1(TyD);
7008    if (1) test_usra_2d_2d_32(TyD);
7009    if (1) test_usra_2d_2d_64(TyD);
7010    if (1) test_usra_4s_4s_1(TyS);
7011    if (1) test_usra_4s_4s_16(TyS);
7012    if (1) test_usra_4s_4s_32(TyS);
7013    if (1) test_usra_2s_2s_1(TyS);
7014    if (1) test_usra_2s_2s_16(TyS);
7015    if (1) test_usra_2s_2s_32(TyS);
7016    if (1) test_usra_8h_8h_1(TyH);
7017    if (1) test_usra_8h_8h_8(TyH);
7018    if (1) test_usra_8h_8h_16(TyH);
7019    if (1) test_usra_4h_4h_1(TyH);
7020    if (1) test_usra_4h_4h_8(TyH);
7021    if (1) test_usra_4h_4h_16(TyH);
7022    if (1) test_usra_16b_16b_1(TyB);
7023    if (1) test_usra_16b_16b_3(TyB);
7024    if (1) test_usra_16b_16b_8(TyB);
7025    if (1) test_usra_8b_8b_1(TyB);
7026    if (1) test_usra_8b_8b_3(TyB);
7027    if (1) test_usra_8b_8b_8(TyB);
7028 
7029    // srshl (reg)  d
7030    // urshl (reg)  d
7031    if (1) test_srshl_d_d_d(TyD);
7032    if (1) test_urshl_d_d_d(TyD);
7033 
7034    // srshl (reg)  2d,4s,2s,8h,4h,16b,8b
7035    // urshl (reg)  2d,4s,2s,8h,4h,16b,8b
7036    if (1) test_srshl_2d_2d_2d(TyD);
7037    if (1) test_srshl_4s_4s_4s(TyS);
7038    if (1) test_srshl_2s_2s_2s(TyS);
7039    if (1) test_srshl_8h_8h_8h(TyH);
7040    if (1) test_srshl_4h_4h_4h(TyH);
7041    if (1) test_srshl_16b_16b_16b(TyB);
7042    if (1) test_srshl_8b_8b_8b(TyB);
7043    if (1) test_urshl_2d_2d_2d(TyD);
7044    if (1) test_urshl_4s_4s_4s(TyS);
7045    if (1) test_urshl_2s_2s_2s(TyS);
7046    if (1) test_urshl_8h_8h_8h(TyH);
7047    if (1) test_urshl_4h_4h_4h(TyH);
7048    if (1) test_urshl_16b_16b_16b(TyB);
7049    if (1) test_urshl_8b_8b_8b(TyB);
7050 
7051    // srshr (imm)  d
7052    // urshr (imm)  d
7053    if (1) test_srshr_d_d_1(TyD);
7054    if (1) test_srshr_d_d_32(TyD);
7055    if (1) test_srshr_d_d_64(TyD);
7056    if (1) test_urshr_d_d_1(TyD);
7057    if (1) test_urshr_d_d_32(TyD);
7058    if (1) test_urshr_d_d_64(TyD);
7059 
7060    // srshr (imm)  2d,4s,2s,8h,4h,16b,8b
7061    // urshr (imm)  2d,4s,2s,8h,4h,16b,8b
7062    if (1) test_srshr_2d_2d_1(TyD);
7063    if (1) test_srshr_2d_2d_32(TyD);
7064    if (1) test_srshr_2d_2d_64(TyD);
7065    if (1) test_srshr_4s_4s_1(TyS);
7066    if (1) test_srshr_4s_4s_16(TyS);
7067    if (1) test_srshr_4s_4s_32(TyS);
7068    if (1) test_srshr_2s_2s_1(TyS);
7069    if (1) test_srshr_2s_2s_16(TyS);
7070    if (1) test_srshr_2s_2s_32(TyS);
7071    if (1) test_srshr_8h_8h_1(TyH);
7072    if (1) test_srshr_8h_8h_8(TyH);
7073    if (1) test_srshr_8h_8h_16(TyH);
7074    if (1) test_srshr_4h_4h_1(TyH);
7075    if (1) test_srshr_4h_4h_8(TyH);
7076    if (1) test_srshr_4h_4h_16(TyH);
7077    if (1) test_srshr_16b_16b_1(TyB);
7078    if (1) test_srshr_16b_16b_3(TyB);
7079    if (1) test_srshr_16b_16b_8(TyB);
7080    if (1) test_srshr_8b_8b_1(TyB);
7081    if (1) test_srshr_8b_8b_3(TyB);
7082    if (1) test_srshr_8b_8b_8(TyB);
7083    if (1) test_urshr_2d_2d_1(TyD);
7084    if (1) test_urshr_2d_2d_32(TyD);
7085    if (1) test_urshr_2d_2d_64(TyD);
7086    if (1) test_urshr_4s_4s_1(TyS);
7087    if (1) test_urshr_4s_4s_16(TyS);
7088    if (1) test_urshr_4s_4s_32(TyS);
7089    if (1) test_urshr_2s_2s_1(TyS);
7090    if (1) test_urshr_2s_2s_16(TyS);
7091    if (1) test_urshr_2s_2s_32(TyS);
7092    if (1) test_urshr_8h_8h_1(TyH);
7093    if (1) test_urshr_8h_8h_8(TyH);
7094    if (1) test_urshr_8h_8h_16(TyH);
7095    if (1) test_urshr_4h_4h_1(TyH);
7096    if (1) test_urshr_4h_4h_8(TyH);
7097    if (1) test_urshr_4h_4h_16(TyH);
7098    if (1) test_urshr_16b_16b_1(TyB);
7099    if (1) test_urshr_16b_16b_3(TyB);
7100    if (1) test_urshr_16b_16b_8(TyB);
7101    if (1) test_urshr_8b_8b_1(TyB);
7102    if (1) test_urshr_8b_8b_3(TyB);
7103    if (1) test_urshr_8b_8b_8(TyB);
7104 
7105    // srsra (imm)  d
7106    // ursra (imm)  d
7107    if (1) test_srsra_d_d_1(TyD);
7108    if (1) test_srsra_d_d_32(TyD);
7109    if (1) test_srsra_d_d_64(TyD);
7110    if (1) test_ursra_d_d_1(TyD);
7111    if (1) test_ursra_d_d_32(TyD);
7112    if (1) test_ursra_d_d_64(TyD);
7113 
7114    // srsra (imm)  2d,4s,2s,8h,4h,16b,8b
7115    // ursra (imm)  2d,4s,2s,8h,4h,16b,8b
7116    if (1) test_srsra_2d_2d_1(TyD);
7117    if (1) test_srsra_2d_2d_32(TyD);
7118    if (1) test_srsra_2d_2d_64(TyD);
7119    if (1) test_srsra_4s_4s_1(TyS);
7120    if (1) test_srsra_4s_4s_16(TyS);
7121    if (1) test_srsra_4s_4s_32(TyS);
7122    if (1) test_srsra_2s_2s_1(TyS);
7123    if (1) test_srsra_2s_2s_16(TyS);
7124    if (1) test_srsra_2s_2s_32(TyS);
7125    if (1) test_srsra_8h_8h_1(TyH);
7126    if (1) test_srsra_8h_8h_8(TyH);
7127    if (1) test_srsra_8h_8h_16(TyH);
7128    if (1) test_srsra_4h_4h_1(TyH);
7129    if (1) test_srsra_4h_4h_8(TyH);
7130    if (1) test_srsra_4h_4h_16(TyH);
7131    if (1) test_srsra_16b_16b_1(TyB);
7132    if (1) test_srsra_16b_16b_3(TyB);
7133    if (1) test_srsra_16b_16b_8(TyB);
7134    if (1) test_srsra_8b_8b_1(TyB);
7135    if (1) test_srsra_8b_8b_3(TyB);
7136    if (1) test_srsra_8b_8b_8(TyB);
7137    if (1) test_ursra_2d_2d_1(TyD);
7138    if (1) test_ursra_2d_2d_32(TyD);
7139    if (1) test_ursra_2d_2d_64(TyD);
7140    if (1) test_ursra_4s_4s_1(TyS);
7141    if (1) test_ursra_4s_4s_16(TyS);
7142    if (1) test_ursra_4s_4s_32(TyS);
7143    if (1) test_ursra_2s_2s_1(TyS);
7144    if (1) test_ursra_2s_2s_16(TyS);
7145    if (1) test_ursra_2s_2s_32(TyS);
7146    if (1) test_ursra_8h_8h_1(TyH);
7147    if (1) test_ursra_8h_8h_8(TyH);
7148    if (1) test_ursra_8h_8h_16(TyH);
7149    if (1) test_ursra_4h_4h_1(TyH);
7150    if (1) test_ursra_4h_4h_8(TyH);
7151    if (1) test_ursra_4h_4h_16(TyH);
7152    if (1) test_ursra_16b_16b_1(TyB);
7153    if (1) test_ursra_16b_16b_3(TyB);
7154    if (1) test_ursra_16b_16b_8(TyB);
7155    if (1) test_ursra_8b_8b_1(TyB);
7156    if (1) test_ursra_8b_8b_3(TyB);
7157    if (1) test_ursra_8b_8b_8(TyB);
7158 
7159    // sshll{2} (imm)  2d_2s/4s, 4s_4h/8h, 8h_8b/16b
7160    // ushll{2} (imm)  2d_2s/4s, 4s_4h/8h, 8h_8b/16b
7161    if (1) test_sshll_2d_2s_0(TyS);
7162    if (1) test_sshll_2d_2s_15(TyS);
7163    if (1) test_sshll_2d_2s_31(TyS);
7164    if (1) test_sshll2_2d_4s_0(TyS);
7165    if (1) test_sshll2_2d_4s_15(TyS);
7166    if (1) test_sshll2_2d_4s_31(TyS);
7167    if (1) test_sshll_4s_4h_0(TyH);
7168    if (1) test_sshll_4s_4h_7(TyH);
7169    if (1) test_sshll_4s_4h_15(TyH);
7170    if (1) test_sshll2_4s_8h_0(TyH);
7171    if (1) test_sshll2_4s_8h_7(TyH);
7172    if (1) test_sshll2_4s_8h_15(TyH);
7173    if (1) test_sshll_8h_8b_0(TyB);
7174    if (1) test_sshll_8h_8b_3(TyB);
7175    if (1) test_sshll_8h_8b_7(TyB);
7176    if (1) test_sshll2_8h_16b_0(TyB);
7177    if (1) test_sshll2_8h_16b_3(TyB);
7178    if (1) test_sshll2_8h_16b_7(TyB);
7179    if (1) test_ushll_2d_2s_0(TyS);
7180    if (1) test_ushll_2d_2s_15(TyS);
7181    if (1) test_ushll_2d_2s_31(TyS);
7182    if (1) test_ushll2_2d_4s_0(TyS);
7183    if (1) test_ushll2_2d_4s_15(TyS);
7184    if (1) test_ushll2_2d_4s_31(TyS);
7185    if (1) test_ushll_4s_4h_0(TyH);
7186    if (1) test_ushll_4s_4h_7(TyH);
7187    if (1) test_ushll_4s_4h_15(TyH);
7188    if (1) test_ushll2_4s_8h_0(TyH);
7189    if (1) test_ushll2_4s_8h_7(TyH);
7190    if (1) test_ushll2_4s_8h_15(TyH);
7191    if (1) test_ushll_8h_8b_0(TyB);
7192    if (1) test_ushll_8h_8b_3(TyB);
7193    if (1) test_ushll_8h_8b_7(TyB);
7194    if (1) test_ushll2_8h_16b_0(TyB);
7195    if (1) test_ushll2_8h_16b_3(TyB);
7196    if (1) test_ushll2_8h_16b_7(TyB);
7197 
7198    // suqadd  d,s,h,b
7199    // usqadd  d,s,h,b
7200    if (1) test_suqadd_d_d(TyD);
7201    if (1) test_suqadd_s_s(TyS);
7202    if (1) test_suqadd_h_h(TyH);
7203    if (1) test_suqadd_b_b(TyB);
7204    if (1) test_usqadd_d_d(TyD);
7205    if (1) test_usqadd_s_s(TyS);
7206    if (1) test_usqadd_h_h(TyH);
7207    if (1) test_usqadd_b_b(TyB);
7208 
7209    // suqadd  2d,4s,2s,8h,4h,16b,8b
7210    // usqadd  2d,4s,2s,8h,4h,16b,8b
7211    if (1) test_suqadd_2d_2d(TyD);
7212    if (1) test_suqadd_4s_4s(TyS);
7213    if (1) test_suqadd_2s_2s(TyS);
7214    if (1) test_suqadd_8h_8h(TyH);
7215    if (1) test_suqadd_4h_4h(TyH);
7216    if (1) test_suqadd_16b_16b(TyB);
7217    if (1) test_suqadd_8b_8b(TyB);
7218    if (1) test_usqadd_2d_2d(TyD);
7219    if (1) test_usqadd_4s_4s(TyS);
7220    if (1) test_usqadd_2s_2s(TyS);
7221    if (1) test_usqadd_8h_8h(TyH);
7222    if (1) test_usqadd_4h_4h(TyH);
7223    if (1) test_usqadd_16b_16b(TyB);
7224    if (1) test_usqadd_8b_8b(TyB);
7225 
7226    // tbl     8b_{16b}_8b, 16b_{16b}_16b
7227    // tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
7228    // tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
7229    // tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
7230    if (1) test_tbl_16b_1reg(TyB);
7231    if (1) test_tbl_16b_2reg(TyB);
7232    if (1) test_tbl_16b_3reg(TyB);
7233    if (1) test_tbl_16b_4reg(TyB);
7234    if (1) test_tbl_8b_1reg(TyB);
7235    if (1) test_tbl_8b_2reg(TyB);
7236    if (1) test_tbl_8b_3reg(TyB);
7237    if (1) test_tbl_8b_4reg(TyB);
7238 
7239    // tbx     8b_{16b}_8b, 16b_{16b}_16b
7240    // tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
7241    // tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
7242    // tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
7243    if (1) test_tbx_16b_1reg(TyB);
7244    if (1) test_tbx_16b_2reg(TyB);
7245    if (1) test_tbx_16b_3reg(TyB);
7246    if (1) test_tbx_16b_4reg(TyB);
7247    if (1) test_tbx_8b_1reg(TyB);
7248    if (1) test_tbx_8b_2reg(TyB);
7249    if (1) test_tbx_8b_3reg(TyB);
7250    if (1) test_tbx_8b_4reg(TyB);
7251 
7252    // trn1    2d,4s,2s,8h,4h,16b,8b
7253    // trn2    2d,4s,2s,8h,4h,16b,8b
7254    if (1) test_trn1_2d_2d_2d(TyD);
7255    if (1) test_trn1_4s_4s_4s(TyS);
7256    if (1) test_trn1_2s_2s_2s(TyS);
7257    if (1) test_trn1_8h_8h_8h(TyH);
7258    if (1) test_trn1_4h_4h_4h(TyH);
7259    if (1) test_trn1_16b_16b_16b(TyB);
7260    if (1) test_trn1_8b_8b_8b(TyB);
7261    if (1) test_trn2_2d_2d_2d(TyD);
7262    if (1) test_trn2_4s_4s_4s(TyS);
7263    if (1) test_trn2_2s_2s_2s(TyS);
7264    if (1) test_trn2_8h_8h_8h(TyH);
7265    if (1) test_trn2_4h_4h_4h(TyH);
7266    if (1) test_trn2_16b_16b_16b(TyB);
7267    if (1) test_trn2_8b_8b_8b(TyB);
7268 
7269    // urecpe      4s,2s
7270    // ursqrte     4s,2s
7271    if (1) test_urecpe_4s_4s(TyS);
7272    if (1) test_urecpe_2s_2s(TyS);
7273    if (1) test_ursqrte_4s_4s(TyS);
7274    if (1) test_ursqrte_2s_2s(TyS);
7275 
7276    // uzp1      2d,4s,2s,8h,4h,16b,8b
7277    // uzp2      2d,4s,2s,8h,4h,16b,8b
7278    // zip1      2d,4s,2s,8h,4h,16b,8b
7279    // zip2      2d,4s,2s,8h,4h,16b,8b
7280    if (1) test_uzp1_2d_2d_2d(TyD);
7281    if (1) test_uzp1_4s_4s_4s(TyS);
7282    if (1) test_uzp1_2s_2s_2s(TyS);
7283    if (1) test_uzp1_8h_8h_8h(TyH);
7284    if (1) test_uzp1_4h_4h_4h(TyH);
7285    if (1) test_uzp1_16b_16b_16b(TyB);
7286    if (1) test_uzp1_8b_8b_8b(TyB);
7287    if (1) test_uzp2_2d_2d_2d(TyD);
7288    if (1) test_uzp2_4s_4s_4s(TyS);
7289    if (1) test_uzp2_2s_2s_2s(TyS);
7290    if (1) test_uzp2_8h_8h_8h(TyH);
7291    if (1) test_uzp2_4h_4h_4h(TyH);
7292    if (1) test_uzp2_16b_16b_16b(TyB);
7293    if (1) test_uzp2_8b_8b_8b(TyB);
7294    if (1) test_zip1_2d_2d_2d(TyD);
7295    if (1) test_zip1_4s_4s_4s(TyS);
7296    if (1) test_zip1_2s_2s_2s(TyS);
7297    if (1) test_zip1_8h_8h_8h(TyH);
7298    if (1) test_zip1_4h_4h_4h(TyH);
7299    if (1) test_zip1_16b_16b_16b(TyB);
7300    if (1) test_zip1_8b_8b_8b(TyB);
7301    if (1) test_zip2_2d_2d_2d(TyD);
7302    if (1) test_zip2_4s_4s_4s(TyS);
7303    if (1) test_zip2_2s_2s_2s(TyS);
7304    if (1) test_zip2_8h_8h_8h(TyH);
7305    if (1) test_zip2_4h_4h_4h(TyH);
7306    if (1) test_zip2_16b_16b_16b(TyB);
7307    if (1) test_zip2_8b_8b_8b(TyB);
7308 
7309    // xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
7310    if (1) test_xtn_2s_2d(TyD);
7311    if (1) test_xtn2_4s_2d(TyD);
7312    if (1) test_xtn_4h_4s(TyS);
7313    if (1) test_xtn2_8h_4s(TyS);
7314    if (1) test_xtn_8b_8h(TyH);
7315    if (1) test_xtn2_16b_8h(TyH);
7316 
7317    // ======================== MEM ========================
7318 
7319    // All the SIMD and FP memory tests are in none/tests/arm64/memory.c.
7320 
7321    // ld1  (multiple 1-element structures to 1/2/3/4 regs)
7322    // ld1  (single 1-element structure to one lane of 1 reg)
7323    // ld1r (single 1-element structure and rep to all lanes of 1 reg)
7324 
7325    // ld2  (multiple 2-element structures to 2 regs)
7326    // ld2  (single 2-element structure to one lane of 2 regs)
7327    // ld2r (single 2-element structure and rep to all lanes of 2 regs)
7328 
7329    // ld3  (multiple 3-element structures to 3 regs)
7330    // ld3  (single 3-element structure to one lane of 3 regs)
7331    // ld3r (single 3-element structure and rep to all lanes of 3 regs)
7332 
7333    // ld4  (multiple 4-element structures to 4 regs)
7334    // ld4  (single 4-element structure to one lane of 4 regs)
7335    // ld4r (single 4-element structure and rep to all lanes of 4 regs)
7336 
7337    // ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
7338    //       addr = reg + uimm7 * reg_size
7339 
7340    // ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
7341    //       addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
7342 
7343    // ldr   q,d,s,h,b from addr
7344    //       addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
7345 
7346    // ldr   q,d,s from  pc+#imm19
7347 
7348    // ldr   q,d,s,h,b from addr
7349    //       addr = [Xn|SP, R <extend> <shift]
7350 
7351    // ldur  q,d,s,h,b from addr
7352    //       addr = [Xn|SP,#imm] (unscaled offset)
7353 
7354    // st1 (multiple 1-element structures from 1/2/3/4 regs)
7355    // st1 (single 1-element structure for 1 lane of 1 reg)
7356 
7357    // st2 (multiple 2-element structures from 2 regs)
7358    // st2 (single 2-element structure from 1 lane of 2 regs)
7359 
7360    // st3 (multiple 3-element structures from 3 regs)
7361    // st3 (single 3-element structure from 1 lane of 3 regs)
7362 
7363    // st4 (multiple 4-element structures from 4 regs)
7364    // st4 (single 4-element structure from one lane of 4 regs)
7365 
7366    // stnp q_q_addr, d_d_addr, s_s_addr
7367    //      addr = [Xn|SP, #imm]
7368 
7369    // stp  q_q_addr, d_d_addr, s_s_addr
7370    //      addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
7371 
7372    // str  q,d,s,h,b_addr
7373    //      addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
7374 
7375    // str   q,d,s,h,b_addr
7376    //       addr = [Xn|SP, R <extend> <shift]
7377 
7378    // stur  q,d,s,h,b_addr
7379    //       addr = [Xn|SP,#imm] (unscaled offset)
7380 
7381    // ======================== CRYPTO ========================
7382 
7383    // aesd       16b (aes single round decryption)
7384    // aese       16b (aes single round encryption)
7385    // aesimc     16b (aes inverse mix columns)
7386    // aesmc      16b (aes mix columns)
7387    if (1) DO50( test_aesd_16b_16b(TyNONE) );
7388    if (1) DO50( test_aese_16b_16b(TyNONE) );
7389    if (1) DO50( test_aesimc_16b_16b(TyNONE) );
7390    if (1) DO50( test_aesmc_16b_16b(TyNONE) );
7391 
7392    // sha1c      q_s_4s
7393    // sha1h      s_s
7394    // sha1m      q_s_4s
7395    // sha1p      q_s_4s
7396    // sha1su0    4s_4s_4s
7397    // sha1su1    4s_4s
7398    if (1) DO50( test_sha1c_q_s_4s(TyNONE) );
7399    if (1) DO50( test_sha1h_s_s(TyNONE) );
7400    if (1) DO50( test_sha1m_q_s_4s(TyNONE) );
7401    if (1) DO50( test_sha1p_q_s_4s(TyNONE) );
7402    if (1) DO50( test_sha1su0_4s_4s_4s(TyNONE) );
7403    if (1) DO50( test_sha1su1_4s_4s(TyNONE) );
7404 
7405    // sha256h2   q_q_4s
7406    // sha256h    q_q_4s
7407    // sha256su0  4s_4s
7408    // sha256su1  4s_4s_4s
7409    if (1) DO50( test_sha256h2_q_q_4s(TyNONE) );
7410    if (1) DO50( test_sha256h_q_q_4s(TyNONE) );
7411    if (1) DO50( test_sha256su0_4s_4s(TyNONE) );
7412    if (1) DO50( test_sha256su1_4s_4s_4s(TyNONE) );
7413 
7414    // pmull{2} 1q_1d_1d,1q_2d_2d
7415    if (1) test_pmull_1q_1d_1d(TyD);
7416    if (1) test_pmull2_1q_2d_2d(TyD);
7417 
7418 return 0;
7419 }
7420 
7421 
7422 /* ---------------------------------------------------------------- */
7423 /* -- Alphabetical list of insns                                 -- */
7424 /* ---------------------------------------------------------------- */
7425 /*
7426    abs      d
7427    abs      2d,4s,2s,8h,4h,16b,8b
7428    add      d
7429    add      2d,4s,2s,8h,4h,16b,8b
7430    addhn    2s.2d.2d, 4s.2d.2d, h_from_s and b_from_h (add and get high half)
7431    addp     d (add pairs, across)
7432    addp     2d,4s,2s,8h,4h,16b,8b
7433    addv     4s,8h,4h,16b,18b (reduce across vector)
7434    aesd     16b (aes single round decryption)
7435    aese     16b (aes single round encryption)
7436    aesimc   16b (aes inverse mix columns)
7437    aesmc    16b (aes mix columns)
7438    and      16b,8b
7439 
7440    bic      4s,2s,8h,4h (vector, imm)
7441    also movi, mvni, orr
7442 
7443    bic      16b,8b (vector,reg) (bit clear)
7444    bif      16b,8b (vector) (bit insert if false)
7445    bit      16b,8b (vector) (bit insert if true)
7446    bsl      16b,8b (vector) (bit select)
7447 
7448    cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
7449    clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
7450 
7451    cmeq     d
7452    cmeq     2d,4s,2s,8h,4h,16b,8b
7453    cmeq_z   d
7454    cmeq_z   2d,4s,2s,8h,4h,16b,8b
7455 
7456    cmge     d
7457    cmge     2d,4s,2s,8h,4h,16b,8b
7458    cmge_z   d
7459    cmge_z   2d,4s,2s,8h,4h,16b,8b
7460 
7461    cmgt     d
7462    cmgt     2d,4s,2s,8h,4h,16b,8b
7463    cmgt_z   d
7464    cmgt_z   2d,4s,2s,8h,4h,16b,8b
7465 
7466    cmhi     d
7467    cmhi     2d,4s,2s,8h,4h,16b,8b
7468 
7469    cmhs     d
7470    cmhs     2d,4s,2s,8h,4h,16b,8b
7471 
7472    cmle_z   d
7473    cmle_z   2d,4s,2s,8h,4h,16b,8b
7474 
7475    cmlt_z   d
7476    cmlt_z   2d,4s,2s,8h,4h,16b,8b
7477 
7478    cmtst    d
7479    cmtst    2d,4s,2s,8h,4h,16b,8b
7480 
7481    cnt      16b,8b (population count per byte)
7482 
7483    dup      d,s,h,b (vec elem to scalar)
7484    dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
7485    dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
7486 
7487    eor      16b,8b (vector)
7488    ext      16b,8b,#imm4 (concat 2 vectors, then slice)
7489 
7490    fabd     d,s
7491    fabd     2d,4s,2s
7492 
7493    fabs     d,s
7494    fabs     2d,4s,2s
7495 
7496    facge    s,d  (floating abs compare GE)
7497    facge    2d,4s,2s
7498 
7499    facgt    s,d  (floating abs compare GE)
7500    facgt    2d,4s,2s
7501 
7502    fadd     d,s
7503    fadd     2d,4s,2s
7504 
7505    faddp    d,s (floating add pair)
7506    faddp    2d,4s,2s
7507 
7508    fccmp    d,s (floating point conditional quiet compare)
7509    fccmpe   d,s (floating point conditional signaling compare)
7510 
7511    fcmeq    d,s
7512    fcmeq    2d,4s,2s
7513    fcmeq_z  d,s
7514    fcmeq_z  2d,4s,2s
7515 
7516    fcmge    d,s
7517    fcmge    2d,4s,2s
7518    fcmge_z  d,s
7519    fcmge_z  2d,4s,2s
7520 
7521    fcmgt    d,s
7522    fcmgt    2d,4s,2s
7523    fcmgt_z  d,s
7524    fcmgt_z  2d,4s,2s
7525 
7526    fcmle_z  d,s
7527    fcmle_z  2d,4s,2s
7528 
7529    fcmlt_z  d,s
7530    fcmlt_z  2d,4s,2s
7531 
7532    fcmp     d,s (floating point quiet, set flags)
7533    fcmp_z   d,s
7534    fcmpe    d,s (floating point signaling, set flags)
7535    fcmpe_z  d,s
7536 
7537    fcsel    d,s (fp cond select)
7538 
7539    fcvt     s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
7540 
7541    fcvtas   d,s  (fcvt to signed int, nearest, ties away)
7542    fcvtas   2d,4s,2s
7543    fcvtas   w_s,x_s,w_d,x_d
7544 
7545    fcvtau   d,s  (fcvt to unsigned int, nearest, ties away)
7546    fcvtau   2d,4s,2s
7547    fcvtau   w_s,x_s,w_d,x_d
7548 
7549    fcvtl{2} 4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
7550 
7551    fcvtms   d,s  (fcvt to signed int, minus inf)
7552    fcvtms   2d,4s,2s
7553    fcvtms   w_s,x_s,w_d,x_d
7554 
7555    fcvtmu   d,s  (fcvt to unsigned int, minus inf)
7556    fcvtmu   2d,4s,2s
7557    fcvtmu   w_s,x_s,w_d,x_d
7558 
7559    fcvtn{2} 4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
7560 
7561    fcvtns   d,s  (fcvt to signed int, nearest)
7562    fcvtns   2d,4s,2s
7563    fcvtns   w_s,x_s,w_d,x_d
7564 
7565    fcvtnu   d,s  (fcvt to unsigned int, nearest)
7566    fcvtnu   2d,4s,2s
7567    fcvtnu   w_s,x_s,w_d,x_d
7568 
7569    fcvtps   d,s  (fcvt to signed int, plus inf)
7570    fcvtps   2d,4s,2s
7571    fcvtps   w_s,x_s,w_d,x_d
7572 
7573    fcvtpu   d,s  (fcvt to unsigned int, plus inf)
7574    fcvtpu   2d,4s,2s
7575    fcvtpu   w_s,x_s,w_d,x_d
7576 
7577    fcvtxn   s_d (fcvt to lower prec narrow, rounding to odd)
7578    fcvtxn   2s_2d,4s_2d
7579 
7580    fcvtzs   s,d (fcvt to signed fixedpt, to zero) (w/ #fbits)
7581    fcvtzs   2d,4s,2s
7582 
7583    fcvtzs   s,d (fcvt to signed integer, to zero)
7584    fcvtzs   2d,4s,2s
7585 
7586    fcvtzs   w_s,x_s,w_d,x_d (fcvt to signed fixedpt, to zero) (w/ #fbits)
7587 
7588    fcvtzs   w_s,x_s,w_d,x_d (fcvt to signed integer, to zero)
7589 
7590    fcvtzu   s,d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
7591    fcvtzu   2d,4s,2s
7592 
7593    fcvtzu   s,d (fcvt to unsigned integer, to zero)
7594    fcvtzu   2d,4s,2s
7595 
7596    fcvtzu   w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
7597 
7598    fcvtzu   w_s,x_s,w_d,x_d (fcvt to unsigned integer, to zero)
7599 
7600    fdiv     d,s
7601    fdiv     2d,4s,2s
7602 
7603    fmadd    d,s
7604    fnmadd   d,s
7605    fnmsub   d,s
7606    fnmul    d,s
7607 
7608    fmax     d,s
7609    fmin     d,s
7610 
7611    fmax     2d,4s,2s
7612    fmin     2d,4s,2s
7613 
7614    fmaxnm   d,s ("max number")
7615    fminnm   d,s
7616 
7617    fmaxnm   2d,4s,2s
7618    fminnm   2d,4s,2s
7619 
7620    fmaxnmp  d_2d,s_2s ("max number pairwise")
7621    fminnmp  d_2d,s_2s
7622 
7623    fmaxnmp  2d,4s,2s
7624    fminnmp  2d,4s,2s
7625 
7626    fmaxnmv  s_4s (maxnum across vector)
7627    fminnmv  s_4s
7628 
7629    fmaxp    d_2d,s_2s (max of a pair)
7630    fminp    d_2d,s_2s (max of a pair)
7631 
7632    fmaxp    2d,4s,2s  (max pairwise)
7633    fminp    2d,4s,2s
7634 
7635    fmaxv    s_4s (max across vector)
7636    fminv    s_4s
7637 
7638    fmla     d_d_d[],s_s_s[] (by element)
7639    fmla     2d_2d_d[],4s_4s_s[],2s_2s_s[]
7640 
7641    fmla     2d,4s,2s
7642 
7643    fmls     d_d_d[],s_s_s[] (by element)
7644    fmls     2d_2d_d[],4s_4s_s[],2s_2s_s[]
7645 
7646    fmls     2d,4s,2s
7647 
7648    fmov     2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
7649 
7650    fmov     d_d,s_s
7651 
7652    fmov     s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
7653 
7654    fmov     d,s #imm
7655 
7656    fmsub    d,s
7657 
7658    fmul     d_d_d[],s_s_s[]
7659    fmul     2d_2d_d[],4s_4s_s[],2s_2s_s[]
7660 
7661    fmul     2d,4s,2s
7662    fmul     d,s
7663 
7664    fmulx    d_d_d[],s_s_s[]
7665    fmulx    2d_2d_d[],4s_4s_s[],2s_2s_s[]
7666 
7667    fmulx    d,s
7668    fmulx    2d,4s,2s
7669 
7670    fneg     d,s
7671    fneg     2d,4s,2s
7672 
7673    frecpe   d,s (recip estimate)
7674    frecpe   2d,4s,2s
7675 
7676    frecps   d,s (recip step)
7677    frecps   2d,4s,2s
7678 
7679    frecpx   d,s (recip exponent)
7680 
7681    frinta   2d,4s,2s (round to integral, nearest away)
7682    frinta   d,s
7683 
7684    frinti   2d,4s,2s (round to integral, per FPCR)
7685    frinti   d,s
7686 
7687    frintm   2d,4s,2s (round to integral, minus inf)
7688    frintm   d,s
7689 
7690    frintn   2d,4s,2s (round to integral, nearest, to even)
7691    frintn   d,s
7692 
7693    frintp   2d,4s,2s (round to integral, plus inf)
7694    frintp   d,s
7695 
7696    frintx   2d,4s,2s (round to integral exact, per FPCR)
7697    frintx   d,s
7698 
7699    frintz   2d,4s,2s (round to integral, zero)
7700    frintz   d,s
7701 
7702    frsqrte  d,s (est)
7703    frsqrte  2d,4s,2s
7704 
7705    frsqrts  d,s (step)
7706    frsqrts  2d,4s,2s
7707 
7708    fsqrt    d,s
7709    fsqrt    2d,4s,2s
7710 
7711    fsub     d,s
7712    fsub     2d,4s,2s
7713 
7714    ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
7715 
7716    ins      d[]_x, s[]_w, h[]_w, b[]_w
7717 
7718    ld1  (multiple 1-element structures to 1/2/3/4 regs)
7719    ld1  (single 1-element structure to one lane of 1 reg)
7720    ld1r (single 1-element structure and rep to all lanes of 1 reg)
7721 
7722    ld2  (multiple 2-element structures to 2 regs)
7723    ld2  (single 2-element structure to one lane of 2 regs)
7724    ld2r (single 2-element structure and rep to all lanes of 2 regs)
7725 
7726    ld3  (multiple 3-element structures to 3 regs)
7727    ld3  (single 3-element structure to one lane of 3 regs)
7728    ld3r (single 3-element structure and rep to all lanes of 3 regs)
7729 
7730    ld4  (multiple 4-element structures to 4 regs)
7731    ld4  (single 4-element structure to one lane of 4 regs)
7732    ld4r (single 4-element structure and rep to all lanes of 4 regs)
7733 
7734    ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
7735          addr = reg + uimm7 * reg_size
7736 
7737    ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
7738          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
7739 
7740    ldr   q,d,s,h,b from addr
7741          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
7742 
7743    ldr   q,d,s from  pc+#imm19
7744 
7745    ldr   q,d,s,h,b from addr
7746          addr = [Xn|SP, R <extend> <shift]
7747 
7748    ldur  q,d,s,h,b from addr
7749          addr = [Xn|SP,#imm] (unscaled offset)
7750 
7751    mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
7752    mla   4s,2s,8h,4h,16b,8b
7753 
7754    mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
7755    mls   4s,2s,8h,4h,16b,8b
7756 
7757    movi  16b,8b   #imm8, LSL #0
7758    movi  8h,4h    #imm8, LSL #0 or 8
7759    movi  4s,2s    #imm8, LSL #0, 8, 16, 24
7760    movi  4s,2s    #imm8, MSL #8 or 16
7761    movi  d,       #imm64
7762    movi  2d,      #imm64
7763 
7764    mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
7765    mul   4s,2s,8h,4h,16b,8b
7766 
7767    mvni  8h,4h    #imm8, LSL #0 or 8
7768    mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
7769    mvni  4s,2s    #imm8, MSL #8 or 16
7770 
7771    neg   d
7772    neg   2d,4s,2s,8h,4h,16b,8b
7773 
7774    not   16b,8b
7775 
7776    orn   16b,8b
7777 
7778    orr   8h,4h   #imm8, LSL #0 or 8
7779    orr   4s,2s   #imm8, LSL #0, 8, 16 or 24
7780 
7781    orr   16b,8b
7782 
7783    pmul  16b,8b
7784 
7785    pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
7786 
7787    raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
7788 
7789    rbit    16b,8b
7790    rev16   16b,8b
7791    rev32   16b,8b,8h,4h
7792    rev64   16b,8b,8h,4h,4s,2s
7793 
7794    rshrn{2}  2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
7795 
7796    rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
7797 
7798    saba      16b,8b,8h,4h,4s,2s
7799    sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
7800 
7801    sabd      16b,8b,8h,4h,4s,2s
7802    sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
7803 
7804    sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
7805 
7806    saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
7807 
7808    saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
7809 
7810    saddlv    h_16b/8b, s_8h/4h, d_4s
7811 
7812    saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
7813 
7814    scvtf     d,s        _#fbits
7815    scvtf     2d,4s,2s   _#fbits
7816 
7817    scvtf     d,s
7818    scvtf     2d,4s,2s
7819 
7820    scvtf     s_w, d_w, s_x, d_x,   _#fbits
7821    scvtf     s_w, d_w, s_x, d_x
7822 
7823    sha1c       q_s_4s
7824    sha1h       s_s
7825    sha1m       q_s_4s
7826    sha1p       q_s_4s
7827    sha1su0     4s_4s_4s
7828    sha1su1     4s_4s
7829    sha256h2    q_q_4s
7830    sha256h     q_q_4s
7831    sha256su0   4s_4s
7832    sha256su1   4s_4s_4s
7833 
7834    shadd       16b,8b,8h,4h,4s,2s
7835 
7836    shl         d_#imm
7837    shl         16b,8b,8h,4h,4s,2s,2d  _#imm
7838 
7839    shll{2}   8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
7840 
7841    shrn{2}  2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
7842 
7843    shsub       16b,8b,8h,4h,4s,2s
7844 
7845    sli         d_#imm
7846    sli         2d,4s,2s,8h,4h,16b,8b  _#imm
7847 
7848    smax        4s,2s,8h,4h,16b,8b
7849 
7850    smaxp       4s,2s,8h,4h,16b,8b
7851 
7852    smaxv       s_4s,h_8h,h_4h,b_16b,b_8b
7853 
7854    smin        4s,2s,8h,4h,16b,8b
7855 
7856    sminp       4s,2s,8h,4h,16b,8b
7857 
7858    sminv       s_4s,h_8h,h_4h,b_16b,b_8b
7859 
7860    smlal{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
7861    smlal{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
7862 
7863    smlsl{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
7864    smlsl{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
7865 
7866    smov        w_b[], w_h[], x_b[], x_h[], x_s[]
7867 
7868    smull{2}    2d_2s/4s_s[]. 4s_4h/8h_h[]
7869    smull{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
7870 
7871    sqabs       d,s,h,b
7872    sqabs       2d,4s,2s,8h,4h,16b,8b
7873 
7874    sqadd       d,s,h,b
7875    sqadd       2d,4s,2s,8h,4h,16b,8b
7876 
7877    sqdmlal     d_s_s[], s_h_h[]
7878    sqdmlal{2}  2d_2s/4s_s[], 4s_4h/8h_h[]
7879 
7880    sqdmlal     d_s_s, s_h_h
7881    sqdmlal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
7882 
7883    sqdmlsl     d_s_s[], s_h_h[]
7884    sqdmlsl{2}  2d_2s/4s_s[], 4s_4h/8h_h[]
7885 
7886    sqdmlsl     d_s_s, s_h_h
7887    sqdmlsl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
7888 
7889    sqdmulh     s_s_s[], h_h_h[]
7890    sqdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
7891 
7892    sqdmulh     h,s
7893    sqdmulh     4s,2s,8h,4h
7894 
7895    sqdmull     d_s_s[], s_h_h[]
7896    sqdmull{2}  2d_2s/4s_s[], 4s_4h/2h_h[]
7897 
7898    sqdmull     d_s_s,s_h_h
7899    sqdmull{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
7900 
7901    sqneg       d,s,h,b
7902    sqneg       2d,4s,2s,8h,4h,16b,8b
7903 
7904    sqrdmulh    s_s_s[], h_h_h[]
7905    sqrdmulh    4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
7906 
7907    sqrdmulh    h,s
7908    sqrdmulh    4s,2s,8h,4h
7909 
7910    sqrshl      d,s,h,b
7911    sqrshl      2d,4s,2s,8h,4h,16b,8b
7912 
7913    sqrshrn     s_d, h_s, b_h   #imm
7914    sqrshrn{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
7915 
7916    sqrshrun     s_d, h_s, b_h   #imm
7917    sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
7918 
7919    sqshl        d,s,h,b   _#imm
7920    sqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
7921 
7922    sqshl        d,s,h,b
7923    sqshl        2d,4s,2s,8h,4h,16b,8b
7924 
7925    sqshlu       d,s,h,b  _#imm
7926    sqshlu       2d,4s,2s,8h,4h,16b,8b  _#imm
7927 
7928    sqshrn       s_d, h_s, b_h   #imm
7929    sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
7930 
7931    sqshrun      s_d, h_s, b_h   #imm
7932    sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
7933 
7934    sqsub       d,s,h,b
7935    sqsub       2d,4s,2s,8h,4h,16b,8b
7936 
7937    sqxtn       s_d,h_s,b_h
7938    sqxtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
7939 
7940    sqxtun      s_d,h_s,b_h
7941    sqxtun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h
7942 
7943    srhadd      4s,2s,8h,4h,16b,8b
7944 
7945    sri         d_#imm
7946    sri         2d,4s,2s,8h,4h,16b,8b  _#imm
7947 
7948    srshl (reg) d
7949    srshl       2d,4s,2s,8h,4h,16b,8b
7950 
7951    srshr (imm) d
7952    srshr       2d,4s,2s,8h,4h,16b,8b
7953 
7954    srsra (imm) d
7955    srsra       2d,4s,2s,8h,4h,16b,8b
7956 
7957    sshl (reg)  d
7958    sshl        2d,4s,2s,8h,4h,16b,8b
7959 
7960    sshll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
7961 
7962    sshr (imm)  d
7963    sshr        2d,4s,2s,8h,4h,16b,8b
7964 
7965    ssra (imm)  d
7966    ssra        2d,4s,2s,8h,4h,16b,8b
7967 
7968    ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
7969 
7970    ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
7971 
7972    st1 (multiple 1-element structures from 1/2/3/4 regs)
7973    st1 (single 1-element structure for 1 lane of 1 reg)
7974 
7975    st2 (multiple 2-element structures from 2 regs)
7976    st2 (single 2-element structure from 1 lane of 2 regs)
7977 
7978    st3 (multiple 3-element structures from 3 regs)
7979    st3 (single 3-element structure from 1 lane of 3 regs)
7980 
7981    st4 (multiple 4-element structures from 4 regs)
7982    st4 (single 4-element structure from one lane of 4 regs)
7983 
7984    stnp q_q_addr, d_d_addr, s_s_addr
7985         addr = [Xn|SP, #imm]
7986 
7987    stp  q_q_addr, d_d_addr, s_s_addr
7988         addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
7989 
7990    str  q,d,s,h,b_addr
7991         addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
7992 
7993    str   q,d,s,h,b_addr
7994          addr = [Xn|SP, R <extend> <shift]
7995 
7996    stur  q,d,s,h,b_addr
7997          addr = [Xn|SP,#imm] (unscaled offset)
7998 
7999    sub   d
8000    sub   2d,4s,2s,8h,4h,16b,8b
8001 
8002    subhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
8003 
8004    suqadd  d,s,h,b
8005    suqadd  2d,4s,2s,8h,4h,16b,8b
8006 
8007    tbl     8b_{16b}_8b, 16b_{16b}_16b
8008    tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
8009    tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
8010    tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
8011 
8012    tbx     8b_{16b}_8b, 16b_{16b}_16b
8013    tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
8014    tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
8015    tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
8016 
8017    trn1    2d,4s,2s,8h,4h,16b,8b
8018    trn2    2d,4s,2s,8h,4h,16b,8b
8019 
8020    uaba      16b,8b,8h,4h,4s,2s
8021    uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8022 
8023    uabd      16b,8b,8h,4h,4s,2s
8024    uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8025 
8026    uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
8027 
8028    uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8029 
8030    uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
8031 
8032    uaddlv    h_16b/8b, s_8h/4h, d_4s
8033 
8034    uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
8035 
8036    ucvtf     d,s        _#fbits
8037    ucvtf     2d,4s,2s   _#fbits
8038 
8039    ucvtf     d,s
8040    ucvtf     2d,4s,2s
8041 
8042    ucvtf     s_w, d_w, s_x, d_x,   _#fbits
8043    ucvtf     s_w, d_w, s_x, d_x
8044 
8045    uhadd       16b,8b,8h,4h,4s,2s
8046 
8047    uhsub       16b,8b,8h,4h,4s,2s
8048 
8049    umax        4s,2s,8h,4h,16b,8b
8050 
8051    umaxp       4s,2s,8h,4h,16b,8b
8052 
8053    umaxv       s_4s,h_8h,h_4h,b_16b,b_8b
8054 
8055    umin        4s,2s,8h,4h,16b,8b
8056 
8057    uminp       4s,2s,8h,4h,16b,8b
8058 
8059    uminv       s_4s,h_8h,h_4h,b_16b,b_8b
8060 
8061    umlal{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
8062    umlal{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8063 
8064    umlsl{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
8065    umlsl{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8066 
8067    umov        w_b[], w_h[], x_b[], x_h[], x_s[]
8068 
8069    umull{2}    2d_2s/4s_s[]. 4s_4h/8h_h[]
8070    umull{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8071 
8072    uqadd       d,s,h,b
8073    uqadd       2d,4s,2s,8h,4h,16b,8b
8074 
8075    uqrshl      d,s,h,b
8076    uqrshl      2d,4s,2s,8h,4h,16b,8b
8077 
8078    uqrshrn     s_d, h_s, b_h   #imm
8079    uqrshrn{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
8080 
8081    uqshl        d,s,h,b   _#imm
8082    uqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
8083 
8084    uqshl        d,s,h,b
8085    uqshl        2d,4s,2s,8h,4h,16b,8b
8086 
8087    uqshrn       s_d, h_s, b_h   #imm
8088    uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
8089 
8090    uqsub       d,s,h,b
8091    uqsub       2d,4s,2s,8h,4h,16b,8b
8092 
8093    uqxtn       s_d,h_s,b_h
8094    uqxtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
8095 
8096    urecpe      4s,2s
8097 
8098    urhadd      4s,2s,8h,4h,16b,8b
8099 
8100    urshl (reg) d
8101    urshl       2d,4s,2s,8h,4h,16b,8b
8102 
8103    urshr (imm) d
8104    urshr       2d,4s,2s,8h,4h,16b,8b
8105 
8106    ursqrte     4s,2s
8107 
8108    ursra (imm) d
8109    ursra       2d,4s,2s,8h,4h,16b,8b
8110 
8111    ushl (reg)  d
8112    ushl        2d,4s,2s,8h,4h,16b,8b
8113 
8114    ushll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
8115 
8116    ushr (imm)  d
8117    ushr        2d,4s,2s,8h,4h,16b,8b
8118 
8119    usqadd      d,s,h,b
8120    usqadd      2d,4s,2s,8h,4h,16b,8b
8121 
8122    usra (imm)  d
8123    usra        2d,4s,2s,8h,4h,16b,8b
8124 
8125    usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8126 
8127    usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
8128 
8129    uzp1      2d,4s,2s,8h,4h,16b,8b
8130    uzp2      2d,4s,2s,8h,4h,16b,8b
8131 
8132    xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
8133 
8134    zip1      2d,4s,2s,8h,4h,16b,8b
8135    zip2      2d,4s,2s,8h,4h,16b,8b
8136 */
8137 
8138 
8139 /* ---------------------------------------------------------------- */
8140 /* -- List of insns, grouped somewhat by laneage configuration   -- */
8141 /* ---------------------------------------------------------------- */
8142 /*
8143    ======================== FP ========================
8144 
8145    fabs      d,s
8146    fabs      2d,4s,2s
8147 
8148    fneg      d,s
8149    fneg      2d,4s,2s
8150 
8151    fsqrt     d,s
8152    fsqrt     2d,4s,2s
8153 
8154    fadd      d,s
8155    fsub      d,s
8156 
8157    fadd      2d,4s,2s
8158    fsub      2d,4s,2s
8159 
8160    fabd      d,s
8161    fabd      2d,4s,2s
8162 
8163    faddp     d,s (floating add pair)
8164    faddp     2d,4s,2s
8165 
8166    fccmp     d,s (floating point conditional quiet compare)
8167    fccmpe    d,s (floating point conditional signaling compare)
8168 
8169    fcmeq     d,s
8170    fcmge     d,s
8171    fcmgt     d,s
8172    facgt     d,s  (floating abs compare GE)
8173    facge     d,s  (floating abs compare GE)
8174 
8175    fcmeq     2d,4s,2s
8176    fcmge     2d,4s,2s
8177    fcmgt     2d,4s,2s
8178    facge     2d,4s,2s
8179    facgt     2d,4s,2s
8180 
8181    fcmeq_z   d,s
8182    fcmge_z   d,s
8183    fcmgt_z   d,s
8184    fcmle_z   d,s
8185    fcmlt_z   d,s
8186 
8187    fcmeq_z   2d,4s,2s
8188    fcmge_z   2d,4s,2s
8189    fcmgt_z   2d,4s,2s
8190    fcmle_z   2d,4s,2s
8191    fcmlt_z   2d,4s,2s
8192 
8193    fcmp_z    d,s
8194    fcmpe_z   d,s
8195    fcmp      d,s (floating point quiet, set flags)
8196    fcmpe     d,s (floating point signaling, set flags)
8197 
8198    fcsel     d,s (fp cond select)
8199 
8200    fdiv      d,s
8201    fdiv      2d,4s,2s
8202 
8203    fmadd     d,s
8204    fnmadd    d,s
8205    fmsub     d,s
8206    fnmsub    d,s
8207 
8208    fnmul     d,s
8209 
8210    fmax      d,s
8211    fmin      d,s
8212    fmaxnm    d,s ("max number")
8213    fminnm    d,s
8214 
8215    fmax      2d,4s,2s
8216    fmin      2d,4s,2s
8217    fmaxnm    2d,4s,2s
8218    fminnm    2d,4s,2s
8219 
8220    fmaxnmp   d_2d,s_2s ("max number pairwise")
8221    fminnmp   d_2d,s_2s
8222 
8223    fmaxnmp   2d,4s,2s
8224    fminnmp   2d,4s,2s
8225 
8226    fmaxnmv   s_4s (maxnum across vector)
8227    fminnmv   s_4s
8228 
8229    fmaxp     d_2d,s_2s (max of a pair)
8230    fminp     d_2d,s_2s (max of a pair)
8231 
8232    fmaxp     2d,4s,2s  (max pairwise)
8233    fminp     2d,4s,2s
8234 
8235    fmaxv     s_4s (max across vector)
8236    fminv     s_4s
8237 
8238    fmla      2d,4s,2s
8239    fmls      2d,4s,2s
8240 
8241    fmla      d_d_d[],s_s_s[] (by element)
8242    fmls      d_d_d[],s_s_s[] (by element)
8243 
8244    fmla      2d_2d_d[],4s_4s_s[],2s_2s_s[]
8245    fmls      2d_2d_d[],4s_4s_s[],2s_2s_s[]
8246 
8247    fmov      2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
8248 
8249    fmov      d_d,s_s
8250 
8251    fmov      s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
8252 
8253    fmov      d,s #imm
8254 
8255    fmul      d_d_d[],s_s_s[]
8256    fmul      2d_2d_d[],4s_4s_s[],2s_2s_s[]
8257 
8258    fmul      2d,4s,2s
8259    fmul      d,s
8260 
8261    fmulx     d_d_d[],s_s_s[]
8262    fmulx     2d_2d_d[],4s_4s_s[],2s_2s_s[]
8263 
8264    fmulx     d,s
8265    fmulx     2d,4s,2s
8266 
8267    frecpe    d,s (recip estimate)
8268    frecpe    2d,4s,2s
8269 
8270    frecps    d,s (recip step)
8271    frecps    2d,4s,2s
8272 
8273    frecpx    d,s (recip exponent)
8274 
8275    frinta    d,s
8276    frinti    d,s
8277    frintm    d,s
8278    frintn    d,s
8279    frintp    d,s
8280    frintx    d,s
8281    frintz    d,s
8282 
8283    frinta    2d,4s,2s (round to integral, nearest away)
8284    frinti    2d,4s,2s (round to integral, per FPCR)
8285    frintm    2d,4s,2s (round to integral, minus inf)
8286    frintn    2d,4s,2s (round to integral, nearest, to even)
8287    frintp    2d,4s,2s (round to integral, plus inf)
8288    frintx    2d,4s,2s (round to integral exact, per FPCR)
8289    frintz    2d,4s,2s (round to integral, zero)
8290 
8291    frsqrte   d,s (est)
8292    frsqrte   2d,4s,2s
8293 
8294    frsqrts   d,s (step)
8295    frsqrts   2d,4s,2s
8296 
8297    ======================== CONV ========================
8298 
8299    fcvt      s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
8300 
8301    fcvtl{2}  4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
8302 
8303    fcvtn{2}  4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
8304 
8305    fcvtas    d,s  (fcvt to signed int,   nearest, ties away)
8306    fcvtau    d,s  (fcvt to unsigned int, nearest, ties away)
8307    fcvtas    2d,4s,2s
8308    fcvtau    2d,4s,2s
8309    fcvtas    w_s,x_s,w_d,x_d
8310    fcvtau    w_s,x_s,w_d,x_d
8311 
8312    fcvtms    d,s  (fcvt to signed int,   minus inf)
8313    fcvtmu    d,s  (fcvt to unsigned int, minus inf)
8314    fcvtms    2d,4s,2s
8315    fcvtmu    2d,4s,2s
8316    fcvtms    w_s,x_s,w_d,x_d
8317    fcvtmu    w_s,x_s,w_d,x_d
8318 
8319    fcvtns    d,s  (fcvt to signed int,   nearest)
8320    fcvtnu    d,s  (fcvt to unsigned int, nearest)
8321    fcvtns    2d,4s,2s
8322    fcvtnu    2d,4s,2s
8323    fcvtns    w_s,x_s,w_d,x_d
8324    fcvtnu    w_s,x_s,w_d,x_d
8325 
8326    fcvtps    d,s  (fcvt to signed int,   plus inf)
8327    fcvtpu    d,s  (fcvt to unsigned int, plus inf)
8328    fcvtps    2d,4s,2s
8329    fcvtpu    2d,4s,2s
8330    fcvtps    w_s,x_s,w_d,x_d
8331    fcvtpu    w_s,x_s,w_d,x_d
8332 
8333    fcvtzs    d,s (fcvt to signed integer,   to zero)
8334    fcvtzu    d,s (fcvt to unsigned integer, to zero)
8335    fcvtzs    2d,4s,2s
8336    fcvtzu    2d,4s,2s
8337    fcvtzs    w_s,x_s,w_d,x_d
8338    fcvtzu    w_s,x_s,w_d,x_d
8339 
8340    fcvtzs    d,s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
8341    fcvtzu    d,s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
8342    fcvtzs    2d,4s,2s
8343    fcvtzu    2d,4s,2s
8344    fcvtzs    w_s,x_s,w_d,x_d (fcvt to signed fixedpt,   to zero) (w/ #fbits)
8345    fcvtzu    w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
8346 
8347    fcvtxn    s_d (fcvt to lower prec narrow, rounding to odd)
8348    fcvtxn    2s_2d,4s_2d
8349 
8350    scvtf     d,s        _#fbits
8351    ucvtf     d,s        _#fbits
8352 
8353    scvtf     2d,4s,2s   _#fbits
8354    ucvtf     2d,4s,2s   _#fbits
8355 
8356    scvtf     d,s
8357    ucvtf     d,s
8358 
8359    scvtf     2d,4s,2s
8360    ucvtf     2d,4s,2s
8361 
8362    scvtf     s_w, d_w, s_x, d_x,   _#fbits
8363    ucvtf     s_w, d_w, s_x, d_x,   _#fbits
8364 
8365    scvtf     s_w, d_w, s_x, d_x
8366    ucvtf     s_w, d_w, s_x, d_x
8367 
8368    ======================== INT ========================
8369 
8370    abs       d
8371    neg       d
8372 
8373    abs       2d,4s,2s,8h,4h,16b,8b
8374    neg       2d,4s,2s,8h,4h,16b,8b
8375 
8376    add       d
8377    sub       d
8378 
8379    add       2d,4s,2s,8h,4h,16b,8b
8380    sub       2d,4s,2s,8h,4h,16b,8b
8381 
8382    addhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
8383    subhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
8384    raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
8385    rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
8386 
8387    addp     d (add pairs, across)
8388    addp     2d,4s,2s,8h,4h,16b,8b
8389    addv     4s,8h,4h,16b,18b (reduce across vector)
8390 
8391    and      16b,8b
8392 
8393    orr      8h,4h   #imm8, LSL #0 or 8
8394    orr      4s,2s   #imm8, LSL #0, 8, 16 or 24
8395    bic      8h,4h   #imm8, LSL #0 or 8
8396    bic      4s,2s   #imm8, LSL #0, 8, 16 or 24
8397    also movi, mvni
8398 
8399    bic      16b,8b (vector,reg) (bit clear)
8400    bif      16b,8b (vector) (bit insert if false)
8401    bit      16b,8b (vector) (bit insert if true)
8402    bsl      16b,8b (vector) (bit select)
8403 
8404    cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
8405    clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
8406 
8407    cmeq     d
8408    cmge     d
8409    cmgt     d
8410    cmhi     d
8411    cmhs     d
8412    cmtst    d
8413 
8414    cmeq     2d,4s,2s,8h,4h,16b,8b
8415    cmge     2d,4s,2s,8h,4h,16b,8b
8416    cmgt     2d,4s,2s,8h,4h,16b,8b
8417    cmhi     2d,4s,2s,8h,4h,16b,8b
8418    cmhs     2d,4s,2s,8h,4h,16b,8b
8419    cmtst    2d,4s,2s,8h,4h,16b,8b
8420 
8421    cmeq_z   d
8422    cmge_z   d
8423    cmgt_z   d
8424    cmle_z   d
8425    cmlt_z   d
8426 
8427    cmeq_z   2d,4s,2s,8h,4h,16b,8b
8428    cmge_z   2d,4s,2s,8h,4h,16b,8b
8429    cmgt_z   2d,4s,2s,8h,4h,16b,8b
8430    cmle_z   2d,4s,2s,8h,4h,16b,8b
8431    cmlt_z   2d,4s,2s,8h,4h,16b,8b
8432 
8433    cnt      16b,8b (population count per byte)
8434 
8435    dup      d,s,h,b (vec elem to scalar)
8436    dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
8437    dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
8438 
8439    eor      16b,8b (vector)
8440    ext      16b,8b,#imm4 (concat 2 vectors, then slice)
8441 
8442    ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
8443 
8444    ins      d[]_x, s[]_w, h[]_w, b[]_w
8445 
8446    mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
8447    mla   4s,2s,8h,4h,16b,8b
8448 
8449    mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
8450    mls   4s,2s,8h,4h,16b,8b
8451 
8452    movi  16b,8b   #imm8, LSL #0
8453    movi  8h,4h    #imm8, LSL #0 or 8
8454    movi  4s,2s    #imm8, LSL #0, 8, 16, 24
8455    movi  4s,2s    #imm8, MSL #8 or 16
8456    movi  d,       #imm64
8457    movi  2d,      #imm64
8458 
8459    mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
8460    mul   4s,2s,8h,4h,16b,8b
8461 
8462    mvni  8h,4h    #imm8, LSL #0 or 8
8463    mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
8464    mvni  4s,2s    #imm8, MSL #8 or 16
8465 
8466    not   16b,8b
8467 
8468    orn   16b,8b
8469    orr   16b,8b
8470 
8471    pmul  16b,8b
8472 
8473    pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
8474 
8475    rbit    16b,8b
8476    rev16   16b,8b
8477    rev32   16b,8b,8h,4h
8478    rev64   16b,8b,8h,4h,4s,2s
8479 
8480    saba      16b,8b,8h,4h,4s,2s
8481    uaba      16b,8b,8h,4h,4s,2s
8482 
8483    sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8484    uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8485 
8486    sabd      16b,8b,8h,4h,4s,2s
8487    uabd      16b,8b,8h,4h,4s,2s
8488 
8489    sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8490    uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8491 
8492    sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
8493    uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
8494 
8495    saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8496    uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8497    ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8498    usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8499 
8500    saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
8501    uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
8502 
8503    saddlv    h_16b/8b, s_8h/4h, d_4s
8504    uaddlv    h_16b/8b, s_8h/4h, d_4s
8505 
8506    saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
8507    uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
8508    ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
8509    usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
8510 
8511    shadd        16b,8b,8h,4h,4s,2s
8512    uhadd        16b,8b,8h,4h,4s,2s
8513    shsub        16b,8b,8h,4h,4s,2s
8514    uhsub        16b,8b,8h,4h,4s,2s
8515 
8516    shl          d_#imm
8517    shl          16b,8b,8h,4h,4s,2s,2d  _#imm
8518 
8519    shll{2}      8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
8520 
8521    shrn{2}      2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
8522    rshrn{2}     2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
8523 
8524    sli          d_#imm
8525    sri          d_#imm
8526 
8527    sli          2d,4s,2s,8h,4h,16b,8b  _#imm
8528    sri          2d,4s,2s,8h,4h,16b,8b  _#imm
8529 
8530    smax         4s,2s,8h,4h,16b,8b
8531    umax         4s,2s,8h,4h,16b,8b
8532    smin         4s,2s,8h,4h,16b,8b
8533    umin         4s,2s,8h,4h,16b,8b
8534 
8535    smaxp        4s,2s,8h,4h,16b,8b
8536    umaxp        4s,2s,8h,4h,16b,8b
8537    sminp        4s,2s,8h,4h,16b,8b
8538    uminp        4s,2s,8h,4h,16b,8b
8539 
8540    smaxv        s_4s,h_8h,h_4h,b_16b,b_8b
8541    umaxv        s_4s,h_8h,h_4h,b_16b,b_8b
8542    sminv        s_4s,h_8h,h_4h,b_16b,b_8b
8543    uminv        s_4s,h_8h,h_4h,b_16b,b_8b
8544 
8545    smlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
8546    umlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
8547    smlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
8548    umlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
8549    smull{2}     2d_2s/4s_s[]. 4s_4h/8h_h[]
8550    umull{2}     2d_2s/4s_s[]. 4s_4h/8h_h[]
8551 
8552    smlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8553    umlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8554    smlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8555    umlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8556    smull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8557    umull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
8558 
8559    smov         w_b[], w_h[], x_b[], x_h[], x_s[]
8560    umov         w_b[], w_h[], x_b[], x_h[], x_s[]
8561 
8562    sqabs        d,s,h,b
8563    sqneg        d,s,h,b
8564 
8565    sqabs        2d,4s,2s,8h,4h,16b,8b
8566    sqneg        2d,4s,2s,8h,4h,16b,8b
8567 
8568    sqadd        d,s,h,b
8569    uqadd        d,s,h,b
8570    sqsub        d,s,h,b
8571    uqsub        d,s,h,b
8572 
8573    sqadd        2d,4s,2s,8h,4h,16b,8b
8574    uqadd        2d,4s,2s,8h,4h,16b,8b
8575    sqsub        2d,4s,2s,8h,4h,16b,8b
8576    uqsub        2d,4s,2s,8h,4h,16b,8b
8577 
8578    sqdmlal      d_s_s[], s_h_h[]
8579    sqdmlsl      d_s_s[], s_h_h[]
8580    sqdmull      d_s_s[], s_h_h[]
8581 
8582    sqdmlal{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
8583    sqdmlsl{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
8584    sqdmull{2}   2d_2s/4s_s[], 4s_4h/2h_h[]
8585 
8586    sqdmlal      d_s_s, s_h_h
8587    sqdmlsl      d_s_s, s_h_h
8588    sqdmull      d_s_s, s_h_h
8589 
8590    sqdmlal{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
8591    sqdmlsl{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
8592    sqdmull{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
8593 
8594    sqdmulh      s_s_s[], h_h_h[]
8595    sqrdmulh     s_s_s[], h_h_h[]
8596 
8597    sqdmulh      4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
8598    sqrdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
8599 
8600    sqdmulh      h,s
8601    sqrdmulh     h,s
8602 
8603    sqdmulh      4s,2s,8h,4h
8604    sqrdmulh     4s,2s,8h,4h
8605 
8606    sqshl        d,s,h,b
8607    uqshl        d,s,h,b
8608    sqrshl       d,s,h,b
8609    uqrshl       d,s,h,b
8610 
8611    sqshl        2d,4s,2s,8h,4h,16b,8b
8612    uqshl        2d,4s,2s,8h,4h,16b,8b
8613    sqrshl       2d,4s,2s,8h,4h,16b,8b
8614    uqrshl       2d,4s,2s,8h,4h,16b,8b
8615 
8616    sqrshrn      s_d, h_s, b_h   #imm
8617    uqrshrn      s_d, h_s, b_h   #imm
8618    sqshrn       s_d, h_s, b_h   #imm
8619    uqshrn       s_d, h_s, b_h   #imm
8620 
8621    sqrshrun     s_d, h_s, b_h   #imm
8622    sqshrun      s_d, h_s, b_h   #imm
8623 
8624    sqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
8625    uqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
8626    sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
8627    uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
8628 
8629    sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
8630    sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
8631 
8632    sqshl        d,s,h,b   _#imm
8633    uqshl        d,s,h,b   _#imm
8634    sqshlu       d,s,h,b   _#imm
8635 
8636    sqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
8637    uqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
8638    sqshlu       2d,4s,2s,8h,4h,16b,8b   _#imm
8639 
8640    sqxtn        s_d,h_s,b_h
8641    uqxtn        s_d,h_s,b_h
8642    sqxtun       s_d,h_s,b_h
8643 
8644    sqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
8645    uqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
8646    sqxtun{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
8647 
8648    srhadd       4s,2s,8h,4h,16b,8b
8649    urhadd       4s,2s,8h,4h,16b,8b
8650 
8651    sshl (reg)   d
8652    ushl (reg)   d
8653    sshr (imm)   d
8654    ushr (imm)   d
8655    ssra (imm)   d
8656    usra (imm)   d
8657 
8658    srshl (reg)  d
8659    urshl (reg)  d
8660    srshr (imm)  d
8661    urshr (imm)  d
8662    srsra (imm)  d
8663    ursra (imm)  d
8664 
8665    sshl         2d,4s,2s,8h,4h,16b,8b
8666    ushl         2d,4s,2s,8h,4h,16b,8b
8667    sshr         2d,4s,2s,8h,4h,16b,8b
8668    ushr         2d,4s,2s,8h,4h,16b,8b
8669    ssra         2d,4s,2s,8h,4h,16b,8b
8670    usra         2d,4s,2s,8h,4h,16b,8b
8671 
8672    srshl        2d,4s,2s,8h,4h,16b,8b
8673    urshl        2d,4s,2s,8h,4h,16b,8b
8674    srshr        2d,4s,2s,8h,4h,16b,8b
8675    urshr        2d,4s,2s,8h,4h,16b,8b
8676    srsra        2d,4s,2s,8h,4h,16b,8b
8677    ursra        2d,4s,2s,8h,4h,16b,8b
8678 
8679    sshll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
8680    ushll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
8681 
8682    suqadd  d,s,h,b
8683    suqadd  2d,4s,2s,8h,4h,16b,8b
8684 
8685    tbl     8b_{16b}_8b, 16b_{16b}_16b
8686    tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
8687    tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
8688    tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
8689 
8690    tbx     8b_{16b}_8b, 16b_{16b}_16b
8691    tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
8692    tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
8693    tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
8694 
8695    trn1    2d,4s,2s,8h,4h,16b,8b
8696    trn2    2d,4s,2s,8h,4h,16b,8b
8697 
8698    urecpe      4s,2s
8699 
8700    ursqrte     4s,2s
8701 
8702    usqadd      d,s,h,b
8703    usqadd      2d,4s,2s,8h,4h,16b,8b
8704 
8705    uzp1      2d,4s,2s,8h,4h,16b,8b
8706    uzp2      2d,4s,2s,8h,4h,16b,8b
8707 
8708    xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
8709 
8710    zip1      2d,4s,2s,8h,4h,16b,8b
8711    zip2      2d,4s,2s,8h,4h,16b,8b
8712 
8713    ======================== MEM ========================
8714 
8715    ld1  (multiple 1-element structures to 1/2/3/4 regs)
8716    ld1  (single 1-element structure to one lane of 1 reg)
8717    ld1r (single 1-element structure and rep to all lanes of 1 reg)
8718 
8719    ld2  (multiple 2-element structures to 2 regs)
8720    ld2  (single 2-element structure to one lane of 2 regs)
8721    ld2r (single 2-element structure and rep to all lanes of 2 regs)
8722 
8723    ld3  (multiple 3-element structures to 3 regs)
8724    ld3  (single 3-element structure to one lane of 3 regs)
8725    ld3r (single 3-element structure and rep to all lanes of 3 regs)
8726 
8727    ld4  (multiple 4-element structures to 4 regs)
8728    ld4  (single 4-element structure to one lane of 4 regs)
8729    ld4r (single 4-element structure and rep to all lanes of 4 regs)
8730 
8731    ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
8732          addr = reg + uimm7 * reg_size
8733 
8734    ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
8735          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
8736 
8737    ldr   q,d,s,h,b from addr
8738          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
8739 
8740    ldr   q,d,s from  pc+#imm19
8741 
8742    ldr   q,d,s,h,b from addr
8743          addr = [Xn|SP, R <extend> <shift]
8744 
8745    ldur  q,d,s,h,b from addr
8746          addr = [Xn|SP,#imm] (unscaled offset)
8747 
8748    st1 (multiple 1-element structures from 1/2/3/4 regs)
8749    st1 (single 1-element structure for 1 lane of 1 reg)
8750 
8751    st2 (multiple 2-element structures from 2 regs)
8752    st2 (single 2-element structure from 1 lane of 2 regs)
8753 
8754    st3 (multiple 3-element structures from 3 regs)
8755    st3 (single 3-element structure from 1 lane of 3 regs)
8756 
8757    st4 (multiple 4-element structures from 4 regs)
8758    st4 (single 4-element structure from one lane of 4 regs)
8759 
8760    stnp q_q_addr, d_d_addr, s_s_addr
8761         addr = [Xn|SP, #imm]
8762 
8763    stp  q_q_addr, d_d_addr, s_s_addr
8764         addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
8765 
8766    str  q,d,s,h,b_addr
8767         addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
8768 
8769    str   q,d,s,h,b_addr
8770          addr = [Xn|SP, R <extend> <shift]
8771 
8772    stur  q,d,s,h,b_addr
8773          addr = [Xn|SP,#imm] (unscaled offset)
8774 
8775    ======================== CRYPTO ========================
8776 
8777    aesd       16b (aes single round decryption)
8778    aese       16b (aes single round encryption)
8779    aesimc     16b (aes inverse mix columns)
8780    aesmc      16b (aes mix columns)
8781 
8782    sha1c      q_s_4s
8783    sha1h      s_s
8784    sha1m      q_s_4s
8785    sha1p      q_s_4s
8786    sha1su0    4s_4s_4s
8787    sha1su1    4s_4s
8788 
8789    sha256h2   q_q_4s
8790    sha256h    q_q_4s
8791    sha256su0  4s_4s
8792    sha256su1  4s_4s_4s
8793 */
8794