1 /* { dg-do compile } */
2 /* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
3 
4 #include <stdint.h>
5 
6 #define TEST(DATA_TYPE, OTHER_TYPE, NAME, OP)				\
7   void __attribute__ ((noinline, noclone))				\
8   test_##DATA_TYPE##_##OTHER_TYPE##_##NAME (DATA_TYPE *__restrict x,	\
9 					    DATA_TYPE *__restrict y,	\
10 					    DATA_TYPE z1, DATA_TYPE z2,	\
11 					    DATA_TYPE *__restrict pred,	\
12 					    OTHER_TYPE *__restrict foo,	\
13 					    int n)			\
14   {									\
15     for (int i = 0; i < n; i += 2)					\
16       {									\
17 	x[i] = (pred[i] != 1 ? y[i] OP z1 : y[i]);			\
18 	x[i + 1] = (pred[i + 1] != 1 ? y[i + 1] OP z2 : y[i + 1]);	\
19 	foo[i] += 1;							\
20 	foo[i + 1] += 2;						\
21       }									\
22   }
23 
24 #define TEST_INT_TYPE(DATA_TYPE, OTHER_TYPE) \
25   TEST (DATA_TYPE, OTHER_TYPE, div, /)
26 
27 #define TEST_FP_TYPE(DATA_TYPE, OTHER_TYPE) \
28   TEST (DATA_TYPE, OTHER_TYPE, add, +) \
29   TEST (DATA_TYPE, OTHER_TYPE, sub, -) \
30   TEST (DATA_TYPE, OTHER_TYPE, mul, *) \
31   TEST (DATA_TYPE, OTHER_TYPE, div, /)
32 
33 #define TEST_ALL \
34   TEST_INT_TYPE (int32_t, int8_t) \
35   TEST_INT_TYPE (int32_t, int16_t) \
36   TEST_INT_TYPE (uint32_t, int8_t) \
37   TEST_INT_TYPE (uint32_t, int16_t) \
38   TEST_INT_TYPE (int64_t, int8_t) \
39   TEST_INT_TYPE (int64_t, int16_t) \
40   TEST_INT_TYPE (int64_t, int32_t) \
41   TEST_INT_TYPE (uint64_t, int8_t) \
42   TEST_INT_TYPE (uint64_t, int16_t) \
43   TEST_INT_TYPE (uint64_t, int32_t) \
44   TEST_FP_TYPE (float, int8_t) \
45   TEST_FP_TYPE (float, int16_t) \
46   TEST_FP_TYPE (double, int8_t) \
47   TEST_FP_TYPE (double, int16_t) \
48   TEST_FP_TYPE (double, int32_t)
49 
50 TEST_ALL
51 
52 /* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
53 /* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
54 /* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
55 /* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
56 
57 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
58 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
59 
60 /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
61 /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
62 
63 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
64 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
65 
66 /* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */
67 /* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */
68 
69 /* The load XFAILs for fixed-length SVE account for extra loads from the
70    constant pool.  */
71 /* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z,} 12 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
72 /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7],} 12 } } */
73 
74 /* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z,} 12 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
75 /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7],} 12 } } */
76 
77 /* 72 for x operations, 6 for foo operations.  */
78 /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z,} 78 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
79 /* 36 for x operations, 6 for foo operations.  */
80 /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7],} 42 } } */
81 
82 /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z,} 168 } } */
83 /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7],} 84 } } */
84 
85 /* { dg-final { scan-assembler-not {\tsel\t} } } */
86