1 /* { dg-do compile } */
2 /* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
3 
4 #include <stdint.h>
5 
6 #define add(A, B) ((A) + (B))
7 #define sub(A, B) ((A) - (B))
8 #define mul(A, B) ((A) * (B))
9 #define div(A, B) ((A) / (B))
10 #define max(A, B) ((A) > (B) ? (A) : (B))
11 #define min(A, B) ((A) < (B) ? (A) : (B))
12 #define and(A, B) ((A) & (B))
13 #define ior(A, B) ((A) | (B))
14 #define xor(A, B) ((A) ^ (B))
15 
16 #define DEF_LOOP(TYPE, CMPTYPE, OP)				\
17   void __attribute__((noipa))					\
18   f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond,	\
19 		   CMPTYPE limit, TYPE *restrict src1,		\
20 		   TYPE *restrict src2, unsigned int n)		\
21   {								\
22     for (unsigned int i = 0; i < n; ++i)			\
23       {								\
24 	TYPE truev = OP (src1[i], src2[i]);			\
25 	dest[i] = cond[i] < limit ? truev : src2[i];		\
26       }								\
27   }
28 
29 #define FOR_EACH_INT_TYPE(T, TYPE) \
30   T (TYPE, TYPE, add) \
31   T (TYPE, TYPE, sub) \
32   T (TYPE, TYPE, mul) \
33   T (TYPE, TYPE, max) \
34   T (TYPE, TYPE, min) \
35   T (TYPE, TYPE, and) \
36   T (TYPE, TYPE, ior) \
37   T (TYPE, TYPE, xor)
38 
39 #define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
40   T (TYPE, CMPTYPE, add) \
41   T (TYPE, CMPTYPE, sub) \
42   T (TYPE, CMPTYPE, mul) \
43   T (TYPE, CMPTYPE, div) \
44   T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
45   T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
46 
47 #define FOR_EACH_LOOP(T) \
48   FOR_EACH_INT_TYPE (T, int8_t) \
49   FOR_EACH_INT_TYPE (T, int16_t) \
50   FOR_EACH_INT_TYPE (T, int32_t) \
51   FOR_EACH_INT_TYPE (T, int64_t) \
52   FOR_EACH_INT_TYPE (T, uint8_t) \
53   FOR_EACH_INT_TYPE (T, uint16_t) \
54   FOR_EACH_INT_TYPE (T, uint32_t) \
55   FOR_EACH_INT_TYPE (T, uint64_t) \
56   FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
57   FOR_EACH_FP_TYPE (T, float, float, f32) \
58   FOR_EACH_FP_TYPE (T, double, double, f64)
59 
60 FOR_EACH_LOOP (DEF_LOOP)
61 
62 /* { dg-final { scan-assembler-not {\tsel\t} } } */
63 /* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
64 
65 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
66 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
67 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
68 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
69 
70 /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
71 /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
72 /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
73 /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
74 
75 /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
76 /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
77 /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
78 /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
79 
80 /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
81 /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
82 /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
83 /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
84 
85 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
86 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
87 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
88 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
89 
90 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
91 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
92 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
93 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
94 
95 /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
96 /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
97 /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
98 /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
99 
100 /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
101 /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
102 /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
103 /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
104 
105 /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
106 /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
107 /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
108 /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
109 
110 /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
111 /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
112 /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
113 /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
114 
115 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
116 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
117 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
118 
119 /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
120 /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
121 /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
122 
123 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
124 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
125 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
126 
127 /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
128 /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
129 /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
130 
131 /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
132 /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
133 /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
134 
135 /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
136 /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
137 /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
138