1 /* Test vneg works correctly.  */
2 /* { dg-do run } */
3 /* { dg-options "-std=gnu99 -O3 -Wno-div-by-zero --save-temps" } */
4 
5 #include <arm_neon.h>
6 #include <limits.h>
7 
8 /* Used to force a variable to a SIMD register.  Also acts as a stronger
9    inhibitor of optimization than the below - necessary for int64x1_t
10    because more of the implementation is in terms of gcc vector extensions
11    (which support constant propagation) than for other types.  */
12 #define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"	\
13 	   : "=w"(V1)						\
14 	   : "w"(V1)						\
15 	   : /* No clobbers */);
16 #define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
17 
18 #define TEST0 0
19 #define TEST1 1
20 #define TEST2 -1
21 #define TEST3 10
22 #define TEST4 -10
23 #define TEST5 0
24 
25 #define ANSW0 0
26 #define ANSW1 -1
27 #define ANSW2 1
28 #define ANSW3 -10
29 #define ANSW4 10
30 #define ANSW5 0
31 
32 extern void abort (void);
33 
34 #define CONCAT(a, b) a##b
35 #define CONCAT1(a, b) CONCAT (a, b)
36 #define REG_INFEX64 _
37 #define REG_INFEX128 q_
38 #define REG_INFEX(reg_len) REG_INFEX##reg_len
39 #define POSTFIX(reg_len, data_len) \
40   CONCAT1 (REG_INFEX (reg_len), s##data_len)
41 #define DATA_TYPE_32 float
42 #define DATA_TYPE_64 double
43 #define DATA_TYPE(data_len) DATA_TYPE_##data_len
44 
45 #define FORCE_SIMD_INST64_8(data)
46 #define FORCE_SIMD_INST64_16(data)
47 #define FORCE_SIMD_INST64_32(data)
48 #define FORCE_SIMD_INST64_64(data) force_simd (data)
49 #define FORCE_SIMD_INST128_8(data)
50 #define FORCE_SIMD_INST128_16(data)
51 #define FORCE_SIMD_INST128_32(data)
52 #define FORCE_SIMD_INST128_64(data)
53 
54 #define FORCE_SIMD_INST(reg_len, data_len, data) \
55   CONCAT1 (FORCE_SIMD_INST, reg_len##_##data_len) (data)
56 #define LOAD_INST(reg_len, data_len) \
57   CONCAT1 (vld1, POSTFIX (reg_len, data_len))
58 #define NEG_INST(reg_len, data_len) \
59   CONCAT1 (vneg, POSTFIX (reg_len, data_len))
60 
61 #define RUN_TEST(test_set, answ_set, reg_len, data_len, n, a, b)	\
62   {									\
63     int i;								\
64     INHIB_OPTIMIZATION;							\
65     (a) = LOAD_INST (reg_len, data_len) (test_set);			\
66     (b) = LOAD_INST (reg_len, data_len) (answ_set);			\
67     FORCE_SIMD_INST (reg_len, data_len, a)				\
68     a = NEG_INST (reg_len, data_len) (a);				\
69     FORCE_SIMD_INST (reg_len, data_len, a)				\
70     for (i = 0; i < n; i++)						\
71       {									\
72         INHIB_OPTIMIZATION;						\
73 	if (a[i] != b[i])						\
74 	  return 1;							\
75       }									\
76   }
77 
78 #define RUN_TEST_SCALAR(test_val, answ_val, a, b)     \
79   {                                                   \
80     int64_t res;                                      \
81     INHIB_OPTIMIZATION;                               \
82     a = test_val;                                     \
83     b = answ_val;                                     \
84     force_simd (b);                                   \
85     force_simd (a);                                   \
86     res = vnegd_s64 (a);                              \
87     force_simd (res);                                 \
88   }
89 
90 int __attribute__ ((noinline))
test_vneg_s8()91 test_vneg_s8 ()
92 {
93   int8x8_t a;
94   int8x8_t b;
95 
96   int8_t test_set0[8] = {
97     TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SCHAR_MAX, SCHAR_MIN
98   };
99   int8_t answ_set0[8] = {
100     ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SCHAR_MIN + 1, SCHAR_MIN
101   };
102 
103   RUN_TEST (test_set0, answ_set0, 64, 8, 8, a, b);
104 
105   return 0;
106 }
107 
108 /* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
109 
110 int __attribute__ ((noinline))
test_vneg_s16()111 test_vneg_s16 ()
112 {
113   int16x4_t a;
114   int16x4_t b;
115 
116   int16_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
117   int16_t test_set1[4] = { TEST4, TEST5, SHRT_MAX, SHRT_MIN };
118 
119   int16_t answ_set0[4] = { ANSW0, ANSW1, ANSW2, ANSW3 };
120   int16_t answ_set1[4] = { ANSW4, ANSW5, SHRT_MIN + 1, SHRT_MIN };
121 
122   RUN_TEST (test_set0, answ_set0, 64, 16, 4, a, b);
123   RUN_TEST (test_set1, answ_set1, 64, 16, 4, a, b);
124 
125   return 0;
126 }
127 
128 /* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 2 } } */
129 
130 int __attribute__ ((noinline))
test_vneg_s32()131 test_vneg_s32 ()
132 {
133   int32x2_t a;
134   int32x2_t b;
135 
136   int32_t test_set0[2] = { TEST0, TEST1 };
137   int32_t test_set1[2] = { TEST2, TEST3 };
138   int32_t test_set2[2] = { TEST4, TEST5 };
139   int32_t test_set3[2] = { INT_MAX, INT_MIN };
140 
141   int32_t answ_set0[2] = { ANSW0, ANSW1 };
142   int32_t answ_set1[2] = { ANSW2, ANSW3 };
143   int32_t answ_set2[2] = { ANSW4, ANSW5 };
144   int32_t answ_set3[2] = { INT_MIN + 1, INT_MIN };
145 
146   RUN_TEST (test_set0, answ_set0, 64, 32, 2, a, b);
147   RUN_TEST (test_set1, answ_set1, 64, 32, 2, a, b);
148   RUN_TEST (test_set2, answ_set2, 64, 32, 2, a, b);
149   RUN_TEST (test_set3, answ_set3, 64, 32, 2, a, b);
150 
151   return 0;
152 }
153 
154 /* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 4 } } */
155 
156 int __attribute__ ((noinline))
test_vneg_s64()157 test_vneg_s64 ()
158 {
159   int64x1_t a;
160   int64x1_t b;
161 
162   int64_t test_set0[1] = { TEST0 };
163   int64_t test_set1[1] = { TEST1 };
164   int64_t test_set2[1] = { TEST2 };
165   int64_t test_set3[1] = { TEST3 };
166   int64_t test_set4[1] = { TEST4 };
167   int64_t test_set5[1] = { TEST5 };
168   int64_t test_set6[1] = { LLONG_MAX };
169   int64_t test_set7[1] = { LLONG_MIN };
170 
171   int64_t answ_set0[1] = { ANSW0 };
172   int64_t answ_set1[1] = { ANSW1 };
173   int64_t answ_set2[1] = { ANSW2 };
174   int64_t answ_set3[1] = { ANSW3 };
175   int64_t answ_set4[1] = { ANSW4 };
176   int64_t answ_set5[1] = { ANSW5 };
177   int64_t answ_set6[1] = { LLONG_MIN + 1 };
178   int64_t answ_set7[1] = { LLONG_MIN };
179 
180   RUN_TEST (test_set0, answ_set0, 64, 64, 1, a, b);
181   RUN_TEST (test_set1, answ_set1, 64, 64, 1, a, b);
182   RUN_TEST (test_set2, answ_set2, 64, 64, 1, a, b);
183   RUN_TEST (test_set3, answ_set3, 64, 64, 1, a, b);
184   RUN_TEST (test_set4, answ_set4, 64, 64, 1, a, b);
185   RUN_TEST (test_set5, answ_set5, 64, 64, 1, a, b);
186   RUN_TEST (test_set6, answ_set6, 64, 64, 1, a, b);
187   RUN_TEST (test_set7, answ_set7, 64, 64, 1, a, b);
188 
189   return 0;
190 }
191 
192 int __attribute__ ((noinline))
test_vnegd_s64()193 test_vnegd_s64 ()
194 {
195   int64_t a, b;
196 
197   RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
198   RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
199   RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
200   RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
201   RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
202   RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
203   RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
204   RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
205 
206   return 0;
207 }
208 
209 /* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 16 } } */
210 
211 int __attribute__ ((noinline))
test_vnegq_s8()212 test_vnegq_s8 ()
213 {
214   int8x16_t a;
215   int8x16_t b;
216 
217   int8_t test_set0[16] = {
218     TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SCHAR_MAX, SCHAR_MIN,
219     4, 8, 15, 16, 23, 42, -1, -2
220   };
221 
222   int8_t answ_set0[16] = {
223     ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SCHAR_MIN + 1, SCHAR_MIN,
224     -4, -8, -15, -16, -23, -42, 1, 2
225   };
226 
227   RUN_TEST (test_set0, answ_set0, 128, 8, 8, a, b);
228 
229   return 0;
230 }
231 
232 /* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
233 
234 int __attribute__ ((noinline))
test_vnegq_s16()235 test_vnegq_s16 ()
236 {
237   int16x8_t a;
238   int16x8_t b;
239 
240   int16_t test_set0[8] = {
241     TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SHRT_MAX, SHRT_MIN
242   };
243   int16_t answ_set0[8] = {
244     ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SHRT_MIN + 1, SHRT_MIN
245   };
246 
247   RUN_TEST (test_set0, answ_set0, 128, 16, 8, a, b);
248 
249   return 0;
250 }
251 
252 /* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
253 
254 int __attribute__ ((noinline))
test_vnegq_s32()255 test_vnegq_s32 ()
256 {
257   int32x4_t a;
258   int32x4_t b;
259 
260   int32_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
261   int32_t test_set1[4] = { TEST4, TEST5, INT_MAX, INT_MIN };
262 
263   int32_t answ_set0[4] = { ANSW0, ANSW1, ANSW2, ANSW3 };
264   int32_t answ_set1[4] = { ANSW4, ANSW5, INT_MIN + 1, INT_MIN };
265 
266   RUN_TEST (test_set0, answ_set0, 128, 32, 4, a, b);
267   RUN_TEST (test_set1, answ_set1, 128, 32, 4, a, b);
268 
269   return 0;
270 }
271 
272 /* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 2 } } */
273 
274 int __attribute__ ((noinline))
test_vnegq_s64()275 test_vnegq_s64 ()
276 {
277   int64x2_t a;
278   int64x2_t b;
279 
280   int64_t test_set0[2] = { TEST0, TEST1 };
281   int64_t test_set1[2] = { TEST2, TEST3 };
282   int64_t test_set2[2] = { TEST4, TEST5 };
283   int64_t test_set3[2] = { LLONG_MAX, LLONG_MIN };
284 
285   int64_t answ_set0[2] = { ANSW0, ANSW1 };
286   int64_t answ_set1[2] = { ANSW2, ANSW3 };
287   int64_t answ_set2[2] = { ANSW4, ANSW5 };
288   int64_t answ_set3[2] = { LLONG_MIN + 1, LLONG_MIN };
289 
290   RUN_TEST (test_set0, answ_set0, 128, 64, 2, a, b);
291   RUN_TEST (test_set1, answ_set1, 128, 64, 2, a, b);
292   RUN_TEST (test_set2, answ_set2, 128, 64, 2, a, b);
293   RUN_TEST (test_set3, answ_set3, 128, 64, 2, a, b);
294 
295   return 0;
296 }
297 
298 /* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 4 } } */
299 
300 int
main(int argc,char ** argv)301 main (int argc, char **argv)
302 {
303   if (test_vneg_s8 ())
304     abort ();
305 
306   if (test_vneg_s16 ())
307     abort ();
308 
309   if (test_vneg_s32 ())
310     abort ();
311 
312   if (test_vneg_s64 ())
313     abort ();
314 
315   if (test_vnegd_s64 ())
316     abort ();
317 
318   if (test_vnegq_s8 ())
319     abort ();
320 
321   if (test_vnegq_s16 ())
322     abort ();
323 
324   if (test_vnegq_s32 ())
325     abort ();
326 
327   if (test_vnegq_s64 ())
328     abort ();
329 
330   return 0;
331 }
332 
333