1 /* Test vdup_lane intrinsics work correctly. */
2 /* { dg-do run } */
3 /* { dg-options "--save-temps -O1" } */
4
5 #include <arm_neon.h>
6
7 extern void abort (void);
8
9 float32x2_t __attribute__ ((noinline))
wrap_vdup_lane_f32_0(float32x2_t a)10 wrap_vdup_lane_f32_0 (float32x2_t a)
11 {
12 return vdup_lane_f32 (a, 0);
13 }
14
15 float32x2_t __attribute__ ((noinline))
wrap_vdup_lane_f32_1(float32x2_t a)16 wrap_vdup_lane_f32_1 (float32x2_t a)
17 {
18 return vdup_lane_f32 (a, 1);
19 }
20
21 int __attribute__ ((noinline))
test_vdup_lane_f32()22 test_vdup_lane_f32 ()
23 {
24 float32x2_t a;
25 float32x2_t b;
26 int i;
27 float32_t c[2] = { 0.0 , 3.14 };
28 float32_t d[2];
29
30 a = vld1_f32 (c);
31 b = wrap_vdup_lane_f32_0 (a);
32 vst1_f32 (d, b);
33 for (i = 0; i < 2; i++)
34 if (c[0] != d[i])
35 return 1;
36
37 b = wrap_vdup_lane_f32_1 (a);
38 vst1_f32 (d, b);
39 for (i = 0; i < 2; i++)
40 if (c[1] != d[i])
41 return 1;
42 return 0;
43 }
44
45 float32x4_t __attribute__ ((noinline))
wrap_vdupq_lane_f32_0(float32x2_t a)46 wrap_vdupq_lane_f32_0 (float32x2_t a)
47 {
48 return vdupq_lane_f32 (a, 0);
49 }
50
51 float32x4_t __attribute__ ((noinline))
wrap_vdupq_lane_f32_1(float32x2_t a)52 wrap_vdupq_lane_f32_1 (float32x2_t a)
53 {
54 return vdupq_lane_f32 (a, 1);
55 }
56
57 int __attribute__ ((noinline))
test_vdupq_lane_f32()58 test_vdupq_lane_f32 ()
59 {
60 float32x2_t a;
61 float32x4_t b;
62 int i;
63 float32_t c[2] = { 0.0 , 3.14 };
64 float32_t d[4];
65
66 a = vld1_f32 (c);
67 b = wrap_vdupq_lane_f32_0 (a);
68 vst1q_f32 (d, b);
69 for (i = 0; i < 4; i++)
70 if (c[0] != d[i])
71 return 1;
72
73 b = wrap_vdupq_lane_f32_1 (a);
74 vst1q_f32 (d, b);
75 for (i = 0; i < 4; i++)
76 if (c[1] != d[i])
77 return 1;
78 return 0;
79 }
80
81 int8x8_t __attribute__ ((noinline))
wrap_vdup_lane_s8_0(int8x8_t a)82 wrap_vdup_lane_s8_0 (int8x8_t a)
83 {
84 return vdup_lane_s8 (a, 0);
85 }
86
87 int8x8_t __attribute__ ((noinline))
wrap_vdup_lane_s8_1(int8x8_t a)88 wrap_vdup_lane_s8_1 (int8x8_t a)
89 {
90 return vdup_lane_s8 (a, 1);
91 }
92
93 int __attribute__ ((noinline))
test_vdup_lane_s8()94 test_vdup_lane_s8 ()
95 {
96 int8x8_t a;
97 int8x8_t b;
98 int i;
99 /* Only two first cases are interesting. */
100 int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
101 int8_t d[8];
102
103 a = vld1_s8 (c);
104 b = wrap_vdup_lane_s8_0 (a);
105 vst1_s8 (d, b);
106 for (i = 0; i < 8; i++)
107 if (c[0] != d[i])
108 return 1;
109
110 b = wrap_vdup_lane_s8_1 (a);
111 vst1_s8 (d, b);
112 for (i = 0; i < 8; i++)
113 if (c[1] != d[i])
114 return 1;
115 return 0;
116 }
117
118 int8x16_t __attribute__ ((noinline))
wrap_vdupq_lane_s8_0(int8x8_t a)119 wrap_vdupq_lane_s8_0 (int8x8_t a)
120 {
121 return vdupq_lane_s8 (a, 0);
122 }
123
124 int8x16_t __attribute__ ((noinline))
wrap_vdupq_lane_s8_1(int8x8_t a)125 wrap_vdupq_lane_s8_1 (int8x8_t a)
126 {
127 return vdupq_lane_s8 (a, 1);
128 }
129
130 int __attribute__ ((noinline))
test_vdupq_lane_s8()131 test_vdupq_lane_s8 ()
132 {
133 int8x8_t a;
134 int8x16_t b;
135 int i;
136 /* Only two first cases are interesting. */
137 int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
138 int8_t d[16];
139
140 a = vld1_s8 (c);
141 b = wrap_vdupq_lane_s8_0 (a);
142 vst1q_s8 (d, b);
143 for (i = 0; i < 16; i++)
144 if (c[0] != d[i])
145 return 1;
146
147 b = wrap_vdupq_lane_s8_1 (a);
148 vst1q_s8 (d, b);
149 for (i = 0; i < 16; i++)
150 if (c[1] != d[i])
151 return 1;
152 return 0;
153 }
154
155 int16x4_t __attribute__ ((noinline))
wrap_vdup_lane_s16_0(int16x4_t a)156 wrap_vdup_lane_s16_0 (int16x4_t a)
157 {
158 return vdup_lane_s16 (a, 0);
159 }
160
161 int16x4_t __attribute__ ((noinline))
wrap_vdup_lane_s16_1(int16x4_t a)162 wrap_vdup_lane_s16_1 (int16x4_t a)
163 {
164 return vdup_lane_s16 (a, 1);
165 }
166
167 int __attribute__ ((noinline))
test_vdup_lane_s16()168 test_vdup_lane_s16 ()
169 {
170 int16x4_t a;
171 int16x4_t b;
172 int i;
173 /* Only two first cases are interesting. */
174 int16_t c[4] = { 0, 1, 2, 3 };
175 int16_t d[4];
176
177 a = vld1_s16 (c);
178 b = wrap_vdup_lane_s16_0 (a);
179 vst1_s16 (d, b);
180 for (i = 0; i < 4; i++)
181 if (c[0] != d[i])
182 return 1;
183
184 b = wrap_vdup_lane_s16_1 (a);
185 vst1_s16 (d, b);
186 for (i = 0; i < 4; i++)
187 if (c[1] != d[i])
188 return 1;
189 return 0;
190 }
191
192 int16x8_t __attribute__ ((noinline))
wrap_vdupq_lane_s16_0(int16x4_t a)193 wrap_vdupq_lane_s16_0 (int16x4_t a)
194 {
195 return vdupq_lane_s16 (a, 0);
196 }
197
198 int16x8_t __attribute__ ((noinline))
wrap_vdupq_lane_s16_1(int16x4_t a)199 wrap_vdupq_lane_s16_1 (int16x4_t a)
200 {
201 return vdupq_lane_s16 (a, 1);
202 }
203
204 int __attribute__ ((noinline))
test_vdupq_lane_s16()205 test_vdupq_lane_s16 ()
206 {
207 int16x4_t a;
208 int16x8_t b;
209 int i;
210 /* Only two first cases are interesting. */
211 int16_t c[4] = { 0, 1, 2, 3 };
212 int16_t d[8];
213
214 a = vld1_s16 (c);
215 b = wrap_vdupq_lane_s16_0 (a);
216 vst1q_s16 (d, b);
217 for (i = 0; i < 8; i++)
218 if (c[0] != d[i])
219 return 1;
220
221 b = wrap_vdupq_lane_s16_1 (a);
222 vst1q_s16 (d, b);
223 for (i = 0; i < 8; i++)
224 if (c[1] != d[i])
225 return 1;
226 return 0;
227 }
228
229 int32x2_t __attribute__ ((noinline))
wrap_vdup_lane_s32_0(int32x2_t a)230 wrap_vdup_lane_s32_0 (int32x2_t a)
231 {
232 return vdup_lane_s32 (a, 0);
233 }
234
235 int32x2_t __attribute__ ((noinline))
wrap_vdup_lane_s32_1(int32x2_t a)236 wrap_vdup_lane_s32_1 (int32x2_t a)
237 {
238 return vdup_lane_s32 (a, 1);
239 }
240
241 int __attribute__ ((noinline))
test_vdup_lane_s32()242 test_vdup_lane_s32 ()
243 {
244 int32x2_t a;
245 int32x2_t b;
246 int i;
247 int32_t c[2] = { 0, 1 };
248 int32_t d[2];
249
250 a = vld1_s32 (c);
251 b = wrap_vdup_lane_s32_0 (a);
252 vst1_s32 (d, b);
253 for (i = 0; i < 2; i++)
254 if (c[0] != d[i])
255 return 1;
256
257 b = wrap_vdup_lane_s32_1 (a);
258 vst1_s32 (d, b);
259 for (i = 0; i < 2; i++)
260 if (c[1] != d[i])
261 return 1;
262 return 0;
263 }
264
265 int32x4_t __attribute__ ((noinline))
wrap_vdupq_lane_s32_0(int32x2_t a)266 wrap_vdupq_lane_s32_0 (int32x2_t a)
267 {
268 return vdupq_lane_s32 (a, 0);
269 }
270
271 int32x4_t __attribute__ ((noinline))
wrap_vdupq_lane_s32_1(int32x2_t a)272 wrap_vdupq_lane_s32_1 (int32x2_t a)
273 {
274 return vdupq_lane_s32 (a, 1);
275 }
276
277 int __attribute__ ((noinline))
test_vdupq_lane_s32()278 test_vdupq_lane_s32 ()
279 {
280 int32x2_t a;
281 int32x4_t b;
282 int i;
283 int32_t c[2] = { 0, 1 };
284 int32_t d[4];
285
286 a = vld1_s32 (c);
287 b = wrap_vdupq_lane_s32_0 (a);
288 vst1q_s32 (d, b);
289 for (i = 0; i < 4; i++)
290 if (c[0] != d[i])
291 return 1;
292
293 b = wrap_vdupq_lane_s32_1 (a);
294 vst1q_s32 (d, b);
295 for (i = 0; i < 4; i++)
296 if (c[1] != d[i])
297 return 1;
298 return 0;
299 }
300
301 int64x1_t __attribute__ ((noinline))
wrap_vdup_lane_s64_0(int64x1_t a)302 wrap_vdup_lane_s64_0 (int64x1_t a)
303 {
304 return vdup_lane_s64 (a, 0);
305 }
306
307 int __attribute__ ((noinline))
test_vdup_lane_s64()308 test_vdup_lane_s64 ()
309 {
310 int64x1_t a;
311 int64x1_t b;
312 int64_t c[1];
313 int64_t d[1];
314
315 c[0] = 0;
316 a = vld1_s64 (c);
317 b = wrap_vdup_lane_s64_0 (a);
318 vst1_s64 (d, b);
319 if (c[0] != d[0])
320 return 1;
321
322 return 0;
323 }
324
325 int64x2_t __attribute__ ((noinline))
wrap_vdupq_lane_s64_0(int64x1_t a)326 wrap_vdupq_lane_s64_0 (int64x1_t a)
327 {
328 return vdupq_lane_s64 (a, 0);
329 }
330
331 int __attribute__ ((noinline))
test_vdupq_lane_s64()332 test_vdupq_lane_s64 ()
333 {
334 int64x1_t a;
335 int64x2_t b;
336 int i;
337 int64_t c[1];
338 int64_t d[2];
339
340 c[0] = 0;
341 a = vld1_s64 (c);
342 b = wrap_vdupq_lane_s64_0 (a);
343 vst1q_s64 (d, b);
344 for (i = 0; i < 2; i++)
345 if (c[0] != d[i])
346 return 1;
347 return 0;
348 }
349
350 int
main()351 main ()
352 {
353
354 if (test_vdup_lane_f32 ())
355 abort ();
356 if (test_vdup_lane_s8 ())
357 abort ();
358 if (test_vdup_lane_s16 ())
359 abort ();
360 if (test_vdup_lane_s32 ())
361 abort ();
362 if (test_vdup_lane_s64 ())
363 abort ();
364 if (test_vdupq_lane_f32 ())
365 abort ();
366 if (test_vdupq_lane_s8 ())
367 abort ();
368 if (test_vdupq_lane_s16 ())
369 abort ();
370 if (test_vdupq_lane_s32 ())
371 abort ();
372 if (test_vdupq_lane_s64 ())
373 abort ();
374
375 return 0;
376 }
377
378 /* Asm check for test_vdup_lane_s8. */
379 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
380 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
381
382 /* Asm check for test_vdupq_lane_s8. */
383 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
384 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
385
386 /* Asm check for test_vdup_lane_s16. */
387 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
388 /* Asm check for test_vdup_lane_s16. */
389 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
390
391 /* Asm check for test_vdupq_lane_s16. */
392 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
393 /* Asm check for test_vdupq_lane_s16. */
394 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
395
396 /* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */
397 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
398 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
399
400 /* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */
401 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
402 /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
403
404