1 /* Test the vmul_n_f64 AArch64 SIMD intrinsic. */
2
3 /* { dg-do run } */
4 /* { dg-options "-O2 --save-temps" } */
5
6 #include "arm_neon.h"
7
8 extern void abort (void);
9
10 #define A (132.4f)
11 #define B (-0.0f)
12 #define C (-34.8f)
13 #define D (289.34f)
14 float32_t expected2_1[2] = {A * A, B * A};
15 float32_t expected2_2[2] = {A * B, B * B};
16 float32_t expected4_1[4] = {A * A, B * A, C * A, D * A};
17 float32_t expected4_2[4] = {A * B, B * B, C * B, D * B};
18 float32_t expected4_3[4] = {A * C, B * C, C * C, D * C};
19 float32_t expected4_4[4] = {A * D, B * D, C * D, D * D};
20 float32_t _elemA = A;
21 float32_t _elemB = B;
22 float32_t _elemC = C;
23 float32_t _elemD = D;
24
25 #define AD (1234.5)
26 #define BD (-0.0)
27 #define CD (71.3)
28 #define DD (-1024.4)
29 float64_t expectedd2_1[2] = {AD * CD, BD * CD};
30 float64_t expectedd2_2[2] = {AD * DD, BD * DD};
31 float64_t _elemdC = CD;
32 float64_t _elemdD = DD;
33
34
35 #define AS (1024)
36 #define BS (-31)
37 #define CS (0)
38 #define DS (655)
39 int32_t expecteds2_1[2] = {AS * AS, BS * AS};
40 int32_t expecteds2_2[2] = {AS * BS, BS * BS};
41 int32_t expecteds4_1[4] = {AS * AS, BS * AS, CS * AS, DS * AS};
42 int32_t expecteds4_2[4] = {AS * BS, BS * BS, CS * BS, DS * BS};
43 int32_t expecteds4_3[4] = {AS * CS, BS * CS, CS * CS, DS * CS};
44 int32_t expecteds4_4[4] = {AS * DS, BS * DS, CS * DS, DS * DS};
45 int32_t _elemsA = AS;
46 int32_t _elemsB = BS;
47 int32_t _elemsC = CS;
48 int32_t _elemsD = DS;
49
50 #define AH ((int16_t) 0)
51 #define BH ((int16_t) -32)
52 #define CH ((int16_t) 102)
53 #define DH ((int16_t) -51)
54 #define EH ((int16_t) 71)
55 #define FH ((int16_t) -91)
56 #define GH ((int16_t) 48)
57 #define HH ((int16_t) 255)
58 int16_t expectedh4_1[4] = {AH * AH, BH * AH, CH * AH, DH * AH};
59 int16_t expectedh4_2[4] = {AH * BH, BH * BH, CH * BH, DH * BH};
60 int16_t expectedh4_3[4] = {AH * CH, BH * CH, CH * CH, DH * CH};
61 int16_t expectedh4_4[4] = {AH * DH, BH * DH, CH * DH, DH * DH};
62 int16_t expectedh8_1[8] = {AH * AH, BH * AH, CH * AH, DH * AH,
63 EH * AH, FH * AH, GH * AH, HH * AH};
64 int16_t expectedh8_2[8] = {AH * BH, BH * BH, CH * BH, DH * BH,
65 EH * BH, FH * BH, GH * BH, HH * BH};
66 int16_t expectedh8_3[8] = {AH * CH, BH * CH, CH * CH, DH * CH,
67 EH * CH, FH * CH, GH * CH, HH * CH};
68 int16_t expectedh8_4[8] = {AH * DH, BH * DH, CH * DH, DH * DH,
69 EH * DH, FH * DH, GH * DH, HH * DH};
70 int16_t expectedh8_5[8] = {AH * EH, BH * EH, CH * EH, DH * EH,
71 EH * EH, FH * EH, GH * EH, HH * EH};
72 int16_t expectedh8_6[8] = {AH * FH, BH * FH, CH * FH, DH * FH,
73 EH * FH, FH * FH, GH * FH, HH * FH};
74 int16_t expectedh8_7[8] = {AH * GH, BH * GH, CH * GH, DH * GH,
75 EH * GH, FH * GH, GH * GH, HH * GH};
76 int16_t expectedh8_8[8] = {AH * HH, BH * HH, CH * HH, DH * HH,
77 EH * HH, FH * HH, GH * HH, HH * HH};
78 int16_t _elemhA = AH;
79 int16_t _elemhB = BH;
80 int16_t _elemhC = CH;
81 int16_t _elemhD = DH;
82 int16_t _elemhE = EH;
83 int16_t _elemhF = FH;
84 int16_t _elemhG = GH;
85 int16_t _elemhH = HH;
86
87 #define AUS (1024)
88 #define BUS (31)
89 #define CUS (0)
90 #define DUS (655)
91 uint32_t expectedus2_1[2] = {AUS * AUS, BUS * AUS};
92 uint32_t expectedus2_2[2] = {AUS * BUS, BUS * BUS};
93 uint32_t expectedus4_1[4] = {AUS * AUS, BUS * AUS, CUS * AUS, DUS * AUS};
94 uint32_t expectedus4_2[4] = {AUS * BUS, BUS * BUS, CUS * BUS, DUS * BUS};
95 uint32_t expectedus4_3[4] = {AUS * CUS, BUS * CUS, CUS * CUS, DUS * CUS};
96 uint32_t expectedus4_4[4] = {AUS * DUS, BUS * DUS, CUS * DUS, DUS * DUS};
97 uint32_t _elemusA = AUS;
98 uint32_t _elemusB = BUS;
99 uint32_t _elemusC = CUS;
100 uint32_t _elemusD = DUS;
101
102 #define AUH ((uint16_t) 0)
103 #define BUH ((uint16_t) 32)
104 #define CUH ((uint16_t) 102)
105 #define DUH ((uint16_t) 51)
106 #define EUH ((uint16_t) 71)
107 #define FUH ((uint16_t) 91)
108 #define GUH ((uint16_t) 48)
109 #define HUH ((uint16_t) 255)
110 uint16_t expecteduh4_1[4] = {AUH * AUH, BUH * AUH, CUH * AUH, DUH * AUH};
111 uint16_t expecteduh4_2[4] = {AUH * BUH, BUH * BUH, CUH * BUH, DUH * BUH};
112 uint16_t expecteduh4_3[4] = {AUH * CUH, BUH * CUH, CUH * CUH, DUH * CUH};
113 uint16_t expecteduh4_4[4] = {AUH * DUH, BUH * DUH, CUH * DUH, DUH * DUH};
114 uint16_t expecteduh8_1[8] = {AUH * AUH, BUH * AUH, CUH * AUH, DUH * AUH,
115 EUH * AUH, FUH * AUH, GUH * AUH, HUH * AUH};
116 uint16_t expecteduh8_2[8] = {AUH * BUH, BUH * BUH, CUH * BUH, DUH * BUH,
117 EUH * BUH, FUH * BUH, GUH * BUH, HUH * BUH};
118 uint16_t expecteduh8_3[8] = {AUH * CUH, BUH * CUH, CUH * CUH, DUH * CUH,
119 EUH * CUH, FUH * CUH, GUH * CUH, HUH * CUH};
120 uint16_t expecteduh8_4[8] = {AUH * DUH, BUH * DUH, CUH * DUH, DUH * DUH,
121 EUH * DUH, FUH * DUH, GUH * DUH, HUH * DUH};
122 uint16_t expecteduh8_5[8] = {AUH * EUH, BUH * EUH, CUH * EUH, DUH * EUH,
123 EUH * EUH, FUH * EUH, GUH * EUH, HUH * EUH};
124 uint16_t expecteduh8_6[8] = {AUH * FUH, BUH * FUH, CUH * FUH, DUH * FUH,
125 EUH * FUH, FUH * FUH, GUH * FUH, HUH * FUH};
126 uint16_t expecteduh8_7[8] = {AUH * GUH, BUH * GUH, CUH * GUH, DUH * GUH,
127 EUH * GUH, FUH * GUH, GUH * GUH, HUH * GUH};
128 uint16_t expecteduh8_8[8] = {AUH * HUH, BUH * HUH, CUH * HUH, DUH * HUH,
129 EUH * HUH, FUH * HUH, GUH * HUH, HUH * HUH};
130 uint16_t _elemuhA = AUH;
131 uint16_t _elemuhB = BUH;
132 uint16_t _elemuhC = CUH;
133 uint16_t _elemuhD = DUH;
134 uint16_t _elemuhE = EUH;
135 uint16_t _elemuhF = FUH;
136 uint16_t _elemuhG = GUH;
137 uint16_t _elemuhH = HUH;
138
139 void
check_v2sf(float32_t elemA,float32_t elemB)140 check_v2sf (float32_t elemA, float32_t elemB)
141 {
142 int32_t indx;
143 const float32_t vec32x2_buf[2] = {A, B};
144 float32x2_t vec32x2_src = vld1_f32 (vec32x2_buf);
145 float32_t vec32x2_res[2];
146
147 vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemA));
148
149 for (indx = 0; indx < 2; indx++)
150 if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_1[indx])
151 abort ();
152
153 vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemB));
154
155 for (indx = 0; indx < 2; indx++)
156 if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_2[indx])
157 abort ();
158
159 /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
160 }
161
162 void
check_v4sf(float32_t elemA,float32_t elemB,float32_t elemC,float32_t elemD)163 check_v4sf (float32_t elemA, float32_t elemB, float32_t elemC, float32_t elemD)
164 {
165 int32_t indx;
166 const float32_t vec32x4_buf[4] = {A, B, C, D};
167 float32x4_t vec32x4_src = vld1q_f32 (vec32x4_buf);
168 float32_t vec32x4_res[4];
169
170 vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemA));
171
172 for (indx = 0; indx < 4; indx++)
173 if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_1[indx])
174 abort ();
175
176 vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemB));
177
178 for (indx = 0; indx < 4; indx++)
179 if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_2[indx])
180 abort ();
181
182 vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemC));
183
184 for (indx = 0; indx < 4; indx++)
185 if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_3[indx])
186 abort ();
187
188 vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemD));
189
190 for (indx = 0; indx < 4; indx++)
191 if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_4[indx])
192 abort ();
193
194 /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 4 } } */
195 }
196
197 void
check_v2df(float64_t elemdC,float64_t elemdD)198 check_v2df (float64_t elemdC, float64_t elemdD)
199 {
200 int32_t indx;
201 const float64_t vec64x2_buf[2] = {AD, BD};
202 float64x2_t vec64x2_src = vld1q_f64 (vec64x2_buf);
203 float64_t vec64x2_res[2];
204
205 vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdC));
206
207 for (indx = 0; indx < 2; indx++)
208 if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_1[indx])
209 abort ();
210
211 vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdD));
212
213 for (indx = 0; indx < 2; indx++)
214 if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_2[indx])
215 abort ();
216
217 /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[0\\\]" 2 } } */
218 }
219
220 void
check_v2si(int32_t elemsA,int32_t elemsB)221 check_v2si (int32_t elemsA, int32_t elemsB)
222 {
223 int32_t indx;
224 const int32_t vecs32x2_buf[2] = {AS, BS};
225 int32x2_t vecs32x2_src = vld1_s32 (vecs32x2_buf);
226 int32_t vecs32x2_res[2];
227
228 vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsA));
229
230 for (indx = 0; indx < 2; indx++)
231 if (vecs32x2_res[indx] != expecteds2_1[indx])
232 abort ();
233
234 vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsB));
235
236 for (indx = 0; indx < 2; indx++)
237 if (vecs32x2_res[indx] != expecteds2_2[indx])
238 abort ();
239 }
240
241 void
check_v2si_unsigned(uint32_t elemusA,uint32_t elemusB)242 check_v2si_unsigned (uint32_t elemusA, uint32_t elemusB)
243 {
244 int indx;
245 const uint32_t vecus32x2_buf[2] = {AUS, BUS};
246 uint32x2_t vecus32x2_src = vld1_u32 (vecus32x2_buf);
247 uint32_t vecus32x2_res[2];
248
249 vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusA));
250
251 for (indx = 0; indx < 2; indx++)
252 if (vecus32x2_res[indx] != expectedus2_1[indx])
253 abort ();
254
255 vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusB));
256
257 for (indx = 0; indx < 2; indx++)
258 if (vecus32x2_res[indx] != expectedus2_2[indx])
259 abort ();
260
261 /* { dg-final { scan-assembler-times "\tmul\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 4 } } */
262 }
263
264 void
check_v4si(int32_t elemsA,int32_t elemsB,int32_t elemsC,int32_t elemsD)265 check_v4si (int32_t elemsA, int32_t elemsB, int32_t elemsC, int32_t elemsD)
266 {
267 int32_t indx;
268 const int32_t vecs32x4_buf[4] = {AS, BS, CS, DS};
269 int32x4_t vecs32x4_src = vld1q_s32 (vecs32x4_buf);
270 int32_t vecs32x4_res[4];
271
272 vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsA));
273
274 for (indx = 0; indx < 4; indx++)
275 if (vecs32x4_res[indx] != expecteds4_1[indx])
276 abort ();
277
278 vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsB));
279
280 for (indx = 0; indx < 4; indx++)
281 if (vecs32x4_res[indx] != expecteds4_2[indx])
282 abort ();
283
284 vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsC));
285
286 for (indx = 0; indx < 4; indx++)
287 if (vecs32x4_res[indx] != expecteds4_3[indx])
288 abort ();
289
290 vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsD));
291
292 for (indx = 0; indx < 4; indx++)
293 if (vecs32x4_res[indx] != expecteds4_4[indx])
294 abort ();
295 }
296
297 void
check_v4si_unsigned(uint32_t elemusA,uint32_t elemusB,uint32_t elemusC,uint32_t elemusD)298 check_v4si_unsigned (uint32_t elemusA, uint32_t elemusB, uint32_t elemusC,
299 uint32_t elemusD)
300 {
301 int indx;
302 const uint32_t vecus32x4_buf[4] = {AUS, BUS, CUS, DUS};
303 uint32x4_t vecus32x4_src = vld1q_u32 (vecus32x4_buf);
304 uint32_t vecus32x4_res[4];
305
306 vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusA));
307
308 for (indx = 0; indx < 4; indx++)
309 if (vecus32x4_res[indx] != expectedus4_1[indx])
310 abort ();
311
312 vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusB));
313
314 for (indx = 0; indx < 4; indx++)
315 if (vecus32x4_res[indx] != expectedus4_2[indx])
316 abort ();
317
318 vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusC));
319
320 for (indx = 0; indx < 4; indx++)
321 if (vecus32x4_res[indx] != expectedus4_3[indx])
322 abort ();
323
324 vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusD));
325
326 for (indx = 0; indx < 4; indx++)
327 if (vecus32x4_res[indx] != expectedus4_4[indx])
328 abort ();
329
330 /* { dg-final { scan-assembler-times "\tmul\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 8 } } */
331 }
332
333
334 void
check_v4hi(int16_t elemhA,int16_t elemhB,int16_t elemhC,int16_t elemhD)335 check_v4hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD)
336 {
337 int32_t indx;
338 const int16_t vech16x4_buf[4] = {AH, BH, CH, DH};
339 int16x4_t vech16x4_src = vld1_s16 (vech16x4_buf);
340 int16_t vech16x4_res[4];
341
342 vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhA));
343
344 for (indx = 0; indx < 4; indx++)
345 if (vech16x4_res[indx] != expectedh4_1[indx])
346 abort ();
347
348 vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhB));
349
350 for (indx = 0; indx < 4; indx++)
351 if (vech16x4_res[indx] != expectedh4_2[indx])
352 abort ();
353
354 vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhC));
355
356 for (indx = 0; indx < 4; indx++)
357 if (vech16x4_res[indx] != expectedh4_3[indx])
358 abort ();
359
360 vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhD));
361
362 for (indx = 0; indx < 4; indx++)
363 if (vech16x4_res[indx] != expectedh4_4[indx])
364 abort ();
365 }
366
367 void
check_v4hi_unsigned(uint16_t elemuhA,uint16_t elemuhB,uint16_t elemuhC,uint16_t elemuhD)368 check_v4hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC,
369 uint16_t elemuhD)
370 {
371 int indx;
372 const uint16_t vecuh16x4_buf[4] = {AUH, BUH, CUH, DUH};
373 uint16x4_t vecuh16x4_src = vld1_u16 (vecuh16x4_buf);
374 uint16_t vecuh16x4_res[4];
375
376 vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhA));
377
378 for (indx = 0; indx < 4; indx++)
379 if (vecuh16x4_res[indx] != expecteduh4_1[indx])
380 abort ();
381
382 vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhB));
383
384 for (indx = 0; indx < 4; indx++)
385 if (vecuh16x4_res[indx] != expecteduh4_2[indx])
386 abort ();
387
388 vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhC));
389
390 for (indx = 0; indx < 4; indx++)
391 if (vecuh16x4_res[indx] != expecteduh4_3[indx])
392 abort ();
393
394 vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhD));
395
396 for (indx = 0; indx < 4; indx++)
397 if (vecuh16x4_res[indx] != expecteduh4_4[indx])
398 abort ();
399
400 /* { dg-final { scan-assembler-times "mul\tv\[0-9\]+\.4h, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 8 } } */
401 }
402
403 void
check_v8hi(int16_t elemhA,int16_t elemhB,int16_t elemhC,int16_t elemhD,int16_t elemhE,int16_t elemhF,int16_t elemhG,int16_t elemhH)404 check_v8hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD,
405 int16_t elemhE, int16_t elemhF, int16_t elemhG, int16_t elemhH)
406 {
407 int32_t indx;
408 const int16_t vech16x8_buf[8] = {AH, BH, CH, DH, EH, FH, GH, HH};
409 int16x8_t vech16x8_src = vld1q_s16 (vech16x8_buf);
410 int16_t vech16x8_res[8];
411
412 vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhA));
413
414 for (indx = 0; indx < 8; indx++)
415 if (vech16x8_res[indx] != expectedh8_1[indx])
416 abort ();
417
418 vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhB));
419
420 for (indx = 0; indx < 8; indx++)
421 if (vech16x8_res[indx] != expectedh8_2[indx])
422 abort ();
423
424 vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhC));
425
426 for (indx = 0; indx < 8; indx++)
427 if (vech16x8_res[indx] != expectedh8_3[indx])
428 abort ();
429
430 vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhD));
431
432 for (indx = 0; indx < 8; indx++)
433 if (vech16x8_res[indx] != expectedh8_4[indx])
434 abort ();
435
436 vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhE));
437
438 for (indx = 0; indx < 8; indx++)
439 if (vech16x8_res[indx] != expectedh8_5[indx])
440 abort ();
441
442 vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhF));
443
444 for (indx = 0; indx < 8; indx++)
445 if (vech16x8_res[indx] != expectedh8_6[indx])
446 abort ();
447
448 vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhG));
449
450 for (indx = 0; indx < 8; indx++)
451 if (vech16x8_res[indx] != expectedh8_7[indx])
452 abort ();
453
454 vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhH));
455
456 for (indx = 0; indx < 8; indx++)
457 if (vech16x8_res[indx] != expectedh8_8[indx])
458 abort ();
459 }
460
461 void
check_v8hi_unsigned(uint16_t elemuhA,uint16_t elemuhB,uint16_t elemuhC,uint16_t elemuhD,uint16_t elemuhE,uint16_t elemuhF,uint16_t elemuhG,uint16_t elemuhH)462 check_v8hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC,
463 uint16_t elemuhD, uint16_t elemuhE, uint16_t elemuhF,
464 uint16_t elemuhG, uint16_t elemuhH)
465 {
466 int indx;
467 const uint16_t vecuh16x8_buf[8] = {AUH, BUH, CUH, DUH, EUH, FUH, GUH, HUH};
468 uint16x8_t vecuh16x8_src = vld1q_u16 (vecuh16x8_buf);
469 uint16_t vecuh16x8_res[8];
470
471 vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhA));
472
473 for (indx = 0; indx < 8; indx++)
474 if (vecuh16x8_res[indx] != expecteduh8_1[indx])
475 abort ();
476
477 vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhB));
478
479 for (indx = 0; indx < 8; indx++)
480 if (vecuh16x8_res[indx] != expecteduh8_2[indx])
481 abort ();
482
483 vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhC));
484
485 for (indx = 0; indx < 8; indx++)
486 if (vecuh16x8_res[indx] != expecteduh8_3[indx])
487 abort ();
488
489 vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhD));
490
491 for (indx = 0; indx < 8; indx++)
492 if (vecuh16x8_res[indx] != expecteduh8_4[indx])
493 abort ();
494
495 vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhE));
496
497 for (indx = 0; indx < 8; indx++)
498 if (vecuh16x8_res[indx] != expecteduh8_5[indx])
499 abort ();
500
501 vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhF));
502
503 for (indx = 0; indx < 8; indx++)
504 if (vecuh16x8_res[indx] != expecteduh8_6[indx])
505 abort ();
506
507 vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhG));
508
509 for (indx = 0; indx < 8; indx++)
510 if (vecuh16x8_res[indx] != expecteduh8_7[indx])
511 abort ();
512
513 vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhH));
514
515 for (indx = 0; indx < 8; indx++)
516 if (vecuh16x8_res[indx] != expecteduh8_8[indx])
517 abort ();
518
519 /* { dg-final { scan-assembler-times "mul\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 16 } } */
520 }
521
522 int
main(void)523 main (void)
524 {
525 check_v2sf (_elemA, _elemB);
526 check_v4sf (_elemA, _elemB, _elemC, _elemD);
527 check_v2df (_elemdC, _elemdD);
528 check_v2si (_elemsA, _elemsB);
529 check_v4si (_elemsA, _elemsB, _elemsC, _elemsD);
530 check_v4hi (_elemhA, _elemhB, _elemhC, _elemhD);
531 check_v8hi (_elemhA, _elemhB, _elemhC, _elemhD,
532 _elemhE, _elemhF, _elemhG, _elemhH);
533 check_v2si_unsigned (_elemusA, _elemusB);
534 check_v4si_unsigned (_elemusA, _elemusB, _elemusC, _elemusD);
535 check_v4hi_unsigned (_elemuhA, _elemuhB, _elemuhC, _elemuhD);
536 check_v8hi_unsigned (_elemuhA, _elemuhB, _elemuhC, _elemuhD,
537 _elemuhE, _elemuhF, _elemuhG, _elemuhH);
538
539 return 0;
540 }
541
542