1 /* Test the vmul_n_f64 AArch64 SIMD intrinsic.  */
2 
3 /* { dg-do run } */
4 /* { dg-options "-O2 --save-temps" } */
5 
6 #include "arm_neon.h"
7 
8 extern void abort (void);
9 
10 #define A (132.4f)
11 #define B (-0.0f)
12 #define C (-34.8f)
13 #define D (289.34f)
14 float32_t expected2_1[2] = {A * A, B * A};
15 float32_t expected2_2[2] = {A * B, B * B};
16 float32_t expected4_1[4] = {A * A, B * A, C * A, D * A};
17 float32_t expected4_2[4] = {A * B, B * B, C * B, D * B};
18 float32_t expected4_3[4] = {A * C, B * C, C * C, D * C};
19 float32_t expected4_4[4] = {A * D, B * D, C * D, D * D};
20 float32_t _elemA = A;
21 float32_t _elemB = B;
22 float32_t _elemC = C;
23 float32_t _elemD = D;
24 
25 #define AD (1234.5)
26 #define BD (-0.0)
27 #define CD (71.3)
28 #define DD (-1024.4)
29 float64_t expectedd2_1[2] = {AD * CD, BD * CD};
30 float64_t expectedd2_2[2] = {AD * DD, BD * DD};
31 float64_t _elemdC = CD;
32 float64_t _elemdD = DD;
33 
34 
35 #define AS (1024)
36 #define BS (-31)
37 #define CS (0)
38 #define DS (655)
39 int32_t expecteds2_1[2] = {AS * AS, BS * AS};
40 int32_t expecteds2_2[2] = {AS * BS, BS * BS};
41 int32_t expecteds4_1[4] = {AS * AS, BS * AS, CS * AS, DS * AS};
42 int32_t expecteds4_2[4] = {AS * BS, BS * BS, CS * BS, DS * BS};
43 int32_t expecteds4_3[4] = {AS * CS, BS * CS, CS * CS, DS * CS};
44 int32_t expecteds4_4[4] = {AS * DS, BS * DS, CS * DS, DS * DS};
45 int32_t _elemsA = AS;
46 int32_t _elemsB = BS;
47 int32_t _elemsC = CS;
48 int32_t _elemsD = DS;
49 
50 #define AH ((int16_t) 0)
51 #define BH ((int16_t) -32)
52 #define CH ((int16_t) 102)
53 #define DH ((int16_t) -51)
54 #define EH ((int16_t) 71)
55 #define FH ((int16_t) -91)
56 #define GH ((int16_t) 48)
57 #define HH ((int16_t) 255)
58 int16_t expectedh4_1[4] = {AH * AH, BH * AH, CH * AH, DH * AH};
59 int16_t expectedh4_2[4] = {AH * BH, BH * BH, CH * BH, DH * BH};
60 int16_t expectedh4_3[4] = {AH * CH, BH * CH, CH * CH, DH * CH};
61 int16_t expectedh4_4[4] = {AH * DH, BH * DH, CH * DH, DH * DH};
62 int16_t expectedh8_1[8] = {AH * AH, BH * AH, CH * AH, DH * AH,
63 			   EH * AH, FH * AH, GH * AH, HH * AH};
64 int16_t expectedh8_2[8] = {AH * BH, BH * BH, CH * BH, DH * BH,
65 			   EH * BH, FH * BH, GH * BH, HH * BH};
66 int16_t expectedh8_3[8] = {AH * CH, BH * CH, CH * CH, DH * CH,
67 			   EH * CH, FH * CH, GH * CH, HH * CH};
68 int16_t expectedh8_4[8] = {AH * DH, BH * DH, CH * DH, DH * DH,
69 			   EH * DH, FH * DH, GH * DH, HH * DH};
70 int16_t expectedh8_5[8] = {AH * EH, BH * EH, CH * EH, DH * EH,
71 			   EH * EH, FH * EH, GH * EH, HH * EH};
72 int16_t expectedh8_6[8] = {AH * FH, BH * FH, CH * FH, DH * FH,
73 			   EH * FH, FH * FH, GH * FH, HH * FH};
74 int16_t expectedh8_7[8] = {AH * GH, BH * GH, CH * GH, DH * GH,
75 			   EH * GH, FH * GH, GH * GH, HH * GH};
76 int16_t expectedh8_8[8] = {AH * HH, BH * HH, CH * HH, DH * HH,
77 			   EH * HH, FH * HH, GH * HH, HH * HH};
78 int16_t _elemhA = AH;
79 int16_t _elemhB = BH;
80 int16_t _elemhC = CH;
81 int16_t _elemhD = DH;
82 int16_t _elemhE = EH;
83 int16_t _elemhF = FH;
84 int16_t _elemhG = GH;
85 int16_t _elemhH = HH;
86 
87 #define AUS (1024)
88 #define BUS (31)
89 #define CUS (0)
90 #define DUS (655)
91 uint32_t expectedus2_1[2] = {AUS * AUS, BUS * AUS};
92 uint32_t expectedus2_2[2] = {AUS * BUS, BUS * BUS};
93 uint32_t expectedus4_1[4] = {AUS * AUS, BUS * AUS, CUS * AUS, DUS * AUS};
94 uint32_t expectedus4_2[4] = {AUS * BUS, BUS * BUS, CUS * BUS, DUS * BUS};
95 uint32_t expectedus4_3[4] = {AUS * CUS, BUS * CUS, CUS * CUS, DUS * CUS};
96 uint32_t expectedus4_4[4] = {AUS * DUS, BUS * DUS, CUS * DUS, DUS * DUS};
97 uint32_t _elemusA = AUS;
98 uint32_t _elemusB = BUS;
99 uint32_t _elemusC = CUS;
100 uint32_t _elemusD = DUS;
101 
102 #define AUH ((uint16_t) 0)
103 #define BUH ((uint16_t) 32)
104 #define CUH ((uint16_t) 102)
105 #define DUH ((uint16_t) 51)
106 #define EUH ((uint16_t) 71)
107 #define FUH ((uint16_t) 91)
108 #define GUH ((uint16_t) 48)
109 #define HUH ((uint16_t) 255)
110 uint16_t expecteduh4_1[4] = {AUH * AUH, BUH * AUH, CUH * AUH, DUH * AUH};
111 uint16_t expecteduh4_2[4] = {AUH * BUH, BUH * BUH, CUH * BUH, DUH * BUH};
112 uint16_t expecteduh4_3[4] = {AUH * CUH, BUH * CUH, CUH * CUH, DUH * CUH};
113 uint16_t expecteduh4_4[4] = {AUH * DUH, BUH * DUH, CUH * DUH, DUH * DUH};
114 uint16_t expecteduh8_1[8] = {AUH * AUH, BUH * AUH, CUH * AUH, DUH * AUH,
115 			     EUH * AUH, FUH * AUH, GUH * AUH, HUH * AUH};
116 uint16_t expecteduh8_2[8] = {AUH * BUH, BUH * BUH, CUH * BUH, DUH * BUH,
117 			     EUH * BUH, FUH * BUH, GUH * BUH, HUH * BUH};
118 uint16_t expecteduh8_3[8] = {AUH * CUH, BUH * CUH, CUH * CUH, DUH * CUH,
119 			     EUH * CUH, FUH * CUH, GUH * CUH, HUH * CUH};
120 uint16_t expecteduh8_4[8] = {AUH * DUH, BUH * DUH, CUH * DUH, DUH * DUH,
121 			     EUH * DUH, FUH * DUH, GUH * DUH, HUH * DUH};
122 uint16_t expecteduh8_5[8] = {AUH * EUH, BUH * EUH, CUH * EUH, DUH * EUH,
123 			     EUH * EUH, FUH * EUH, GUH * EUH, HUH * EUH};
124 uint16_t expecteduh8_6[8] = {AUH * FUH, BUH * FUH, CUH * FUH, DUH * FUH,
125 			     EUH * FUH, FUH * FUH, GUH * FUH, HUH * FUH};
126 uint16_t expecteduh8_7[8] = {AUH * GUH, BUH * GUH, CUH * GUH, DUH * GUH,
127 			     EUH * GUH, FUH * GUH, GUH * GUH, HUH * GUH};
128 uint16_t expecteduh8_8[8] = {AUH * HUH, BUH * HUH, CUH * HUH, DUH * HUH,
129 			     EUH * HUH, FUH * HUH, GUH * HUH, HUH * HUH};
130 uint16_t _elemuhA = AUH;
131 uint16_t _elemuhB = BUH;
132 uint16_t _elemuhC = CUH;
133 uint16_t _elemuhD = DUH;
134 uint16_t _elemuhE = EUH;
135 uint16_t _elemuhF = FUH;
136 uint16_t _elemuhG = GUH;
137 uint16_t _elemuhH = HUH;
138 
139 void
check_v2sf(float32_t elemA,float32_t elemB)140 check_v2sf (float32_t elemA, float32_t elemB)
141 {
142   int32_t indx;
143   const float32_t vec32x2_buf[2] = {A, B};
144   float32x2_t vec32x2_src = vld1_f32 (vec32x2_buf);
145   float32_t vec32x2_res[2];
146 
147   vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemA));
148 
149   for (indx = 0; indx < 2; indx++)
150     if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_1[indx])
151       abort ();
152 
153   vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemB));
154 
155   for (indx = 0; indx < 2; indx++)
156     if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_2[indx])
157       abort ();
158 
159 /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
160 }
161 
162 void
check_v4sf(float32_t elemA,float32_t elemB,float32_t elemC,float32_t elemD)163 check_v4sf (float32_t elemA, float32_t elemB, float32_t elemC, float32_t elemD)
164 {
165   int32_t indx;
166   const float32_t vec32x4_buf[4] = {A, B, C, D};
167   float32x4_t vec32x4_src = vld1q_f32 (vec32x4_buf);
168   float32_t vec32x4_res[4];
169 
170   vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemA));
171 
172   for (indx = 0; indx < 4; indx++)
173     if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_1[indx])
174       abort ();
175 
176   vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemB));
177 
178   for (indx = 0; indx < 4; indx++)
179     if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_2[indx])
180       abort ();
181 
182   vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemC));
183 
184   for (indx = 0; indx < 4; indx++)
185     if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_3[indx])
186       abort ();
187 
188   vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemD));
189 
190   for (indx = 0; indx < 4; indx++)
191     if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_4[indx])
192       abort ();
193 
194 /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 4 } } */
195 }
196 
197 void
check_v2df(float64_t elemdC,float64_t elemdD)198 check_v2df (float64_t elemdC, float64_t elemdD)
199 {
200   int32_t indx;
201   const float64_t vec64x2_buf[2] = {AD, BD};
202   float64x2_t vec64x2_src = vld1q_f64 (vec64x2_buf);
203   float64_t vec64x2_res[2];
204 
205   vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdC));
206 
207   for (indx = 0; indx < 2; indx++)
208     if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_1[indx])
209       abort ();
210 
211   vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdD));
212 
213   for (indx = 0; indx < 2; indx++)
214     if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_2[indx])
215       abort ();
216 
217 /* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[0\\\]" 2 } } */
218 }
219 
220 void
check_v2si(int32_t elemsA,int32_t elemsB)221 check_v2si (int32_t elemsA, int32_t elemsB)
222 {
223   int32_t indx;
224   const int32_t vecs32x2_buf[2] = {AS, BS};
225   int32x2_t vecs32x2_src = vld1_s32 (vecs32x2_buf);
226   int32_t vecs32x2_res[2];
227 
228   vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsA));
229 
230   for (indx = 0; indx < 2; indx++)
231     if (vecs32x2_res[indx] != expecteds2_1[indx])
232       abort ();
233 
234   vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsB));
235 
236   for (indx = 0; indx < 2; indx++)
237     if (vecs32x2_res[indx] != expecteds2_2[indx])
238       abort ();
239 }
240 
241 void
check_v2si_unsigned(uint32_t elemusA,uint32_t elemusB)242 check_v2si_unsigned (uint32_t elemusA, uint32_t elemusB)
243 {
244   int indx;
245   const uint32_t vecus32x2_buf[2] = {AUS, BUS};
246   uint32x2_t vecus32x2_src = vld1_u32 (vecus32x2_buf);
247   uint32_t vecus32x2_res[2];
248 
249   vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusA));
250 
251   for (indx = 0; indx < 2; indx++)
252     if (vecus32x2_res[indx] != expectedus2_1[indx])
253       abort ();
254 
255   vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusB));
256 
257   for (indx = 0; indx < 2; indx++)
258     if (vecus32x2_res[indx] != expectedus2_2[indx])
259       abort ();
260 
261 /* { dg-final { scan-assembler-times "\tmul\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 4 } } */
262 }
263 
264 void
check_v4si(int32_t elemsA,int32_t elemsB,int32_t elemsC,int32_t elemsD)265 check_v4si (int32_t elemsA, int32_t elemsB, int32_t elemsC, int32_t elemsD)
266 {
267   int32_t indx;
268   const int32_t vecs32x4_buf[4] = {AS, BS, CS, DS};
269   int32x4_t vecs32x4_src = vld1q_s32 (vecs32x4_buf);
270   int32_t vecs32x4_res[4];
271 
272   vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsA));
273 
274   for (indx = 0; indx < 4; indx++)
275     if (vecs32x4_res[indx] != expecteds4_1[indx])
276       abort ();
277 
278   vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsB));
279 
280   for (indx = 0; indx < 4; indx++)
281     if (vecs32x4_res[indx] != expecteds4_2[indx])
282       abort ();
283 
284   vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsC));
285 
286   for (indx = 0; indx < 4; indx++)
287     if (vecs32x4_res[indx] != expecteds4_3[indx])
288       abort ();
289 
290   vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsD));
291 
292   for (indx = 0; indx < 4; indx++)
293     if (vecs32x4_res[indx] != expecteds4_4[indx])
294       abort ();
295 }
296 
297 void
check_v4si_unsigned(uint32_t elemusA,uint32_t elemusB,uint32_t elemusC,uint32_t elemusD)298 check_v4si_unsigned (uint32_t elemusA, uint32_t elemusB, uint32_t elemusC,
299 		     uint32_t elemusD)
300 {
301   int indx;
302   const uint32_t vecus32x4_buf[4] = {AUS, BUS, CUS, DUS};
303   uint32x4_t vecus32x4_src = vld1q_u32 (vecus32x4_buf);
304   uint32_t vecus32x4_res[4];
305 
306   vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusA));
307 
308   for (indx = 0; indx < 4; indx++)
309     if (vecus32x4_res[indx] != expectedus4_1[indx])
310       abort ();
311 
312   vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusB));
313 
314   for (indx = 0; indx < 4; indx++)
315     if (vecus32x4_res[indx] != expectedus4_2[indx])
316       abort ();
317 
318   vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusC));
319 
320   for (indx = 0; indx < 4; indx++)
321     if (vecus32x4_res[indx] != expectedus4_3[indx])
322       abort ();
323 
324   vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusD));
325 
326   for (indx = 0; indx < 4; indx++)
327     if (vecus32x4_res[indx] != expectedus4_4[indx])
328       abort ();
329 
330 /* { dg-final { scan-assembler-times "\tmul\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 8 } } */
331 }
332 
333 
334 void
check_v4hi(int16_t elemhA,int16_t elemhB,int16_t elemhC,int16_t elemhD)335 check_v4hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD)
336 {
337   int32_t indx;
338   const int16_t vech16x4_buf[4] = {AH, BH, CH, DH};
339   int16x4_t vech16x4_src = vld1_s16 (vech16x4_buf);
340   int16_t vech16x4_res[4];
341 
342   vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhA));
343 
344   for (indx = 0; indx < 4; indx++)
345     if (vech16x4_res[indx] != expectedh4_1[indx])
346       abort ();
347 
348   vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhB));
349 
350   for (indx = 0; indx < 4; indx++)
351     if (vech16x4_res[indx] != expectedh4_2[indx])
352       abort ();
353 
354   vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhC));
355 
356   for (indx = 0; indx < 4; indx++)
357     if (vech16x4_res[indx] != expectedh4_3[indx])
358       abort ();
359 
360   vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhD));
361 
362   for (indx = 0; indx < 4; indx++)
363     if (vech16x4_res[indx] != expectedh4_4[indx])
364       abort ();
365 }
366 
367 void
check_v4hi_unsigned(uint16_t elemuhA,uint16_t elemuhB,uint16_t elemuhC,uint16_t elemuhD)368 check_v4hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC,
369 		     uint16_t elemuhD)
370 {
371   int indx;
372   const uint16_t vecuh16x4_buf[4] = {AUH, BUH, CUH, DUH};
373   uint16x4_t vecuh16x4_src = vld1_u16 (vecuh16x4_buf);
374   uint16_t vecuh16x4_res[4];
375 
376   vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhA));
377 
378   for (indx = 0; indx < 4; indx++)
379     if (vecuh16x4_res[indx] != expecteduh4_1[indx])
380       abort ();
381 
382   vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhB));
383 
384   for (indx = 0; indx < 4; indx++)
385     if (vecuh16x4_res[indx] != expecteduh4_2[indx])
386       abort ();
387 
388   vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhC));
389 
390   for (indx = 0; indx < 4; indx++)
391     if (vecuh16x4_res[indx] != expecteduh4_3[indx])
392       abort ();
393 
394   vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhD));
395 
396   for (indx = 0; indx < 4; indx++)
397     if (vecuh16x4_res[indx] != expecteduh4_4[indx])
398       abort ();
399 
400 /* { dg-final { scan-assembler-times "mul\tv\[0-9\]+\.4h, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 8 } } */
401 }
402 
403 void
check_v8hi(int16_t elemhA,int16_t elemhB,int16_t elemhC,int16_t elemhD,int16_t elemhE,int16_t elemhF,int16_t elemhG,int16_t elemhH)404 check_v8hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD,
405 	    int16_t elemhE, int16_t elemhF, int16_t elemhG, int16_t elemhH)
406 {
407   int32_t indx;
408   const int16_t vech16x8_buf[8] = {AH, BH, CH, DH, EH, FH, GH, HH};
409   int16x8_t vech16x8_src = vld1q_s16 (vech16x8_buf);
410   int16_t vech16x8_res[8];
411 
412   vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhA));
413 
414   for (indx = 0; indx < 8; indx++)
415     if (vech16x8_res[indx] != expectedh8_1[indx])
416       abort ();
417 
418   vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhB));
419 
420   for (indx = 0; indx < 8; indx++)
421     if (vech16x8_res[indx] != expectedh8_2[indx])
422       abort ();
423 
424   vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhC));
425 
426   for (indx = 0; indx < 8; indx++)
427     if (vech16x8_res[indx] != expectedh8_3[indx])
428       abort ();
429 
430   vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhD));
431 
432   for (indx = 0; indx < 8; indx++)
433     if (vech16x8_res[indx] != expectedh8_4[indx])
434       abort ();
435 
436   vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhE));
437 
438   for (indx = 0; indx < 8; indx++)
439     if (vech16x8_res[indx] != expectedh8_5[indx])
440       abort ();
441 
442   vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhF));
443 
444   for (indx = 0; indx < 8; indx++)
445     if (vech16x8_res[indx] != expectedh8_6[indx])
446       abort ();
447 
448   vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhG));
449 
450   for (indx = 0; indx < 8; indx++)
451     if (vech16x8_res[indx] != expectedh8_7[indx])
452       abort ();
453 
454   vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhH));
455 
456   for (indx = 0; indx < 8; indx++)
457     if (vech16x8_res[indx] != expectedh8_8[indx])
458       abort ();
459 }
460 
461 void
check_v8hi_unsigned(uint16_t elemuhA,uint16_t elemuhB,uint16_t elemuhC,uint16_t elemuhD,uint16_t elemuhE,uint16_t elemuhF,uint16_t elemuhG,uint16_t elemuhH)462 check_v8hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC,
463 		     uint16_t elemuhD, uint16_t elemuhE, uint16_t elemuhF,
464 		     uint16_t elemuhG, uint16_t elemuhH)
465 {
466   int indx;
467   const uint16_t vecuh16x8_buf[8] = {AUH, BUH, CUH, DUH, EUH, FUH, GUH, HUH};
468   uint16x8_t vecuh16x8_src = vld1q_u16 (vecuh16x8_buf);
469   uint16_t vecuh16x8_res[8];
470 
471   vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhA));
472 
473   for (indx = 0; indx < 8; indx++)
474     if (vecuh16x8_res[indx] != expecteduh8_1[indx])
475       abort ();
476 
477   vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhB));
478 
479   for (indx = 0; indx < 8; indx++)
480     if (vecuh16x8_res[indx] != expecteduh8_2[indx])
481       abort ();
482 
483   vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhC));
484 
485   for (indx = 0; indx < 8; indx++)
486     if (vecuh16x8_res[indx] != expecteduh8_3[indx])
487       abort ();
488 
489   vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhD));
490 
491   for (indx = 0; indx < 8; indx++)
492     if (vecuh16x8_res[indx] != expecteduh8_4[indx])
493       abort ();
494 
495   vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhE));
496 
497   for (indx = 0; indx < 8; indx++)
498     if (vecuh16x8_res[indx] != expecteduh8_5[indx])
499       abort ();
500 
501   vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhF));
502 
503   for (indx = 0; indx < 8; indx++)
504     if (vecuh16x8_res[indx] != expecteduh8_6[indx])
505       abort ();
506 
507   vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhG));
508 
509   for (indx = 0; indx < 8; indx++)
510     if (vecuh16x8_res[indx] != expecteduh8_7[indx])
511       abort ();
512 
513   vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhH));
514 
515   for (indx = 0; indx < 8; indx++)
516     if (vecuh16x8_res[indx] != expecteduh8_8[indx])
517       abort ();
518 
519 /* { dg-final { scan-assembler-times "mul\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 16 } } */
520 }
521 
522 int
main(void)523 main (void)
524 {
525   check_v2sf (_elemA, _elemB);
526   check_v4sf (_elemA, _elemB, _elemC, _elemD);
527   check_v2df (_elemdC, _elemdD);
528   check_v2si (_elemsA, _elemsB);
529   check_v4si (_elemsA, _elemsB, _elemsC, _elemsD);
530   check_v4hi (_elemhA, _elemhB, _elemhC, _elemhD);
531   check_v8hi (_elemhA, _elemhB, _elemhC, _elemhD,
532 	      _elemhE, _elemhF, _elemhG, _elemhH);
533   check_v2si_unsigned (_elemusA, _elemusB);
534   check_v4si_unsigned (_elemusA, _elemusB, _elemusC, _elemusD);
535   check_v4hi_unsigned (_elemuhA, _elemuhB, _elemuhC, _elemuhD);
536   check_v8hi_unsigned (_elemuhA, _elemuhB, _elemuhC, _elemuhD,
537 		       _elemuhE, _elemuhF, _elemuhG, _elemuhH);
538 
539   return 0;
540 }
541 
542