1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2
3declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
4
5declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
6
7declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
8
9declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
10
11declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
12
13declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
14
15declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
16
17declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
18
19declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
20
21declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
22
23declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
24
25declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
26
27declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
28
29declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
30
31declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
32
33declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
34
35declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
36
37declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
38
39declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
40
41declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
42
43declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
44
45define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
46; CHECK-LABEL: test_vmla_lane_s16:
47; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
48; CHECK-NEXT: ret
49entry:
50  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
51  %mul = mul <4 x i16> %shuffle, %b
52  %add = add <4 x i16> %mul, %a
53  ret <4 x i16> %add
54}
55
56define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
57; CHECK-LABEL: test_vmlaq_lane_s16:
58; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
59; CHECK-NEXT: ret
60entry:
61  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
62  %mul = mul <8 x i16> %shuffle, %b
63  %add = add <8 x i16> %mul, %a
64  ret <8 x i16> %add
65}
66
67define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
68; CHECK-LABEL: test_vmla_lane_s32:
69; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
70; CHECK-NEXT: ret
71entry:
72  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
73  %mul = mul <2 x i32> %shuffle, %b
74  %add = add <2 x i32> %mul, %a
75  ret <2 x i32> %add
76}
77
78define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
79; CHECK-LABEL: test_vmlaq_lane_s32:
80; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
81; CHECK-NEXT: ret
82entry:
83  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
84  %mul = mul <4 x i32> %shuffle, %b
85  %add = add <4 x i32> %mul, %a
86  ret <4 x i32> %add
87}
88
89define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
90; CHECK-LABEL: test_vmla_laneq_s16:
91; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
92; CHECK-NEXT: ret
93entry:
94  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
95  %mul = mul <4 x i16> %shuffle, %b
96  %add = add <4 x i16> %mul, %a
97  ret <4 x i16> %add
98}
99
100define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
101; CHECK-LABEL: test_vmlaq_laneq_s16:
102; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
103; CHECK-NEXT: ret
104entry:
105  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
106  %mul = mul <8 x i16> %shuffle, %b
107  %add = add <8 x i16> %mul, %a
108  ret <8 x i16> %add
109}
110
111define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
112; CHECK-LABEL: test_vmla_laneq_s32:
113; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
114; CHECK-NEXT: ret
115entry:
116  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
117  %mul = mul <2 x i32> %shuffle, %b
118  %add = add <2 x i32> %mul, %a
119  ret <2 x i32> %add
120}
121
122define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
123; CHECK-LABEL: test_vmlaq_laneq_s32:
124; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
125; CHECK-NEXT: ret
126entry:
127  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
128  %mul = mul <4 x i32> %shuffle, %b
129  %add = add <4 x i32> %mul, %a
130  ret <4 x i32> %add
131}
132
133define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
134; CHECK-LABEL: test_vmls_lane_s16:
135; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
136; CHECK-NEXT: ret
137entry:
138  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
139  %mul = mul <4 x i16> %shuffle, %b
140  %sub = sub <4 x i16> %a, %mul
141  ret <4 x i16> %sub
142}
143
144define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
145; CHECK-LABEL: test_vmlsq_lane_s16:
146; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
147; CHECK-NEXT: ret
148entry:
149  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
150  %mul = mul <8 x i16> %shuffle, %b
151  %sub = sub <8 x i16> %a, %mul
152  ret <8 x i16> %sub
153}
154
155define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
156; CHECK-LABEL: test_vmls_lane_s32:
157; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
158; CHECK-NEXT: ret
159entry:
160  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
161  %mul = mul <2 x i32> %shuffle, %b
162  %sub = sub <2 x i32> %a, %mul
163  ret <2 x i32> %sub
164}
165
166define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
167; CHECK-LABEL: test_vmlsq_lane_s32:
168; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
169; CHECK-NEXT: ret
170entry:
171  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
172  %mul = mul <4 x i32> %shuffle, %b
173  %sub = sub <4 x i32> %a, %mul
174  ret <4 x i32> %sub
175}
176
177define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
178; CHECK-LABEL: test_vmls_laneq_s16:
179; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
180; CHECK-NEXT: ret
181entry:
182  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
183  %mul = mul <4 x i16> %shuffle, %b
184  %sub = sub <4 x i16> %a, %mul
185  ret <4 x i16> %sub
186}
187
188define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
189; CHECK-LABEL: test_vmlsq_laneq_s16:
190; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
191; CHECK-NEXT: ret
192entry:
193  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
194  %mul = mul <8 x i16> %shuffle, %b
195  %sub = sub <8 x i16> %a, %mul
196  ret <8 x i16> %sub
197}
198
199define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
200; CHECK-LABEL: test_vmls_laneq_s32:
201; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
202; CHECK-NEXT: ret
203entry:
204  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
205  %mul = mul <2 x i32> %shuffle, %b
206  %sub = sub <2 x i32> %a, %mul
207  ret <2 x i32> %sub
208}
209
210define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
211; CHECK-LABEL: test_vmlsq_laneq_s32:
212; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
213; CHECK-NEXT: ret
214entry:
215  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
216  %mul = mul <4 x i32> %shuffle, %b
217  %sub = sub <4 x i32> %a, %mul
218  ret <4 x i32> %sub
219}
220
221define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
222; CHECK-LABEL: test_vmul_lane_s16:
223; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
224; CHECK-NEXT: ret
225entry:
226  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
227  %mul = mul <4 x i16> %shuffle, %a
228  ret <4 x i16> %mul
229}
230
231define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
232; CHECK-LABEL: test_vmulq_lane_s16:
233; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
234; CHECK-NEXT: ret
235entry:
236  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
237  %mul = mul <8 x i16> %shuffle, %a
238  ret <8 x i16> %mul
239}
240
241define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
242; CHECK-LABEL: test_vmul_lane_s32:
243; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
244; CHECK-NEXT: ret
245entry:
246  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
247  %mul = mul <2 x i32> %shuffle, %a
248  ret <2 x i32> %mul
249}
250
251define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
252; CHECK-LABEL: test_vmulq_lane_s32:
253; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
254; CHECK-NEXT: ret
255entry:
256  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
257  %mul = mul <4 x i32> %shuffle, %a
258  ret <4 x i32> %mul
259}
260
261define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
262; CHECK-LABEL: test_vmul_lane_u16:
263; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
264; CHECK-NEXT: ret
265entry:
266  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
267  %mul = mul <4 x i16> %shuffle, %a
268  ret <4 x i16> %mul
269}
270
271define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
272; CHECK-LABEL: test_vmulq_lane_u16:
273; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
274; CHECK-NEXT: ret
275entry:
276  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
277  %mul = mul <8 x i16> %shuffle, %a
278  ret <8 x i16> %mul
279}
280
281define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
282; CHECK-LABEL: test_vmul_lane_u32:
283; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
284; CHECK-NEXT: ret
285entry:
286  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
287  %mul = mul <2 x i32> %shuffle, %a
288  ret <2 x i32> %mul
289}
290
291define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
292; CHECK-LABEL: test_vmulq_lane_u32:
293; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
294; CHECK-NEXT: ret
295entry:
296  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
297  %mul = mul <4 x i32> %shuffle, %a
298  ret <4 x i32> %mul
299}
300
301define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
302; CHECK-LABEL: test_vmul_laneq_s16:
303; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
304; CHECK-NEXT: ret
305entry:
306  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
307  %mul = mul <4 x i16> %shuffle, %a
308  ret <4 x i16> %mul
309}
310
311define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
312; CHECK-LABEL: test_vmulq_laneq_s16:
313; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
314; CHECK-NEXT: ret
315entry:
316  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
317  %mul = mul <8 x i16> %shuffle, %a
318  ret <8 x i16> %mul
319}
320
321define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
322; CHECK-LABEL: test_vmul_laneq_s32:
323; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
324; CHECK-NEXT: ret
325entry:
326  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
327  %mul = mul <2 x i32> %shuffle, %a
328  ret <2 x i32> %mul
329}
330
331define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
332; CHECK-LABEL: test_vmulq_laneq_s32:
333; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
334; CHECK-NEXT: ret
335entry:
336  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
337  %mul = mul <4 x i32> %shuffle, %a
338  ret <4 x i32> %mul
339}
340
341define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
342; CHECK-LABEL: test_vmul_laneq_u16:
343; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
344; CHECK-NEXT: ret
345entry:
346  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
347  %mul = mul <4 x i16> %shuffle, %a
348  ret <4 x i16> %mul
349}
350
351define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
352; CHECK-LABEL: test_vmulq_laneq_u16:
353; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
354; CHECK-NEXT: ret
355entry:
356  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
357  %mul = mul <8 x i16> %shuffle, %a
358  ret <8 x i16> %mul
359}
360
361define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
362; CHECK-LABEL: test_vmul_laneq_u32:
363; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
364; CHECK-NEXT: ret
365entry:
366  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
367  %mul = mul <2 x i32> %shuffle, %a
368  ret <2 x i32> %mul
369}
370
371define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
372; CHECK-LABEL: test_vmulq_laneq_u32:
373; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
374; CHECK-NEXT: ret
375entry:
376  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
377  %mul = mul <4 x i32> %shuffle, %a
378  ret <4 x i32> %mul
379}
380
381define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
382; CHECK-LABEL: test_vfma_lane_f32:
383; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
384; CHECK-NEXT: ret
385entry:
386  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
387  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
388  ret <2 x float> %0
389}
390
391declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
392
393define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
394; CHECK-LABEL: test_vfmaq_lane_f32:
395; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
396; CHECK-NEXT: ret
397entry:
398  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
399  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
400  ret <4 x float> %0
401}
402
403declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
404
405define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
406; CHECK-LABEL: test_vfma_laneq_f32:
407; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
408; CHECK-NEXT: ret
409entry:
410  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
411  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
412  ret <2 x float> %0
413}
414
415define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
416; CHECK-LABEL: test_vfmaq_laneq_f32:
417; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
418; CHECK-NEXT: ret
419entry:
420  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
421  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
422  ret <4 x float> %0
423}
424
425define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
426; CHECK-LABEL: test_vfms_lane_f32:
427; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
428; CHECK-NEXT: ret
429entry:
430  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
431  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
432  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
433  ret <2 x float> %0
434}
435
436define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
437; CHECK-LABEL: test_vfmsq_lane_f32:
438; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
439; CHECK-NEXT: ret
440entry:
441  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
442  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
443  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
444  ret <4 x float> %0
445}
446
447define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
448; CHECK-LABEL: test_vfms_laneq_f32:
449; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
450; CHECK-NEXT: ret
451entry:
452  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
453  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
454  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
455  ret <2 x float> %0
456}
457
458define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
459; CHECK-LABEL: test_vfmsq_laneq_f32:
460; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
461; CHECK-NEXT: ret
462entry:
463  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
464  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
465  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
466  ret <4 x float> %0
467}
468
469define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
470; CHECK-LABEL: test_vfmaq_lane_f64:
471; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
472; CHECK-NEXT: ret
473entry:
474  %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
475  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
476  ret <2 x double> %0
477}
478
479declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
480
481define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
482; CHECK-LABEL: test_vfmaq_laneq_f64:
483; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
484; CHECK-NEXT: ret
485entry:
486  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
487  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
488  ret <2 x double> %0
489}
490
491define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
492; CHECK-LABEL: test_vfmsq_lane_f64:
493; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
494; CHECK-NEXT: ret
495entry:
496  %sub = fsub <1 x double> <double -0.000000e+00>, %v
497  %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
498  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
499  ret <2 x double> %0
500}
501
502define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
503; CHECK-LABEL: test_vfmsq_laneq_f64:
504; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
505; CHECK-NEXT: ret
506entry:
507  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
508  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
509  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
510  ret <2 x double> %0
511}
512
513define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
514; CHECK-LABEL: test_vfmas_laneq_f32
515; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
516; CHECK-NEXT: ret
517entry:
518  %extract = extractelement <4 x float> %v, i32 3
519  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
520  ret float %0
521}
522
523declare float @llvm.fma.f32(float, float, float)
524
525define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) {
526; CHECK-LABEL: test_vfmsd_lane_f64
527; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
528; CHECK-NEXT: ret
529entry:
530  %extract.rhs = extractelement <1 x double> %v, i32 0
531  %extract = fsub double -0.000000e+00, %extract.rhs
532  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
533  ret double %0
534}
535
536declare double @llvm.fma.f64(double, double, double)
537
538define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
539; CHECK-LABEL: test_vfmss_laneq_f32
540; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
541; CHECK-NEXT: ret
542entry:
543  %extract.rhs = extractelement <4 x float> %v, i32 3
544  %extract = fsub float -0.000000e+00, %extract.rhs
545  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
546  ret float %0
547}
548
549define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
550; CHECK-LABEL: test_vfmsd_laneq_f64
551; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
552; CHECK-NEXT: ret
553entry:
554  %extract.rhs = extractelement <2 x double> %v, i32 1
555  %extract = fsub double -0.000000e+00, %extract.rhs
556  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
557  ret double %0
558}
559
560define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
561; CHECK-LABEL: test_vmlal_lane_s16:
562; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
563; CHECK-NEXT: ret
564entry:
565  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
566  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
567  %add = add <4 x i32> %vmull2.i, %a
568  ret <4 x i32> %add
569}
570
571define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
572; CHECK-LABEL: test_vmlal_lane_s32:
573; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
574; CHECK-NEXT: ret
575entry:
576  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
577  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
578  %add = add <2 x i64> %vmull2.i, %a
579  ret <2 x i64> %add
580}
581
582define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
583; CHECK-LABEL: test_vmlal_laneq_s16:
584; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
585; CHECK-NEXT: ret
586entry:
587  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
588  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
589  %add = add <4 x i32> %vmull2.i, %a
590  ret <4 x i32> %add
591}
592
593define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
594; CHECK-LABEL: test_vmlal_laneq_s32:
595; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
596; CHECK-NEXT: ret
597entry:
598  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
599  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
600  %add = add <2 x i64> %vmull2.i, %a
601  ret <2 x i64> %add
602}
603
604define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
605; CHECK-LABEL: test_vmlal_high_lane_s16:
606; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
607; CHECK-NEXT: ret
608entry:
609  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
610  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
611  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
612  %add = add <4 x i32> %vmull2.i, %a
613  ret <4 x i32> %add
614}
615
616define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
617; CHECK-LABEL: test_vmlal_high_lane_s32:
618; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
619; CHECK-NEXT: ret
620entry:
621  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
622  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
623  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
624  %add = add <2 x i64> %vmull2.i, %a
625  ret <2 x i64> %add
626}
627
628define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
629; CHECK-LABEL: test_vmlal_high_laneq_s16:
630; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
631; CHECK-NEXT: ret
632entry:
633  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
634  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
635  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
636  %add = add <4 x i32> %vmull2.i, %a
637  ret <4 x i32> %add
638}
639
640define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
641; CHECK-LABEL: test_vmlal_high_laneq_s32:
642; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
643; CHECK-NEXT: ret
644entry:
645  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
646  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
647  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
648  %add = add <2 x i64> %vmull2.i, %a
649  ret <2 x i64> %add
650}
651
652define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
653; CHECK-LABEL: test_vmlsl_lane_s16:
654; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
655; CHECK-NEXT: ret
656entry:
657  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
658  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
659  %sub = sub <4 x i32> %a, %vmull2.i
660  ret <4 x i32> %sub
661}
662
663define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
664; CHECK-LABEL: test_vmlsl_lane_s32:
665; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
666; CHECK-NEXT: ret
667entry:
668  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
669  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
670  %sub = sub <2 x i64> %a, %vmull2.i
671  ret <2 x i64> %sub
672}
673
674define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
675; CHECK-LABEL: test_vmlsl_laneq_s16:
676; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
677; CHECK-NEXT: ret
678entry:
679  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
680  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
681  %sub = sub <4 x i32> %a, %vmull2.i
682  ret <4 x i32> %sub
683}
684
685define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
686; CHECK-LABEL: test_vmlsl_laneq_s32:
687; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
688; CHECK-NEXT: ret
689entry:
690  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
691  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
692  %sub = sub <2 x i64> %a, %vmull2.i
693  ret <2 x i64> %sub
694}
695
696define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
697; CHECK-LABEL: test_vmlsl_high_lane_s16:
698; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
699; CHECK-NEXT: ret
700entry:
701  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
702  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
703  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
704  %sub = sub <4 x i32> %a, %vmull2.i
705  ret <4 x i32> %sub
706}
707
708define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
709; CHECK-LABEL: test_vmlsl_high_lane_s32:
710; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
711; CHECK-NEXT: ret
712entry:
713  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
714  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
715  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
716  %sub = sub <2 x i64> %a, %vmull2.i
717  ret <2 x i64> %sub
718}
719
720define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
721; CHECK-LABEL: test_vmlsl_high_laneq_s16:
722; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
723; CHECK-NEXT: ret
724entry:
725  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
726  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
727  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
728  %sub = sub <4 x i32> %a, %vmull2.i
729  ret <4 x i32> %sub
730}
731
732define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
733; CHECK-LABEL: test_vmlsl_high_laneq_s32:
734; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
735; CHECK-NEXT: ret
736entry:
737  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
738  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
739  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
740  %sub = sub <2 x i64> %a, %vmull2.i
741  ret <2 x i64> %sub
742}
743
744define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
745; CHECK-LABEL: test_vmlal_lane_u16:
746; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
747; CHECK-NEXT: ret
748entry:
749  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
750  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
751  %add = add <4 x i32> %vmull2.i, %a
752  ret <4 x i32> %add
753}
754
755define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
756; CHECK-LABEL: test_vmlal_lane_u32:
757; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
758; CHECK-NEXT: ret
759entry:
760  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
761  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
762  %add = add <2 x i64> %vmull2.i, %a
763  ret <2 x i64> %add
764}
765
766define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
767; CHECK-LABEL: test_vmlal_laneq_u16:
768; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
769; CHECK-NEXT: ret
770entry:
771  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
772  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
773  %add = add <4 x i32> %vmull2.i, %a
774  ret <4 x i32> %add
775}
776
777define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
778; CHECK-LABEL: test_vmlal_laneq_u32:
779; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
780; CHECK-NEXT: ret
781entry:
782  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
783  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
784  %add = add <2 x i64> %vmull2.i, %a
785  ret <2 x i64> %add
786}
787
788define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
789; CHECK-LABEL: test_vmlal_high_lane_u16:
790; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
791; CHECK-NEXT: ret
792entry:
793  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
794  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
795  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
796  %add = add <4 x i32> %vmull2.i, %a
797  ret <4 x i32> %add
798}
799
800define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
801; CHECK-LABEL: test_vmlal_high_lane_u32:
802; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
803; CHECK-NEXT: ret
804entry:
805  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
806  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
807  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
808  %add = add <2 x i64> %vmull2.i, %a
809  ret <2 x i64> %add
810}
811
812define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
813; CHECK-LABEL: test_vmlal_high_laneq_u16:
814; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
815; CHECK-NEXT: ret
816entry:
817  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
818  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
819  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
820  %add = add <4 x i32> %vmull2.i, %a
821  ret <4 x i32> %add
822}
823
824define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
825; CHECK-LABEL: test_vmlal_high_laneq_u32:
826; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
827; CHECK-NEXT: ret
828entry:
829  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
830  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
831  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
832  %add = add <2 x i64> %vmull2.i, %a
833  ret <2 x i64> %add
834}
835
836define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
837; CHECK-LABEL: test_vmlsl_lane_u16:
838; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
839; CHECK-NEXT: ret
840entry:
841  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
842  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
843  %sub = sub <4 x i32> %a, %vmull2.i
844  ret <4 x i32> %sub
845}
846
847define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
848; CHECK-LABEL: test_vmlsl_lane_u32:
849; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
850; CHECK-NEXT: ret
851entry:
852  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
853  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
854  %sub = sub <2 x i64> %a, %vmull2.i
855  ret <2 x i64> %sub
856}
857
858define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
859; CHECK-LABEL: test_vmlsl_laneq_u16:
860; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
861; CHECK-NEXT: ret
862entry:
863  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
864  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
865  %sub = sub <4 x i32> %a, %vmull2.i
866  ret <4 x i32> %sub
867}
868
869define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
870; CHECK-LABEL: test_vmlsl_laneq_u32:
871; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
872; CHECK-NEXT: ret
873entry:
874  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
875  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
876  %sub = sub <2 x i64> %a, %vmull2.i
877  ret <2 x i64> %sub
878}
879
880define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
881; CHECK-LABEL: test_vmlsl_high_lane_u16:
882; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
883; CHECK-NEXT: ret
884entry:
885  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
886  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
887  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
888  %sub = sub <4 x i32> %a, %vmull2.i
889  ret <4 x i32> %sub
890}
891
892define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
893; CHECK-LABEL: test_vmlsl_high_lane_u32:
894; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
895; CHECK-NEXT: ret
896entry:
897  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
898  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
899  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
900  %sub = sub <2 x i64> %a, %vmull2.i
901  ret <2 x i64> %sub
902}
903
904define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
905; CHECK-LABEL: test_vmlsl_high_laneq_u16:
906; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
907; CHECK-NEXT: ret
908entry:
909  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
910  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
911  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
912  %sub = sub <4 x i32> %a, %vmull2.i
913  ret <4 x i32> %sub
914}
915
916define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
917; CHECK-LABEL: test_vmlsl_high_laneq_u32:
918; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
919; CHECK-NEXT: ret
920entry:
921  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
922  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
923  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
924  %sub = sub <2 x i64> %a, %vmull2.i
925  ret <2 x i64> %sub
926}
927
928define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
929; CHECK-LABEL: test_vmull_lane_s16:
930; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
931; CHECK-NEXT: ret
932entry:
933  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
934  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
935  ret <4 x i32> %vmull2.i
936}
937
938define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
939; CHECK-LABEL: test_vmull_lane_s32:
940; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
941; CHECK-NEXT: ret
942entry:
943  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
944  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
945  ret <2 x i64> %vmull2.i
946}
947
948define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
949; CHECK-LABEL: test_vmull_lane_u16:
950; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
951; CHECK-NEXT: ret
952entry:
953  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
954  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
955  ret <4 x i32> %vmull2.i
956}
957
958define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
959; CHECK-LABEL: test_vmull_lane_u32:
960; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
961; CHECK-NEXT: ret
962entry:
963  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
964  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
965  ret <2 x i64> %vmull2.i
966}
967
968define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
969; CHECK-LABEL: test_vmull_high_lane_s16:
970; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
971; CHECK-NEXT: ret
972entry:
973  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
974  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
975  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
976  ret <4 x i32> %vmull2.i
977}
978
979define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
980; CHECK-LABEL: test_vmull_high_lane_s32:
981; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
982; CHECK-NEXT: ret
983entry:
984  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
985  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
986  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
987  ret <2 x i64> %vmull2.i
988}
989
990define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
991; CHECK-LABEL: test_vmull_high_lane_u16:
992; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
993; CHECK-NEXT: ret
994entry:
995  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
996  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
997  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
998  ret <4 x i32> %vmull2.i
999}
1000
1001define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
1002; CHECK-LABEL: test_vmull_high_lane_u32:
1003; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1004; CHECK-NEXT: ret
1005entry:
1006  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1007  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1008  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1009  ret <2 x i64> %vmull2.i
1010}
1011
1012define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1013; CHECK-LABEL: test_vmull_laneq_s16:
1014; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
1015; CHECK-NEXT: ret
1016entry:
1017  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1018  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1019  ret <4 x i32> %vmull2.i
1020}
1021
1022define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1023; CHECK-LABEL: test_vmull_laneq_s32:
1024; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1025; CHECK-NEXT: ret
1026entry:
1027  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1028  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1029  ret <2 x i64> %vmull2.i
1030}
1031
1032define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
1033; CHECK-LABEL: test_vmull_laneq_u16:
1034; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
1035; CHECK-NEXT: ret
1036entry:
1037  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1038  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1039  ret <4 x i32> %vmull2.i
1040}
1041
1042define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
1043; CHECK-LABEL: test_vmull_laneq_u32:
1044; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1045; CHECK-NEXT: ret
1046entry:
1047  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1048  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1049  ret <2 x i64> %vmull2.i
1050}
1051
1052define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1053; CHECK-LABEL: test_vmull_high_laneq_s16:
1054; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
1055; CHECK-NEXT: ret
1056entry:
1057  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1058  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1059  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1060  ret <4 x i32> %vmull2.i
1061}
1062
1063define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1064; CHECK-LABEL: test_vmull_high_laneq_s32:
1065; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1066; CHECK-NEXT: ret
1067entry:
1068  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1069  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1070  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1071  ret <2 x i64> %vmull2.i
1072}
1073
1074define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
1075; CHECK-LABEL: test_vmull_high_laneq_u16:
1076; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
1077; CHECK-NEXT: ret
1078entry:
1079  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1080  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1081  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1082  ret <4 x i32> %vmull2.i
1083}
1084
1085define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
1086; CHECK-LABEL: test_vmull_high_laneq_u32:
1087; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1088; CHECK-NEXT: ret
1089entry:
1090  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1091  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1092  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1093  ret <2 x i64> %vmull2.i
1094}
1095
1096define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1097; CHECK-LABEL: test_vqdmlal_lane_s16:
1098; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1099; CHECK-NEXT: ret
1100entry:
1101  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1102  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1103  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1104  ret <4 x i32> %vqdmlal4.i
1105}
1106
1107define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1108; CHECK-LABEL: test_vqdmlal_lane_s32:
1109; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1110; CHECK-NEXT: ret
1111entry:
1112  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1113  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1114  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1115  ret <2 x i64> %vqdmlal4.i
1116}
1117
1118define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1119; CHECK-LABEL: test_vqdmlal_high_lane_s16:
1120; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1121; CHECK-NEXT: ret
1122entry:
1123  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1124  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1125  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1126  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
1127  ret <4 x i32> %vqdmlal4.i
1128}
1129
1130define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1131; CHECK-LABEL: test_vqdmlal_high_lane_s32:
1132; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1133; CHECK-NEXT: ret
1134entry:
1135  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1136  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1137  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1138  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
1139  ret <2 x i64> %vqdmlal4.i
1140}
1141
1142define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1143; CHECK-LABEL: test_vqdmlsl_lane_s16:
1144; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1145; CHECK-NEXT: ret
1146entry:
1147  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1148  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1149  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1150  ret <4 x i32> %vqdmlsl4.i
1151}
1152
1153define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1154; CHECK-LABEL: test_vqdmlsl_lane_s32:
1155; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1156; CHECK-NEXT: ret
1157entry:
1158  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1159  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1160  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1161  ret <2 x i64> %vqdmlsl4.i
1162}
1163
1164define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1165; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
1166; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1167; CHECK-NEXT: ret
1168entry:
1169  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1170  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1171  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1172  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
1173  ret <4 x i32> %vqdmlsl4.i
1174}
1175
1176define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1177; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
1178; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1179; CHECK-NEXT: ret
1180entry:
1181  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1182  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1183  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1184  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
1185  ret <2 x i64> %vqdmlsl4.i
1186}
1187
1188define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1189; CHECK-LABEL: test_vqdmull_lane_s16:
1190; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1191; CHECK-NEXT: ret
1192entry:
1193  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1194  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1195  ret <4 x i32> %vqdmull2.i
1196}
1197
1198define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1199; CHECK-LABEL: test_vqdmull_lane_s32:
1200; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1201; CHECK-NEXT: ret
1202entry:
1203  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1204  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1205  ret <2 x i64> %vqdmull2.i
1206}
1207
1208define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
1209; CHECK-LABEL: test_vqdmull_laneq_s16:
1210; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1211; CHECK-NEXT: ret
1212entry:
1213  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1214  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
1215  ret <4 x i32> %vqdmull2.i
1216}
1217
1218define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
1219; CHECK-LABEL: test_vqdmull_laneq_s32:
1220; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1221; CHECK-NEXT: ret
1222entry:
1223  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1224  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
1225  ret <2 x i64> %vqdmull2.i
1226}
1227
1228define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1229; CHECK-LABEL: test_vqdmull_high_lane_s16:
1230; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1231; CHECK-NEXT: ret
1232entry:
1233  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1234  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1235  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1236  ret <4 x i32> %vqdmull2.i
1237}
1238
1239define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1240; CHECK-LABEL: test_vqdmull_high_lane_s32:
1241; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1242; CHECK-NEXT: ret
1243entry:
1244  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1245  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1246  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1247  ret <2 x i64> %vqdmull2.i
1248}
1249
1250define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
1251; CHECK-LABEL: test_vqdmull_high_laneq_s16:
1252; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
1253; CHECK-NEXT: ret
1254entry:
1255  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1256  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1257  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1258  ret <4 x i32> %vqdmull2.i
1259}
1260
1261define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
1262; CHECK-LABEL: test_vqdmull_high_laneq_s32:
1263; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1264; CHECK-NEXT: ret
1265entry:
1266  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1267  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1268  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
1269  ret <2 x i64> %vqdmull2.i
1270}
1271
1272define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1273; CHECK-LABEL: test_vqdmulh_lane_s16:
1274; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1275; CHECK-NEXT: ret
1276entry:
1277  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1278  %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
1279  ret <4 x i16> %vqdmulh2.i
1280}
1281
1282define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1283; CHECK-LABEL: test_vqdmulhq_lane_s16:
1284; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1285; CHECK-NEXT: ret
1286entry:
1287  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1288  %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
1289  ret <8 x i16> %vqdmulh2.i
1290}
1291
1292define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1293; CHECK-LABEL: test_vqdmulh_lane_s32:
1294; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1295; CHECK-NEXT: ret
1296entry:
1297  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1298  %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
1299  ret <2 x i32> %vqdmulh2.i
1300}
1301
1302define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1303; CHECK-LABEL: test_vqdmulhq_lane_s32:
1304; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1305; CHECK-NEXT: ret
1306entry:
1307  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1308  %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
1309  ret <4 x i32> %vqdmulh2.i
1310}
1311
1312define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
1313; CHECK-LABEL: test_vqrdmulh_lane_s16:
1314; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
1315; CHECK-NEXT: ret
1316entry:
1317  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1318  %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
1319  ret <4 x i16> %vqrdmulh2.i
1320}
1321
1322define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
1323; CHECK-LABEL: test_vqrdmulhq_lane_s16:
1324; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
1325; CHECK-NEXT: ret
1326entry:
1327  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1328  %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
1329  ret <8 x i16> %vqrdmulh2.i
1330}
1331
1332define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
1333; CHECK-LABEL: test_vqrdmulh_lane_s32:
1334; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1335; CHECK-NEXT: ret
1336entry:
1337  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1338  %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
1339  ret <2 x i32> %vqrdmulh2.i
1340}
1341
1342define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
1343; CHECK-LABEL: test_vqrdmulhq_lane_s32:
1344; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1345; CHECK-NEXT: ret
1346entry:
1347  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1348  %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
1349  ret <4 x i32> %vqrdmulh2.i
1350}
1351
1352define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
1353; CHECK-LABEL: test_vmul_lane_f32:
1354; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1355; CHECK-NEXT: ret
1356entry:
1357  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1358  %mul = fmul <2 x float> %shuffle, %a
1359  ret <2 x float> %mul
1360}
1361
1362define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
1363; CHECK-LABEL: test_vmul_lane_f64:
1364; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
1365; CHECK-NEXT: ret
1366entry:
1367  %0 = bitcast <1 x double> %a to <8 x i8>
1368  %1 = bitcast <8 x i8> %0 to double
1369  %extract = extractelement <1 x double> %v, i32 0
1370  %2 = fmul double %1, %extract
1371  %3 = insertelement <1 x double> undef, double %2, i32 0
1372  ret <1 x double> %3
1373}
1374
1375define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
1376; CHECK-LABEL: test_vmulq_lane_f32:
1377; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1378; CHECK-NEXT: ret
1379entry:
1380  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1381  %mul = fmul <4 x float> %shuffle, %a
1382  ret <4 x float> %mul
1383}
1384
1385define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
1386; CHECK-LABEL: test_vmulq_lane_f64:
1387; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
1388; CHECK-NEXT: ret
1389entry:
1390  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
1391  %mul = fmul <2 x double> %shuffle, %a
1392  ret <2 x double> %mul
1393}
1394
1395define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
1396; CHECK-LABEL: test_vmul_laneq_f32:
1397; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1398; CHECK-NEXT: ret
1399entry:
1400  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
1401  %mul = fmul <2 x float> %shuffle, %a
1402  ret <2 x float> %mul
1403}
1404
1405define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
1406; CHECK-LABEL: test_vmul_laneq_f64:
1407; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
1408; CHECK-NEXT: ret
1409entry:
1410  %0 = bitcast <1 x double> %a to <8 x i8>
1411  %1 = bitcast <8 x i8> %0 to double
1412  %extract = extractelement <2 x double> %v, i32 1
1413  %2 = fmul double %1, %extract
1414  %3 = insertelement <1 x double> undef, double %2, i32 0
1415  ret <1 x double> %3
1416}
1417
1418define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
1419; CHECK-LABEL: test_vmulq_laneq_f32:
1420; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1421; CHECK-NEXT: ret
1422entry:
1423  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1424  %mul = fmul <4 x float> %shuffle, %a
1425  ret <4 x float> %mul
1426}
1427
1428define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
1429; CHECK-LABEL: test_vmulq_laneq_f64:
1430; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
1431; CHECK-NEXT: ret
1432entry:
1433  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
1434  %mul = fmul <2 x double> %shuffle, %a
1435  ret <2 x double> %mul
1436}
1437
1438define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
1439; CHECK-LABEL: test_vmulx_lane_f32:
1440; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
1441; CHECK-NEXT: ret
1442entry:
1443  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1444  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
1445  ret <2 x float> %vmulx2.i
1446}
1447
1448define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
1449; CHECK-LABEL: test_vmulxq_lane_f32:
1450; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
1451; CHECK-NEXT: ret
1452entry:
1453  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1454  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
1455  ret <4 x float> %vmulx2.i
1456}
1457
1458define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
1459; CHECK-LABEL: test_vmulxq_lane_f64:
1460; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
1461; CHECK-NEXT: ret
1462entry:
1463  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
1464  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
1465  ret <2 x double> %vmulx2.i
1466}
1467
1468define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
1469; CHECK-LABEL: test_vmulx_laneq_f32:
1470; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
1471; CHECK-NEXT: ret
1472entry:
1473  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
1474  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
1475  ret <2 x float> %vmulx2.i
1476}
1477
1478define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
1479; CHECK-LABEL: test_vmulxq_laneq_f32:
1480; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
1481; CHECK-NEXT: ret
1482entry:
1483  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1484  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
1485  ret <4 x float> %vmulx2.i
1486}
1487
1488define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
1489; CHECK-LABEL: test_vmulxq_laneq_f64:
1490; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
1491; CHECK-NEXT: ret
1492entry:
1493  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
1494  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
1495  ret <2 x double> %vmulx2.i
1496}
1497
1498define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
1499; CHECK-LABEL: test_vmla_lane_s16_0:
1500; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1501; CHECK-NEXT: ret
1502entry:
1503  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1504  %mul = mul <4 x i16> %shuffle, %b
1505  %add = add <4 x i16> %mul, %a
1506  ret <4 x i16> %add
1507}
1508
1509define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
1510; CHECK-LABEL: test_vmlaq_lane_s16_0:
1511; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1512; CHECK-NEXT: ret
1513entry:
1514  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
1515  %mul = mul <8 x i16> %shuffle, %b
1516  %add = add <8 x i16> %mul, %a
1517  ret <8 x i16> %add
1518}
1519
1520define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
1521; CHECK-LABEL: test_vmla_lane_s32_0:
1522; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1523; CHECK-NEXT: ret
1524entry:
1525  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1526  %mul = mul <2 x i32> %shuffle, %b
1527  %add = add <2 x i32> %mul, %a
1528  ret <2 x i32> %add
1529}
1530
1531define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
1532; CHECK-LABEL: test_vmlaq_lane_s32_0:
1533; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1534; CHECK-NEXT: ret
1535entry:
1536  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
1537  %mul = mul <4 x i32> %shuffle, %b
1538  %add = add <4 x i32> %mul, %a
1539  ret <4 x i32> %add
1540}
1541
1542define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
1543; CHECK-LABEL: test_vmla_laneq_s16_0:
1544; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1545; CHECK-NEXT: ret
1546entry:
1547  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1548  %mul = mul <4 x i16> %shuffle, %b
1549  %add = add <4 x i16> %mul, %a
1550  ret <4 x i16> %add
1551}
1552
1553define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
1554; CHECK-LABEL: test_vmlaq_laneq_s16_0:
1555; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1556; CHECK-NEXT: ret
1557entry:
1558  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
1559  %mul = mul <8 x i16> %shuffle, %b
1560  %add = add <8 x i16> %mul, %a
1561  ret <8 x i16> %add
1562}
1563
1564define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
1565; CHECK-LABEL: test_vmla_laneq_s32_0:
1566; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1567; CHECK-NEXT: ret
1568entry:
1569  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1570  %mul = mul <2 x i32> %shuffle, %b
1571  %add = add <2 x i32> %mul, %a
1572  ret <2 x i32> %add
1573}
1574
1575define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
1576; CHECK-LABEL: test_vmlaq_laneq_s32_0:
1577; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1578; CHECK-NEXT: ret
1579entry:
1580  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
1581  %mul = mul <4 x i32> %shuffle, %b
1582  %add = add <4 x i32> %mul, %a
1583  ret <4 x i32> %add
1584}
1585
1586define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
1587; CHECK-LABEL: test_vmls_lane_s16_0:
1588; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1589; CHECK-NEXT: ret
1590entry:
1591  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1592  %mul = mul <4 x i16> %shuffle, %b
1593  %sub = sub <4 x i16> %a, %mul
1594  ret <4 x i16> %sub
1595}
1596
1597define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
1598; CHECK-LABEL: test_vmlsq_lane_s16_0:
1599; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1600; CHECK-NEXT: ret
1601entry:
1602  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
1603  %mul = mul <8 x i16> %shuffle, %b
1604  %sub = sub <8 x i16> %a, %mul
1605  ret <8 x i16> %sub
1606}
1607
1608define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
1609; CHECK-LABEL: test_vmls_lane_s32_0:
1610; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1611; CHECK-NEXT: ret
1612entry:
1613  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1614  %mul = mul <2 x i32> %shuffle, %b
1615  %sub = sub <2 x i32> %a, %mul
1616  ret <2 x i32> %sub
1617}
1618
1619define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
1620; CHECK-LABEL: test_vmlsq_lane_s32_0:
1621; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1622; CHECK-NEXT: ret
1623entry:
1624  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
1625  %mul = mul <4 x i32> %shuffle, %b
1626  %sub = sub <4 x i32> %a, %mul
1627  ret <4 x i32> %sub
1628}
1629
1630define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
1631; CHECK-LABEL: test_vmls_laneq_s16_0:
1632; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1633; CHECK-NEXT: ret
1634entry:
1635  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1636  %mul = mul <4 x i16> %shuffle, %b
1637  %sub = sub <4 x i16> %a, %mul
1638  ret <4 x i16> %sub
1639}
1640
1641define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
1642; CHECK-LABEL: test_vmlsq_laneq_s16_0:
1643; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1644; CHECK-NEXT: ret
1645entry:
1646  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
1647  %mul = mul <8 x i16> %shuffle, %b
1648  %sub = sub <8 x i16> %a, %mul
1649  ret <8 x i16> %sub
1650}
1651
1652define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
1653; CHECK-LABEL: test_vmls_laneq_s32_0:
1654; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1655; CHECK-NEXT: ret
1656entry:
1657  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1658  %mul = mul <2 x i32> %shuffle, %b
1659  %sub = sub <2 x i32> %a, %mul
1660  ret <2 x i32> %sub
1661}
1662
1663define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
1664; CHECK-LABEL: test_vmlsq_laneq_s32_0:
1665; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1666; CHECK-NEXT: ret
1667entry:
1668  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
1669  %mul = mul <4 x i32> %shuffle, %b
1670  %sub = sub <4 x i32> %a, %mul
1671  ret <4 x i32> %sub
1672}
1673
1674define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
1675; CHECK-LABEL: test_vmul_lane_s16_0:
1676; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1677; CHECK-NEXT: ret
1678entry:
1679  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1680  %mul = mul <4 x i16> %shuffle, %a
1681  ret <4 x i16> %mul
1682}
1683
1684define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
1685; CHECK-LABEL: test_vmulq_lane_s16_0:
1686; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1687; CHECK-NEXT: ret
1688entry:
1689  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
1690  %mul = mul <8 x i16> %shuffle, %a
1691  ret <8 x i16> %mul
1692}
1693
1694define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
1695; CHECK-LABEL: test_vmul_lane_s32_0:
1696; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1697; CHECK-NEXT: ret
1698entry:
1699  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1700  %mul = mul <2 x i32> %shuffle, %a
1701  ret <2 x i32> %mul
1702}
1703
1704define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
1705; CHECK-LABEL: test_vmulq_lane_s32_0:
1706; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1707; CHECK-NEXT: ret
1708entry:
1709  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
1710  %mul = mul <4 x i32> %shuffle, %a
1711  ret <4 x i32> %mul
1712}
1713
1714define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
1715; CHECK-LABEL: test_vmul_lane_u16_0:
1716; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1717; CHECK-NEXT: ret
1718entry:
1719  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1720  %mul = mul <4 x i16> %shuffle, %a
1721  ret <4 x i16> %mul
1722}
1723
1724define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
1725; CHECK-LABEL: test_vmulq_lane_u16_0:
1726; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1727; CHECK-NEXT: ret
1728entry:
1729  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
1730  %mul = mul <8 x i16> %shuffle, %a
1731  ret <8 x i16> %mul
1732}
1733
1734define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
1735; CHECK-LABEL: test_vmul_lane_u32_0:
1736; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1737; CHECK-NEXT: ret
1738entry:
1739  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1740  %mul = mul <2 x i32> %shuffle, %a
1741  ret <2 x i32> %mul
1742}
1743
1744define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
1745; CHECK-LABEL: test_vmulq_lane_u32_0:
1746; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1747; CHECK-NEXT: ret
1748entry:
1749  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
1750  %mul = mul <4 x i32> %shuffle, %a
1751  ret <4 x i32> %mul
1752}
1753
1754define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
1755; CHECK-LABEL: test_vmul_laneq_s16_0:
1756; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1757; CHECK-NEXT: ret
1758entry:
1759  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1760  %mul = mul <4 x i16> %shuffle, %a
1761  ret <4 x i16> %mul
1762}
1763
1764define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
1765; CHECK-LABEL: test_vmulq_laneq_s16_0:
1766; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1767; CHECK-NEXT: ret
1768entry:
1769  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
1770  %mul = mul <8 x i16> %shuffle, %a
1771  ret <8 x i16> %mul
1772}
1773
1774define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
1775; CHECK-LABEL: test_vmul_laneq_s32_0:
1776; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1777; CHECK-NEXT: ret
1778entry:
1779  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1780  %mul = mul <2 x i32> %shuffle, %a
1781  ret <2 x i32> %mul
1782}
1783
1784define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
1785; CHECK-LABEL: test_vmulq_laneq_s32_0:
1786; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1787; CHECK-NEXT: ret
1788entry:
1789  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
1790  %mul = mul <4 x i32> %shuffle, %a
1791  ret <4 x i32> %mul
1792}
1793
1794define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
1795; CHECK-LABEL: test_vmul_laneq_u16_0:
1796; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1797; CHECK-NEXT: ret
1798entry:
1799  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1800  %mul = mul <4 x i16> %shuffle, %a
1801  ret <4 x i16> %mul
1802}
1803
1804define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
1805; CHECK-LABEL: test_vmulq_laneq_u16_0:
1806; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1807; CHECK-NEXT: ret
1808entry:
1809  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
1810  %mul = mul <8 x i16> %shuffle, %a
1811  ret <8 x i16> %mul
1812}
1813
1814define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
1815; CHECK-LABEL: test_vmul_laneq_u32_0:
1816; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1817; CHECK-NEXT: ret
1818entry:
1819  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1820  %mul = mul <2 x i32> %shuffle, %a
1821  ret <2 x i32> %mul
1822}
1823
1824define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
1825; CHECK-LABEL: test_vmulq_laneq_u32_0:
1826; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1827; CHECK-NEXT: ret
1828entry:
1829  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
1830  %mul = mul <4 x i32> %shuffle, %a
1831  ret <4 x i32> %mul
1832}
1833
1834define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
1835; CHECK-LABEL: test_vfma_lane_f32_0:
1836; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1837; CHECK-NEXT: ret
1838entry:
1839  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
1840  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
1841  ret <2 x float> %0
1842}
1843
1844define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
1845; CHECK-LABEL: test_vfmaq_lane_f32_0:
1846; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1847; CHECK-NEXT: ret
1848entry:
1849  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
1850  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
1851  ret <4 x float> %0
1852}
1853
1854define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
1855; CHECK-LABEL: test_vfma_laneq_f32_0:
1856; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1857; CHECK-NEXT: ret
1858entry:
1859  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
1860  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
1861  ret <2 x float> %0
1862}
1863
1864define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
1865; CHECK-LABEL: test_vfmaq_laneq_f32_0:
1866; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1867; CHECK-NEXT: ret
1868entry:
1869  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
1870  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
1871  ret <4 x float> %0
1872}
1873
1874define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
1875; CHECK-LABEL: test_vfms_lane_f32_0:
1876; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1877; CHECK-NEXT: ret
1878entry:
1879  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
1880  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
1881  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
1882  ret <2 x float> %0
1883}
1884
1885define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
1886; CHECK-LABEL: test_vfmsq_lane_f32_0:
1887; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1888; CHECK-NEXT: ret
1889entry:
1890  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
1891  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
1892  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
1893  ret <4 x float> %0
1894}
1895
1896define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
1897; CHECK-LABEL: test_vfms_laneq_f32_0:
1898; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1899; CHECK-NEXT: ret
1900entry:
1901  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
1902  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
1903  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
1904  ret <2 x float> %0
1905}
1906
1907define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
1908; CHECK-LABEL: test_vfmsq_laneq_f32_0:
1909; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1910; CHECK-NEXT: ret
1911entry:
1912  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
1913  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
1914  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
1915  ret <4 x float> %0
1916}
1917
1918define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
1919; CHECK-LABEL: test_vfmaq_laneq_f64_0:
1920; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
1921; CHECK-NEXT: ret
1922entry:
1923  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
1924  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
1925  ret <2 x double> %0
1926}
1927
1928define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
1929; CHECK-LABEL: test_vfmsq_laneq_f64_0:
1930; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
1931; CHECK-NEXT: ret
1932entry:
1933  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
1934  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
1935  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
1936  ret <2 x double> %0
1937}
1938
1939define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
1940; CHECK-LABEL: test_vmlal_lane_s16_0:
1941; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1942; CHECK-NEXT: ret
1943entry:
1944  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1945  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1946  %add = add <4 x i32> %vmull2.i, %a
1947  ret <4 x i32> %add
1948}
1949
1950define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
1951; CHECK-LABEL: test_vmlal_lane_s32_0:
1952; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1953; CHECK-NEXT: ret
1954entry:
1955  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
1956  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1957  %add = add <2 x i64> %vmull2.i, %a
1958  ret <2 x i64> %add
1959}
1960
1961define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
1962; CHECK-LABEL: test_vmlal_laneq_s16_0:
1963; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
1964; CHECK-NEXT: ret
1965entry:
1966  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
1967  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
1968  %add = add <4 x i32> %vmull2.i, %a
1969  ret <4 x i32> %add
1970}
1971
1972define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
1973; CHECK-LABEL: test_vmlal_laneq_s32_0:
1974; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1975; CHECK-NEXT: ret
1976entry:
1977  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
1978  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
1979  %add = add <2 x i64> %vmull2.i, %a
1980  ret <2 x i64> %add
1981}
1982
1983define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
1984; CHECK-LABEL: test_vmlal_high_lane_s16_0:
1985; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
1986; CHECK-NEXT: ret
1987entry:
1988  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1989  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
1990  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
1991  %add = add <4 x i32> %vmull2.i, %a
1992  ret <4 x i32> %add
1993}
1994
1995define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
1996; CHECK-LABEL: test_vmlal_high_lane_s32_0:
1997; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
1998; CHECK-NEXT: ret
1999entry:
2000  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2001  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2002  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2003  %add = add <2 x i64> %vmull2.i, %a
2004  ret <2 x i64> %add
2005}
2006
2007define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2008; CHECK-LABEL: test_vmlal_high_laneq_s16_0:
2009; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2010; CHECK-NEXT: ret
2011entry:
2012  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2013  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2014  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2015  %add = add <4 x i32> %vmull2.i, %a
2016  ret <4 x i32> %add
2017}
2018
2019define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2020; CHECK-LABEL: test_vmlal_high_laneq_s32_0:
2021; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2022; CHECK-NEXT: ret
2023entry:
2024  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2025  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2026  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2027  %add = add <2 x i64> %vmull2.i, %a
2028  ret <2 x i64> %add
2029}
2030
2031define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2032; CHECK-LABEL: test_vmlsl_lane_s16_0:
2033; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2034; CHECK-NEXT: ret
2035entry:
2036  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2037  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2038  %sub = sub <4 x i32> %a, %vmull2.i
2039  ret <4 x i32> %sub
2040}
2041
2042define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2043; CHECK-LABEL: test_vmlsl_lane_s32_0:
2044; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2045; CHECK-NEXT: ret
2046entry:
2047  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2048  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2049  %sub = sub <2 x i64> %a, %vmull2.i
2050  ret <2 x i64> %sub
2051}
2052
2053define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2054; CHECK-LABEL: test_vmlsl_laneq_s16_0:
2055; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2056; CHECK-NEXT: ret
2057entry:
2058  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2059  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2060  %sub = sub <4 x i32> %a, %vmull2.i
2061  ret <4 x i32> %sub
2062}
2063
2064define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2065; CHECK-LABEL: test_vmlsl_laneq_s32_0:
2066; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2067; CHECK-NEXT: ret
2068entry:
2069  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2070  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2071  %sub = sub <2 x i64> %a, %vmull2.i
2072  ret <2 x i64> %sub
2073}
2074
2075define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2076; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
2077; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2078; CHECK-NEXT: ret
2079entry:
2080  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2081  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2082  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2083  %sub = sub <4 x i32> %a, %vmull2.i
2084  ret <4 x i32> %sub
2085}
2086
2087define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2088; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
2089; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2090; CHECK-NEXT: ret
2091entry:
2092  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2093  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2094  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2095  %sub = sub <2 x i64> %a, %vmull2.i
2096  ret <2 x i64> %sub
2097}
2098
2099define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2100; CHECK-LABEL: test_vmlsl_high_laneq_s16_0:
2101; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2102; CHECK-NEXT: ret
2103entry:
2104  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2105  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2106  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2107  %sub = sub <4 x i32> %a, %vmull2.i
2108  ret <4 x i32> %sub
2109}
2110
2111define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2112; CHECK-LABEL: test_vmlsl_high_laneq_s32_0:
2113; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2114; CHECK-NEXT: ret
2115entry:
2116  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2117  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2118  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2119  %sub = sub <2 x i64> %a, %vmull2.i
2120  ret <2 x i64> %sub
2121}
2122
2123define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2124; CHECK-LABEL: test_vmlal_lane_u16_0:
2125; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2126; CHECK-NEXT: ret
2127entry:
2128  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2129  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2130  %add = add <4 x i32> %vmull2.i, %a
2131  ret <4 x i32> %add
2132}
2133
2134define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2135; CHECK-LABEL: test_vmlal_lane_u32_0:
2136; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2137; CHECK-NEXT: ret
2138entry:
2139  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2140  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2141  %add = add <2 x i64> %vmull2.i, %a
2142  ret <2 x i64> %add
2143}
2144
2145define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2146; CHECK-LABEL: test_vmlal_laneq_u16_0:
2147; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2148; CHECK-NEXT: ret
2149entry:
2150  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2151  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2152  %add = add <4 x i32> %vmull2.i, %a
2153  ret <4 x i32> %add
2154}
2155
2156define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2157; CHECK-LABEL: test_vmlal_laneq_u32_0:
2158; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2159; CHECK-NEXT: ret
2160entry:
2161  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2162  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2163  %add = add <2 x i64> %vmull2.i, %a
2164  ret <2 x i64> %add
2165}
2166
2167define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2168; CHECK-LABEL: test_vmlal_high_lane_u16_0:
2169; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2170; CHECK-NEXT: ret
2171entry:
2172  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2173  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2174  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2175  %add = add <4 x i32> %vmull2.i, %a
2176  ret <4 x i32> %add
2177}
2178
2179define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2180; CHECK-LABEL: test_vmlal_high_lane_u32_0:
2181; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2182; CHECK-NEXT: ret
2183entry:
2184  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2185  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2186  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2187  %add = add <2 x i64> %vmull2.i, %a
2188  ret <2 x i64> %add
2189}
2190
2191define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2192; CHECK-LABEL: test_vmlal_high_laneq_u16_0:
2193; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2194; CHECK-NEXT: ret
2195entry:
2196  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2197  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2198  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2199  %add = add <4 x i32> %vmull2.i, %a
2200  ret <4 x i32> %add
2201}
2202
2203define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2204; CHECK-LABEL: test_vmlal_high_laneq_u32_0:
2205; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2206; CHECK-NEXT: ret
2207entry:
2208  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2209  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2210  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2211  %add = add <2 x i64> %vmull2.i, %a
2212  ret <2 x i64> %add
2213}
2214
2215define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2216; CHECK-LABEL: test_vmlsl_lane_u16_0:
2217; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2218; CHECK-NEXT: ret
2219entry:
2220  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2221  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2222  %sub = sub <4 x i32> %a, %vmull2.i
2223  ret <4 x i32> %sub
2224}
2225
2226define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2227; CHECK-LABEL: test_vmlsl_lane_u32_0:
2228; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2229; CHECK-NEXT: ret
2230entry:
2231  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2232  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2233  %sub = sub <2 x i64> %a, %vmull2.i
2234  ret <2 x i64> %sub
2235}
2236
2237define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
2238; CHECK-LABEL: test_vmlsl_laneq_u16_0:
2239; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2240; CHECK-NEXT: ret
2241entry:
2242  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2243  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2244  %sub = sub <4 x i32> %a, %vmull2.i
2245  ret <4 x i32> %sub
2246}
2247
2248define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
2249; CHECK-LABEL: test_vmlsl_laneq_u32_0:
2250; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2251; CHECK-NEXT: ret
2252entry:
2253  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2254  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2255  %sub = sub <2 x i64> %a, %vmull2.i
2256  ret <2 x i64> %sub
2257}
2258
2259define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2260; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
2261; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2262; CHECK-NEXT: ret
2263entry:
2264  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2265  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2266  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2267  %sub = sub <4 x i32> %a, %vmull2.i
2268  ret <4 x i32> %sub
2269}
2270
2271define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2272; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
2273; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2274; CHECK-NEXT: ret
2275entry:
2276  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2277  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2278  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2279  %sub = sub <2 x i64> %a, %vmull2.i
2280  ret <2 x i64> %sub
2281}
2282
2283define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
2284; CHECK-LABEL: test_vmlsl_high_laneq_u16_0:
2285; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2286; CHECK-NEXT: ret
2287entry:
2288  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2289  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2290  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2291  %sub = sub <4 x i32> %a, %vmull2.i
2292  ret <4 x i32> %sub
2293}
2294
2295define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
2296; CHECK-LABEL: test_vmlsl_high_laneq_u32_0:
2297; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2298; CHECK-NEXT: ret
2299entry:
2300  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2301  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2302  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2303  %sub = sub <2 x i64> %a, %vmull2.i
2304  ret <2 x i64> %sub
2305}
2306
2307define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2308; CHECK-LABEL: test_vmull_lane_s16_0:
2309; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2310; CHECK-NEXT: ret
2311entry:
2312  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2313  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2314  ret <4 x i32> %vmull2.i
2315}
2316
2317define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2318; CHECK-LABEL: test_vmull_lane_s32_0:
2319; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2320; CHECK-NEXT: ret
2321entry:
2322  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2323  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2324  ret <2 x i64> %vmull2.i
2325}
2326
2327define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
2328; CHECK-LABEL: test_vmull_lane_u16_0:
2329; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2330; CHECK-NEXT: ret
2331entry:
2332  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2333  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2334  ret <4 x i32> %vmull2.i
2335}
2336
2337define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
2338; CHECK-LABEL: test_vmull_lane_u32_0:
2339; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2340; CHECK-NEXT: ret
2341entry:
2342  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2343  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2344  ret <2 x i64> %vmull2.i
2345}
2346
2347define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2348; CHECK-LABEL: test_vmull_high_lane_s16_0:
2349; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2350; CHECK-NEXT: ret
2351entry:
2352  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2353  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2354  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2355  ret <4 x i32> %vmull2.i
2356}
2357
2358define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2359; CHECK-LABEL: test_vmull_high_lane_s32_0:
2360; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2361; CHECK-NEXT: ret
2362entry:
2363  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2364  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2365  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2366  ret <2 x i64> %vmull2.i
2367}
2368
2369define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
2370; CHECK-LABEL: test_vmull_high_lane_u16_0:
2371; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2372; CHECK-NEXT: ret
2373entry:
2374  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2375  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2376  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2377  ret <4 x i32> %vmull2.i
2378}
2379
2380define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
2381; CHECK-LABEL: test_vmull_high_lane_u32_0:
2382; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2383; CHECK-NEXT: ret
2384entry:
2385  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2386  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2387  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2388  ret <2 x i64> %vmull2.i
2389}
2390
2391define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
2392; CHECK-LABEL: test_vmull_laneq_s16_0:
2393; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2394; CHECK-NEXT: ret
2395entry:
2396  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2397  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2398  ret <4 x i32> %vmull2.i
2399}
2400
2401define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
2402; CHECK-LABEL: test_vmull_laneq_s32_0:
2403; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2404; CHECK-NEXT: ret
2405entry:
2406  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2407  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2408  ret <2 x i64> %vmull2.i
2409}
2410
2411define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
2412; CHECK-LABEL: test_vmull_laneq_u16_0:
2413; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2414; CHECK-NEXT: ret
2415entry:
2416  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2417  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2418  ret <4 x i32> %vmull2.i
2419}
2420
2421define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
2422; CHECK-LABEL: test_vmull_laneq_u32_0:
2423; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2424; CHECK-NEXT: ret
2425entry:
2426  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2427  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2428  ret <2 x i64> %vmull2.i
2429}
2430
2431define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
2432; CHECK-LABEL: test_vmull_high_laneq_s16_0:
2433; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2434; CHECK-NEXT: ret
2435entry:
2436  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2437  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2438  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2439  ret <4 x i32> %vmull2.i
2440}
2441
2442define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
2443; CHECK-LABEL: test_vmull_high_laneq_s32_0:
2444; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2445; CHECK-NEXT: ret
2446entry:
2447  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2448  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2449  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2450  ret <2 x i64> %vmull2.i
2451}
2452
2453define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
2454; CHECK-LABEL: test_vmull_high_laneq_u16_0:
2455; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2456; CHECK-NEXT: ret
2457entry:
2458  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2459  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2460  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2461  ret <4 x i32> %vmull2.i
2462}
2463
2464define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
2465; CHECK-LABEL: test_vmull_high_laneq_u32_0:
2466; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2467; CHECK-NEXT: ret
2468entry:
2469  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2470  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2471  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2472  ret <2 x i64> %vmull2.i
2473}
2474
2475define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2476; CHECK-LABEL: test_vqdmlal_lane_s16_0:
2477; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2478; CHECK-NEXT: ret
2479entry:
2480  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2481  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2482  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
2483  ret <4 x i32> %vqdmlal4.i
2484}
2485
2486define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2487; CHECK-LABEL: test_vqdmlal_lane_s32_0:
2488; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2489; CHECK-NEXT: ret
2490entry:
2491  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2492  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2493  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
2494  ret <2 x i64> %vqdmlal4.i
2495}
2496
2497define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2498; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
2499; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2500; CHECK-NEXT: ret
2501entry:
2502  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2503  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2504  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2505  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
2506  ret <4 x i32> %vqdmlal4.i
2507}
2508
2509define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2510; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
2511; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2512; CHECK-NEXT: ret
2513entry:
2514  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2515  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2516  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2517  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
2518  ret <2 x i64> %vqdmlal4.i
2519}
2520
2521define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
2522; CHECK-LABEL: test_vqdmlsl_lane_s16_0:
2523; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2524; CHECK-NEXT: ret
2525entry:
2526  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2527  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
2528  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
2529  ret <4 x i32> %vqdmlsl4.i
2530}
2531
2532define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
2533; CHECK-LABEL: test_vqdmlsl_lane_s32_0:
2534; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2535; CHECK-NEXT: ret
2536entry:
2537  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2538  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
2539  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
2540  ret <2 x i64> %vqdmlsl4.i
2541}
2542
2543define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
2544; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
2545; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2546; CHECK-NEXT: ret
2547entry:
2548  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2549  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2550  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2551  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
2552  ret <4 x i32> %vqdmlsl4.i
2553}
2554
2555define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
2556; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
2557; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2558; CHECK-NEXT: ret
2559entry:
2560  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2561  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2562  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2563  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
2564  ret <2 x i64> %vqdmlsl4.i
2565}
2566
2567define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2568; CHECK-LABEL: test_vqdmull_lane_s16_0:
2569; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2570; CHECK-NEXT: ret
2571entry:
2572  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2573  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2574  ret <4 x i32> %vqdmull2.i
2575}
2576
2577define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2578; CHECK-LABEL: test_vqdmull_lane_s32_0:
2579; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2580; CHECK-NEXT: ret
2581entry:
2582  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2583  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2584  ret <2 x i64> %vqdmull2.i
2585}
2586
2587define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
2588; CHECK-LABEL: test_vqdmull_laneq_s16_0:
2589; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2590; CHECK-NEXT: ret
2591entry:
2592  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2593  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
2594  ret <4 x i32> %vqdmull2.i
2595}
2596
2597define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
2598; CHECK-LABEL: test_vqdmull_laneq_s32_0:
2599; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2600; CHECK-NEXT: ret
2601entry:
2602  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2603  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
2604  ret <2 x i64> %vqdmull2.i
2605}
2606
2607define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2608; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
2609; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2610; CHECK-NEXT: ret
2611entry:
2612  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2613  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2614  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2615  ret <4 x i32> %vqdmull2.i
2616}
2617
2618define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2619; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
2620; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2621; CHECK-NEXT: ret
2622entry:
2623  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2624  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2625  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2626  ret <2 x i64> %vqdmull2.i
2627}
2628
2629define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
2630; CHECK-LABEL: test_vqdmull_high_laneq_s16_0:
2631; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2632; CHECK-NEXT: ret
2633entry:
2634  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2635  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
2636  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
2637  ret <4 x i32> %vqdmull2.i
2638}
2639
2640define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
2641; CHECK-LABEL: test_vqdmull_high_laneq_s32_0:
2642; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2643; CHECK-NEXT: ret
2644entry:
2645  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
2646  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
2647  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
2648  ret <2 x i64> %vqdmull2.i
2649}
2650
2651define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2652; CHECK-LABEL: test_vqdmulh_lane_s16_0:
2653; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2654; CHECK-NEXT: ret
2655entry:
2656  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2657  %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
2658  ret <4 x i16> %vqdmulh2.i
2659}
2660
2661define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2662; CHECK-LABEL: test_vqdmulhq_lane_s16_0:
2663; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2664; CHECK-NEXT: ret
2665entry:
2666  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2667  %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
2668  ret <8 x i16> %vqdmulh2.i
2669}
2670
2671define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2672; CHECK-LABEL: test_vqdmulh_lane_s32_0:
2673; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2674; CHECK-NEXT: ret
2675entry:
2676  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2677  %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
2678  ret <2 x i32> %vqdmulh2.i
2679}
2680
2681define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2682; CHECK-LABEL: test_vqdmulhq_lane_s32_0:
2683; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2684; CHECK-NEXT: ret
2685entry:
2686  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2687  %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
2688  ret <4 x i32> %vqdmulh2.i
2689}
2690
2691define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
2692; CHECK-LABEL: test_vqrdmulh_lane_s16_0:
2693; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
2694; CHECK-NEXT: ret
2695entry:
2696  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
2697  %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
2698  ret <4 x i16> %vqrdmulh2.i
2699}
2700
2701define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
2702; CHECK-LABEL: test_vqrdmulhq_lane_s16_0:
2703; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
2704; CHECK-NEXT: ret
2705entry:
2706  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
2707  %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
2708  ret <8 x i16> %vqrdmulh2.i
2709}
2710
2711define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
2712; CHECK-LABEL: test_vqrdmulh_lane_s32_0:
2713; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2714; CHECK-NEXT: ret
2715entry:
2716  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
2717  %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
2718  ret <2 x i32> %vqrdmulh2.i
2719}
2720
2721define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
2722; CHECK-LABEL: test_vqrdmulhq_lane_s32_0:
2723; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2724; CHECK-NEXT: ret
2725entry:
2726  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
2727  %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
2728  ret <4 x i32> %vqrdmulh2.i
2729}
2730
2731define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
2732; CHECK-LABEL: test_vmul_lane_f32_0:
2733; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2734; CHECK-NEXT: ret
2735entry:
2736  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
2737  %mul = fmul <2 x float> %shuffle, %a
2738  ret <2 x float> %mul
2739}
2740
2741define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
2742; CHECK-LABEL: test_vmulq_lane_f32_0:
2743; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2744; CHECK-NEXT: ret
2745entry:
2746  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
2747  %mul = fmul <4 x float> %shuffle, %a
2748  ret <4 x float> %mul
2749}
2750
2751define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
2752; CHECK-LABEL: test_vmul_laneq_f32_0:
2753; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2754; CHECK-NEXT: ret
2755entry:
2756  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
2757  %mul = fmul <2 x float> %shuffle, %a
2758  ret <2 x float> %mul
2759}
2760
2761define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
2762; CHECK-LABEL: test_vmul_laneq_f64_0:
2763; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
2764; CHECK-NEXT: ret
2765entry:
2766  %0 = bitcast <1 x double> %a to <8 x i8>
2767  %1 = bitcast <8 x i8> %0 to double
2768  %extract = extractelement <2 x double> %v, i32 0
2769  %2 = fmul double %1, %extract
2770  %3 = insertelement <1 x double> undef, double %2, i32 0
2771  ret <1 x double> %3
2772}
2773
2774define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
2775; CHECK-LABEL: test_vmulq_laneq_f32_0:
2776; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2777; CHECK-NEXT: ret
2778entry:
2779  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
2780  %mul = fmul <4 x float> %shuffle, %a
2781  ret <4 x float> %mul
2782}
2783
2784define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
2785; CHECK-LABEL: test_vmulq_laneq_f64_0:
2786; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
2787; CHECK-NEXT: ret
2788entry:
2789  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
2790  %mul = fmul <2 x double> %shuffle, %a
2791  ret <2 x double> %mul
2792}
2793
2794define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
2795; CHECK-LABEL: test_vmulx_lane_f32_0:
2796; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2797; CHECK-NEXT: ret
2798entry:
2799  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
2800  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
2801  ret <2 x float> %vmulx2.i
2802}
2803
2804define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
2805; CHECK-LABEL: test_vmulxq_lane_f32_0:
2806; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2807; CHECK-NEXT: ret
2808entry:
2809  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
2810  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
2811  ret <4 x float> %vmulx2.i
2812}
2813
2814define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
2815; CHECK-LABEL: test_vmulxq_lane_f64_0:
2816; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
2817; CHECK-NEXT: ret
2818entry:
2819  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
2820  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
2821  ret <2 x double> %vmulx2.i
2822}
2823
2824define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
2825; CHECK-LABEL: test_vmulx_laneq_f32_0:
2826; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
2827; CHECK-NEXT: ret
2828entry:
2829  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
2830  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
2831  ret <2 x float> %vmulx2.i
2832}
2833
2834define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
2835; CHECK-LABEL: test_vmulxq_laneq_f32_0:
2836; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
2837; CHECK-NEXT: ret
2838entry:
2839  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
2840  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
2841  ret <4 x float> %vmulx2.i
2842}
2843
2844define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
2845; CHECK-LABEL: test_vmulxq_laneq_f64_0:
2846; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
2847; CHECK-NEXT: ret
2848entry:
2849  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
2850  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
2851  ret <2 x double> %vmulx2.i
2852}
2853
2854