1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2
3declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
4
5declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
6
7declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
8
9declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
10
11declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
12
13declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
14
15declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
16
17declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
18
19declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
20
21declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
22
23declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
24
25declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
26
27define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
28; CHECK-LABEL: test_vmull_high_n_s16:
29; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
30; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
31entry:
32  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
33  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
34  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
35  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
36  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
37  %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
38  ret <4 x i32> %vmull15.i.i
39}
40
41define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
42; CHECK-LABEL: test_vmull_high_n_s32:
43; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
44; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
45entry:
46  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
47  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
48  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
49  %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
50  ret <2 x i64> %vmull9.i.i
51}
52
53define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
54; CHECK-LABEL: test_vmull_high_n_u16:
55; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
56; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
57entry:
58  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
59  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
60  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
61  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
62  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
63  %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
64  ret <4 x i32> %vmull15.i.i
65}
66
67define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
68; CHECK-LABEL: test_vmull_high_n_u32:
69; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
70; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
71entry:
72  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
73  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
74  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
75  %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
76  ret <2 x i64> %vmull9.i.i
77}
78
79define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
80; CHECK-LABEL: test_vqdmull_high_n_s16:
81; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
82; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
83entry:
84  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
85  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
86  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
87  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
88  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
89  %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
90  ret <4 x i32> %vqdmull15.i.i
91}
92
93define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
94; CHECK-LABEL: test_vqdmull_high_n_s32:
95; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
96; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
97entry:
98  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
99  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
100  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
101  %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
102  ret <2 x i64> %vqdmull9.i.i
103}
104
105define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
106; CHECK-LABEL: test_vmlal_high_n_s16:
107; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
108; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
109entry:
110  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
111  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
112  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
113  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
114  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
115  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
116  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
117  ret <4 x i32> %add.i.i
118}
119
120define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
121; CHECK-LABEL: test_vmlal_high_n_s32:
122; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
123; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
124entry:
125  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
126  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
127  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
128  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
129  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
130  ret <2 x i64> %add.i.i
131}
132
133define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
134; CHECK-LABEL: test_vmlal_high_n_u16:
135; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
136; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
137entry:
138  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
139  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
140  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
141  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
142  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
143  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
144  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
145  ret <4 x i32> %add.i.i
146}
147
148define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
149; CHECK-LABEL: test_vmlal_high_n_u32:
150; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
151; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
152entry:
153  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
154  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
155  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
156  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
157  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
158  ret <2 x i64> %add.i.i
159}
160
161define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
162; CHECK-LABEL: test_vqdmlal_high_n_s16:
163; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
164entry:
165  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
166  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
167  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
168  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
169  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
170  %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
171  %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
172  ret <4 x i32> %vqdmlal17.i.i
173}
174
175define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
176; CHECK-LABEL: test_vqdmlal_high_n_s32:
177; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
178entry:
179  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
180  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
181  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
182  %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
183  %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
184  ret <2 x i64> %vqdmlal11.i.i
185}
186
187define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
188; CHECK-LABEL: test_vmlsl_high_n_s16:
189; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
190entry:
191  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
192  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
193  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
194  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
195  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
196  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
197  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
198  ret <4 x i32> %sub.i.i
199}
200
201define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
202; CHECK-LABEL: test_vmlsl_high_n_s32:
203; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
204entry:
205  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
206  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
207  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
208  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
209  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
210  ret <2 x i64> %sub.i.i
211}
212
213define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
214; CHECK-LABEL: test_vmlsl_high_n_u16:
215; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
216entry:
217  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
218  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
219  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
220  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
221  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
222  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
223  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
224  ret <4 x i32> %sub.i.i
225}
226
227define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
228; CHECK-LABEL: test_vmlsl_high_n_u32:
229; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
230entry:
231  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
232  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
233  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
234  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
235  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
236  ret <2 x i64> %sub.i.i
237}
238
239define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
240; CHECK-LABEL: test_vqdmlsl_high_n_s16:
241; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
242entry:
243  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
244  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
245  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
246  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
247  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
248  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
249  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
250  ret <4 x i32> %vqdmlsl17.i.i
251}
252
253define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
254; CHECK-LABEL: test_vqdmlsl_high_n_s32:
255; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
256entry:
257  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
258  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
259  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
260  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
261  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
262  ret <2 x i64> %vqdmlsl11.i.i
263}
264
265define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
266; CHECK-LABEL: test_vmul_n_f32:
267; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
268entry:
269  %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
270  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
271  %mul.i = fmul <2 x float> %vecinit1.i, %a
272  ret <2 x float> %mul.i
273}
274
275define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
276; CHECK-LABEL: test_vmulq_n_f32:
277; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
278entry:
279  %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
280  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
281  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
282  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
283  %mul.i = fmul <4 x float> %vecinit3.i, %a
284  ret <4 x float> %mul.i
285}
286
287define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
288; CHECK-LABEL: test_vmulq_n_f64:
289; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
290entry:
291  %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
292  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
293  %mul.i = fmul <2 x double> %vecinit1.i, %a
294  ret <2 x double> %mul.i
295}
296
297define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
298; CHECK-LABEL: test_vfma_n_f32:
299; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
300entry:
301  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
302  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
303  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
304  ret <2 x float> %0
305}
306
307define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
308; CHECK-LABEL: test_vfmaq_n_f32:
309; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
310entry:
311  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
312  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
313  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
314  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
315  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
316  ret <4 x float> %0
317}
318
319define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
320; CHECK-LABEL: test_vfms_n_f32:
321; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
322entry:
323  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
324  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
325  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
326  %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
327  ret <2 x float> %1
328}
329
330define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
331; CHECK-LABEL: test_vfmsq_n_f32:
332; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
333entry:
334  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
335  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
336  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
337  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
338  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
339  %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
340  ret <4 x float> %1
341}
342