1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2
3declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
4
5declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
6
7declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
8
9declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
10
11declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
12
13declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
14
15declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
16
17declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
18
19declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
20
21declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>)
22
23declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>)
24
25declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>)
26
27declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
28
29declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
30
31declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
32
33declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>)
34
35declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>)
36
37declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
38
39declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
40
41declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>)
42
43declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>)
44
45declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>)
46
47declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
48
49declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
50
51declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
52
53declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>)
54
55declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>)
56
57declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
58
59declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
60
61declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>)
62
63declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>)
64
65declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>)
66
67declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)
68
69declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>)
70
71declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>)
72
73declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>)
74
75declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>)
76
77declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>)
78
79declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>)
80
81define i16 @test_vaddlv_s8(<8 x i8> %a) {
82; CHECK: test_vaddlv_s8:
83; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
84entry:
85  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a)
86  %0 = trunc i32 %saddlvv.i to i16
87  ret i16 %0
88}
89
90define i32 @test_vaddlv_s16(<4 x i16> %a) {
91; CHECK: test_vaddlv_s16:
92; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
93entry:
94  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a)
95  ret i32 %saddlvv.i
96}
97
98define i16 @test_vaddlv_u8(<8 x i8> %a) {
99; CHECK: test_vaddlv_u8:
100; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
101entry:
102  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
103  %0 = trunc i32 %uaddlvv.i to i16
104  ret i16 %0
105}
106
107define i32 @test_vaddlv_u16(<4 x i16> %a) {
108; CHECK: test_vaddlv_u16:
109; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
110entry:
111  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a)
112  ret i32 %uaddlvv.i
113}
114
115define i16 @test_vaddlvq_s8(<16 x i8> %a) {
116; CHECK: test_vaddlvq_s8:
117; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
118entry:
119  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a)
120  %0 = trunc i32 %saddlvv.i to i16
121  ret i16 %0
122}
123
124define i32 @test_vaddlvq_s16(<8 x i16> %a) {
125; CHECK: test_vaddlvq_s16:
126; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
127entry:
128  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a)
129  ret i32 %saddlvv.i
130}
131
132define i64 @test_vaddlvq_s32(<4 x i32> %a) {
133; CHECK: test_vaddlvq_s32:
134; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
135entry:
136  %saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a)
137  ret i64 %saddlvv.i
138}
139
140define i16 @test_vaddlvq_u8(<16 x i8> %a) {
141; CHECK: test_vaddlvq_u8:
142; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
143entry:
144  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
145  %0 = trunc i32 %uaddlvv.i to i16
146  ret i16 %0
147}
148
149define i32 @test_vaddlvq_u16(<8 x i16> %a) {
150; CHECK: test_vaddlvq_u16:
151; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
152entry:
153  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a)
154  ret i32 %uaddlvv.i
155}
156
157define i64 @test_vaddlvq_u32(<4 x i32> %a) {
158; CHECK: test_vaddlvq_u32:
159; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
160entry:
161  %uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a)
162  ret i64 %uaddlvv.i
163}
164
165define i8 @test_vmaxv_s8(<8 x i8> %a) {
166; CHECK: test_vmaxv_s8:
167; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
168entry:
169  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a)
170  %0 = trunc i32 %smaxv.i to i8
171  ret i8 %0
172}
173
174define i16 @test_vmaxv_s16(<4 x i16> %a) {
175; CHECK: test_vmaxv_s16:
176; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
177entry:
178  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a)
179  %0 = trunc i32 %smaxv.i to i16
180  ret i16 %0
181}
182
183define i8 @test_vmaxv_u8(<8 x i8> %a) {
184; CHECK: test_vmaxv_u8:
185; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
186entry:
187  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a)
188  %0 = trunc i32 %umaxv.i to i8
189  ret i8 %0
190}
191
192define i16 @test_vmaxv_u16(<4 x i16> %a) {
193; CHECK: test_vmaxv_u16:
194; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
195entry:
196  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a)
197  %0 = trunc i32 %umaxv.i to i16
198  ret i16 %0
199}
200
201define i8 @test_vmaxvq_s8(<16 x i8> %a) {
202; CHECK: test_vmaxvq_s8:
203; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
204entry:
205  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a)
206  %0 = trunc i32 %smaxv.i to i8
207  ret i8 %0
208}
209
210define i16 @test_vmaxvq_s16(<8 x i16> %a) {
211; CHECK: test_vmaxvq_s16:
212; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
213entry:
214  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a)
215  %0 = trunc i32 %smaxv.i to i16
216  ret i16 %0
217}
218
219define i32 @test_vmaxvq_s32(<4 x i32> %a) {
220; CHECK: test_vmaxvq_s32:
221; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
222entry:
223  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a)
224  ret i32 %smaxv.i
225}
226
227define i8 @test_vmaxvq_u8(<16 x i8> %a) {
228; CHECK: test_vmaxvq_u8:
229; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
230entry:
231  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a)
232  %0 = trunc i32 %umaxv.i to i8
233  ret i8 %0
234}
235
236define i16 @test_vmaxvq_u16(<8 x i16> %a) {
237; CHECK: test_vmaxvq_u16:
238; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
239entry:
240  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a)
241  %0 = trunc i32 %umaxv.i to i16
242  ret i16 %0
243}
244
245define i32 @test_vmaxvq_u32(<4 x i32> %a) {
246; CHECK: test_vmaxvq_u32:
247; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
248entry:
249  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a)
250  ret i32 %umaxv.i
251}
252
253define i8 @test_vminv_s8(<8 x i8> %a) {
254; CHECK: test_vminv_s8:
255; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
256entry:
257  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a)
258  %0 = trunc i32 %sminv.i to i8
259  ret i8 %0
260}
261
262define i16 @test_vminv_s16(<4 x i16> %a) {
263; CHECK: test_vminv_s16:
264; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
265entry:
266  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a)
267  %0 = trunc i32 %sminv.i to i16
268  ret i16 %0
269}
270
271define i8 @test_vminv_u8(<8 x i8> %a) {
272; CHECK: test_vminv_u8:
273; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
274entry:
275  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a)
276  %0 = trunc i32 %uminv.i to i8
277  ret i8 %0
278}
279
280define i16 @test_vminv_u16(<4 x i16> %a) {
281; CHECK: test_vminv_u16:
282; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
283entry:
284  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a)
285  %0 = trunc i32 %uminv.i to i16
286  ret i16 %0
287}
288
289define i8 @test_vminvq_s8(<16 x i8> %a) {
290; CHECK: test_vminvq_s8:
291; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
292entry:
293  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a)
294  %0 = trunc i32 %sminv.i to i8
295  ret i8 %0
296}
297
298define i16 @test_vminvq_s16(<8 x i16> %a) {
299; CHECK: test_vminvq_s16:
300; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
301entry:
302  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a)
303  %0 = trunc i32 %sminv.i to i16
304  ret i16 %0
305}
306
307define i32 @test_vminvq_s32(<4 x i32> %a) {
308; CHECK: test_vminvq_s32:
309; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
310entry:
311  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a)
312  ret i32 %sminv.i
313}
314
315define i8 @test_vminvq_u8(<16 x i8> %a) {
316; CHECK: test_vminvq_u8:
317; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
318entry:
319  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a)
320  %0 = trunc i32 %uminv.i to i8
321  ret i8 %0
322}
323
324define i16 @test_vminvq_u16(<8 x i16> %a) {
325; CHECK: test_vminvq_u16:
326; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
327entry:
328  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a)
329  %0 = trunc i32 %uminv.i to i16
330  ret i16 %0
331}
332
333define i32 @test_vminvq_u32(<4 x i32> %a) {
334; CHECK: test_vminvq_u32:
335; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
336entry:
337  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a)
338  ret i32 %uminv.i
339}
340
341define i8 @test_vaddv_s8(<8 x i8> %a) {
342; CHECK: test_vaddv_s8:
343; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
344entry:
345  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
346  %0 = trunc i32 %vaddv.i to i8
347  ret i8 %0
348}
349
350define i16 @test_vaddv_s16(<4 x i16> %a) {
351; CHECK: test_vaddv_s16:
352; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
353entry:
354  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
355  %0 = trunc i32 %vaddv.i to i16
356  ret i16 %0
357}
358
359define i8 @test_vaddv_u8(<8 x i8> %a) {
360; CHECK: test_vaddv_u8:
361; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
362entry:
363  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
364  %0 = trunc i32 %vaddv.i to i8
365  ret i8 %0
366}
367
368define i16 @test_vaddv_u16(<4 x i16> %a) {
369; CHECK: test_vaddv_u16:
370; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
371entry:
372  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
373  %0 = trunc i32 %vaddv.i to i16
374  ret i16 %0
375}
376
377define i8 @test_vaddvq_s8(<16 x i8> %a) {
378; CHECK: test_vaddvq_s8:
379; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
380entry:
381  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
382  %0 = trunc i32 %vaddv.i to i8
383  ret i8 %0
384}
385
386define i16 @test_vaddvq_s16(<8 x i16> %a) {
387; CHECK: test_vaddvq_s16:
388; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
389entry:
390  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
391  %0 = trunc i32 %vaddv.i to i16
392  ret i16 %0
393}
394
395define i32 @test_vaddvq_s32(<4 x i32> %a) {
396; CHECK: test_vaddvq_s32:
397; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
398entry:
399  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
400  ret i32 %vaddv.i
401}
402
403define i8 @test_vaddvq_u8(<16 x i8> %a) {
404; CHECK: test_vaddvq_u8:
405; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
406entry:
407  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
408  %0 = trunc i32 %vaddv.i to i8
409  ret i8 %0
410}
411
412define i16 @test_vaddvq_u16(<8 x i16> %a) {
413; CHECK: test_vaddvq_u16:
414; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
415entry:
416  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
417  %0 = trunc i32 %vaddv.i to i16
418  ret i16 %0
419}
420
421define i32 @test_vaddvq_u32(<4 x i32> %a) {
422; CHECK: test_vaddvq_u32:
423; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
424entry:
425  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
426  ret i32 %vaddv.i
427}
428
429define float @test_vmaxvq_f32(<4 x float> %a) {
430; CHECK: test_vmaxvq_f32:
431; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
432entry:
433  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a)
434  ret float %0
435}
436
437define float @test_vminvq_f32(<4 x float> %a) {
438; CHECK: test_vminvq_f32:
439; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
440entry:
441  %0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a)
442  ret float %0
443}
444
445define float @test_vmaxnmvq_f32(<4 x float> %a) {
446; CHECK: test_vmaxnmvq_f32:
447; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
448entry:
449  %0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a)
450  ret float %0
451}
452
453define float @test_vminnmvq_f32(<4 x float> %a) {
454; CHECK: test_vminnmvq_f32:
455; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
456entry:
457  %0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a)
458  ret float %0
459}
460
461