1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
5declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
6declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
7declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
8declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
9declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
10declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
11declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
12declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
13declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
14declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
15declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
16
17define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8(<16 x i8> %s1) {
18; CHECK-LABEL: vmaxv_s_v16i8:
19; CHECK:       @ %bb.0:
20; CHECK-NEXT:    mvn r0, #127
21; CHECK-NEXT:    vmaxv.s8 r0, q0
22; CHECK-NEXT:    bx lr
23  %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
24  ret i8 %r
25}
26
27define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16(<8 x i16> %s1) {
28; CHECK-LABEL: vmaxv_s_v8i16:
29; CHECK:       @ %bb.0:
30; CHECK-NEXT:    movw r0, #32768
31; CHECK-NEXT:    movt r0, #65535
32; CHECK-NEXT:    vmaxv.s16 r0, q0
33; CHECK-NEXT:    bx lr
34  %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
35  ret i16 %r
36}
37
38define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32(<4 x i32> %s1) {
39; CHECK-LABEL: vmaxv_s_v4i32:
40; CHECK:       @ %bb.0:
41; CHECK-NEXT:    mov.w r0, #-2147483648
42; CHECK-NEXT:    vmaxv.s32 r0, q0
43; CHECK-NEXT:    bx lr
44  %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1)
45  ret i32 %r
46}
47
48define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8(<16 x i8> %s1) {
49; CHECK-LABEL: vmaxv_u_v16i8:
50; CHECK:       @ %bb.0:
51; CHECK-NEXT:    movs r0, #0
52; CHECK-NEXT:    vmaxv.u8 r0, q0
53; CHECK-NEXT:    bx lr
54  %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
55  ret i8 %r
56}
57
58define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16(<8 x i16> %s1) {
59; CHECK-LABEL: vmaxv_u_v8i16:
60; CHECK:       @ %bb.0:
61; CHECK-NEXT:    movs r0, #0
62; CHECK-NEXT:    vmaxv.u16 r0, q0
63; CHECK-NEXT:    bx lr
64  %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
65  ret i16 %r
66}
67
68define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32(<4 x i32> %s1) {
69; CHECK-LABEL: vmaxv_u_v4i32:
70; CHECK:       @ %bb.0:
71; CHECK-NEXT:    movs r0, #0
72; CHECK-NEXT:    vmaxv.u32 r0, q0
73; CHECK-NEXT:    bx lr
74  %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1)
75  ret i32 %r
76}
77
78define arm_aapcs_vfpcc i8 @vminv_s_v16i8(<16 x i8> %s1) {
79; CHECK-LABEL: vminv_s_v16i8:
80; CHECK:       @ %bb.0:
81; CHECK-NEXT:    movs r0, #127
82; CHECK-NEXT:    vminv.s8 r0, q0
83; CHECK-NEXT:    bx lr
84  %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
85  ret i8 %r
86}
87
88define arm_aapcs_vfpcc i16 @vminv_s_v8i16(<8 x i16> %s1) {
89; CHECK-LABEL: vminv_s_v8i16:
90; CHECK:       @ %bb.0:
91; CHECK-NEXT:    movw r0, #32767
92; CHECK-NEXT:    vminv.s16 r0, q0
93; CHECK-NEXT:    bx lr
94  %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
95  ret i16 %r
96}
97
98define arm_aapcs_vfpcc i32 @vminv_s_v4i32(<4 x i32> %s1) {
99; CHECK-LABEL: vminv_s_v4i32:
100; CHECK:       @ %bb.0:
101; CHECK-NEXT:    mvn r0, #-2147483648
102; CHECK-NEXT:    vminv.s32 r0, q0
103; CHECK-NEXT:    bx lr
104  %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1)
105  ret i32 %r
106}
107
108define arm_aapcs_vfpcc i8 @vminv_u_v16i8(<16 x i8> %s1) {
109; CHECK-LABEL: vminv_u_v16i8:
110; CHECK:       @ %bb.0:
111; CHECK-NEXT:    movs r0, #255
112; CHECK-NEXT:    vminv.u8 r0, q0
113; CHECK-NEXT:    bx lr
114  %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
115  ret i8 %r
116}
117
118define arm_aapcs_vfpcc i16 @vminv_u_v8i16(<8 x i16> %s1) {
119; CHECK-LABEL: vminv_u_v8i16:
120; CHECK:       @ %bb.0:
121; CHECK-NEXT:    movw r0, #65535
122; CHECK-NEXT:    vminv.u16 r0, q0
123; CHECK-NEXT:    bx lr
124  %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
125  ret i16 %r
126}
127
128define arm_aapcs_vfpcc i32 @vminv_u_v4i32(<4 x i32> %s1) {
129; CHECK-LABEL: vminv_u_v4i32:
130; CHECK:       @ %bb.0:
131; CHECK-NEXT:    mov.w r0, #-1
132; CHECK-NEXT:    vminv.u32 r0, q0
133; CHECK-NEXT:    bx lr
134  %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1)
135  ret i32 %r
136}
137
138
139
140define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) {
141; CHECK-LABEL: vmaxv_s_v16i8_i8:
142; CHECK:       @ %bb.0:
143; CHECK-NEXT:    vmaxv.s8 r0, q0
144; CHECK-NEXT:    bx lr
145  %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
146  %c = icmp sgt i8 %r, %s2
147  %s = select i1 %c, i8 %r, i8 %s2
148  ret i8 %s
149}
150
151define arm_aapcs_vfpcc i32 @vmaxv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) {
152; CHECK-LABEL: vmaxv_s_v16i8_i32:
153; CHECK:       @ %bb.0:
154; CHECK-NEXT:    mvn r1, #127
155; CHECK-NEXT:    vmaxv.s8 r1, q0
156; CHECK-NEXT:    sxtb r1, r1
157; CHECK-NEXT:    cmp r1, r0
158; CHECK-NEXT:    csel r0, r1, r0, gt
159; CHECK-NEXT:    bx lr
160  %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
161  %rs = sext i8 %r to i32
162  %c = icmp sgt i32 %rs, %s2
163  %s = select i1 %c, i32 %rs, i32 %s2
164  ret i32 %s
165}
166
167define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) {
168; CHECK-LABEL: vmaxv_s_v8i16_i16:
169; CHECK:       @ %bb.0:
170; CHECK-NEXT:    vmaxv.s16 r0, q0
171; CHECK-NEXT:    bx lr
172  %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
173  %c = icmp sgt i16 %r, %s2
174  %s = select i1 %c, i16 %r, i16 %s2
175  ret i16 %s
176}
177
178define arm_aapcs_vfpcc i32 @vmaxv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) {
179; CHECK-LABEL: vmaxv_s_v8i16_i32:
180; CHECK:       @ %bb.0:
181; CHECK-NEXT:    movw r1, #32768
182; CHECK-NEXT:    movt r1, #65535
183; CHECK-NEXT:    vmaxv.s16 r1, q0
184; CHECK-NEXT:    sxth r1, r1
185; CHECK-NEXT:    cmp r1, r0
186; CHECK-NEXT:    csel r0, r1, r0, gt
187; CHECK-NEXT:    bx lr
188  %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
189  %rs = sext i16 %r to i32
190  %c = icmp sgt i32 %rs, %s2
191  %s = select i1 %c, i32 %rs, i32 %s2
192  ret i32 %s
193}
194
195define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) {
196; CHECK-LABEL: vmaxv_s_v4i32_i32:
197; CHECK:       @ %bb.0:
198; CHECK-NEXT:    vmaxv.s32 r0, q0
199; CHECK-NEXT:    bx lr
200  %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1)
201  %c = icmp sgt i32 %r, %s2
202  %s = select i1 %c, i32 %r, i32 %s2
203  ret i32 %s
204}
205
206define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) {
207; CHECK-LABEL: vmaxv_u_v16i8_i8:
208; CHECK:       @ %bb.0:
209; CHECK-NEXT:    vmaxv.u8 r0, q0
210; CHECK-NEXT:    bx lr
211  %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
212  %c = icmp ugt i8 %r, %s2
213  %s = select i1 %c, i8 %r, i8 %s2
214  ret i8 %s
215}
216
217define arm_aapcs_vfpcc i32 @vmaxv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) {
218; CHECK-LABEL: vmaxv_u_v16i8_i32:
219; CHECK:       @ %bb.0:
220; CHECK-NEXT:    movs r1, #0
221; CHECK-NEXT:    vmaxv.u8 r1, q0
222; CHECK-NEXT:    uxtb r1, r1
223; CHECK-NEXT:    cmp r1, r0
224; CHECK-NEXT:    csel r0, r1, r0, hi
225; CHECK-NEXT:    bx lr
226  %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
227  %rs = zext i8 %r to i32
228  %c = icmp ugt i32 %rs, %s2
229  %s = select i1 %c, i32 %rs, i32 %s2
230  ret i32 %s
231}
232
233define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) {
234; CHECK-LABEL: vmaxv_u_v8i16_i16:
235; CHECK:       @ %bb.0:
236; CHECK-NEXT:    vmaxv.u16 r0, q0
237; CHECK-NEXT:    bx lr
238  %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
239  %c = icmp ugt i16 %r, %s2
240  %s = select i1 %c, i16 %r, i16 %s2
241  ret i16 %s
242}
243
244define arm_aapcs_vfpcc i32 @vmaxv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) {
245; CHECK-LABEL: vmaxv_u_v8i16_i32:
246; CHECK:       @ %bb.0:
247; CHECK-NEXT:    movs r1, #0
248; CHECK-NEXT:    vmaxv.u16 r1, q0
249; CHECK-NEXT:    uxth r1, r1
250; CHECK-NEXT:    cmp r1, r0
251; CHECK-NEXT:    csel r0, r1, r0, hi
252; CHECK-NEXT:    bx lr
253  %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
254  %rs = zext i16 %r to i32
255  %c = icmp ugt i32 %rs, %s2
256  %s = select i1 %c, i32 %rs, i32 %s2
257  ret i32 %s
258}
259
260define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) {
261; CHECK-LABEL: vmaxv_u_v4i32_i32:
262; CHECK:       @ %bb.0:
263; CHECK-NEXT:    vmaxv.u32 r0, q0
264; CHECK-NEXT:    bx lr
265  %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1)
266  %c = icmp ugt i32 %r, %s2
267  %s = select i1 %c, i32 %r, i32 %s2
268  ret i32 %s
269}
270
271define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) {
272; CHECK-LABEL: vminv_s_v16i8_i8:
273; CHECK:       @ %bb.0:
274; CHECK-NEXT:    vminv.s8 r0, q0
275; CHECK-NEXT:    bx lr
276  %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
277  %c = icmp slt i8 %r, %s2
278  %s = select i1 %c, i8 %r, i8 %s2
279  ret i8 %s
280}
281
282define arm_aapcs_vfpcc i32 @vminv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) {
283; CHECK-LABEL: vminv_s_v16i8_i32:
284; CHECK:       @ %bb.0:
285; CHECK-NEXT:    movs r1, #127
286; CHECK-NEXT:    vminv.s8 r1, q0
287; CHECK-NEXT:    sxtb r1, r1
288; CHECK-NEXT:    cmp r1, r0
289; CHECK-NEXT:    csel r0, r1, r0, lt
290; CHECK-NEXT:    bx lr
291  %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
292  %rs = sext i8 %r to i32
293  %c = icmp slt i32 %rs, %s2
294  %s = select i1 %c, i32 %rs, i32 %s2
295  ret i32 %s
296}
297
298define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) {
299; CHECK-LABEL: vminv_s_v8i16_i16:
300; CHECK:       @ %bb.0:
301; CHECK-NEXT:    vminv.s16 r0, q0
302; CHECK-NEXT:    bx lr
303  %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
304  %c = icmp slt i16 %r, %s2
305  %s = select i1 %c, i16 %r, i16 %s2
306  ret i16 %s
307}
308
309define arm_aapcs_vfpcc i32 @vminv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) {
310; CHECK-LABEL: vminv_s_v8i16_i32:
311; CHECK:       @ %bb.0:
312; CHECK-NEXT:    movw r1, #32767
313; CHECK-NEXT:    vminv.s16 r1, q0
314; CHECK-NEXT:    sxth r1, r1
315; CHECK-NEXT:    cmp r1, r0
316; CHECK-NEXT:    csel r0, r1, r0, lt
317; CHECK-NEXT:    bx lr
318  %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
319  %rs = sext i16 %r to i32
320  %c = icmp slt i32 %rs, %s2
321  %s = select i1 %c, i32 %rs, i32 %s2
322  ret i32 %s
323}
324
325define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) {
326; CHECK-LABEL: vminv_s_v4i32_i32:
327; CHECK:       @ %bb.0:
328; CHECK-NEXT:    vminv.s32 r0, q0
329; CHECK-NEXT:    bx lr
330  %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1)
331  %c = icmp slt i32 %r, %s2
332  %s = select i1 %c, i32 %r, i32 %s2
333  ret i32 %s
334}
335
336define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) {
337; CHECK-LABEL: vminv_u_v16i8_i8:
338; CHECK:       @ %bb.0:
339; CHECK-NEXT:    vminv.u8 r0, q0
340; CHECK-NEXT:    bx lr
341  %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
342  %c = icmp ult i8 %r, %s2
343  %s = select i1 %c, i8 %r, i8 %s2
344  ret i8 %s
345}
346
347define arm_aapcs_vfpcc i32 @vminv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) {
348; CHECK-LABEL: vminv_u_v16i8_i32:
349; CHECK:       @ %bb.0:
350; CHECK-NEXT:    movs r1, #255
351; CHECK-NEXT:    vminv.u8 r1, q0
352; CHECK-NEXT:    uxtb r1, r1
353; CHECK-NEXT:    cmp r1, r0
354; CHECK-NEXT:    csel r0, r1, r0, lo
355; CHECK-NEXT:    bx lr
356  %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
357  %rs = zext i8 %r to i32
358  %c = icmp ult i32 %rs, %s2
359  %s = select i1 %c, i32 %rs, i32 %s2
360  ret i32 %s
361}
362
363define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) {
364; CHECK-LABEL: vminv_u_v8i16_i16:
365; CHECK:       @ %bb.0:
366; CHECK-NEXT:    vminv.u16 r0, q0
367; CHECK-NEXT:    bx lr
368  %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
369  %c = icmp ult i16 %r, %s2
370  %s = select i1 %c, i16 %r, i16 %s2
371  ret i16 %s
372}
373
374define arm_aapcs_vfpcc i32 @vminv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) {
375; CHECK-LABEL: vminv_u_v8i16_i32:
376; CHECK:       @ %bb.0:
377; CHECK-NEXT:    movw r1, #65535
378; CHECK-NEXT:    vminv.u16 r1, q0
379; CHECK-NEXT:    uxth r1, r1
380; CHECK-NEXT:    cmp r1, r0
381; CHECK-NEXT:    csel r0, r1, r0, lo
382; CHECK-NEXT:    bx lr
383  %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
384  %rs = zext i16 %r to i32
385  %c = icmp ult i32 %rs, %s2
386  %s = select i1 %c, i32 %rs, i32 %s2
387  ret i32 %s
388}
389
390define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) {
391; CHECK-LABEL: vminv_u_v4i32_i32:
392; CHECK:       @ %bb.0:
393; CHECK-NEXT:    vminv.u32 r0, q0
394; CHECK-NEXT:    bx lr
395  %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1)
396  %c = icmp ult i32 %r, %s2
397  %s = select i1 %c, i32 %r, i32 %s2
398  ret i32 %s
399}
400