1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4; ANDV
5
6define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) {
7; CHECK-LABEL: andv_nxv8i8:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    ptrue p0.h
10; CHECK-NEXT:    andv h0, p0, z0.h
11; CHECK-NEXT:    fmov w0, s0
12; CHECK-NEXT:    ret
13  %res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a)
14  ret i8 %res
15}
16
17define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) {
18; CHECK-LABEL: andv_nxv8i32:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    and z0.d, z0.d, z1.d
21; CHECK-NEXT:    ptrue p0.s
22; CHECK-NEXT:    andv s0, p0, z0.s
23; CHECK-NEXT:    fmov w0, s0
24; CHECK-NEXT:    ret
25  %res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a)
26  ret i32 %res
27}
28
29; ORV
30
31define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
32; CHECK-LABEL: orv_nxv2i32:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    ptrue p0.d
35; CHECK-NEXT:    orv d0, p0, z0.d
36; CHECK-NEXT:    fmov x0, d0
37; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
38; CHECK-NEXT:    ret
39  %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
40  ret i32 %res
41}
42
43define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) {
44; CHECK-LABEL: orv_nxv8i64:
45; CHECK:       // %bb.0:
46; CHECK-NEXT:    orr z1.d, z1.d, z3.d
47; CHECK-NEXT:    orr z0.d, z0.d, z2.d
48; CHECK-NEXT:    orr z0.d, z0.d, z1.d
49; CHECK-NEXT:    ptrue p0.d
50; CHECK-NEXT:    orv d0, p0, z0.d
51; CHECK-NEXT:    fmov x0, d0
52; CHECK-NEXT:    ret
53  %res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a)
54  ret i64 %res
55}
56
57; XORV
58
59define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
60; CHECK-LABEL: xorv_nxv2i16:
61; CHECK:       // %bb.0:
62; CHECK-NEXT:    ptrue p0.d
63; CHECK-NEXT:    eorv d0, p0, z0.d
64; CHECK-NEXT:    fmov x0, d0
65; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
66; CHECK-NEXT:    ret
67  %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
68  ret i16 %res
69}
70
71define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) {
72; CHECK-LABEL: xorv_nxv8i32:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    eor z0.d, z0.d, z1.d
75; CHECK-NEXT:    ptrue p0.s
76; CHECK-NEXT:    eorv s0, p0, z0.s
77; CHECK-NEXT:    fmov w0, s0
78; CHECK-NEXT:    ret
79  %res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a)
80  ret i32 %res
81}
82
83; UADDV
84
85define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
86; CHECK-LABEL: uaddv_nxv4i16:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    ptrue p0.s
89; CHECK-NEXT:    uaddv d0, p0, z0.s
90; CHECK-NEXT:    fmov x0, d0
91; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
92; CHECK-NEXT:    ret
93  %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
94  ret i16 %res
95}
96
97define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
98; CHECK-LABEL: uaddv_nxv16i16:
99; CHECK:       // %bb.0:
100; CHECK-NEXT:    add z0.h, z0.h, z1.h
101; CHECK-NEXT:    ptrue p0.h
102; CHECK-NEXT:    uaddv d0, p0, z0.h
103; CHECK-NEXT:    fmov x0, d0
104; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
105; CHECK-NEXT:    ret
106  %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
107  ret i16 %res
108}
109
110define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
111; CHECK-LABEL: uaddv_nxv16i32:
112; CHECK:       // %bb.0:
113; CHECK-NEXT:    add z1.s, z1.s, z3.s
114; CHECK-NEXT:    add z0.s, z0.s, z2.s
115; CHECK-NEXT:    add z0.s, z0.s, z1.s
116; CHECK-NEXT:    ptrue p0.s
117; CHECK-NEXT:    uaddv d0, p0, z0.s
118; CHECK-NEXT:    fmov x0, d0
119; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
120; CHECK-NEXT:    ret
121  %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
122  ret i32 %res
123}
124
125; UMINV
126
127define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
128; CHECK-LABEL: umin_nxv2i32:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    ptrue p0.d
131; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
132; CHECK-NEXT:    uminv d0, p0, z0.d
133; CHECK-NEXT:    fmov x0, d0
134; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
135; CHECK-NEXT:    ret
136  %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
137  ret i32 %res
138}
139
140define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) {
141; CHECK-LABEL: umin_nxv4i64:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    ptrue p0.d
144; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
145; CHECK-NEXT:    uminv d0, p0, z0.d
146; CHECK-NEXT:    fmov x0, d0
147; CHECK-NEXT:    ret
148  %res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a)
149  ret i64 %res
150}
151
152; SMINV
153
154define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) {
155; CHECK-LABEL: smin_nxv4i8:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    ptrue p0.s
158; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
159; CHECK-NEXT:    sminv s0, p0, z0.s
160; CHECK-NEXT:    fmov w0, s0
161; CHECK-NEXT:    ret
162  %res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a)
163  ret i8 %res
164}
165
166define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) {
167; CHECK-LABEL: smin_nxv8i32:
168; CHECK:       // %bb.0:
169; CHECK-NEXT:    ptrue p0.s
170; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
171; CHECK-NEXT:    sminv s0, p0, z0.s
172; CHECK-NEXT:    fmov w0, s0
173; CHECK-NEXT:    ret
174  %res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a)
175  ret i32 %res
176}
177
178; UMAXV
179
180define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) {
181; CHECK-LABEL: smin_nxv16i16:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    ptrue p0.h
184; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
185; CHECK-NEXT:    umaxv h0, p0, z0.h
186; CHECK-NEXT:    fmov w0, s0
187; CHECK-NEXT:    ret
188  %res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a)
189  ret i16 %res
190}
191
192; SMAXV
193
194define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) {
195; CHECK-LABEL: smin_nxv8i64:
196; CHECK:       // %bb.0:
197; CHECK-NEXT:    ptrue p0.d
198; CHECK-NEXT:    smax z1.d, p0/m, z1.d, z3.d
199; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z2.d
200; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
201; CHECK-NEXT:    smaxv d0, p0, z0.d
202; CHECK-NEXT:    fmov x0, d0
203; CHECK-NEXT:    ret
204  %res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a)
205  ret i64 %res
206}
207
208declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>)
209declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>)
210
211declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
212declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>)
213
214declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
215declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>)
216
217declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
218declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>)
219declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>)
220
221declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
222declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
223
224declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
225declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>)
226
227declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>)
228
229declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>)
230