1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
3; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
4
5; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
6; WARN-NOT: warning
7
8; ANDV
9
10define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) {
11; CHECK-LABEL: andv_nxv8i8:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    ptrue p0.h
14; CHECK-NEXT:    andv h0, p0, z0.h
15; CHECK-NEXT:    fmov w0, s0
16; CHECK-NEXT:    ret
17  %res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a)
18  ret i8 %res
19}
20
21define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) {
22; CHECK-LABEL: andv_nxv8i32:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    and z0.d, z0.d, z1.d
25; CHECK-NEXT:    ptrue p0.s
26; CHECK-NEXT:    andv s0, p0, z0.s
27; CHECK-NEXT:    fmov w0, s0
28; CHECK-NEXT:    ret
29  %res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a)
30  ret i32 %res
31}
32
33; ORV
34
35define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
36; CHECK-LABEL: orv_nxv2i32:
37; CHECK:       // %bb.0:
38; CHECK-NEXT:    ptrue p0.d
39; CHECK-NEXT:    orv d0, p0, z0.d
40; CHECK-NEXT:    fmov x0, d0
41; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
42; CHECK-NEXT:    ret
43  %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
44  ret i32 %res
45}
46
47define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) {
48; CHECK-LABEL: orv_nxv8i64:
49; CHECK:       // %bb.0:
50; CHECK-NEXT:    orr z1.d, z1.d, z3.d
51; CHECK-NEXT:    orr z0.d, z0.d, z2.d
52; CHECK-NEXT:    orr z0.d, z0.d, z1.d
53; CHECK-NEXT:    ptrue p0.d
54; CHECK-NEXT:    orv d0, p0, z0.d
55; CHECK-NEXT:    fmov x0, d0
56; CHECK-NEXT:    ret
57  %res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a)
58  ret i64 %res
59}
60
61; XORV
62
63define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
64; CHECK-LABEL: xorv_nxv2i16:
65; CHECK:       // %bb.0:
66; CHECK-NEXT:    ptrue p0.d
67; CHECK-NEXT:    eorv d0, p0, z0.d
68; CHECK-NEXT:    fmov x0, d0
69; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
70; CHECK-NEXT:    ret
71  %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
72  ret i16 %res
73}
74
75define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) {
76; CHECK-LABEL: xorv_nxv8i32:
77; CHECK:       // %bb.0:
78; CHECK-NEXT:    eor z0.d, z0.d, z1.d
79; CHECK-NEXT:    ptrue p0.s
80; CHECK-NEXT:    eorv s0, p0, z0.s
81; CHECK-NEXT:    fmov w0, s0
82; CHECK-NEXT:    ret
83  %res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a)
84  ret i32 %res
85}
86
87; UADDV
88
89define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
90; CHECK-LABEL: uaddv_nxv4i16:
91; CHECK:       // %bb.0:
92; CHECK-NEXT:    ptrue p0.s
93; CHECK-NEXT:    uaddv d0, p0, z0.s
94; CHECK-NEXT:    fmov x0, d0
95; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
96; CHECK-NEXT:    ret
97  %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
98  ret i16 %res
99}
100
101define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
102; CHECK-LABEL: uaddv_nxv16i16:
103; CHECK:       // %bb.0:
104; CHECK-NEXT:    add z0.h, z0.h, z1.h
105; CHECK-NEXT:    ptrue p0.h
106; CHECK-NEXT:    uaddv d0, p0, z0.h
107; CHECK-NEXT:    fmov x0, d0
108; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
109; CHECK-NEXT:    ret
110  %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
111  ret i16 %res
112}
113
114define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
115; CHECK-LABEL: uaddv_nxv16i32:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    add z1.s, z1.s, z3.s
118; CHECK-NEXT:    add z0.s, z0.s, z2.s
119; CHECK-NEXT:    add z0.s, z0.s, z1.s
120; CHECK-NEXT:    ptrue p0.s
121; CHECK-NEXT:    uaddv d0, p0, z0.s
122; CHECK-NEXT:    fmov x0, d0
123; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
124; CHECK-NEXT:    ret
125  %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
126  ret i32 %res
127}
128
129; UMINV
130
131define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
132; CHECK-LABEL: umin_nxv2i32:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    ptrue p0.d
135; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
136; CHECK-NEXT:    uminv d0, p0, z0.d
137; CHECK-NEXT:    fmov x0, d0
138; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
139; CHECK-NEXT:    ret
140  %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
141  ret i32 %res
142}
143
144define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) {
145; CHECK-LABEL: umin_nxv4i64:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    ptrue p0.d
148; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
149; CHECK-NEXT:    uminv d0, p0, z0.d
150; CHECK-NEXT:    fmov x0, d0
151; CHECK-NEXT:    ret
152  %res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a)
153  ret i64 %res
154}
155
156; SMINV
157
158define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) {
159; CHECK-LABEL: smin_nxv4i8:
160; CHECK:       // %bb.0:
161; CHECK-NEXT:    ptrue p0.s
162; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
163; CHECK-NEXT:    sminv s0, p0, z0.s
164; CHECK-NEXT:    fmov w0, s0
165; CHECK-NEXT:    ret
166  %res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a)
167  ret i8 %res
168}
169
170define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) {
171; CHECK-LABEL: smin_nxv8i32:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    ptrue p0.s
174; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
175; CHECK-NEXT:    sminv s0, p0, z0.s
176; CHECK-NEXT:    fmov w0, s0
177; CHECK-NEXT:    ret
178  %res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a)
179  ret i32 %res
180}
181
182; UMAXV
183
184define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) {
185; CHECK-LABEL: smin_nxv16i16:
186; CHECK:       // %bb.0:
187; CHECK-NEXT:    ptrue p0.h
188; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
189; CHECK-NEXT:    umaxv h0, p0, z0.h
190; CHECK-NEXT:    fmov w0, s0
191; CHECK-NEXT:    ret
192  %res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a)
193  ret i16 %res
194}
195
196; SMAXV
197
198define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) {
199; CHECK-LABEL: smin_nxv8i64:
200; CHECK:       // %bb.0:
201; CHECK-NEXT:    ptrue p0.d
202; CHECK-NEXT:    smax z1.d, p0/m, z1.d, z3.d
203; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z2.d
204; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
205; CHECK-NEXT:    smaxv d0, p0, z0.d
206; CHECK-NEXT:    fmov x0, d0
207; CHECK-NEXT:    ret
208  %res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a)
209  ret i64 %res
210}
211
212declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>)
213declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>)
214
215declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
216declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>)
217
218declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
219declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>)
220
221declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
222declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>)
223declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>)
224
225declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
226declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
227
228declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
229declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>)
230
231declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>)
232
233declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>)
234