1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
3
4declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
5declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a)
6declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
7declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
8declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
9declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a)
10
11declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %a)
12declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
13declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
14declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
15declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
16declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
17
18define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
19; CHECK-LABEL: reduce_and_v1:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
22; CHECK-NEXT:    smov w8, v0.b[0]
23; CHECK-NEXT:    cmp w8, #0 // =0
24; CHECK-NEXT:    csel w0, w0, w1, lt
25; CHECK-NEXT:    ret
26  %x = icmp slt <1 x i8> %a0, zeroinitializer
27  %y = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %x)
28  %z = select i1 %y, i32 %a1, i32 %a2
29  ret i32 %z
30}
31
32define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
33; CHECK-LABEL: reduce_and_v2:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    shl v0.2s, v0.2s, #24
36; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
37; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
38; CHECK-NEXT:    uminp v0.2s, v0.2s, v0.2s
39; CHECK-NEXT:    fmov w8, s0
40; CHECK-NEXT:    tst w8, #0x1
41; CHECK-NEXT:    csel w0, w0, w1, ne
42; CHECK-NEXT:    ret
43  %x = icmp slt <2 x i8> %a0, zeroinitializer
44  %y = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %x)
45  %z = select i1 %y, i32 %a1, i32 %a2
46  ret i32 %z
47}
48
49define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
50; CHECK-LABEL: reduce_and_v4:
51; CHECK:       // %bb.0:
52; CHECK-NEXT:    shl v0.4h, v0.4h, #8
53; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
54; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
55; CHECK-NEXT:    uminv h0, v0.4h
56; CHECK-NEXT:    fmov w8, s0
57; CHECK-NEXT:    tst w8, #0x1
58; CHECK-NEXT:    csel w0, w0, w1, ne
59; CHECK-NEXT:    ret
60  %x = icmp slt <4 x i8> %a0, zeroinitializer
61  %y = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %x)
62  %z = select i1 %y, i32 %a1, i32 %a2
63  ret i32 %z
64}
65
66define i32 @reduce_and_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
67; CHECK-LABEL: reduce_and_v8:
68; CHECK:       // %bb.0:
69; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
70; CHECK-NEXT:    uminv b0, v0.8b
71; CHECK-NEXT:    fmov w8, s0
72; CHECK-NEXT:    tst w8, #0x1
73; CHECK-NEXT:    csel w0, w0, w1, ne
74; CHECK-NEXT:    ret
75  %x = icmp slt <8 x i8> %a0, zeroinitializer
76  %y = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %x)
77  %z = select i1 %y, i32 %a1, i32 %a2
78  ret i32 %z
79}
80
81define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
82; CHECK-LABEL: reduce_and_v16:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
85; CHECK-NEXT:    uminv b0, v0.16b
86; CHECK-NEXT:    fmov w8, s0
87; CHECK-NEXT:    tst w8, #0x1
88; CHECK-NEXT:    csel w0, w0, w1, ne
89; CHECK-NEXT:    ret
90  %x = icmp slt <16 x i8> %a0, zeroinitializer
91  %y = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %x)
92  %z = select i1 %y, i32 %a1, i32 %a2
93  ret i32 %z
94}
95
96define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
97; CHECK-LABEL: reduce_and_v32:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    cmlt v1.16b, v1.16b, #0
100; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
101; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
102; CHECK-NEXT:    uminv b0, v0.16b
103; CHECK-NEXT:    fmov w8, s0
104; CHECK-NEXT:    tst w8, #0x1
105; CHECK-NEXT:    csel w0, w0, w1, ne
106; CHECK-NEXT:    ret
107  %x = icmp slt <32 x i8> %a0, zeroinitializer
108  %y = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %x)
109  %z = select i1 %y, i32 %a1, i32 %a2
110  ret i32 %z
111}
112
113define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
114; CHECK-LABEL: reduce_or_v1:
115; CHECK:       // %bb.0:
116; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
117; CHECK-NEXT:    smov w8, v0.b[0]
118; CHECK-NEXT:    cmp w8, #0 // =0
119; CHECK-NEXT:    csel w0, w0, w1, lt
120; CHECK-NEXT:    ret
121  %x = icmp slt <1 x i8> %a0, zeroinitializer
122  %y = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %x)
123  %z = select i1 %y, i32 %a1, i32 %a2
124  ret i32 %z
125}
126
127define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
128; CHECK-LABEL: reduce_or_v2:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    shl v0.2s, v0.2s, #24
131; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
132; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
133; CHECK-NEXT:    umaxp v0.2s, v0.2s, v0.2s
134; CHECK-NEXT:    fmov w8, s0
135; CHECK-NEXT:    tst w8, #0x1
136; CHECK-NEXT:    csel w0, w0, w1, ne
137; CHECK-NEXT:    ret
138  %x = icmp slt <2 x i8> %a0, zeroinitializer
139  %y = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %x)
140  %z = select i1 %y, i32 %a1, i32 %a2
141  ret i32 %z
142}
143
144define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
145; CHECK-LABEL: reduce_or_v4:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    shl v0.4h, v0.4h, #8
148; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
149; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
150; CHECK-NEXT:    umaxv h0, v0.4h
151; CHECK-NEXT:    fmov w8, s0
152; CHECK-NEXT:    tst w8, #0x1
153; CHECK-NEXT:    csel w0, w0, w1, ne
154; CHECK-NEXT:    ret
155  %x = icmp slt <4 x i8> %a0, zeroinitializer
156  %y = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %x)
157  %z = select i1 %y, i32 %a1, i32 %a2
158  ret i32 %z
159}
160
161define i32 @reduce_or_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
162; CHECK-LABEL: reduce_or_v8:
163; CHECK:       // %bb.0:
164; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
165; CHECK-NEXT:    umaxv b0, v0.8b
166; CHECK-NEXT:    fmov w8, s0
167; CHECK-NEXT:    tst w8, #0x1
168; CHECK-NEXT:    csel w0, w0, w1, ne
169; CHECK-NEXT:    ret
170  %x = icmp slt <8 x i8> %a0, zeroinitializer
171  %y = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %x)
172  %z = select i1 %y, i32 %a1, i32 %a2
173  ret i32 %z
174}
175
176define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
177; CHECK-LABEL: reduce_or_v16:
178; CHECK:       // %bb.0:
179; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
180; CHECK-NEXT:    umaxv b0, v0.16b
181; CHECK-NEXT:    fmov w8, s0
182; CHECK-NEXT:    tst w8, #0x1
183; CHECK-NEXT:    csel w0, w0, w1, ne
184; CHECK-NEXT:    ret
185  %x = icmp slt <16 x i8> %a0, zeroinitializer
186  %y = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %x)
187  %z = select i1 %y, i32 %a1, i32 %a2
188  ret i32 %z
189}
190
191define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
192; CHECK-LABEL: reduce_or_v32:
193; CHECK:       // %bb.0:
194; CHECK-NEXT:    cmlt v1.16b, v1.16b, #0
195; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
196; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
197; CHECK-NEXT:    umaxv b0, v0.16b
198; CHECK-NEXT:    fmov w8, s0
199; CHECK-NEXT:    tst w8, #0x1
200; CHECK-NEXT:    csel w0, w0, w1, ne
201; CHECK-NEXT:    ret
202  %x = icmp slt <32 x i8> %a0, zeroinitializer
203  %y = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %x)
204  %z = select i1 %y, i32 %a1, i32 %a2
205  ret i32 %z
206}
207