1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -disable-expand-reductions -stop-after=irtranslator %s -o - | FileCheck %s
3
4declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
5declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
6
7define float @fadd_seq(float %start, <4 x float> %vec) {
8  ; CHECK-LABEL: name: fadd_seq
9  ; CHECK: bb.1 (%ir-block.0):
10  ; CHECK:   liveins: $q1, $s0
11  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
12  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
13  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
14  ; CHECK:   [[VECREDUCE_SEQ_FADD:%[0-9]+]]:_(s32) = G_VECREDUCE_SEQ_FADD [[COPY]](s32), [[BITCAST]](<4 x s32>)
15  ; CHECK:   $s0 = COPY [[VECREDUCE_SEQ_FADD]](s32)
16  ; CHECK:   RET_ReallyLR implicit $s0
17  %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec)
18  ret float %res
19}
20
21define float @fadd_fast(float %start, <4 x float> %vec) {
22  ; CHECK-LABEL: name: fadd_fast
23  ; CHECK: bb.1 (%ir-block.0):
24  ; CHECK:   liveins: $q1, $s0
25  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
26  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
27  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
28  ; CHECK:   [[VECREDUCE_FADD:%[0-9]+]]:_(s32) = reassoc G_VECREDUCE_FADD [[BITCAST]](<4 x s32>)
29  ; CHECK:   [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY]], [[VECREDUCE_FADD]]
30  ; CHECK:   $s0 = COPY [[FADD]](s32)
31  ; CHECK:   RET_ReallyLR implicit $s0
32  %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec)
33  ret float %res
34}
35
36define double @fmul_seq(double %start, <4 x double> %vec) {
37  ; CHECK-LABEL: name: fmul_seq
38  ; CHECK: bb.1 (%ir-block.0):
39  ; CHECK:   liveins: $d0, $q1, $q2
40  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
41  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
42  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
43  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
44  ; CHECK:   [[VECREDUCE_SEQ_FMUL:%[0-9]+]]:_(s64) = G_VECREDUCE_SEQ_FMUL [[COPY]](s64), [[CONCAT_VECTORS]](<4 x s64>)
45  ; CHECK:   $d0 = COPY [[VECREDUCE_SEQ_FMUL]](s64)
46  ; CHECK:   RET_ReallyLR implicit $d0
47  %res = call double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec)
48  ret double %res
49}
50
51define double @fmul_fast(double %start, <4 x double> %vec) {
52  ; CHECK-LABEL: name: fmul_fast
53  ; CHECK: bb.1 (%ir-block.0):
54  ; CHECK:   liveins: $d0, $q1, $q2
55  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
56  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
57  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
58  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
59  ; CHECK:   [[VECREDUCE_FMUL:%[0-9]+]]:_(s64) = reassoc G_VECREDUCE_FMUL [[CONCAT_VECTORS]](<4 x s64>)
60  ; CHECK:   [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[COPY]], [[VECREDUCE_FMUL]]
61  ; CHECK:   $d0 = COPY [[FMUL]](s64)
62  ; CHECK:   RET_ReallyLR implicit $d0
63  %res = call reassoc double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec)
64  ret double %res
65}
66
67declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
68declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
69
70define float @fmax(<4 x float> %vec) {
71  ; CHECK-LABEL: name: fmax
72  ; CHECK: bb.1 (%ir-block.0):
73  ; CHECK:   liveins: $q0
74  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
75  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
76  ; CHECK:   [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[BITCAST]](<4 x s32>)
77  ; CHECK:   $s0 = COPY [[VECREDUCE_FMAX]](s32)
78  ; CHECK:   RET_ReallyLR implicit $s0
79  %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %vec)
80  ret float %res
81}
82
83define float @fmin(<4 x float> %vec) {
84  ; CHECK-LABEL: name: fmin
85  ; CHECK: bb.1 (%ir-block.0):
86  ; CHECK:   liveins: $q0
87  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
88  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
89  ; CHECK:   [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
90  ; CHECK:   $s0 = COPY [[VECREDUCE_FMIN]](s32)
91  ; CHECK:   RET_ReallyLR implicit $s0
92  %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec)
93  ret float %res
94}
95
96define float @fmin_nnan(<4 x float> %vec) {
97  ; CHECK-LABEL: name: fmin_nnan
98  ; CHECK: bb.1 (%ir-block.0):
99  ; CHECK:   liveins: $q0
100  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
101  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
102  ; CHECK:   [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
103  ; CHECK:   $s0 = COPY [[VECREDUCE_FMIN]](s32)
104  ; CHECK:   RET_ReallyLR implicit $s0
105  %res = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec)
106  ret float %res
107}
108
109declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
110
111define i32 @add(<4 x i32> %vec) {
112  ; CHECK-LABEL: name: add
113  ; CHECK: bb.1 (%ir-block.0):
114  ; CHECK:   liveins: $q0
115  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
116  ; CHECK:   [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[COPY]](<4 x s32>)
117  ; CHECK:   $w0 = COPY [[VECREDUCE_ADD]](s32)
118  ; CHECK:   RET_ReallyLR implicit $w0
119  %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vec)
120  ret i32 %res
121}
122
123declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
124
125define i32 @mul(<4 x i32> %vec) {
126  ; CHECK-LABEL: name: mul
127  ; CHECK: bb.1 (%ir-block.0):
128  ; CHECK:   liveins: $q0
129  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
130  ; CHECK:   [[VECREDUCE_MUL:%[0-9]+]]:_(s32) = G_VECREDUCE_MUL [[COPY]](<4 x s32>)
131  ; CHECK:   $w0 = COPY [[VECREDUCE_MUL]](s32)
132  ; CHECK:   RET_ReallyLR implicit $w0
133  %res = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vec)
134  ret i32 %res
135}
136
137declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
138
139define i32 @and(<4 x i32> %vec) {
140  ; CHECK-LABEL: name: and
141  ; CHECK: bb.1 (%ir-block.0):
142  ; CHECK:   liveins: $q0
143  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
144  ; CHECK:   [[VECREDUCE_AND:%[0-9]+]]:_(s32) = G_VECREDUCE_AND [[COPY]](<4 x s32>)
145  ; CHECK:   $w0 = COPY [[VECREDUCE_AND]](s32)
146  ; CHECK:   RET_ReallyLR implicit $w0
147  %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vec)
148  ret i32 %res
149}
150
151declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
152
153define i32 @or(<4 x i32> %vec) {
154  ; CHECK-LABEL: name: or
155  ; CHECK: bb.1 (%ir-block.0):
156  ; CHECK:   liveins: $q0
157  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
158  ; CHECK:   [[VECREDUCE_OR:%[0-9]+]]:_(s32) = G_VECREDUCE_OR [[COPY]](<4 x s32>)
159  ; CHECK:   $w0 = COPY [[VECREDUCE_OR]](s32)
160  ; CHECK:   RET_ReallyLR implicit $w0
161  %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vec)
162  ret i32 %res
163}
164
165declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
166
167define i32 @xor(<4 x i32> %vec) {
168  ; CHECK-LABEL: name: xor
169  ; CHECK: bb.1 (%ir-block.0):
170  ; CHECK:   liveins: $q0
171  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
172  ; CHECK:   [[VECREDUCE_XOR:%[0-9]+]]:_(s32) = G_VECREDUCE_XOR [[COPY]](<4 x s32>)
173  ; CHECK:   $w0 = COPY [[VECREDUCE_XOR]](s32)
174  ; CHECK:   RET_ReallyLR implicit $w0
175  %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vec)
176  ret i32 %res
177}
178
179declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
180declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
181declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
182declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
183
184define i32 @smax(<4 x i32> %vec) {
185  ; CHECK-LABEL: name: smax
186  ; CHECK: bb.1 (%ir-block.0):
187  ; CHECK:   liveins: $q0
188  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
189  ; CHECK:   [[VECREDUCE_SMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_SMAX [[COPY]](<4 x s32>)
190  ; CHECK:   $w0 = COPY [[VECREDUCE_SMAX]](s32)
191  ; CHECK:   RET_ReallyLR implicit $w0
192  %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec)
193  ret i32 %res
194}
195
196define i32 @smin(<4 x i32> %vec) {
197  ; CHECK-LABEL: name: smin
198  ; CHECK: bb.1 (%ir-block.0):
199  ; CHECK:   liveins: $q0
200  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
201  ; CHECK:   [[VECREDUCE_SMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_SMIN [[COPY]](<4 x s32>)
202  ; CHECK:   $w0 = COPY [[VECREDUCE_SMIN]](s32)
203  ; CHECK:   RET_ReallyLR implicit $w0
204  %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec)
205  ret i32 %res
206}
207
208define i32 @umax(<4 x i32> %vec) {
209  ; CHECK-LABEL: name: umax
210  ; CHECK: bb.1 (%ir-block.0):
211  ; CHECK:   liveins: $q0
212  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
213  ; CHECK:   [[VECREDUCE_UMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_UMAX [[COPY]](<4 x s32>)
214  ; CHECK:   $w0 = COPY [[VECREDUCE_UMAX]](s32)
215  ; CHECK:   RET_ReallyLR implicit $w0
216  %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec)
217  ret i32 %res
218}
219
220define i32 @umin(<4 x i32> %vec) {
221  ; CHECK-LABEL: name: umin
222  ; CHECK: bb.1 (%ir-block.0):
223  ; CHECK:   liveins: $q0
224  ; CHECK:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
225  ; CHECK:   [[VECREDUCE_UMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_UMIN [[COPY]](<4 x s32>)
226  ; CHECK:   $w0 = COPY [[VECREDUCE_UMIN]](s32)
227  ; CHECK:   RET_ReallyLR implicit $w0
228  %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec)
229  ret i32 %res
230}
231