1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -instcombine -S -o - %s | FileCheck %s
3
4target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5
6declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
7declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>)
8declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>)
9
10declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
11declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
12declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
13
14; Round-trip conversions from predicate vector to i32 back to the same
15; size of vector should be eliminated.
16
17define <4 x i1> @v2i2v_4(<4 x i1> %vin) {
18; CHECK-LABEL: @v2i2v_4(
19; CHECK-NEXT:  entry:
20; CHECK-NEXT:    ret <4 x i1> [[VIN:%.*]]
21;
22entry:
23  %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
24  %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %int)
25  ret <4 x i1> %vout
26}
27
28define <8 x i1> @v2i2v_8(<8 x i1> %vin) {
29; CHECK-LABEL: @v2i2v_8(
30; CHECK-NEXT:  entry:
31; CHECK-NEXT:    ret <8 x i1> [[VIN:%.*]]
32;
33entry:
34  %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin)
35  %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %int)
36  ret <8 x i1> %vout
37}
38
39define <16 x i1> @v2i2v_16(<16 x i1> %vin) {
40; CHECK-LABEL: @v2i2v_16(
41; CHECK-NEXT:  entry:
42; CHECK-NEXT:    ret <16 x i1> [[VIN:%.*]]
43;
44entry:
45  %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
46  %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %int)
47  ret <16 x i1> %vout
48}
49
50; Conversions from a predicate vector to i32 and then to a _different_
51; size of predicate vector should be left alone.
52
53define <16 x i1> @v2i2v_4_16(<4 x i1> %vin) {
54; CHECK-LABEL: @v2i2v_4_16(
55; CHECK-NEXT:  entry:
56; CHECK-NEXT:    [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0
57; CHECK-NEXT:    [[VOUT:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[INT]])
58; CHECK-NEXT:    ret <16 x i1> [[VOUT]]
59;
60entry:
61  %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
62  %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %int)
63  ret <16 x i1> %vout
64}
65
66define <4 x i1> @v2i2v_8_4(<8 x i1> %vin) {
67; CHECK-LABEL: @v2i2v_8_4(
68; CHECK-NEXT:  entry:
69; CHECK-NEXT:    [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[VIN:%.*]]), !range !0
70; CHECK-NEXT:    [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[INT]])
71; CHECK-NEXT:    ret <4 x i1> [[VOUT]]
72;
73entry:
74  %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin)
75  %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %int)
76  ret <4 x i1> %vout
77}
78
79define <8 x i1> @v2i2v_16_8(<16 x i1> %vin) {
80; CHECK-LABEL: @v2i2v_16_8(
81; CHECK-NEXT:  entry:
82; CHECK-NEXT:    [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[VIN:%.*]]), !range !0
83; CHECK-NEXT:    [[VOUT:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[INT]])
84; CHECK-NEXT:    ret <8 x i1> [[VOUT]]
85;
86entry:
87  %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
88  %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %int)
89  ret <8 x i1> %vout
90}
91
92; Round-trip conversions from i32 to predicate vector back to i32
93; should be eliminated.
94
95define i32 @i2v2i_4(i32 %iin) {
96; CHECK-LABEL: @i2v2i_4(
97; CHECK-NEXT:  entry:
98; CHECK-NEXT:    ret i32 [[IIN:%.*]]
99;
100entry:
101  %vec = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %iin)
102  %iout = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vec)
103  ret i32 %iout
104}
105
106define i32 @i2v2i_8(i32 %iin) {
107; CHECK-LABEL: @i2v2i_8(
108; CHECK-NEXT:  entry:
109; CHECK-NEXT:    ret i32 [[IIN:%.*]]
110;
111entry:
112  %vec = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %iin)
113  %iout = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vec)
114  ret i32 %iout
115}
116
117define i32 @i2v2i_16(i32 %iin) {
118; CHECK-LABEL: @i2v2i_16(
119; CHECK-NEXT:  entry:
120; CHECK-NEXT:    ret i32 [[IIN:%.*]]
121;
122entry:
123  %vec = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %iin)
124  %iout = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vec)
125  ret i32 %iout
126}
127
128; v2i leaves the top 16 bits clear. So a trunc/zext pair applied to
129; its output, going via i16, can be completely eliminated - but not
130; one going via i8. Similarly with other methods of clearing the top
131; bits, like bitwise and.
132
133define i32 @v2i_truncext_i16(<4 x i1> %vin) {
134; CHECK-LABEL: @v2i_truncext_i16(
135; CHECK-NEXT:  entry:
136; CHECK-NEXT:    [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0
137; CHECK-NEXT:    ret i32 [[WIDE1]]
138;
139entry:
140  %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
141  %narrow = trunc i32 %wide1 to i16
142  %wide2 = zext i16 %narrow to i32
143  ret i32 %wide2
144}
145
146define i32 @v2i_truncext_i8(<4 x i1> %vin) {
147; CHECK-LABEL: @v2i_truncext_i8(
148; CHECK-NEXT:  entry:
149; CHECK-NEXT:    [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0
150; CHECK-NEXT:    [[WIDE2:%.*]] = and i32 [[WIDE1]], 255
151; CHECK-NEXT:    ret i32 [[WIDE2]]
152;
153entry:
154  %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
155  %narrow = trunc i32 %wide1 to i8
156  %wide2 = zext i8 %narrow to i32
157  ret i32 %wide2
158}
159
160define i32 @v2i_and_16(<4 x i1> %vin) {
161; CHECK-LABEL: @v2i_and_16(
162; CHECK-NEXT:  entry:
163; CHECK-NEXT:    [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0
164; CHECK-NEXT:    ret i32 [[WIDE1]]
165;
166entry:
167  %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
168  %wide2 = and i32 %wide1, 65535
169  ret i32 %wide2
170}
171
172define i32 @v2i_and_15(<4 x i1> %vin) {
173; CHECK-LABEL: @v2i_and_15(
174; CHECK-NEXT:  entry:
175; CHECK-NEXT:    [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0
176; CHECK-NEXT:    [[WIDE2:%.*]] = and i32 [[WIDE1]], 32767
177; CHECK-NEXT:    ret i32 [[WIDE2]]
178;
179entry:
180  %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
181  %wide2 = and i32 %wide1, 32767
182  ret i32 %wide2
183}
184
185; i2v doesn't use the top bits of its input. So the same operations
186; on a value that's about to be passed to i2v can be eliminated.
187
188define <4 x i1> @i2v_truncext_i16(i32 %wide1) {
189; CHECK-LABEL: @i2v_truncext_i16(
190; CHECK-NEXT:  entry:
191; CHECK-NEXT:    [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE1:%.*]])
192; CHECK-NEXT:    ret <4 x i1> [[VOUT]]
193;
194entry:
195  %narrow = trunc i32 %wide1 to i16
196  %wide2 = zext i16 %narrow to i32
197  %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2)
198  ret <4 x i1> %vout
199}
200
201define <4 x i1> @i2v_truncext_i8(i32 %wide1) {
202; CHECK-LABEL: @i2v_truncext_i8(
203; CHECK-NEXT:  entry:
204; CHECK-NEXT:    [[WIDE2:%.*]] = and i32 [[WIDE1:%.*]], 255
205; CHECK-NEXT:    [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE2]])
206; CHECK-NEXT:    ret <4 x i1> [[VOUT]]
207;
208entry:
209  %narrow = trunc i32 %wide1 to i8
210  %wide2 = zext i8 %narrow to i32
211  %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2)
212  ret <4 x i1> %vout
213}
214
215define <4 x i1> @i2v_and_16(i32 %wide1) {
216; CHECK-LABEL: @i2v_and_16(
217; CHECK-NEXT:  entry:
218; CHECK-NEXT:    [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE1:%.*]])
219; CHECK-NEXT:    ret <4 x i1> [[VOUT]]
220;
221entry:
222  %wide2 = and i32 %wide1, 65535
223  %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2)
224  ret <4 x i1> %vout
225}
226
227define <4 x i1> @i2v_and_15(i32 %wide1) {
228; CHECK-LABEL: @i2v_and_15(
229; CHECK-NEXT:  entry:
230; CHECK-NEXT:    [[WIDE2:%.*]] = and i32 [[WIDE1:%.*]], 32767
231; CHECK-NEXT:    [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE2]])
232; CHECK-NEXT:    ret <4 x i1> [[VOUT]]
233;
234entry:
235  %wide2 = and i32 %wide1, 32767
236  %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2)
237  ret <4 x i1> %vout
238}
239
240; If a predicate vector is round-tripped to an integer and back, and
241; complemented while it's in integer form, we should collapse that to
242; a complement of the vector itself. (Rationale: this is likely to
243; allow it to be code-generated as MVE VPNOT.)
244
245define <4 x i1> @vpnot_4(<4 x i1> %vin) {
246; CHECK-LABEL: @vpnot_4(
247; CHECK-NEXT:  entry:
248; CHECK-NEXT:    [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true>
249; CHECK-NEXT:    ret <4 x i1> [[VOUT]]
250;
251entry:
252  %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
253  %flipped = xor i32 %int, 65535
254  %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %flipped)
255  ret <4 x i1> %vout
256}
257
258define <8 x i1> @vpnot_8(<8 x i1> %vin) {
259; CHECK-LABEL: @vpnot_8(
260; CHECK-NEXT:  entry:
261; CHECK-NEXT:    [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
262; CHECK-NEXT:    ret <8 x i1> [[VOUT]]
263;
264entry:
265  %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin)
266  %flipped = xor i32 %int, 65535
267  %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %flipped)
268  ret <8 x i1> %vout
269}
270
271define <16 x i1> @vpnot_16(<16 x i1> %vin) {
272; CHECK-LABEL: @vpnot_16(
273; CHECK-NEXT:  entry:
274; CHECK-NEXT:    [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
275; CHECK-NEXT:    ret <16 x i1> [[VOUT]]
276;
277entry:
278  %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
279  %flipped = xor i32 %int, 65535
280  %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %flipped)
281  ret <16 x i1> %vout
282}
283
284; And this still works even if the i32 is narrowed to i16 and back on
285; opposite sides of the xor.
286
287define <4 x i1> @vpnot_narrow_4(<4 x i1> %vin) {
288; CHECK-LABEL: @vpnot_narrow_4(
289; CHECK-NEXT:  entry:
290; CHECK-NEXT:    [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true>
291; CHECK-NEXT:    ret <4 x i1> [[VOUT]]
292;
293entry:
294  %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin)
295  %narrow = trunc i32 %int to i16
296  %flipped_narrow = xor i16 %narrow, -1
297  %flipped = zext i16 %flipped_narrow to i32
298  %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %flipped)
299  ret <4 x i1> %vout
300}
301
302define <8 x i1> @vpnot_narrow_8(<8 x i1> %vin) {
303; CHECK-LABEL: @vpnot_narrow_8(
304; CHECK-NEXT:  entry:
305; CHECK-NEXT:    [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
306; CHECK-NEXT:    ret <8 x i1> [[VOUT]]
307;
308entry:
309  %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin)
310  %narrow = trunc i32 %int to i16
311  %flipped_narrow = xor i16 %narrow, -1
312  %flipped = zext i16 %flipped_narrow to i32
313  %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %flipped)
314  ret <8 x i1> %vout
315}
316
317define <16 x i1> @vpnot_narrow_16(<16 x i1> %vin) {
318; CHECK-LABEL: @vpnot_narrow_16(
319; CHECK-NEXT:  entry:
320; CHECK-NEXT:    [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
321; CHECK-NEXT:    ret <16 x i1> [[VOUT]]
322;
323entry:
324  %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin)
325  %narrow = trunc i32 %int to i16
326  %flipped_narrow = xor i16 %narrow, -1
327  %flipped = zext i16 %flipped_narrow to i32
328  %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %flipped)
329  ret <16 x i1> %vout
330}
331