1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instsimplify -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BIGENDIAN
3; RUN: opt < %s -instsimplify -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LITTLEENDIAN
4
5; If any bits of the shift amount are known to make it exceed or equal
6; the number of bits in the type, the shift causes undefined behavior.
7
8define i32 @shl_amount_is_known_bogus(i32 %a, i32 %b) {
9; CHECK-LABEL: @shl_amount_is_known_bogus(
10; CHECK-NEXT:    ret i32 poison
11;
12  %or = or i32 %b, 32
13  %shl = shl i32 %a, %or
14  ret i32 %shl
15}
16
17; Check some weird types and the other shift ops.
18
19define i31 @lshr_amount_is_known_bogus(i31 %a, i31 %b) {
20; CHECK-LABEL: @lshr_amount_is_known_bogus(
21; CHECK-NEXT:    ret i31 poison
22;
23  %or = or i31 %b, 31
24  %shr = lshr i31 %a, %or
25  ret i31 %shr
26}
27
28define i33 @ashr_amount_is_known_bogus(i33 %a, i33 %b) {
29; CHECK-LABEL: @ashr_amount_is_known_bogus(
30; CHECK-NEXT:    ret i33 poison
31;
32  %or = or i33 %b, 33
33  %shr = ashr i33 %a, %or
34  ret i33 %shr
35}
36
37
38; If all valid bits of the shift amount are known 0, there's no shift.
39; It doesn't matter if high bits are set because that would be undefined.
40; Therefore, the only possible valid result of these shifts is %a.
41
42define i16 @ashr_amount_is_zero(i16 %a, i16 %b) {
43; CHECK-LABEL: @ashr_amount_is_zero(
44; CHECK-NEXT:    ret i16 [[A:%.*]]
45;
46  %and = and i16 %b, 65520 ; 0xfff0
47  %shr = ashr i16 %a, %and
48  ret i16 %shr
49}
50
51define i300 @lshr_amount_is_zero(i300 %a, i300 %b) {
52; CHECK-LABEL: @lshr_amount_is_zero(
53; CHECK-NEXT:    ret i300 [[A:%.*]]
54;
55  %and = and i300 %b, 2048
56  %shr = lshr i300 %a, %and
57  ret i300 %shr
58}
59
60define i9 @shl_amount_is_zero(i9 %a, i9 %b) {
61; CHECK-LABEL: @shl_amount_is_zero(
62; CHECK-NEXT:    ret i9 [[A:%.*]]
63;
64  %and = and i9 %b, 496 ; 0x1f0
65  %shl = shl i9 %a, %and
66  ret i9 %shl
67}
68
69
70; Verify that we've calculated the log2 boundary of valid bits correctly for a weird type.
71
72define i9 @shl_amount_is_not_known_zero(i9 %a, i9 %b) {
73; CHECK-LABEL: @shl_amount_is_not_known_zero(
74; CHECK-NEXT:    [[AND:%.*]] = and i9 [[B:%.*]], -8
75; CHECK-NEXT:    [[SHL:%.*]] = shl i9 [[A:%.*]], [[AND]]
76; CHECK-NEXT:    ret i9 [[SHL]]
77;
78  %and = and i9 %b, 504 ; 0x1f8
79  %shl = shl i9 %a, %and
80  ret i9 %shl
81}
82
83
84; For vectors, we need all scalar elements to meet the requirements to optimize.
85
86define <2 x i32> @ashr_vector_bogus(<2 x i32> %a, <2 x i32> %b) {
87; CHECK-LABEL: @ashr_vector_bogus(
88; CHECK-NEXT:    ret <2 x i32> poison
89;
90  %or = or <2 x i32> %b, <i32 32, i32 32>
91  %shr = ashr <2 x i32> %a, %or
92  ret <2 x i32> %shr
93}
94
95; FIXME: This is undef, but computeKnownBits doesn't handle the union.
96define <2 x i32> @shl_vector_bogus(<2 x i32> %a, <2 x i32> %b) {
97; CHECK-LABEL: @shl_vector_bogus(
98; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[B:%.*]], <i32 32, i32 64>
99; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[OR]]
100; CHECK-NEXT:    ret <2 x i32> [[SHL]]
101;
102  %or = or <2 x i32> %b, <i32 32, i32 64>
103  %shl = shl <2 x i32> %a, %or
104  ret <2 x i32> %shl
105}
106
107define <2 x i32> @lshr_vector_zero(<2 x i32> %a, <2 x i32> %b) {
108; CHECK-LABEL: @lshr_vector_zero(
109; CHECK-NEXT:    ret <2 x i32> [[A:%.*]]
110;
111  %and = and <2 x i32> %b, <i32 64, i32 256>
112  %shr = lshr <2 x i32> %a, %and
113  ret <2 x i32> %shr
114}
115
116; Make sure that weird vector types work too.
117define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) {
118; CHECK-LABEL: @shl_vector_zero(
119; CHECK-NEXT:    ret <2 x i15> [[A:%.*]]
120;
121  %and = and <2 x i15> %b, <i15 1024, i15 1024>
122  %shl = shl <2 x i15> %a, %and
123  ret <2 x i15> %shl
124}
125
126define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) {
127; CHECK-LABEL: @shl_vector_for_real(
128; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[B:%.*]], <i32 3, i32 3>
129; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[AND]]
130; CHECK-NEXT:    ret <2 x i32> [[SHL]]
131;
132  %and = and <2 x i32> %b, <i32 3, i32 3> ; a necessary mask op
133  %shl = shl <2 x i32> %a, %and
134  ret <2 x i32> %shl
135}
136
137
138; We calculate the valid bits of the shift using log2, and log2 of 1 (the type width) is 0.
139; That should be ok. Either the shift amount is 0 or invalid (1), so we can always return %a.
140
141define i1 @shl_i1(i1 %a, i1 %b) {
142; CHECK-LABEL: @shl_i1(
143; CHECK-NEXT:    ret i1 [[A:%.*]]
144;
145  %shl = shl i1 %a, %b
146  ret i1 %shl
147}
148
149; The following cases only get folded by InstCombine,
150; see InstCombine/lshr.ll.
151
152declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
153declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
154declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone
155declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
156
157define i32 @lshr_ctlz_zero_is_undef(i32 %x) {
158; CHECK-LABEL: @lshr_ctlz_zero_is_undef(
159; CHECK-NEXT:    [[CT:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true)
160; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[CT]], 5
161; CHECK-NEXT:    ret i32 [[SH]]
162;
163  %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
164  %sh = lshr i32 %ct, 5
165  ret i32 %sh
166}
167
168define i32 @lshr_cttz_zero_is_undef(i32 %x) {
169; CHECK-LABEL: @lshr_cttz_zero_is_undef(
170; CHECK-NEXT:    [[CT:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
171; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[CT]], 5
172; CHECK-NEXT:    ret i32 [[SH]]
173;
174  %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true)
175  %sh = lshr i32 %ct, 5
176  ret i32 %sh
177}
178
179define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) {
180; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec(
181; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
182; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
183; CHECK-NEXT:    ret <2 x i8> [[SH]]
184;
185  %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
186  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
187  ret <2 x i8> %sh
188}
189
190define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) {
191; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec(
192; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
193; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0>
194; CHECK-NEXT:    [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0
195; CHECK-NEXT:    ret i8 [[EX]]
196;
197  %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
198  %sh = lshr <2 x i8> %ct, <i8 3, i8 0>
199  %ex = extractelement <2 x i8> %sh, i32 0
200  ret i8 %ex
201}
202
203define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) {
204; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec(
205; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
206; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
207; CHECK-NEXT:    ret <2 x i8> [[SH]]
208;
209  %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
210  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
211  ret <2 x i8> %sh
212}
213
214define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) {
215; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec(
216; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
217; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0>
218; CHECK-NEXT:    [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0
219; CHECK-NEXT:    ret i8 [[EX]]
220;
221  %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
222  %sh = lshr <2 x i8> %ct, <i8 3, i8 0>
223  %ex = extractelement <2 x i8> %sh, i32 0
224  ret i8 %ex
225}
226
227; The shift amount is 0 on either of high/low bytes. The middle byte doesn't matter.
228
229define i24 @bitcast_noshift_scalar(<3 x i8> %v1, i24 %v2) {
230; CHECK-LABEL: @bitcast_noshift_scalar(
231; CHECK-NEXT:    ret i24 [[V2:%.*]]
232;
233  %c = insertelement <3 x i8> poison, i8 0, i64 0
234  %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 3>
235  %b = bitcast <3 x i8> %s to i24
236  %r = shl i24 %v2, %b
237  ret i24 %r
238}
239
240; The shift amount is 0 on low byte of big-endian and unknown on little-endian.
241
242define i24 @bitcast_noshift_scalar_bigend(<3 x i8> %v1, i24 %v2) {
243; BIGENDIAN-LABEL: @bitcast_noshift_scalar_bigend(
244; BIGENDIAN-NEXT:    ret i24 [[V2:%.*]]
245;
246; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_bigend(
247; LITTLEENDIAN-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 0, i32 1, i32 3>
248; LITTLEENDIAN-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
249; LITTLEENDIAN-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
250; LITTLEENDIAN-NEXT:    ret i24 [[R]]
251;
252  %c = insertelement <3 x i8> poison, i8 0, i64 0
253  %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 0, i32 1, i32 3>
254  %b = bitcast <3 x i8> %s to i24
255  %r = shl i24 %v2, %b
256  ret i24 %r
257}
258
259; The shift amount is 0 on low byte of little-endian and unknown on big-endian.
260
261define i24 @bitcast_noshift_scalar_littleend(<3 x i8> %v1, i24 %v2) {
262; BIGENDIAN-LABEL: @bitcast_noshift_scalar_littleend(
263; BIGENDIAN-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 2>
264; BIGENDIAN-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
265; BIGENDIAN-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
266; BIGENDIAN-NEXT:    ret i24 [[R]]
267;
268; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_littleend(
269; LITTLEENDIAN-NEXT:    ret i24 [[V2:%.*]]
270;
271  %c = insertelement <3 x i8> poison, i8 0, i64 0
272  %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 2>
273  %b = bitcast <3 x i8> %s to i24
274  %r = shl i24 %v2, %b
275  ret i24 %r
276}
277
278; The shift amount is known 24 on little-endian and known 24<<16 on big-endian
279; across all vector elements, so it's an overshift either way.
280
281define <3 x i24> @bitcast_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
282; CHECK-LABEL: @bitcast_overshift_vector(
283; CHECK-NEXT:    ret <3 x i24> poison
284;
285  %c = insertelement <9 x i8> poison, i8 24, i64 0
286  %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
287  %b = bitcast <9 x i8> %s to <3 x i24>
288  %r = shl <3 x i24> %v2, %b
289  ret <3 x i24> %r
290}
291
292; The shift amount is known 23 on little-endian and known 23<<16 on big-endian
293; across all vector elements, so it's an overshift for big-endian.
294
295define <3 x i24> @bitcast_overshift_vector_bigend(<9 x i8> %v1, <3 x i24> %v2) {
296; BIGENDIAN-LABEL: @bitcast_overshift_vector_bigend(
297; BIGENDIAN-NEXT:    ret <3 x i24> poison
298;
299; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_bigend(
300; LITTLEENDIAN-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
301; LITTLEENDIAN-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
302; LITTLEENDIAN-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
303; LITTLEENDIAN-NEXT:    ret <3 x i24> [[R]]
304;
305  %c = insertelement <9 x i8> poison, i8 23, i64 0
306  %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
307  %b = bitcast <9 x i8> %s to <3 x i24>
308  %r = shl <3 x i24> %v2, %b
309  ret <3 x i24> %r
310}
311
312; The shift amount is known 23 on big-endian and known 23<<16 on little-endian
313; across all vector elements, so it's an overshift for little-endian.
314
315define <3 x i24> @bitcast_overshift_vector_littleend(<9 x i8> %v1, <3 x i24> %v2) {
316; BIGENDIAN-LABEL: @bitcast_overshift_vector_littleend(
317; BIGENDIAN-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
318; BIGENDIAN-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
319; BIGENDIAN-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
320; BIGENDIAN-NEXT:    ret <3 x i24> [[R]]
321;
322; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_littleend(
323; LITTLEENDIAN-NEXT:    ret <3 x i24> poison
324;
325  %c = insertelement <9 x i8> poison, i8 23, i64 0
326  %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
327  %b = bitcast <9 x i8> %s to <3 x i24>
328  %r = shl <3 x i24> %v2, %b
329  ret <3 x i24> %r
330}
331
332; Negative test - the shift amount is known 24 or 24<<16 on only 2 out of 3 elements.
333
334define <3 x i24> @bitcast_partial_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
335; CHECK-LABEL: @bitcast_partial_overshift_vector(
336; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 24, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7, i32 8>
337; CHECK-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
338; CHECK-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
339; CHECK-NEXT:    ret <3 x i24> [[R]]
340;
341  %c = insertelement <9 x i8> poison, i8 24, i64 0
342  %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7, i32 8>
343  %b = bitcast <9 x i8> %s to <3 x i24>
344  %r = shl <3 x i24> %v2, %b
345  ret <3 x i24> %r
346}
347
348; Negative test - don't know how to look through a cast with non-integer type (but we could handle this...).
349
350define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> %v2) {
351; CHECK-LABEL: @bitcast_noshift_vector_wrong_type(
352; CHECK-NEXT:    [[S:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 1>
353; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x float> [[S]] to <1 x i64>
354; CHECK-NEXT:    [[R:%.*]] = shl <1 x i64> [[V2:%.*]], [[B]]
355; CHECK-NEXT:    ret <1 x i64> [[R]]
356;
357  %c = insertelement <2 x float> poison, float 0.0, i64 0
358  %s = shufflevector <2 x float> %v1, <2 x float> %c, <2 x i32> <i32 2, i32 1>
359  %b = bitcast <2 x float> %s to <1 x i64>
360  %r = shl <1 x i64> %v2, %b
361  ret <1 x i64> %r
362}
363