1; RUN: opt -S -instcombine < %s | FileCheck %s
2
3; return mul(zext x, zext y) > MAX
4define i32 @pr4917_1(i32 %x, i32 %y) nounwind {
5; CHECK-LABEL: @pr4917_1(
6entry:
7  %l = zext i32 %x to i64
8  %r = zext i32 %y to i64
9; CHECK-NOT: zext i32
10  %mul64 = mul i64 %l, %r
11; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
12  %overflow = icmp ugt i64 %mul64, 4294967295
13; CHECK: extractvalue { i32, i1 } [[MUL]], 1
14  %retval = zext i1 %overflow to i32
15  ret i32 %retval
16}
17
18; return mul(zext x, zext y) >= MAX+1
19define i32 @pr4917_1a(i32 %x, i32 %y) nounwind {
20; CHECK-LABEL: @pr4917_1a(
21entry:
22  %l = zext i32 %x to i64
23  %r = zext i32 %y to i64
24; CHECK-NOT: zext i32
25  %mul64 = mul i64 %l, %r
26; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
27  %overflow = icmp uge i64 %mul64, 4294967296
28; CHECK: extractvalue { i32, i1 } [[MUL]], 1
29  %retval = zext i1 %overflow to i32
30  ret i32 %retval
31}
32
33; mul(zext x, zext y) > MAX
34; mul(x, y) is used
35define i32 @pr4917_2(i32 %x, i32 %y) nounwind {
36; CHECK-LABEL: @pr4917_2(
37entry:
38  %l = zext i32 %x to i64
39  %r = zext i32 %y to i64
40; CHECK-NOT: zext i32
41  %mul64 = mul i64 %l, %r
42; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
43  %overflow = icmp ugt i64 %mul64, 4294967295
44; CHECK-DAG: [[VAL:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
45  %mul32 = trunc i64 %mul64 to i32
46; CHECK-DAG: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
47  %retval = select i1 %overflow, i32 %mul32, i32 111
48; CHECK: select i1 [[OVFL]], i32 [[VAL]]
49  ret i32 %retval
50}
51
52; return mul(zext x, zext y) > MAX
53; mul is used in non-truncate
54define i64 @pr4917_3(i32 %x, i32 %y) nounwind {
55; CHECK-LABEL: @pr4917_3(
56entry:
57  %l = zext i32 %x to i64
58  %r = zext i32 %y to i64
59  %mul64 = mul i64 %l, %r
60; CHECK-NOT: umul.with.overflow.i32
61  %overflow = icmp ugt i64 %mul64, 4294967295
62  %retval = select i1 %overflow, i64 %mul64, i64 111
63  ret i64 %retval
64}
65
66; return mul(zext x, zext y) <= MAX
67define i32 @pr4917_4(i32 %x, i32 %y) nounwind {
68; CHECK-LABEL: @pr4917_4(
69entry:
70  %l = zext i32 %x to i64
71  %r = zext i32 %y to i64
72; CHECK-NOT: zext i32
73  %mul64 = mul i64 %l, %r
74; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
75  %overflow = icmp ule i64 %mul64, 4294967295
76; CHECK: extractvalue { i32, i1 } [[MUL]], 1
77; CHECK: xor
78  %retval = zext i1 %overflow to i32
79  ret i32 %retval
80}
81
82; return mul(zext x, zext y) < MAX+1
83define i32 @pr4917_4a(i32 %x, i32 %y) nounwind {
84; CHECK-LABEL: @pr4917_4a(
85entry:
86  %l = zext i32 %x to i64
87  %r = zext i32 %y to i64
88; CHECK-NOT: zext i32
89  %mul64 = mul i64 %l, %r
90; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
91  %overflow = icmp ult i64 %mul64, 4294967296
92; CHECK: extractvalue { i32, i1 } [[MUL]], 1
93; CHECK: xor
94  %retval = zext i1 %overflow to i32
95  ret i32 %retval
96}
97
98; operands of mul are of different size
99define i32 @pr4917_5(i32 %x, i8 %y) nounwind {
100; CHECK-LABEL: @pr4917_5(
101entry:
102  %l = zext i32 %x to i64
103  %r = zext i8 %y to i64
104; CHECK: [[Y:%.*]] = zext i8 %y to i32
105  %mul64 = mul i64 %l, %r
106  %overflow = icmp ugt i64 %mul64, 4294967295
107  %mul32 = trunc i64 %mul64 to i32
108; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 [[Y]])
109; CHECK-DAG: [[VAL:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
110; CHECK-DAG: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
111  %retval = select i1 %overflow, i32 %mul32, i32 111
112; CHECK: select i1 [[OVFL]], i32 [[VAL]]
113  ret i32 %retval
114}
115
116; mul(zext x, zext y) != zext trunc mul
117define i32 @pr4918_1(i32 %x, i32 %y) nounwind {
118; CHECK-LABEL: @pr4918_1(
119entry:
120  %l = zext i32 %x to i64
121  %r = zext i32 %y to i64
122  %mul64 = mul i64 %l, %r
123; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
124  %part32 = trunc i64 %mul64 to i32
125  %part64 = zext i32 %part32 to i64
126  %overflow = icmp ne i64 %mul64, %part64
127; CHECK: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL:%.*]], 1
128  %retval = zext i1 %overflow to i32
129  ret i32 %retval
130}
131
132; mul(zext x, zext y) == zext trunc mul
133define i32 @pr4918_2(i32 %x, i32 %y) nounwind {
134; CHECK-LABEL: @pr4918_2(
135entry:
136  %l = zext i32 %x to i64
137  %r = zext i32 %y to i64
138  %mul64 = mul i64 %l, %r
139; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
140  %part32 = trunc i64 %mul64 to i32
141  %part64 = zext i32 %part32 to i64
142  %overflow = icmp eq i64 %mul64, %part64
143; CHECK: extractvalue { i32, i1 } [[MUL]]
144  %retval = zext i1 %overflow to i32
145; CHECK: xor
146  ret i32 %retval
147}
148
149; zext trunc mul != mul(zext x, zext y)
150define i32 @pr4918_3(i32 %x, i32 %y) nounwind {
151; CHECK-LABEL: @pr4918_3(
152entry:
153  %l = zext i32 %x to i64
154  %r = zext i32 %y to i64
155  %mul64 = mul i64 %l, %r
156; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
157  %part32 = trunc i64 %mul64 to i32
158  %part64 = zext i32 %part32 to i64
159  %overflow = icmp ne i64 %part64, %mul64
160; CHECK: extractvalue { i32, i1 } [[MUL]], 1
161  %retval = zext i1 %overflow to i32
162  ret i32 %retval
163}
164
165define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) {
166; CHECK-LABEL: @pr20113
167; CHECK-NOT: mul.with.overflow
168; CHECK: ret
169  %vmovl.i.i726 = zext <4 x i16> %a to <4 x i32>
170  %vmovl.i.i712 = zext <4 x i16> %b to <4 x i32>
171  %mul.i703 = mul <4 x i32> %vmovl.i.i712, %vmovl.i.i726
172  %tmp = icmp sge <4 x i32> %mul.i703, zeroinitializer
173  %vcgez.i = sext <4 x i1> %tmp to <4 x i32>
174  ret <4 x i32> %vcgez.i
175}
176
177
178; The last test needs this weird datalayout.
179target datalayout = "i32:8:8"
180; Without it, InstCombine will align the pointed on 4 Bytes
181; The KnownBitsZero that result from the alignment allows to
182; turn:
183;    and i32 %mul, 255
184; to:
185;    and i32 %mul, 252
186; The mask is no longer in the form 2^n-1  and this prevents the transformation.
187
188@pr21445_data = external global i32
189define i1 @pr21445(i8 %a) {
190; CHECK-LABEL: @pr21445(
191; CHECK-NEXT:  %[[umul:.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 ptrtoint (i32* @pr21445_data to i8))
192; CHECK-NEXT:  %[[cmp:.*]] = extractvalue { i8, i1 } %[[umul]], 1
193; CHECK-NEXT:  ret i1 %[[cmp]]
194  %ext = zext i8 %a to i32
195  %mul = mul i32 %ext, zext (i8 ptrtoint (i32* @pr21445_data to i8) to i32)
196  %and = and i32 %mul, 255
197  %cmp = icmp ne i32 %mul, %and
198  ret i1 %cmp
199}
200