1; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
2; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s
3; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
4; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
5; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE
6
7; rdar://7304838
8; CodeGenPrepare should move the zext into the block with the load
9; so that SelectionDAG can select it with the load.
10;
11; CHECK-LABEL: foo:
12; CHECK: movsbl ({{%rdi|%rcx}}), %eax
13;
14; OPTALL-LABEL: @foo
15; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
16; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
17; OPTALL: store i32 [[ZEXT]], i32* %q
18; OPTALL: ret
19define void @foo(i8* %p, i32* %q) {
20entry:
21  %t = load i8, i8* %p
22  %a = icmp slt i8 %t, 20
23  br i1 %a, label %true, label %false
24true:
25  %s = zext i8 %t to i32
26  store i32 %s, i32* %q
27  ret void
28false:
29  ret void
30}
31
32; Check that we manage to form a zextload is an operation with only one
33; argument to explicitly extend is in the way.
34; OPTALL-LABEL: @promoteOneArg
35; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
36; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
37; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
38; Make sure the operation is not promoted when the promotion pass is disabled.
39; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
40; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
41; OPTALL: store i32 [[RES]], i32* %q
42; OPTALL: ret
43define void @promoteOneArg(i8* %p, i32* %q) {
44entry:
45  %t = load i8, i8* %p
46  %add = add nuw i8 %t, 2
47  %a = icmp slt i8 %t, 20
48  br i1 %a, label %true, label %false
49true:
50  %s = zext i8 %add to i32
51  store i32 %s, i32* %q
52  ret void
53false:
54  ret void
55}
56
57; Check that we manage to form a sextload is an operation with only one
58; argument to explicitly extend is in the way.
59; Version with sext.
60; OPTALL-LABEL: @promoteOneArgSExt
61; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
62; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
63; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
64; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
65; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
66; OPTALL: store i32 [[RES]], i32* %q
67; OPTALL: ret
68define void @promoteOneArgSExt(i8* %p, i32* %q) {
69entry:
70  %t = load i8, i8* %p
71  %add = add nsw i8 %t, 2
72  %a = icmp slt i8 %t, 20
73  br i1 %a, label %true, label %false
74true:
75  %s = sext i8 %add to i32
76  store i32 %s, i32* %q
77  ret void
78false:
79  ret void
80}
81
82; Check that we manage to form a zextload is an operation with two
83; arguments to explicitly extend is in the way.
84; Extending %add will create two extensions:
85; 1. One for %b.
86; 2. One for %t.
87; #1 will not be removed as we do not know anything about %b.
88; #2 may not be merged with the load because %t is used in a comparison.
89; Since two extensions may be emitted in the end instead of one before the
90; transformation, the regular heuristic does not apply the optimization.
91;
92; OPTALL-LABEL: @promoteTwoArgZext
93; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
94;
95; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
96; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
97; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
98;
99; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
100; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
101;
102; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
103; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
104;
105; OPTALL: store i32 [[RES]], i32* %q
106; OPTALL: ret
107define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
108entry:
109  %t = load i8, i8* %p
110  %add = add nuw i8 %t, %b
111  %a = icmp slt i8 %t, 20
112  br i1 %a, label %true, label %false
113true:
114  %s = zext i8 %add to i32
115  store i32 %s, i32* %q
116  ret void
117false:
118  ret void
119}
120
121; Check that we manage to form a sextload is an operation with two
122; arguments to explicitly extend is in the way.
123; Version with sext.
124; OPTALL-LABEL: @promoteTwoArgSExt
125; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
126;
127; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
128; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
129; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
130;
131; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
132; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
133;
134; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
135; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
136; OPTALL: store i32 [[RES]], i32* %q
137; OPTALL: ret
138define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
139entry:
140  %t = load i8, i8* %p
141  %add = add nsw i8 %t, %b
142  %a = icmp slt i8 %t, 20
143  br i1 %a, label %true, label %false
144true:
145  %s = sext i8 %add to i32
146  store i32 %s, i32* %q
147  ret void
148false:
149  ret void
150}
151
152; Check that we do not a zextload if we need to introduce more than
153; one additional extension.
154; OPTALL-LABEL: @promoteThreeArgZext
155; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
156;
157; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
158; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
159; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
160; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
161; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
162;
163; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
164; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
165; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
166;
167; DISABLE: add nuw i8
168; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
169; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
170;
171; OPTALL: store i32 [[RES]], i32* %q
172; OPTALL: ret
173define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
174entry:
175  %t = load i8, i8* %p
176  %tmp = add nuw i8 %t, %b
177  %add = add nuw i8 %tmp, %c
178  %a = icmp slt i8 %t, 20
179  br i1 %a, label %true, label %false
180true:
181  %s = zext i8 %add to i32
182  store i32 %s, i32* %q
183  ret void
184false:
185  ret void
186}
187
188; Check that we manage to form a zextload after promoting and merging
189; two extensions.
190; OPTALL-LABEL: @promoteMergeExtArgZExt
191; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
192;
193; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
194; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
195; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
196;
197; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
198; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
199; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
200;
201; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
202; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
203; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
204;
205; OPTALL: store i32 [[RES]], i32* %q
206; OPTALL: ret
207define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
208entry:
209  %t = load i8, i8* %p
210  %ext = zext i8 %t to i16
211  %add = add nuw i16 %ext, %b
212  %a = icmp slt i8 %t, 20
213  br i1 %a, label %true, label %false
214true:
215  %s = zext i16 %add to i32
216  store i32 %s, i32* %q
217  ret void
218false:
219  ret void
220}
221
222; Check that we manage to form a sextload after promoting and merging
223; two extensions.
224; Version with sext.
225; OPTALL-LABEL: @promoteMergeExtArgSExt
226; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
227;
228; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
229; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
230; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
231;
232; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
233; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
234; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
235;
236; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
237; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
238; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
239; OPTALL: store i32 [[RES]], i32* %q
240; OPTALL: ret
241define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
242entry:
243  %t = load i8, i8* %p
244  %ext = zext i8 %t to i16
245  %add = add nsw i16 %ext, %b
246  %a = icmp slt i8 %t, 20
247  br i1 %a, label %true, label %false
248true:
249  %s = sext i16 %add to i32
250  store i32 %s, i32* %q
251  ret void
252false:
253  ret void
254}
255
256; Check that we manage to catch all the extload opportunities that are exposed
257; by the different iterations of codegen prepare.
258; Moreover, check that we do not promote more than we need to.
259; Here is what is happening in this test (not necessarly in this order):
260; 1. We try to promote the operand of %sextadd.
261;    a. This creates one sext of %ld2 and one of %zextld
262;    b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
263;       introduced one. This is fine with the current heuristic: neutral.
264;    => We have one zext of %zextld left and we created one sext of %ld2.
265; 2. We try to promote the operand of %sextaddza.
266;    a. This creates one sext of %zexta and one of %zextld
267;    b. The sext of %zexta can be combined with the zext of %a.
268;    c. The sext of %zextld leads to %ld and can be combined with it. This is
269;       done by promoting %zextld. This is fine with the current heuristic:
270;       neutral.
271;    => We have created a new zext of %ld and we created one sext of %zexta.
272; 3. We try to promote the operand of %sextaddb.
273;    a. This creates one sext of %b and one of %zextld
274;    b. The sext of %b is a dead-end, nothing to be done.
275;    c. Same thing as 2.c. happens.
276;    => We have created a new zext of %ld and we created one sext of %b.
277; 4. We try to promote the operand of the zext of %zextld introduced in #1.
278;    a. Same thing as 2.c. happens.
279;    b. %zextld does not have any other uses. It is dead coded.
280;    => We have created a new zext of %ld and we removed a zext of %zextld and
281;       a zext of %ld.
282; Currently we do not try to reuse existing extensions, so in the end we have
283; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
284;
285; OPTALL-LABEL: @severalPromotions
286; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
287; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
288; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
289; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
290; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
291; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
292; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]]
293; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64
294; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]]
295; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
296; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]]
297;
298; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
299; DISABLE: [[RES:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADD]] to i64
300; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
301; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDZA]] to i64
302; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
303; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDB]] to i64
304;
305; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
306; OPTALL: ret
307define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
308  %ld = load i8, i8* %addr1
309  %zextld = zext i8 %ld to i32
310  %ld2 = load i32, i32* %addr2
311  %add = add nsw i32 %ld2, %zextld
312  %sextadd = sext i32 %add to i64
313  %zexta = zext i8 %a to i32
314  %addza = add nsw i32 %zexta, %zextld
315  %sextaddza = sext i32 %addza to i64
316  %addb = add nsw i32 %b, %zextld
317  %sextaddb = sext i32 %addb to i64
318  call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
319  ret void
320}
321
322declare void @dummy(i64, i64, i64)
323
324; Make sure we do not try to promote vector types since the type promotion
325; helper does not support them for now.
326; OPTALL-LABEL: @vectorPromotion
327; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
328; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
329; OPTALL: ret
330define void @vectorPromotion() {
331entry:
332  %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
333  %b = zext <2 x i32> %a to <2 x i64>
334  ret void
335}
336
337@a = common global i32 0, align 4
338@c = common global [2 x i32] zeroinitializer, align 4
339
340; PR21978.
341; Make sure we support promotion of operands that produces a Value as opposed
342; to an instruction.
343; This used to cause a crash.
344; OPTALL-LABEL: @promotionOfArgEndsUpInValue
345; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
346
347; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
348; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
349;
350; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
351; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
352;
353; OPTALL-NEXT: ret i32 [[RES]]
354define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
355entry:
356  %val = load i16, i16* %addr
357  %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
358  %conv3 = sext i16 %add to i32
359  ret i32 %conv3
360}
361
362; Check that we see that one zext can be derived from the other for free.
363; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
364; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
365
366; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
367; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
368; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
369; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
370; OPT-NEXT: store i32 [[RES32]], i32* %addr
371; OPT-NEXT: store i64 [[RES64]], i64* %q
372;
373; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
374; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
375; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
376; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
377; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
378; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
379;
380; OPTALL-NEXT: ret void
381define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
382entry:
383  %t = load i8, i8* %p
384  %zextt = zext i8 %t to i32
385  %add = add nuw i32 %zextt, %b
386  %add2 = add nuw i32 %zextt, 12
387  store i32 %add, i32 *%addr
388  %s = zext i32 %add2 to i64
389  store i64 %s, i64* %q
390  ret void
391}
392