1; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
2; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
3; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE
4
5; CodeGenPrepare should move the zext into the block with the load
6; so that SelectionDAG can select it with the load.
7;
8; OPTALL-LABEL: @foo
9; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
10; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
11; OPTALL: store i32 [[ZEXT]], i32* %q
12; OPTALL: ret
13define void @foo(i8* %p, i32* %q) {
14entry:
15  %t = load i8, i8* %p
16  %a = icmp slt i8 %t, 20
17  br i1 %a, label %true, label %false
18true:
19  %s = zext i8 %t to i32
20  store i32 %s, i32* %q
21  ret void
22false:
23  ret void
24}
25
26; Check that we manage to form a zextload is an operation with only one
27; argument to explicitly extend is in the way.
28; OPTALL-LABEL: @promoteOneArg
29; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
30; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
31; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
32; Make sure the operation is not promoted when the promotion pass is disabled.
33; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
34; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
35; OPTALL: store i32 [[RES]], i32* %q
36; OPTALL: ret
37define void @promoteOneArg(i8* %p, i32* %q) {
38entry:
39  %t = load i8, i8* %p
40  %add = add nuw i8 %t, 2
41  %a = icmp slt i8 %t, 20
42  br i1 %a, label %true, label %false
43true:
44  %s = zext i8 %add to i32
45  store i32 %s, i32* %q
46  ret void
47false:
48  ret void
49}
50
51; Check that we manage to form a sextload is an operation with only one
52; argument to explicitly extend is in the way.
53; Version with sext.
54; OPTALL-LABEL: @promoteOneArgSExt
55; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
56; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
57; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
58; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
59; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
60; OPTALL: store i32 [[RES]], i32* %q
61; OPTALL: ret
62define void @promoteOneArgSExt(i8* %p, i32* %q) {
63entry:
64  %t = load i8, i8* %p
65  %add = add nsw i8 %t, 2
66  %a = icmp slt i8 %t, 20
67  br i1 %a, label %true, label %false
68true:
69  %s = sext i8 %add to i32
70  store i32 %s, i32* %q
71  ret void
72false:
73  ret void
74}
75
76; Check that we manage to form a zextload is an operation with two
77; arguments to explicitly extend is in the way.
78; Extending %add will create two extensions:
79; 1. One for %b.
80; 2. One for %t.
81; #1 will not be removed as we do not know anything about %b.
82; #2 may not be merged with the load because %t is used in a comparison.
83; Since two extensions may be emitted in the end instead of one before the
84; transformation, the regular heuristic does not apply the optimization.
85;
86; OPTALL-LABEL: @promoteTwoArgZext
87; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
88;
89; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
90; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
91; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
92;
93; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
94; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
95;
96; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
97; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
98;
99; OPTALL: store i32 [[RES]], i32* %q
100; OPTALL: ret
101define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
102entry:
103  %t = load i8, i8* %p
104  %add = add nuw i8 %t, %b
105  %a = icmp slt i8 %t, 20
106  br i1 %a, label %true, label %false
107true:
108  %s = zext i8 %add to i32
109  store i32 %s, i32* %q
110  ret void
111false:
112  ret void
113}
114
115; Check that we manage to form a sextload is an operation with two
116; arguments to explicitly extend is in the way.
117; Version with sext.
118; OPTALL-LABEL: @promoteTwoArgSExt
119; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
120;
121; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
122; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
123; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
124;
125; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
126; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
127;
128; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
129; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
130; OPTALL: store i32 [[RES]], i32* %q
131; OPTALL: ret
132define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
133entry:
134  %t = load i8, i8* %p
135  %add = add nsw i8 %t, %b
136  %a = icmp slt i8 %t, 20
137  br i1 %a, label %true, label %false
138true:
139  %s = sext i8 %add to i32
140  store i32 %s, i32* %q
141  ret void
142false:
143  ret void
144}
145
146; Check that we do not a zextload if we need to introduce more than
147; one additional extension.
148; OPTALL-LABEL: @promoteThreeArgZext
149; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
150;
151; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
152; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
153; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
154; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
155; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
156;
157; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
158; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
159; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
160;
161; DISABLE: add nuw i8
162; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
163; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
164;
165; OPTALL: store i32 [[RES]], i32* %q
166; OPTALL: ret
167define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
168entry:
169  %t = load i8, i8* %p
170  %tmp = add nuw i8 %t, %b
171  %add = add nuw i8 %tmp, %c
172  %a = icmp slt i8 %t, 20
173  br i1 %a, label %true, label %false
174true:
175  %s = zext i8 %add to i32
176  store i32 %s, i32* %q
177  ret void
178false:
179  ret void
180}
181
182; Check that we manage to form a zextload after promoting and merging
183; two extensions.
184; OPTALL-LABEL: @promoteMergeExtArgZExt
185; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
186;
187; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
188; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
189; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
190;
191; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
192; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
193; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
194;
195; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
196; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
197; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
198;
199; OPTALL: store i32 [[RES]], i32* %q
200; OPTALL: ret
201define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
202entry:
203  %t = load i8, i8* %p
204  %ext = zext i8 %t to i16
205  %add = add nuw i16 %ext, %b
206  %a = icmp slt i8 %t, 20
207  br i1 %a, label %true, label %false
208true:
209  %s = zext i16 %add to i32
210  store i32 %s, i32* %q
211  ret void
212false:
213  ret void
214}
215
216; Check that we manage to form a sextload after promoting and merging
217; two extensions.
218; Version with sext.
219; OPTALL-LABEL: @promoteMergeExtArgSExt
220; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
221;
222; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
223; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
224; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
225;
226; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
227; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
228; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
229;
230; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
231; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
232; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
233; OPTALL: store i32 [[RES]], i32* %q
234; OPTALL: ret
235define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
236entry:
237  %t = load i8, i8* %p
238  %ext = zext i8 %t to i16
239  %add = add nsw i16 %ext, %b
240  %a = icmp slt i8 %t, 20
241  br i1 %a, label %true, label %false
242true:
243  %s = sext i16 %add to i32
244  store i32 %s, i32* %q
245  ret void
246false:
247  ret void
248}
249
250; Check that we manage to catch all the extload opportunities that are exposed
251; by the different iterations of codegen prepare.
252; Moreover, check that we do not promote more than we need to.
253; Here is what is happening in this test (not necessarly in this order):
254; 1. We try to promote the operand of %sextadd.
255;    a. This creates one sext of %ld2 and one of %zextld
256;    b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
257;       introduced one. This is fine with the current heuristic: neutral.
258;    => We have one zext of %zextld left and we created one sext of %ld2.
259; 2. We try to promote the operand of %sextaddza.
260;    a. This creates one sext of %zexta and one of %zextld
261;    b. The sext of %zexta can be combined with the zext of %a.
262;    c. The sext of %zextld leads to %ld and can be combined with it. This is
263;       done by promoting %zextld. This is fine with the current heuristic:
264;       neutral.
265;    => We have created a new zext of %ld and we created one sext of %zexta.
266; 3. We try to promote the operand of %sextaddb.
267;    a. This creates one sext of %b and one of %zextld
268;    b. The sext of %b is a dead-end, nothing to be done.
269;    c. Same thing as 2.c. happens.
270;    => We have created a new zext of %ld and we created one sext of %b.
271; 4. We try to promote the operand of the zext of %zextld introduced in #1.
272;    a. Same thing as 2.c. happens.
273;    b. %zextld does not have any other uses. It is dead coded.
274;    => We have created a new zext of %ld and we removed a zext of %zextld and
275;       a zext of %ld.
276; Currently we do not try to reuse existing extensions, so in the end we have
277; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
278;
279; OPTALL-LABEL: @severalPromotions
280; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
281; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
282; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
283; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
284; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
285; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
286; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]]
287; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64
288; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]]
289; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
290; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]]
291;
292; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
293; DISABLE: [[RES:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADD]] to i64
294; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
295; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDZA]] to i64
296; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
297; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDB]] to i64
298;
299; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
300; OPTALL: ret
301define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
302  %ld = load i8, i8* %addr1
303  %zextld = zext i8 %ld to i32
304  %ld2 = load i32, i32* %addr2
305  %add = add nsw i32 %ld2, %zextld
306  %sextadd = sext i32 %add to i64
307  %zexta = zext i8 %a to i32
308  %addza = add nsw i32 %zexta, %zextld
309  %sextaddza = sext i32 %addza to i64
310  %addb = add nsw i32 %b, %zextld
311  %sextaddb = sext i32 %addb to i64
312  call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
313  ret void
314}
315
316declare void @dummy(i64, i64, i64)
317
318; Make sure we do not try to promote vector types since the type promotion
319; helper does not support them for now.
320; OPTALL-LABEL: @vectorPromotion
321; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
322; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
323; OPTALL: ret
324define void @vectorPromotion() {
325entry:
326  %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
327  %b = zext <2 x i32> %a to <2 x i64>
328  ret void
329}
330
331@a = common global i32 0, align 4
332@c = common global [2 x i32] zeroinitializer, align 4
333
334; Make sure we support promotion of operands that produces a Value as opposed
335; to an instruction.
336; This used to cause a crash.
337; OPTALL-LABEL: @promotionOfArgEndsUpInValue
338; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
339;
340; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
341; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
342;
343; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
344; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
345;
346; OPTALL-NEXT: ret i32 [[RES]]
347define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
348entry:
349  %val = load i16, i16* %addr
350  %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
351  %conv3 = sext i16 %add to i32
352  ret i32 %conv3
353}
354
355; Check that we see that one zext can be derived from the other for free.
356; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
357; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
358;
359; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
360; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
361; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
362; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
363; OPT-NEXT: store i32 [[RES32]], i32* %addr
364; OPT-NEXT: store i64 [[RES64]], i64* %q
365;
366; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
367; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
368; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
369; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
370; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
371; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
372;
373; OPTALL-NEXT: ret void
374define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
375entry:
376  %t = load i8, i8* %p
377  %zextt = zext i8 %t to i32
378  %add = add nuw i32 %zextt, %b
379  %add2 = add nuw i32 %zextt, 12
380  store i32 %add, i32 *%addr
381  %s = zext i32 %add2 to i64
382  store i64 %s, i64* %q
383  ret void
384}
385
386; Check that we do not increase the cost of the code.
387; The input has one free zext and one free sext. If we would have promoted
388; all the way through the load we would end up with a free zext and a
389; non-free sext (of %b).
390; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode
391; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
392;
393; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
394; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
395; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
396; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
397;
398; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
399; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
400; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
401;
402; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
403; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
404; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
405;
406; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
407; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
408; OPTALL-NEXT: ret void
409define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
410entry:
411  %t = load i8, i8* %p
412  %zextt = zext i8 %t to i32
413  %add = add nsw i32 %zextt, %b
414  %idx64 = sext i32 %add to i64
415  %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
416  store i32 %add, i32 *%staddr
417  ret void
418}
419
420; Check that we do not increase the cost of the code.
421; The input has one free zext and one free sext. If we would have promoted
422; all the way through the load we would end up with a free zext and a
423; non-free sext (of %b).
424; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64
425; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
426;
427; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
428; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
429; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
430;
431; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
432; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
433; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
434;
435; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
436; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
437; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
438;
439; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]]
440; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]]
441; OPTALL-NEXT: ret void
442define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) {
443entry:
444  %t = load i8, i8* %p
445  %zextt = zext i8 %t to i32
446  %add = add nsw i32 %zextt, %b
447  %idx64 = sext i32 %add to i64
448  %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64
449  store i64 %stuff, i64 *%staddr
450  ret void
451}
452
453; Check that we do not increase the cost of the code.
454; The input has one free zext and one free sext. If we would have promoted
455; all the way through the load we would end up with a free zext and a
456; non-free sext (of %b).
457; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
458; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
459;
460; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
461; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
462; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
463;
464; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
465; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
466; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
467;
468; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
469; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
470; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
471;
472; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]]
473; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]]
474; OPTALL-NEXT: ret void
475define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) {
476entry:
477  %t = load i8, i8* %p
478  %zextt = zext i8 %t to i32
479  %add = add nsw i32 %zextt, %b
480  %idx64 = sext i32 %add to i64
481  %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64
482  store i128 %stuff, i128 *%staddr
483  ret void
484}
485
486
487; Check that we do not increase the cost of the code.
488; The input has one free zext and one free sext. If we would have promoted
489; all the way through the load we would end up with a free zext and a
490; non-free sext (of %b).
491; OPTALL-LABEL: @promoteSExtFromAddrMode256
492; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
493;
494; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
495; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
496; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
497;
498; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
499; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
500; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
501;
502; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]]
503; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]]
504; OPTALL-NEXT: ret void
505define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) {
506entry:
507  %t = load i8, i8* %p
508  %zextt = zext i8 %t to i32
509  %add = add nsw i32 %zextt, %b
510  %idx64 = sext i32 %add to i64
511  %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64
512  store i256 %stuff, i256 *%staddr
513  ret void
514}
515
516; Check that we do not increase the cost of the code.
517; The input has one free zext and one free zext.
518; When we promote all the way through the load, we end up with
519; a free zext and a non-free zext (of %b).
520; However, the current target lowering says zext i32 to i64 is free
521; so the promotion happens because the cost did not change and may
522; expose more opportunities.
523; This would need to be fixed at some point.
524; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode
525; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
526;
527; This transformation should really happen only for stress mode.
528; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
529; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
530; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
531; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
532;
533; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
534; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
535; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
536;
537; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
538; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
539; OPTALL-NEXT: ret void
540define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
541entry:
542  %t = load i8, i8* %p
543  %zextt = zext i8 %t to i32
544  %add = add nuw i32 %zextt, %b
545  %idx64 = zext i32 %add to i64
546  %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
547  store i32 %add, i32 *%staddr
548  ret void
549}
550
551; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift
552; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
553;
554; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
555; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
556; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
557;
558; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
559; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
560; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
561;
562; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
563; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
564; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
565;
566; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
567; OPTALL-NEXT: ret i64 %staddr
568define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) {
569entry:
570  %t = load i8, i8* %p
571  %zextt = zext i8 %t to i32
572  %add = add nsw i32 %zextt, %b
573  %idx64 = sext i32 %add to i64
574  %staddr = shl i64 %idx64, 12
575  ret i64 %staddr
576}
577
578; Same comment as doNotPromoteFreeZExtFromAddrMode.
579; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift
580; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
581;
582; This transformation should really happen only for stress mode.
583; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
584; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
585; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
586;
587; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
588; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
589; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
590;
591; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
592; OPTALL-NEXT: ret i64 %staddr
593define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) {
594entry:
595  %t = load i8, i8* %p
596  %zextt = zext i8 %t to i32
597  %add = add nuw i32 %zextt, %b
598  %idx64 = zext i32 %add to i64
599  %staddr = shl i64 %idx64, 12
600  ret i64 %staddr
601}
602
603; The input has one free zext and one non-free sext.
604; When we promote all the way through to the load, we end up with
605; a free zext, a free sext (%ld1), and a non-free sext (of %cst).
606; However, we when generate load pair and the free sext(%ld1) becomes
607; non-free. So technically, we trade a non-free sext to two non-free
608; sext.
609; This would need to be fixed at some point.
610; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad
611; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p
612; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1
613; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]]
614;
615; This transformation should really happen only for stress mode.
616; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64
617; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64
618; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]]
619;
620; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst
621; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64
622;
623; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64
624; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]]
625; OPTALL-NEXT: ret i64 [[FINAL]]
626define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) {
627  %ld0 = load i32, i32* %p
628  %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1
629  %ld1 = load i32, i32* %idxLd1
630  %res = add nsw i32 %ld1, %cst
631  %sextres = sext i32 %res to i64
632  %zextLd0 = zext i32 %ld0 to i64
633  %final = add i64 %sextres, %zextLd0
634  ret i64 %final
635}
636
637define i64 @promoteZextShl(i1 %c, i16* %P) {
638entry:
639; OPTALL-LABEL: promoteZextShl
640; OPTALL: entry:
641; OPT: %[[LD:.*]] = load i16, i16* %P
642; OPT: %[[EXT:.*]] = zext i16 %[[LD]] to i64
643; OPT: if.then:
644; OPT: shl nsw i64 %[[EXT]], 1
645; DISABLE: if.then:
646; DISABLE: %r = sext i32 %shl2 to i64
647  %ld = load i16, i16* %P
648  br i1 %c, label %end, label %if.then
649if.then:
650  %z = zext i16 %ld to i32
651  %shl2 = shl nsw i32 %z, 1
652  %r = sext i32 %shl2 to i64
653  ret i64 %r
654end:
655  ret i64 0
656}
657