1; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS 2; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS 3; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE 4 5; CodeGenPrepare should move the zext into the block with the load 6; so that SelectionDAG can select it with the load. 7; 8; OPTALL-LABEL: @foo 9; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 10; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 11; OPTALL: store i32 [[ZEXT]], i32* %q 12; OPTALL: ret 13define void @foo(i8* %p, i32* %q) { 14entry: 15 %t = load i8, i8* %p 16 %a = icmp slt i8 %t, 20 17 br i1 %a, label %true, label %false 18true: 19 %s = zext i8 %t to i32 20 store i32 %s, i32* %q 21 ret void 22false: 23 ret void 24} 25 26; Check that we manage to form a zextload is an operation with only one 27; argument to explicitly extend is in the way. 28; OPTALL-LABEL: @promoteOneArg 29; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 30; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 31; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2 32; Make sure the operation is not promoted when the promotion pass is disabled. 33; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2 34; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 35; OPTALL: store i32 [[RES]], i32* %q 36; OPTALL: ret 37define void @promoteOneArg(i8* %p, i32* %q) { 38entry: 39 %t = load i8, i8* %p 40 %add = add nuw i8 %t, 2 41 %a = icmp slt i8 %t, 20 42 br i1 %a, label %true, label %false 43true: 44 %s = zext i8 %add to i32 45 store i32 %s, i32* %q 46 ret void 47false: 48 ret void 49} 50 51; Check that we manage to form a sextload is an operation with only one 52; argument to explicitly extend is in the way. 53; Version with sext. 54; OPTALL-LABEL: @promoteOneArgSExt 55; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 56; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 57; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2 58; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2 59; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 60; OPTALL: store i32 [[RES]], i32* %q 61; OPTALL: ret 62define void @promoteOneArgSExt(i8* %p, i32* %q) { 63entry: 64 %t = load i8, i8* %p 65 %add = add nsw i8 %t, 2 66 %a = icmp slt i8 %t, 20 67 br i1 %a, label %true, label %false 68true: 69 %s = sext i8 %add to i32 70 store i32 %s, i32* %q 71 ret void 72false: 73 ret void 74} 75 76; Check that we manage to form a zextload is an operation with two 77; arguments to explicitly extend is in the way. 78; Extending %add will create two extensions: 79; 1. One for %b. 80; 2. One for %t. 81; #1 will not be removed as we do not know anything about %b. 82; #2 may not be merged with the load because %t is used in a comparison. 83; Since two extensions may be emitted in the end instead of one before the 84; transformation, the regular heuristic does not apply the optimization. 85; 86; OPTALL-LABEL: @promoteTwoArgZext 87; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 88; 89; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 90; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 91; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 92; 93; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 94; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 95; 96; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 97; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 98; 99; OPTALL: store i32 [[RES]], i32* %q 100; OPTALL: ret 101define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) { 102entry: 103 %t = load i8, i8* %p 104 %add = add nuw i8 %t, %b 105 %a = icmp slt i8 %t, 20 106 br i1 %a, label %true, label %false 107true: 108 %s = zext i8 %add to i32 109 store i32 %s, i32* %q 110 ret void 111false: 112 ret void 113} 114 115; Check that we manage to form a sextload is an operation with two 116; arguments to explicitly extend is in the way. 117; Version with sext. 118; OPTALL-LABEL: @promoteTwoArgSExt 119; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 120; 121; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 122; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32 123; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]] 124; 125; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 126; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 127; 128; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 129; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 130; OPTALL: store i32 [[RES]], i32* %q 131; OPTALL: ret 132define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) { 133entry: 134 %t = load i8, i8* %p 135 %add = add nsw i8 %t, %b 136 %a = icmp slt i8 %t, 20 137 br i1 %a, label %true, label %false 138true: 139 %s = sext i8 %add to i32 140 store i32 %s, i32* %q 141 ret void 142false: 143 ret void 144} 145 146; Check that we do not a zextload if we need to introduce more than 147; one additional extension. 148; OPTALL-LABEL: @promoteThreeArgZext 149; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 150; 151; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 152; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 153; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 154; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32 155; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]] 156; 157; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 158; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c 159; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 160; 161; DISABLE: add nuw i8 162; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 163; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 164; 165; OPTALL: store i32 [[RES]], i32* %q 166; OPTALL: ret 167define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) { 168entry: 169 %t = load i8, i8* %p 170 %tmp = add nuw i8 %t, %b 171 %add = add nuw i8 %tmp, %c 172 %a = icmp slt i8 %t, 20 173 br i1 %a, label %true, label %false 174true: 175 %s = zext i8 %add to i32 176 store i32 %s, i32* %q 177 ret void 178false: 179 ret void 180} 181 182; Check that we manage to form a zextload after promoting and merging 183; two extensions. 184; OPTALL-LABEL: @promoteMergeExtArgZExt 185; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 186; 187; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 188; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32 189; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 190; 191; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 192; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 193; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 194; 195; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 196; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 197; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 198; 199; OPTALL: store i32 [[RES]], i32* %q 200; OPTALL: ret 201define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) { 202entry: 203 %t = load i8, i8* %p 204 %ext = zext i8 %t to i16 205 %add = add nuw i16 %ext, %b 206 %a = icmp slt i8 %t, 20 207 br i1 %a, label %true, label %false 208true: 209 %s = zext i16 %add to i32 210 store i32 %s, i32* %q 211 ret void 212false: 213 ret void 214} 215 216; Check that we manage to form a sextload after promoting and merging 217; two extensions. 218; Version with sext. 219; OPTALL-LABEL: @promoteMergeExtArgSExt 220; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 221; 222; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 223; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32 224; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]] 225; 226; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 227; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 228; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 229; 230; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 231; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 232; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 233; OPTALL: store i32 [[RES]], i32* %q 234; OPTALL: ret 235define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) { 236entry: 237 %t = load i8, i8* %p 238 %ext = zext i8 %t to i16 239 %add = add nsw i16 %ext, %b 240 %a = icmp slt i8 %t, 20 241 br i1 %a, label %true, label %false 242true: 243 %s = sext i16 %add to i32 244 store i32 %s, i32* %q 245 ret void 246false: 247 ret void 248} 249 250; Check that we manage to catch all the extload opportunities that are exposed 251; by the different iterations of codegen prepare. 252; Moreover, check that we do not promote more than we need to. 253; Here is what is happening in this test (not necessarly in this order): 254; 1. We try to promote the operand of %sextadd. 255; a. This creates one sext of %ld2 and one of %zextld 256; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but 257; introduced one. This is fine with the current heuristic: neutral. 258; => We have one zext of %zextld left and we created one sext of %ld2. 259; 2. We try to promote the operand of %sextaddza. 260; a. This creates one sext of %zexta and one of %zextld 261; b. The sext of %zexta can be combined with the zext of %a. 262; c. The sext of %zextld leads to %ld and can be combined with it. This is 263; done by promoting %zextld. This is fine with the current heuristic: 264; neutral. 265; => We have created a new zext of %ld and we created one sext of %zexta. 266; 3. We try to promote the operand of %sextaddb. 267; a. This creates one sext of %b and one of %zextld 268; b. The sext of %b is a dead-end, nothing to be done. 269; c. Same thing as 2.c. happens. 270; => We have created a new zext of %ld and we created one sext of %b. 271; 4. We try to promote the operand of the zext of %zextld introduced in #1. 272; a. Same thing as 2.c. happens. 273; b. %zextld does not have any other uses. It is dead coded. 274; => We have created a new zext of %ld and we removed a zext of %zextld and 275; a zext of %ld. 276; Currently we do not try to reuse existing extensions, so in the end we have 277; 3 identical zext of %ld. The extensions will be CSE'ed by SDag. 278; 279; OPTALL-LABEL: @severalPromotions 280; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1 281; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 282; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 283; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2 284; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64 285; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 286; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]] 287; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64 288; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]] 289; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 290; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]] 291; 292; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 293; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 294; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32 295; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64 296; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32 297; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64 298; 299; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]]) 300; OPTALL: ret 301define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) { 302 %ld = load i8, i8* %addr1 303 %zextld = zext i8 %ld to i32 304 %ld2 = load i32, i32* %addr2 305 %add = add nsw i32 %ld2, %zextld 306 %sextadd = sext i32 %add to i64 307 %zexta = zext i8 %a to i32 308 %addza = add nsw i32 %zexta, %zextld 309 %sextaddza = sext i32 %addza to i64 310 %addb = add nsw i32 %b, %zextld 311 %sextaddb = sext i32 %addb to i64 312 call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb) 313 ret void 314} 315 316declare void @dummy(i64, i64, i64) 317 318; Make sure we do not try to promote vector types since the type promotion 319; helper does not support them for now. 320; OPTALL-LABEL: @vectorPromotion 321; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 322; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64> 323; OPTALL: ret 324define void @vectorPromotion() { 325entry: 326 %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 327 %b = zext <2 x i32> %a to <2 x i64> 328 ret void 329} 330 331@a = common global i32 0, align 4 332@c = common global [2 x i32] zeroinitializer, align 4 333 334; Make sure we support promotion of operands that produces a Value as opposed 335; to an instruction. 336; This used to cause a crash. 337; OPTALL-LABEL: @promotionOfArgEndsUpInValue 338; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr 339; 340; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32 341; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32) 342; 343; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 344; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 345; 346; OPTALL-NEXT: ret i32 [[RES]] 347define i32 @promotionOfArgEndsUpInValue(i16* %addr) { 348entry: 349 %val = load i16, i16* %addr 350 %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 351 %conv3 = sext i16 %add to i32 352 ret i32 %conv3 353} 354 355; Check that we see that one zext can be derived from the other for free. 356; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice 357; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 358; 359; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 360; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 361; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 362; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12 363; OPT-NEXT: store i32 [[RES32]], i32* %addr 364; OPT-NEXT: store i64 [[RES64]], i64* %q 365; 366; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 367; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 368; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12 369; DISABLE-NEXT: store i32 [[RES32]], i32* %addr 370; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64 371; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q 372; 373; OPTALL-NEXT: ret void 374define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) { 375entry: 376 %t = load i8, i8* %p 377 %zextt = zext i8 %t to i32 378 %add = add nuw i32 %zextt, %b 379 %add2 = add nuw i32 %zextt, 12 380 store i32 %add, i32 *%addr 381 %s = zext i32 %add2 to i64 382 store i64 %s, i64* %q 383 ret void 384} 385 386; Check that we do not increase the cost of the code. 387; The input has one free zext and one free sext. If we would have promoted 388; all the way through the load we would end up with a free zext and a 389; non-free sext (of %b). 390; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode 391; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 392; 393; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 394; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 395; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 396; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32 397; 398; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 399; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 400; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 401; 402; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 403; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 404; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 405; 406; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]] 407; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]] 408; OPTALL-NEXT: ret void 409define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) { 410entry: 411 %t = load i8, i8* %p 412 %zextt = zext i8 %t to i32 413 %add = add nsw i32 %zextt, %b 414 %idx64 = sext i32 %add to i64 415 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64 416 store i32 %add, i32 *%staddr 417 ret void 418} 419 420; Check that we do not increase the cost of the code. 421; The input has one free zext and one free sext. If we would have promoted 422; all the way through the load we would end up with a free zext and a 423; non-free sext (of %b). 424; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64 425; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 426; 427; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 428; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 429; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 430; 431; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 432; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 433; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 434; 435; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 436; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 437; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 438; 439; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]] 440; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]] 441; OPTALL-NEXT: ret void 442define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) { 443entry: 444 %t = load i8, i8* %p 445 %zextt = zext i8 %t to i32 446 %add = add nsw i32 %zextt, %b 447 %idx64 = sext i32 %add to i64 448 %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64 449 store i64 %stuff, i64 *%staddr 450 ret void 451} 452 453; Check that we do not increase the cost of the code. 454; The input has one free zext and one free sext. If we would have promoted 455; all the way through the load we would end up with a free zext and a 456; non-free sext (of %b). 457; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128 458; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 459; 460; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 461; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 462; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 463; 464; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 465; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 466; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 467; 468; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 469; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 470; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 471; 472; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]] 473; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]] 474; OPTALL-NEXT: ret void 475define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) { 476entry: 477 %t = load i8, i8* %p 478 %zextt = zext i8 %t to i32 479 %add = add nsw i32 %zextt, %b 480 %idx64 = sext i32 %add to i64 481 %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64 482 store i128 %stuff, i128 *%staddr 483 ret void 484} 485 486 487; Check that we do not increase the cost of the code. 488; The input has one free zext and one free sext. If we would have promoted 489; all the way through the load we would end up with a free zext and a 490; non-free sext (of %b). 491; OPTALL-LABEL: @promoteSExtFromAddrMode256 492; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 493; 494; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 495; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 496; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 497; 498; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 499; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 500; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 501; 502; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]] 503; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]] 504; OPTALL-NEXT: ret void 505define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) { 506entry: 507 %t = load i8, i8* %p 508 %zextt = zext i8 %t to i32 509 %add = add nsw i32 %zextt, %b 510 %idx64 = sext i32 %add to i64 511 %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64 512 store i256 %stuff, i256 *%staddr 513 ret void 514} 515 516; Check that we do not increase the cost of the code. 517; The input has one free zext and one free zext. 518; When we promote all the way through the load, we end up with 519; a free zext and a non-free zext (of %b). 520; However, the current target lowering says zext i32 to i64 is free 521; so the promotion happens because the cost did not change and may 522; expose more opportunities. 523; This would need to be fixed at some point. 524; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode 525; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 526; 527; This transformation should really happen only for stress mode. 528; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 529; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64 530; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]] 531; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32 532; 533; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 534; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 535; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64 536; 537; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]] 538; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]] 539; OPTALL-NEXT: ret void 540define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) { 541entry: 542 %t = load i8, i8* %p 543 %zextt = zext i8 %t to i32 544 %add = add nuw i32 %zextt, %b 545 %idx64 = zext i32 %add to i64 546 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64 547 store i32 %add, i32 *%staddr 548 ret void 549} 550 551; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift 552; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 553; 554; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 555; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 556; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 557; 558; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 559; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 560; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 561; 562; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 563; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 564; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 565; 566; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12 567; OPTALL-NEXT: ret i64 %staddr 568define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) { 569entry: 570 %t = load i8, i8* %p 571 %zextt = zext i8 %t to i32 572 %add = add nsw i32 %zextt, %b 573 %idx64 = sext i32 %add to i64 574 %staddr = shl i64 %idx64, 12 575 ret i64 %staddr 576} 577 578; Same comment as doNotPromoteFreeZExtFromAddrMode. 579; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift 580; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 581; 582; This transformation should really happen only for stress mode. 583; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 584; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64 585; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]] 586; 587; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 588; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 589; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64 590; 591; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12 592; OPTALL-NEXT: ret i64 %staddr 593define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) { 594entry: 595 %t = load i8, i8* %p 596 %zextt = zext i8 %t to i32 597 %add = add nuw i32 %zextt, %b 598 %idx64 = zext i32 %add to i64 599 %staddr = shl i64 %idx64, 12 600 ret i64 %staddr 601} 602 603; The input has one free zext and one non-free sext. 604; When we promote all the way through to the load, we end up with 605; a free zext, a free sext (%ld1), and a non-free sext (of %cst). 606; However, we when generate load pair and the free sext(%ld1) becomes 607; non-free. So technically, we trade a non-free sext to two non-free 608; sext. 609; This would need to be fixed at some point. 610; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad 611; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p 612; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1 613; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]] 614; 615; This transformation should really happen only for stress mode. 616; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64 617; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64 618; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]] 619; 620; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst 621; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64 622; 623; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64 624; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]] 625; OPTALL-NEXT: ret i64 [[FINAL]] 626define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) { 627 %ld0 = load i32, i32* %p 628 %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1 629 %ld1 = load i32, i32* %idxLd1 630 %res = add nsw i32 %ld1, %cst 631 %sextres = sext i32 %res to i64 632 %zextLd0 = zext i32 %ld0 to i64 633 %final = add i64 %sextres, %zextLd0 634 ret i64 %final 635} 636 637define i64 @promoteZextShl(i1 %c, i16* %P) { 638entry: 639; OPTALL-LABEL: promoteZextShl 640; OPTALL: entry: 641; OPT: %[[LD:.*]] = load i16, i16* %P 642; OPT: %[[EXT:.*]] = zext i16 %[[LD]] to i64 643; OPT: if.then: 644; OPT: shl nsw i64 %[[EXT]], 1 645; DISABLE: if.then: 646; DISABLE: %r = sext i32 %shl2 to i64 647 %ld = load i16, i16* %P 648 br i1 %c, label %end, label %if.then 649if.then: 650 %z = zext i16 %ld to i32 651 %shl2 = shl nsw i32 %z, 1 652 %r = sext i32 %shl2 to i64 653 ret i64 %r 654end: 655 ret i64 0 656} 657