1; RUN: opt < %s -jump-threading -S | FileCheck %s 2; RUN: opt < %s -aa-pipeline=basic-aa -passes=jump-threading -S | FileCheck %s 3 4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" 5target triple = "i386-apple-darwin7" 6 7; Test that we can thread through the block with the partially redundant load (%2). 8; rdar://6402033 9define i32 @test1(i32* %P) nounwind { 10; CHECK-LABEL: @test1( 11entry: 12 %0 = tail call i32 (...) @f1() nounwind ; <i32> [#uses=1] 13 %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] 14 br i1 %1, label %bb1, label %bb 15 16bb: ; preds = %entry 17; CHECK: bb1.thread: 18; CHECK: store 19; CHECK: br label %bb3 20 store i32 42, i32* %P, align 4 21 br label %bb1 22 23bb1: ; preds = %entry, %bb 24 %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] ; <i32> [#uses=2] 25 %2 = load i32, i32* %P, align 4 ; <i32> [#uses=1] 26 %3 = icmp sgt i32 %2, 36 ; <i1> [#uses=1] 27 br i1 %3, label %bb3, label %bb2 28 29bb2: ; preds = %bb1 30 %4 = tail call i32 (...) @f2() nounwind ; <i32> [#uses=0] 31 ret i32 %res.0 32 33bb3: ; preds = %bb1 34; CHECK: bb3: 35; CHECK: %res.02 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ] 36; CHECK: ret i32 %res.02 37 ret i32 %res.0 38} 39 40declare i32 @f1(...) 41 42declare i32 @f2(...) 43 44 45;; Check that we preserve TBAA information. 46; rdar://11039258 47 48define i32 @test2(i32* %P) nounwind { 49; CHECK-LABEL: @test2( 50entry: 51 %0 = tail call i32 (...) @f1() nounwind ; <i32> [#uses=1] 52 %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] 53 br i1 %1, label %bb1, label %bb 54 55bb: ; preds = %entry 56; CHECK: bb1.thread: 57; CHECK: store{{.*}}, !tbaa !0 58; CHECK: br label %bb3 59 store i32 42, i32* %P, align 4, !tbaa !0 60 br label %bb1 61 62bb1: ; preds = %entry, %bb 63 %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] 64 %2 = load i32, i32* %P, align 4, !tbaa !0 65 %3 = icmp sgt i32 %2, 36 66 br i1 %3, label %bb3, label %bb2 67 68bb2: ; preds = %bb1 69 %4 = tail call i32 (...) @f2() nounwind 70 ret i32 %res.0 71 72bb3: ; preds = %bb1 73; CHECK: bb3: 74; CHECK: %res.02 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ] 75; CHECK: ret i32 %res.02 76 ret i32 %res.0 77} 78 79define i32 @test3(i8** %x, i1 %f) { 80; Correctly thread loads of different (but compatible) types, placing bitcasts 81; as necessary in the predecessors. This is especially tricky because the same 82; predecessor ends up with two entries in the PHI node and they must share 83; a single cast. 84; CHECK-LABEL: @test3( 85entry: 86 %0 = bitcast i8** %x to i32** 87 %1 = load i32*, i32** %0, align 8 88 br i1 %f, label %if.end57, label %if.then56 89; CHECK: %[[LOAD:.*]] = load i32*, i32** 90; CHECK: %[[CAST:.*]] = bitcast i32* %[[LOAD]] to i8* 91 92if.then56: 93 br label %if.end57 94 95if.end57: 96 %2 = load i8*, i8** %x, align 8 97 %tobool59 = icmp eq i8* %2, null 98 br i1 %tobool59, label %return, label %if.then60 99; CHECK: %[[PHI:.*]] = phi i8* [ %[[CAST]], %[[PRED:[^ ]+]] ], [ %[[CAST]], %[[PRED]] ] 100; CHECK-NEXT: %[[CMP:.*]] = icmp eq i8* %[[PHI]], null 101; CHECK-NEXT: br i1 %[[CMP]] 102 103if.then60: 104 ret i32 42 105 106return: 107 ret i32 13 108} 109 110define i32 @test4(i32* %P) { 111; CHECK-LABEL: @test4( 112entry: 113 %v0 = tail call i32 (...) @f1() 114 %v1 = icmp eq i32 %v0, 0 115 br i1 %v1, label %bb1, label %bb 116 117bb: 118; CHECK: bb1.thread: 119; CHECK: store atomic 120; CHECK: br label %bb3 121 store atomic i32 42, i32* %P unordered, align 4 122 br label %bb1 123 124bb1: 125; CHECK: bb1: 126; CHECK-NOT: phi 127; CHECK: load atomic 128 %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] 129 %v2 = load atomic i32, i32* %P unordered, align 4 130 %v3 = icmp sgt i32 %v2, 36 131 br i1 %v3, label %bb3, label %bb2 132 133bb2: 134 %v4 = tail call i32 (...) @f2() 135 ret i32 %res.0 136 137bb3: 138 ret i32 %res.0 139} 140 141define i32 @test5(i32* %P) { 142; Negative test 143 144; CHECK-LABEL: @test5( 145entry: 146 %v0 = tail call i32 (...) @f1() 147 %v1 = icmp eq i32 %v0, 0 148 br i1 %v1, label %bb1, label %bb 149 150bb: 151; CHECK: bb: 152; CHECK-NEXT: store atomic i32 42, i32* %P release, align 4 153; CHECK-NEXT: br label %bb1 154 store atomic i32 42, i32* %P release, align 4 155 br label %bb1 156 157bb1: 158; CHECK: bb1: 159; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] 160; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4 161; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36 162; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2 163 164 %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] 165 %v2 = load atomic i32, i32* %P acquire, align 4 166 %v3 = icmp sgt i32 %v2, 36 167 br i1 %v3, label %bb3, label %bb2 168 169bb2: 170 %v4 = tail call i32 (...) @f2() 171 ret i32 %res.0 172 173bb3: 174 ret i32 %res.0 175} 176 177define i32 @test6(i32* %P) { 178; Negative test 179 180; CHECK-LABEL: @test6( 181entry: 182 %v0 = tail call i32 (...) @f1() 183 %v1 = icmp eq i32 %v0, 0 184 br i1 %v1, label %bb1, label %bb 185 186bb: 187; CHECK: bb: 188; CHECK-NEXT: store i32 42, i32* %P 189; CHECK-NEXT: br label %bb1 190 store i32 42, i32* %P 191 br label %bb1 192 193bb1: 194; CHECK: bb1: 195; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] 196; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4 197; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36 198; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2 199 200 %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] 201 %v2 = load atomic i32, i32* %P acquire, align 4 202 %v3 = icmp sgt i32 %v2, 36 203 br i1 %v3, label %bb3, label %bb2 204 205bb2: 206 %v4 = tail call i32 (...) @f2() 207 ret i32 %res.0 208 209bb3: 210 ret i32 %res.0 211} 212 213define i32 @test7(i32* %P) { 214; Negative test 215 216; CHECK-LABEL: @test7( 217entry: 218 %v0 = tail call i32 (...) @f1() 219 %v1 = icmp eq i32 %v0, 0 220 br i1 %v1, label %bb1, label %bb 221 222bb: 223; CHECK: bb: 224; CHECK-NEXT: %val = load i32, i32* %P 225; CHECK-NEXT: br label %bb1 226 %val = load i32, i32* %P 227 br label %bb1 228 229bb1: 230; CHECK: bb1: 231; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] 232; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4 233; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36 234; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2 235 236 %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] 237 %v2 = load atomic i32, i32* %P acquire, align 4 238 %v3 = icmp sgt i32 %v2, 36 239 br i1 %v3, label %bb3, label %bb2 240 241bb2: 242 %v4 = tail call i32 (...) @f2() 243 ret i32 %res.0 244 245bb3: 246 ret i32 %res.0 247} 248 249; Make sure we merge the aliasing metadata. We keep the range metadata for the 250; first load, as it dominates the second load. Hence we can eliminate the 251; branch. 252define void @test8(i32*, i32*, i32*) { 253; CHECK-LABEL: @test8( 254; CHECK: %a = load i32, i32* %0, !range ![[RANGE4:[0-9]+]] 255; CHECK-NEXT: store i32 %a 256; CHECK-NEXT: %xxx = tail call i32 (...) @f1() 257; CHECK-NEXT: ret void 258 %a = load i32, i32* %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10 259 %b = load i32, i32* %0, !range !5 260 store i32 %a, i32* %1 261 %c = icmp eq i32 %b, 8 262 br i1 %c, label %ret1, label %ret2 263 264ret1: 265 ret void 266 267ret2: 268 %xxx = tail call i32 (...) @f1() nounwind 269 ret void 270} 271 272; Make sure we merge/PRE aliasing metadata correctly. That means that 273; we need to remove metadata from the existing load, and add appropriate 274; metadata to the newly inserted load. 275define void @test9(i32*, i32*, i32*, i1 %c) { 276; CHECK-LABEL: @test9( 277 br i1 %c, label %d1, label %d2 278 279; CHECK: d1: 280; CHECK-NEXT: %a = load i32, i32* %0{{$}} 281d1: 282 %a = load i32, i32* %0, !range !4, !alias.scope !9, !noalias !10 283 br label %d3 284 285; CHECK: d2: 286; CHECK-NEXT: %xxxx = tail call i32 (...) @f1() 287; CHECK-NEXT: %b.pr = load i32, i32* %0, !tbaa !0{{$}} 288d2: 289 %xxxx = tail call i32 (...) @f1() nounwind 290 br label %d3 291 292d3: 293 %p = phi i32 [ 1, %d2 ], [ %a, %d1 ] 294 %b = load i32, i32* %0, !tbaa !0 295 store i32 %p, i32* %1 296 %c2 = icmp eq i32 %b, 8 297 br i1 %c2, label %ret1, label %ret2 298 299ret1: 300 ret void 301 302ret2: 303 %xxx = tail call i32 (...) @f1() nounwind 304 ret void 305} 306 307define i32 @fn_noalias(i1 %c2,i64* noalias %P, i64* noalias %P2) { 308; CHECK-LABEL: @fn_noalias 309; CHECK-LABEL: cond1: 310; CHECK: %[[LD1:.*]] = load i64, i64* %P 311; CHECK: br i1 %c, label %[[THREAD:.*]], label %end 312; CHECK-LABEL: cond2: 313; CHECK: %[[LD2:.*]] = load i64, i64* %P 314; CHECK-LABEL: cond3: 315; CHECK: %[[PHI:.*]] = phi i64 [ %[[LD1]], %[[THREAD]] ], [ %[[LD2]], %cond2 ] 316; CHECK: call void @fn3(i64 %[[PHI]]) 317entry: 318 br i1 %c2, label %cond2, label %cond1 319 320cond1: 321 %l1 = load i64, i64* %P 322 store i64 42, i64* %P2 323 %c = icmp eq i64 %l1, 0 324 br i1 %c, label %cond2, label %end 325 326cond2: 327 %l2 = load i64, i64* %P 328 call void @fn2(i64 %l2) 329 %c3 = icmp eq i64 %l2, 0 330 br i1 %c3, label %cond3, label %end 331 332cond3: 333 call void @fn3(i64 %l2) 334 br label %end 335 336end: 337 ret i32 0 338} 339 340; This tests if we can thread from %sw.bb.i to %do.body.preheader.i67 through 341; %sw.bb21.i. To make this happen, %l2 should be detected as a partically 342; redundant load with %l3 across the store to %phase in %sw.bb21.i. 343 344%struct.NEXT_MOVE = type { i32, i32, i32* } 345@hash_move = unnamed_addr global [65 x i32] zeroinitializer, align 4 346@current_move = internal global [65 x i32] zeroinitializer, align 4 347@last = internal unnamed_addr global [65 x i32*] zeroinitializer, align 8 348@next_status = internal unnamed_addr global [65 x %struct.NEXT_MOVE] zeroinitializer, align 8 349define fastcc i32 @Search(i64 %idxprom.i, i64 %idxprom.i89, i32 %c) { 350; CHECK-LABEL: @Search 351; CHECK-LABEL: sw.bb.i: 352; CHECK: %[[LD1:.*]] = load i32, i32* %arrayidx185, align 4 353; CHECK: %[[C1:.*]] = icmp eq i32 %[[LD1]], 0 354; CHECK: br i1 %[[C1]], label %sw.bb21.i.thread, label %if.then.i64 355; CHECK-LABEL: sw.bb21.i.thread: 356; CHECK: br label %[[THREAD_TO:.*]] 357; CHECK-LABEL: sw.bb21.i: 358; CHECK: %[[LD2:.*]] = load i32, i32* %arrayidx185, align 4 359; CHECK: %[[C2:.*]] = icmp eq i32 %[[LD2]], 0 360; CHECK:br i1 %[[C2]], label %[[THREAD_TO]], label %cleanup 361entry: 362 %arrayidx185 = getelementptr inbounds [65 x i32], [65 x i32]* @hash_move, i64 0, i64 %idxprom.i 363 %arrayidx307 = getelementptr inbounds [65 x i32], [65 x i32]* @current_move, i64 0, i64 %idxprom.i 364 %arrayidx89 = getelementptr inbounds [65 x i32*], [65 x i32*]* @last, i64 0, i64 %idxprom.i 365 %phase = getelementptr inbounds [65 x %struct.NEXT_MOVE], [65 x %struct.NEXT_MOVE]* @next_status, i64 0, i64 %idxprom.i, i32 0 366 br label %cond.true282 367 368cond.true282: 369 switch i32 %c, label %sw.default.i [ 370 i32 1, label %sw.bb.i 371 i32 0, label %sw.bb21.i 372 ] 373 374sw.default.i: 375 br label %cleanup 376 377sw.bb.i: 378 %call.i62 = call fastcc i32* @GenerateCheckEvasions() 379 store i32* %call.i62, i32** %arrayidx89, align 8 380 %l2 = load i32, i32* %arrayidx185, align 4 381 %tobool.i63 = icmp eq i32 %l2, 0 382 br i1 %tobool.i63, label %sw.bb21.i, label %if.then.i64 383 384if.then.i64: ; preds = %sw.bb.i 385 store i32 7, i32* %phase, align 8 386 store i32 %l2, i32* %arrayidx307, align 4 387 %call16.i = call fastcc i32 @ValidMove(i32 %l2) 388 %tobool17.i = icmp eq i32 %call16.i, 0 389 br i1 %tobool17.i, label %if.else.i65, label %cleanup 390 391if.else.i65: 392 call void @f65() 393 br label %sw.bb21.i 394 395sw.bb21.i: 396 store i32 10, i32* %phase, align 8 397 %l3= load i32, i32* %arrayidx185, align 4 398 %tobool27.i = icmp eq i32 %l3, 0 399 br i1 %tobool27.i, label %do.body.preheader.i67, label %cleanup 400 401do.body.preheader.i67: 402 call void @f67() 403 ret i32 67 404 405cleanup: 406 call void @Cleanup() 407 ret i32 0 408} 409 410declare fastcc i32* @GenerateCheckEvasions() 411declare fastcc i32 @ValidMove(i32 %move) 412declare void @f67() 413declare void @Cleanup() 414declare void @f65() 415 416define i32 @fn_SinglePred(i1 %c2,i64* %P) { 417; CHECK-LABEL: @fn_SinglePred 418; CHECK-LABEL: entry: 419; CHECK: %[[L1:.*]] = load i64, i64* %P 420; CHECK: br i1 %c, label %cond3, label %cond1 421; CHECK-LABEL: cond2: 422; CHECK-NOT: load 423; CHECK: %[[PHI:.*]] = phi i64 [ %[[L1]], %cond1 ] 424; CHECK: call void @fn2(i64 %[[PHI]]) 425; CHECK: br label %end 426; CHECK-LABEL: cond3: 427; CHECK: call void @fn2(i64 %l1) 428; CHECK: call void @fn3(i64 %l1) 429 430entry: 431 %l1 = load i64, i64* %P 432 %c = icmp eq i64 %l1, 0 433 br i1 %c, label %cond2, label %cond1 434 435cond1: 436 br i1 %c2, label %cond2, label %end 437 438cond2: 439 %l2 = load i64, i64* %P 440 call void @fn2(i64 %l2) 441 %c3 = icmp eq i64 %l2, 0 442 br i1 %c3, label %cond3, label %end 443 444cond3: 445 call void @fn3(i64 %l2) 446 br label %end 447 448end: 449 ret i32 0 450} 451 452define i32 @fn_SinglePredMultihop(i1 %c1, i1 %c2,i64* %P) { 453; CHECK-LABEL: @fn_SinglePredMultihop 454; CHECK-LABEL: entry: 455; CHECK: %[[L1:.*]] = load i64, i64* %P 456; CHECK: br i1 %c0, label %cond3, label %cond0 457; CHECK-LABEL: cond2: 458; CHECK-NOT: load 459; CHECK: %[[PHI:.*]] = phi i64 [ %[[L1]], %cond1 ] 460; CHECK: call void @fn2(i64 %[[PHI]]) 461; CHECK: br label %end 462; CHECK-LABEL: cond3: 463; CHECK: call void @fn2(i64 %l1) 464; CHECK: call void @fn3(i64 %l1) 465 466entry: 467 %l1 = load i64, i64* %P 468 %c0 = icmp eq i64 %l1, 0 469 br i1 %c0, label %cond2, label %cond0 470 471cond0: 472 br i1 %c1, label %cond1, label %end 473 474cond1: 475 br i1 %c2, label %cond2, label %end 476 477cond2: 478 %l2 = load i64, i64* %P 479 call void @fn2(i64 %l2) 480 %c3 = icmp eq i64 %l2, 0 481 br i1 %c3, label %cond3, label %end 482 483cond3: 484 call void @fn3(i64 %l2) 485 br label %end 486 487end: 488 ret i32 0 489} 490 491declare void @fn2(i64) 492declare void @fn3(i64) 493 494 495; Make sure we phi-translate and make the partially redundant load in 496; merge fully redudant and then we can jump-thread the block with the 497; store. 498; 499; CHECK-LABEL: define i32 @phi_translate_partial_redundant_loads(i32 %0, i32* %1, i32* %2 500; CHECK: merge.thread: 501; CHECK: store 502; CHECK: br label %left_x 503; 504; CHECK: left_x: 505; CHECK-NEXT: ret i32 20 506define i32 @phi_translate_partial_redundant_loads(i32, i32*, i32*) { 507 %cmp0 = icmp ne i32 %0, 0 508 br i1 %cmp0, label %left, label %right 509 510left: 511 store i32 1, i32* %1, align 4 512 br label %merge 513 514right: 515 br label %merge 516 517merge: 518 %phiptr = phi i32* [ %1, %left ], [ %2, %right ] 519 %newload = load i32, i32* %phiptr, align 4 520 %cmp1 = icmp slt i32 %newload, 5 521 br i1 %cmp1, label %left_x, label %right_x 522 523left_x: 524 ret i32 20 525 526right_x: 527 ret i32 10 528} 529 530; CHECK: ![[RANGE4]] = !{i32 0, i32 1} 531 532!0 = !{!3, !3, i64 0} 533!1 = !{!"omnipotent char", !2} 534!2 = !{!"Simple C/C++ TBAA"} 535!3 = !{!"int", !1} 536!4 = !{ i32 0, i32 1 } 537!5 = !{ i32 8, i32 10 } 538!6 = !{!6} 539!7 = !{!7, !6} 540!8 = !{!8, !6} 541!9 = !{!7} 542!10 = !{!8} 543