1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -basic-aa -dse -S | FileCheck %s 3; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s 4target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" 5 6declare i8* @calloc(i64, i64) 7declare void @memset_pattern16(i8*, i8*, i64) 8 9declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind 10declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind 11declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind 12declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind 13declare void @llvm.init.trampoline(i8*, i8*, i8*) 14 15; **** Noop load->store tests ************************************************** 16 17; We CAN optimize volatile loads. 18define void @test_load_volatile(i32* %Q) { 19; CHECK-LABEL: @test_load_volatile( 20; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]], align 4 21; CHECK-NEXT: ret void 22; 23 %a = load volatile i32, i32* %Q 24 store i32 %a, i32* %Q 25 ret void 26} 27 28; We can NOT optimize volatile stores. 29define void @test_store_volatile(i32* %Q) { 30; CHECK-LABEL: @test_store_volatile( 31; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4 32; CHECK-NEXT: store volatile i32 [[A]], i32* [[Q]], align 4 33; CHECK-NEXT: ret void 34; 35 %a = load i32, i32* %Q 36 store volatile i32 %a, i32* %Q 37 ret void 38} 39 40; PR2599 - load -> store to same address. 41define void @test12({ i32, i32 }* %x) nounwind { 42; CHECK-LABEL: @test12( 43; CHECK-NEXT: [[TEMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1 44; CHECK-NEXT: [[TEMP8:%.*]] = load i32, i32* [[TEMP7]], align 4 45; CHECK-NEXT: [[TEMP17:%.*]] = sub i32 0, [[TEMP8]] 46; CHECK-NEXT: store i32 [[TEMP17]], i32* [[TEMP7]], align 4 47; CHECK-NEXT: ret void 48; 49 %temp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0 50 %temp5 = load i32, i32* %temp4, align 4 51 %temp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 52 %temp8 = load i32, i32* %temp7, align 4 53 %temp17 = sub i32 0, %temp8 54 store i32 %temp5, i32* %temp4, align 4 55 store i32 %temp17, i32* %temp7, align 4 56 ret void 57} 58 59; Remove redundant store if loaded value is in another block. 60define i32 @test26(i1 %c, i32* %p) { 61; CHECK-LABEL: @test26( 62; CHECK-NEXT: entry: 63; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] 64; CHECK: bb1: 65; CHECK-NEXT: br label [[BB3:%.*]] 66; CHECK: bb2: 67; CHECK-NEXT: br label [[BB3]] 68; CHECK: bb3: 69; CHECK-NEXT: ret i32 0 70; 71entry: 72 %v = load i32, i32* %p, align 4 73 br i1 %c, label %bb1, label %bb2 74bb1: 75 br label %bb3 76bb2: 77 store i32 %v, i32* %p, align 4 78 br label %bb3 79bb3: 80 ret i32 0 81} 82 83; Remove redundant store if loaded value is in another block. 84define i32 @test27(i1 %c, i32* %p) { 85; CHECK-LABEL: @test27( 86; CHECK-NEXT: entry: 87; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] 88; CHECK: bb1: 89; CHECK-NEXT: br label [[BB3:%.*]] 90; CHECK: bb2: 91; CHECK-NEXT: br label [[BB3]] 92; CHECK: bb3: 93; CHECK-NEXT: ret i32 0 94; 95entry: 96 %v = load i32, i32* %p, align 4 97 br i1 %c, label %bb1, label %bb2 98bb1: 99 br label %bb3 100bb2: 101 br label %bb3 102bb3: 103 store i32 %v, i32* %p, align 4 104 ret i32 0 105} 106 107; Remove redundant store if loaded value is in another block inside a loop. 108define i32 @test31(i1 %c, i32* %p, i32 %i) { 109; CHECK-LABEL: @test31( 110; CHECK-NEXT: entry: 111; CHECK-NEXT: br label [[BB1:%.*]] 112; CHECK: bb1: 113; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1]], label [[BB2:%.*]] 114; CHECK: bb2: 115; CHECK-NEXT: ret i32 0 116; 117entry: 118 %v = load i32, i32* %p, align 4 119 br label %bb1 120bb1: 121 store i32 %v, i32* %p, align 4 122 br i1 %c, label %bb1, label %bb2 123bb2: 124 ret i32 0 125} 126 127; Don't remove "redundant" store if %p is possibly stored to. 128define i32 @test46(i1 %c, i32* %p, i32* %p2, i32 %i) { 129; CHECK-LABEL: @test46( 130; CHECK-NEXT: entry: 131; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 132; CHECK-NEXT: br label [[BB1:%.*]] 133; CHECK: bb1: 134; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 135; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1]], label [[BB2:%.*]] 136; CHECK: bb2: 137; CHECK-NEXT: store i32 0, i32* [[P2:%.*]], align 4 138; CHECK-NEXT: br i1 [[C]], label [[BB3:%.*]], label [[BB1]] 139; CHECK: bb3: 140; CHECK-NEXT: ret i32 0 141; 142entry: 143 %v = load i32, i32* %p, align 4 144 br label %bb1 145bb1: 146 store i32 %v, i32* %p, align 4 147 br i1 %c, label %bb1, label %bb2 148bb2: 149 store i32 0, i32* %p2, align 4 150 br i1 %c, label %bb3, label %bb1 151bb3: 152 ret i32 0 153} 154 155declare void @unknown_func() 156 157; Remove redundant store, which is in the lame loop as the load. 158define i32 @test33(i1 %c, i32* %p, i32 %i) { 159; CHECK-LABEL: @test33( 160; CHECK-NEXT: entry: 161; CHECK-NEXT: br label [[BB1:%.*]] 162; CHECK: bb1: 163; CHECK-NEXT: br label [[BB2:%.*]] 164; CHECK: bb2: 165; CHECK-NEXT: call void @unknown_func() 166; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1]], label [[BB3:%.*]] 167; CHECK: bb3: 168; CHECK-NEXT: ret i32 0 169; 170entry: 171 br label %bb1 172bb1: 173 %v = load i32, i32* %p, align 4 174 br label %bb2 175bb2: 176 store i32 %v, i32* %p, align 4 177 ; Might read and overwrite value at %p, but doesn't matter. 178 call void @unknown_func() 179 br i1 %c, label %bb1, label %bb3 180bb3: 181 ret i32 0 182} 183 184declare void @unkown_write(i32*) 185 186; We can't remove the "noop" store around an unkown write. 187define void @test43(i32* %Q) { 188; CHECK-LABEL: @test43( 189; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4 190; CHECK-NEXT: call void @unkown_write(i32* [[Q]]) 191; CHECK-NEXT: store i32 [[A]], i32* [[Q]], align 4 192; CHECK-NEXT: ret void 193; 194 %a = load i32, i32* %Q 195 call void @unkown_write(i32* %Q) 196 store i32 %a, i32* %Q 197 ret void 198} 199 200; We CAN remove it when the unkown write comes AFTER. 201define void @test44(i32* %Q) { 202; CHECK-LABEL: @test44( 203; CHECK-NEXT: call void @unkown_write(i32* [[Q:%.*]]) 204; CHECK-NEXT: ret void 205; 206 %a = load i32, i32* %Q 207 store i32 %a, i32* %Q 208 call void @unkown_write(i32* %Q) 209 ret void 210} 211 212define void @test45(i32* %Q) { 213; CHECK-LABEL: @test45( 214; CHECK-NEXT: ret void 215; 216 %a = load i32, i32* %Q 217 store i32 10, i32* %Q 218 store i32 %a, i32* %Q 219 ret void 220} 221 222define i32 @test48(i1 %c, i32* %p) { 223; CHECK-LABEL: @test48( 224; CHECK-NEXT: entry: 225; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 226; CHECK-NEXT: br i1 [[C:%.*]], label [[BB0:%.*]], label [[BB0_0:%.*]] 227; CHECK: bb0: 228; CHECK-NEXT: store i32 0, i32* [[P]], align 4 229; CHECK-NEXT: br i1 [[C]], label [[BB1:%.*]], label [[BB2:%.*]] 230; CHECK: bb0.0: 231; CHECK-NEXT: br label [[BB1]] 232; CHECK: bb1: 233; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 234; CHECK-NEXT: br i1 [[C]], label [[BB2]], label [[BB0]] 235; CHECK: bb2: 236; CHECK-NEXT: ret i32 0 237; 238entry: 239 %v = load i32, i32* %p, align 4 240 br i1 %c, label %bb0, label %bb0.0 241 242bb0: 243 store i32 0, i32* %p 244 br i1 %c, label %bb1, label %bb2 245 246bb0.0: 247 br label %bb1 248 249bb1: 250 store i32 %v, i32* %p, align 4 251 br i1 %c, label %bb2, label %bb0 252bb2: 253 ret i32 0 254} 255 256define i32 @test47(i1 %c, i32* %p, i32 %i) { 257; CHECK-LABEL: @test47( 258; CHECK-NEXT: entry: 259; CHECK-NEXT: br label [[BB1:%.*]] 260; CHECK: bb1: 261; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1]], label [[BB2:%.*]] 262; CHECK: bb2: 263; CHECK-NEXT: br i1 [[C]], label [[BB3:%.*]], label [[BB1]] 264; CHECK: bb3: 265; CHECK-NEXT: ret i32 0 266; 267entry: 268 %v = load i32, i32* %p, align 4 269 br label %bb1 270bb1: 271 store i32 %v, i32* %p, align 4 272 br i1 %c, label %bb1, label %bb2 273bb2: 274 store i32 %v, i32* %p, align 4 275 br i1 %c, label %bb3, label %bb1 276bb3: 277 ret i32 0 278} 279 280; Test case from PR47887. 281define void @test_noalias_store_between_load_and_store(i32* noalias %x, i32* noalias %y) { 282; CHECK-LABEL: @test_noalias_store_between_load_and_store( 283; CHECK-NEXT: entry: 284; CHECK-NEXT: store i32 0, i32* [[Y:%.*]], align 4 285; CHECK-NEXT: ret void 286; 287entry: 288 %lv = load i32, i32* %x, align 4 289 store i32 0, i32* %y, align 4 290 store i32 %lv, i32* %x, align 4 291 ret void 292} 293 294; Test case from PR47887. Currently we eliminate the dead `store i32 %inc, i32* %x`, 295; but not the no-op `store i32 %lv, i32* %x`. That is because no-op stores are 296; eliminated before dead stores for the same def. 297define void @test_noalias_store_between_load_and_store_elimin_order(i32* noalias %x, i32* noalias %y) { 298; CHECK-LABEL: @test_noalias_store_between_load_and_store_elimin_order( 299; CHECK-NEXT: entry: 300; CHECK-NEXT: store i32 0, i32* [[Y:%.*]], align 4 301; CHECK-NEXT: ret void 302; 303entry: 304 %lv = load i32, i32* %x, align 4 305 %inc = add nsw i32 %lv, 1 306 store i32 %inc, i32* %x, align 4 307 store i32 0, i32* %y, align 4 308 store i32 %lv, i32* %x, align 4 309 ret void 310} 311 312; PR50143 313define i8* @store_zero_after_calloc_inaccessiblememonly() { 314; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly( 315; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 1, i64 10) #[[ATTR6:[0-9]+]] 316; CHECK-NEXT: store i8 0, i8* [[CALL]], align 1 317; CHECK-NEXT: ret i8* [[CALL]] 318; 319 %call = tail call i8* @calloc(i64 1, i64 10) inaccessiblememonly 320 store i8 0, i8* %call 321 ret i8* %call 322} 323 324define i8* @zero_memset_after_calloc() { 325; CHECK-LABEL: @zero_memset_after_calloc( 326; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 327; CHECK-NEXT: ret i8* [[CALL]] 328; 329 %call = tail call i8* @calloc(i64 10000, i64 4) 330 call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false) 331 ret i8* %call 332} 333 334define i8* @volatile_zero_memset_after_calloc() { 335; CHECK-LABEL: @volatile_zero_memset_after_calloc( 336; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 337; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 40000, i1 true) 338; CHECK-NEXT: ret i8* [[CALL]] 339; 340 %call = tail call i8* @calloc(i64 10000, i64 4) 341 call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 true) 342 ret i8* %call 343} 344 345define i8* @zero_memset_and_store_after_calloc(i8 %v) { 346; CHECK-LABEL: @zero_memset_and_store_after_calloc( 347; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 348; CHECK-NEXT: ret i8* [[CALL]] 349; 350 %call = tail call i8* @calloc(i64 10000, i64 4) 351 store i8 %v, i8* %call 352 call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false) 353 ret i8* %call 354} 355 356define i8* @partial_zero_memset_after_calloc() { 357; CHECK-LABEL: @partial_zero_memset_after_calloc( 358; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 359; CHECK-NEXT: ret i8* [[CALL]] 360; 361 %call = tail call i8* @calloc(i64 10000, i64 4) 362 call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false) 363 ret i8* %call 364} 365 366define i8* @partial_zero_memset_and_store_after_calloc(i8 %v) { 367; CHECK-LABEL: @partial_zero_memset_and_store_after_calloc( 368; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 369; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 30 370; CHECK-NEXT: store i8 [[V:%.*]], i8* [[GEP]], align 1 371; CHECK-NEXT: ret i8* [[CALL]] 372; 373 %call = tail call i8* @calloc(i64 10000, i64 4) 374 %gep = getelementptr inbounds i8, i8* %call, i64 30 375 store i8 %v, i8* %gep 376 call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false) 377 ret i8* %call 378} 379 380define i8* @zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64 %idx) { 381; CHECK-LABEL: @zero_memset_and_store_with_dyn_index_after_calloc( 382; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 383; CHECK-NEXT: ret i8* [[CALL]] 384; 385 %call = tail call i8* @calloc(i64 10000, i64 4) 386 %gep = getelementptr inbounds i8, i8* %call, i64 %idx 387 store i8 %v, i8* %gep 388 call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false) 389 ret i8* %call 390} 391 392define i8* @partial_zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64 %idx) { 393; CHECK-LABEL: @partial_zero_memset_and_store_with_dyn_index_after_calloc( 394; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 395; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[IDX:%.*]] 396; CHECK-NEXT: store i8 [[V:%.*]], i8* [[GEP]], align 1 397; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 20, i1 false) 398; CHECK-NEXT: ret i8* [[CALL]] 399; 400 %call = tail call i8* @calloc(i64 10000, i64 4) 401 %gep = getelementptr inbounds i8, i8* %call, i64 %idx 402 store i8 %v, i8* %gep 403 call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false) 404 ret i8* %call 405} 406 407define i8* @zero_memset_after_calloc_inaccessiblememonly() { 408; CHECK-LABEL: @zero_memset_after_calloc_inaccessiblememonly( 409; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) #[[ATTR6]] 410; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 40000, i1 false) 411; CHECK-NEXT: ret i8* [[CALL]] 412; 413 %call = tail call i8* @calloc(i64 10000, i64 4) inaccessiblememonly 414 call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false) 415 ret i8* %call 416} 417 418define i8* @cst_nonzero_memset_after_calloc() { 419; CHECK-LABEL: @cst_nonzero_memset_after_calloc( 420; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 421; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 1, i64 40000, i1 false) 422; CHECK-NEXT: ret i8* [[CALL]] 423; 424 %call = tail call i8* @calloc(i64 10000, i64 4) 425 call void @llvm.memset.p0i8.i64(i8* %call, i8 1, i64 40000, i1 false) 426 ret i8* %call 427} 428 429define i8* @nonzero_memset_after_calloc(i8 %v) { 430; CHECK-LABEL: @nonzero_memset_after_calloc( 431; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 432; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 [[V:%.*]], i64 40000, i1 false) 433; CHECK-NEXT: ret i8* [[CALL]] 434; 435 %call = tail call i8* @calloc(i64 10000, i64 4) 436 call void @llvm.memset.p0i8.i64(i8* %call, i8 %v, i64 40000, i1 false) 437 ret i8* %call 438} 439 440; PR11896 441; The first memset is dead, because calloc provides zero-filled memory. 442; TODO: This could be replaced with a call to malloc + memset_pattern16. 443define i8* @memset_pattern16_after_calloc(i8* %pat) { 444; CHECK-LABEL: @memset_pattern16_after_calloc( 445; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) 446; CHECK-NEXT: call void @memset_pattern16(i8* [[CALL]], i8* [[PAT:%.*]], i64 40000) 447; CHECK-NEXT: ret i8* [[CALL]] 448; 449 %call = tail call i8* @calloc(i64 10000, i64 4) #1 450 call void @llvm.memset.p0i8.i64(i8* align 4 %call, i8 0, i64 40000, i1 false) 451 call void @memset_pattern16(i8* %call, i8* %pat, i64 40000) #1 452 ret i8* %call 453} 454