1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -basic-aa -dse -S < %s | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 5target triple = "x86_64-apple-macosx10.7.0" 6 7; Sanity tests for atomic stores. 8; Note that it turns out essentially every transformation DSE does is legal on 9; atomic ops, just some transformations are not allowed across release-acquire pairs. 10 11@x = common global i32 0, align 4 12@y = common global i32 0, align 4 13 14declare void @randomop(i32*) 15 16; DSE across unordered store (allowed) 17define void @test1() { 18; CHECK-LABEL: @test1( 19; CHECK-NEXT: store atomic i32 0, i32* @y unordered, align 4 20; CHECK-NEXT: store i32 1, i32* @x, align 4 21; CHECK-NEXT: ret void 22; 23 store i32 0, i32* @x 24 store atomic i32 0, i32* @y unordered, align 4 25 store i32 1, i32* @x 26 ret void 27} 28 29; DSE remove unordered store (allowed) 30define void @test4() { 31; CHECK-LABEL: @test4( 32; CHECK-NEXT: store i32 1, i32* @x, align 4 33; CHECK-NEXT: ret void 34; 35 store atomic i32 0, i32* @x unordered, align 4 36 store i32 1, i32* @x 37 ret void 38} 39 40; DSE unordered store overwriting non-atomic store (allowed) 41define void @test5() { 42; CHECK-LABEL: @test5( 43; CHECK-NEXT: store atomic i32 1, i32* @x unordered, align 4 44; CHECK-NEXT: ret void 45; 46 store i32 0, i32* @x 47 store atomic i32 1, i32* @x unordered, align 4 48 ret void 49} 50 51; DSE no-op unordered atomic store (allowed) 52define void @test6() { 53; CHECK-LABEL: @test6( 54; CHECK-NEXT: ret void 55; 56 %x = load atomic i32, i32* @x unordered, align 4 57 store atomic i32 %x, i32* @x unordered, align 4 58 ret void 59} 60 61; DSE seq_cst store (be conservative; DSE doesn't have infrastructure 62; to reason about atomic operations). 63define void @test7() { 64; CHECK-LABEL: @test7( 65; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 66; CHECK-NEXT: store atomic i32 0, i32* [[A]] seq_cst, align 4 67; CHECK-NEXT: ret void 68; 69 %a = alloca i32 70 store atomic i32 0, i32* %a seq_cst, align 4 71 ret void 72} 73 74; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure 75; to reason about atomic operations). 76define i32 @test8() { 77; CHECK-LABEL: @test8( 78; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 79; CHECK-NEXT: call void @randomop(i32* [[A]]) 80; CHECK-NEXT: store i32 0, i32* [[A]], align 4 81; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* @x seq_cst, align 4 82; CHECK-NEXT: ret i32 [[X]] 83; 84 %a = alloca i32 85 call void @randomop(i32* %a) 86 store i32 0, i32* %a, align 4 87 %x = load atomic i32, i32* @x seq_cst, align 4 88 ret i32 %x 89} 90 91; DSE across monotonic store (allowed as long as the eliminated store isUnordered) 92define void @test10() { 93; CHECK-LABEL: test10 94; CHECK-NOT: store i32 0 95; CHECK: store i32 1 96 store i32 0, i32* @x 97 store atomic i32 42, i32* @y monotonic, align 4 98 store i32 1, i32* @x 99 ret void 100} 101 102; DSE across monotonic load (forbidden since the eliminated store is atomic) 103define i32 @test11() { 104; CHECK-LABEL: @test11( 105; CHECK-NEXT: store atomic i32 0, i32* @x monotonic, align 4 106; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* @y monotonic, align 4 107; CHECK-NEXT: store atomic i32 1, i32* @x monotonic, align 4 108; CHECK-NEXT: ret i32 [[X]] 109; 110 store atomic i32 0, i32* @x monotonic, align 4 111 %x = load atomic i32, i32* @y monotonic, align 4 112 store atomic i32 1, i32* @x monotonic, align 4 113 ret i32 %x 114} 115 116; DSE across monotonic store (forbidden since the eliminated store is atomic) 117define void @test12() { 118; CHECK-LABEL: @test12( 119; CHECK-NEXT: store atomic i32 0, i32* @x monotonic, align 4 120; CHECK-NEXT: store atomic i32 42, i32* @y monotonic, align 4 121; CHECK-NEXT: store atomic i32 1, i32* @x monotonic, align 4 122; CHECK-NEXT: ret void 123; 124 store atomic i32 0, i32* @x monotonic, align 4 125 store atomic i32 42, i32* @y monotonic, align 4 126 store atomic i32 1, i32* @x monotonic, align 4 127 ret void 128} 129 130; But DSE is not allowed across a release-acquire pair. 131define i32 @test15() { 132; CHECK-LABEL: @test15( 133; CHECK-NEXT: store i32 0, i32* @x, align 4 134; CHECK-NEXT: store atomic i32 0, i32* @y release, align 4 135; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* @y acquire, align 4 136; CHECK-NEXT: store i32 1, i32* @x, align 4 137; CHECK-NEXT: ret i32 [[X]] 138; 139 store i32 0, i32* @x 140 store atomic i32 0, i32* @y release, align 4 141 %x = load atomic i32, i32* @y acquire, align 4 142 store i32 1, i32* @x 143 ret i32 %x 144} 145 146@z = common global i64 0, align 4 147@a = common global i64 0, align 4 148 149; Be conservative, do not kill regular store. 150define i64 @test_atomicrmw_0() { 151; CHECK-LABEL: @test_atomicrmw_0( 152; CHECK-NEXT: store i64 1, i64* @z, align 8 153; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i64* @z, i64 -1 monotonic 154; CHECK-NEXT: ret i64 [[RES]] 155; 156 store i64 1, i64* @z 157 %res = atomicrmw add i64* @z, i64 -1 monotonic 158 ret i64 %res 159} 160 161; Be conservative, do not kill regular store. 162define i64 @test_atomicrmw_1() { 163; CHECK-LABEL: @test_atomicrmw_1( 164; CHECK-NEXT: store i64 1, i64* @z, align 8 165; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i64* @z, i64 -1 acq_rel 166; CHECK-NEXT: ret i64 [[RES]] 167; 168 store i64 1, i64* @z 169 %res = atomicrmw add i64* @z, i64 -1 acq_rel 170 ret i64 %res 171} 172 173; Monotonic atomicrmw should not block eliminating no-aliasing stores. 174define i64 @test_atomicrmw_2() { 175; CHECK-LABEL: @test_atomicrmw_2( 176; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i64* @a, i64 -1 monotonic 177; CHECK-NEXT: store i64 2, i64* @z, align 8 178; CHECK-NEXT: ret i64 [[RES]] 179; 180 store i64 1, i64* @z 181 %res = atomicrmw add i64* @a, i64 -1 monotonic 182 store i64 2, i64* @z 183 ret i64 %res 184} 185 186; Be conservative, do not eliminate stores across atomic operations > monotonic. 187define i64 @test_atomicrmw_3() { 188; CHECK-LABEL: @test_atomicrmw_3( 189; CHECK-NEXT: store i64 1, i64* @z, align 8 190; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i64* @a, i64 -1 release 191; CHECK-NEXT: store i64 2, i64* @z, align 8 192; CHECK-NEXT: ret i64 [[RES]] 193; 194 store i64 1, i64* @z 195 %res = atomicrmw add i64* @a, i64 -1 release 196 store i64 2, i64* @z 197 ret i64 %res 198} 199 200; Be conservative, do not eliminate may-alias stores. 201define i64 @test_atomicrmw_4(i64* %ptr) { 202; CHECK-LABEL: @test_atomicrmw_4( 203; CHECK-NEXT: store i64 1, i64* @z, align 8 204; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i64* [[PTR:%.*]], i64 -1 monotonic 205; CHECK-NEXT: store i64 2, i64* @z, align 8 206; CHECK-NEXT: ret i64 [[RES]] 207; 208 store i64 1, i64* @z 209 %res = atomicrmw add i64* %ptr, i64 -1 monotonic 210 store i64 2, i64* @z 211 ret i64 %res 212} 213 214; Be conservative, do not eliminate aliasing stores. 215define i64 @test_atomicrmw_5() { 216; CHECK-LABEL: @test_atomicrmw_5( 217; CHECK-NEXT: store i64 1, i64* @z, align 8 218; CHECK-NEXT: [[RES:%.*]] = atomicrmw add i64* @z, i64 -1 monotonic 219; CHECK-NEXT: store i64 2, i64* @z, align 8 220; CHECK-NEXT: ret i64 [[RES]] 221; 222 store i64 1, i64* @z 223 %res = atomicrmw add i64* @z, i64 -1 monotonic 224 store i64 2, i64* @z 225 ret i64 %res 226} 227 228; Be conservative, do not eliminate non-monotonic cmpxchg. 229define { i32, i1} @test_cmpxchg_1() { 230; CHECK-LABEL: @test_cmpxchg_1( 231; CHECK-NEXT: store i32 1, i32* @x, align 4 232; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile i32* @x, i32 10, i32 20 seq_cst monotonic 233; CHECK-NEXT: store i32 2, i32* @x, align 4 234; CHECK-NEXT: ret { i32, i1 } [[RET]] 235; 236 store i32 1, i32* @x 237 %ret = cmpxchg volatile i32* @x, i32 10, i32 20 seq_cst monotonic 238 store i32 2, i32* @x 239 ret { i32, i1 } %ret 240} 241 242; Monotonic cmpxchg should not block DSE for non-aliasing stores. 243define { i32, i1} @test_cmpxchg_2() { 244; CHECK-LABEL: @test_cmpxchg_2( 245; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile i32* @y, i32 10, i32 20 monotonic monotonic 246; CHECK-NEXT: store i32 2, i32* @x, align 4 247; CHECK-NEXT: ret { i32, i1 } [[RET]] 248; 249 store i32 1, i32* @x 250 %ret = cmpxchg volatile i32* @y, i32 10, i32 20 monotonic monotonic 251 store i32 2, i32* @x 252 ret { i32, i1 } %ret 253} 254 255; Be conservative, do not eliminate non-monotonic cmpxchg. 256define { i32, i1} @test_cmpxchg_3() { 257; CHECK-LABEL: @test_cmpxchg_3( 258; CHECK-NEXT: store i32 1, i32* @x, align 4 259; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile i32* @y, i32 10, i32 20 seq_cst seq_cst 260; CHECK-NEXT: store i32 2, i32* @x, align 4 261; CHECK-NEXT: ret { i32, i1 } [[RET]] 262; 263 store i32 1, i32* @x 264 %ret = cmpxchg volatile i32* @y, i32 10, i32 20 seq_cst seq_cst 265 store i32 2, i32* @x 266 ret { i32, i1 } %ret 267} 268 269; Be conservative, do not eliminate may-alias stores. 270define { i32, i1} @test_cmpxchg_4(i32* %ptr) { 271; CHECK-LABEL: @test_cmpxchg_4( 272; CHECK-NEXT: store i32 1, i32* @x, align 4 273; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile i32* [[PTR:%.*]], i32 10, i32 20 monotonic monotonic 274; CHECK-NEXT: store i32 2, i32* @x, align 4 275; CHECK-NEXT: ret { i32, i1 } [[RET]] 276; 277 store i32 1, i32* @x 278 %ret = cmpxchg volatile i32* %ptr, i32 10, i32 20 monotonic monotonic 279 store i32 2, i32* @x 280 ret { i32, i1 } %ret 281} 282 283; Be conservative, do not eliminate alias stores. 284define { i32, i1} @test_cmpxchg_5(i32* %ptr) { 285; CHECK-LABEL: @test_cmpxchg_5( 286; CHECK-NEXT: store i32 1, i32* @x, align 4 287; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile i32* @x, i32 10, i32 20 monotonic monotonic 288; CHECK-NEXT: store i32 2, i32* @x, align 4 289; CHECK-NEXT: ret { i32, i1 } [[RET]] 290; 291 store i32 1, i32* @x 292 %ret = cmpxchg volatile i32* @x, i32 10, i32 20 monotonic monotonic 293 store i32 2, i32* @x 294 ret { i32, i1 } %ret 295} 296 297; **** Noop load->store tests ************************************************** 298 299; We can optimize unordered atomic loads or stores. 300define void @test_load_atomic(i32* %Q) { 301; CHECK-LABEL: @test_load_atomic( 302; CHECK-NEXT: ret void 303; 304 %a = load atomic i32, i32* %Q unordered, align 4 305 store atomic i32 %a, i32* %Q unordered, align 4 306 ret void 307} 308 309; We can optimize unordered atomic loads or stores. 310define void @test_store_atomic(i32* %Q) { 311; CHECK-LABEL: @test_store_atomic( 312; CHECK-NEXT: ret void 313; 314 %a = load i32, i32* %Q 315 store atomic i32 %a, i32* %Q unordered, align 4 316 ret void 317} 318 319; We can NOT optimize release atomic loads or stores. 320define void @test_store_atomic_release(i32* %Q) { 321; CHECK-LABEL: @test_store_atomic_release( 322; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4 323; CHECK-NEXT: store atomic i32 [[A]], i32* [[Q]] release, align 4 324; CHECK-NEXT: ret void 325; 326 %a = load i32, i32* %Q 327 store atomic i32 %a, i32* %Q release, align 4 328 ret void 329} 330