1 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s 2 3 // Test that we are generating atomicrmw instructions, rather than 4 // compare-exchange loops for common atomic ops. This makes a big difference 5 // on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for 6 // the load and then another ll/sc in the loop, expanding to about 30 7 // instructions when it should be only 4. It has a smaller, but still 8 // noticeable, impact on platforms like x86 and RISC-V, where there are atomic 9 // RMW instructions. 10 // 11 // We currently emit cmpxchg loops for most operations on _Bools, because 12 // they're sufficiently rare that it's not worth making sure that the semantics 13 // are correct. 14 15 // CHECK: @testStructGlobal = global {{.*}} { i16 1, i16 2, i16 3, i16 4 } 16 // CHECK: @testPromotedStructGlobal = global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer } 17 18 typedef int __attribute__((vector_size(16))) vector; 19 20 _Atomic(_Bool) b; 21 _Atomic(int) i; 22 _Atomic(long long) l; 23 _Atomic(short) s; 24 _Atomic(char*) p; 25 _Atomic(float) f; 26 _Atomic(vector) v; 27 28 // CHECK: testinc 29 void testinc(void) 30 { 31 // Special case for suffix bool++, sets to true and returns the old value. 32 // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst 33 b++; 34 // CHECK: atomicrmw add i32* @i, i32 1 seq_cst 35 i++; 36 // CHECK: atomicrmw add i64* @l, i64 1 seq_cst 37 l++; 38 // CHECK: atomicrmw add i16* @s, i16 1 seq_cst 39 s++; 40 // Prefix increment 41 // Special case for bool: set to true and return true 42 // CHECK: store atomic i8 1, i8* @b seq_cst, align 1 43 ++b; 44 // Currently, we have no variant of atomicrmw that returns the new value, so 45 // we have to generate an atomic add, which returns the old value, and then a 46 // non-atomic add. 47 // CHECK: atomicrmw add i32* @i, i32 1 seq_cst 48 // CHECK: add i32 49 ++i; 50 // CHECK: atomicrmw add i64* @l, i64 1 seq_cst 51 // CHECK: add i64 52 ++l; 53 // CHECK: atomicrmw add i16* @s, i16 1 seq_cst 54 // CHECK: add i16 55 ++s; 56 } 57 // CHECK: testdec 58 void testdec(void) 59 { 60 // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b 61 b--; 62 // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst 63 i--; 64 // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst 65 l--; 66 // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst 67 s--; 68 // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b 69 --b; 70 // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst 71 // CHECK: sub i32 72 --i; 73 // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst 74 // CHECK: sub i64 75 --l; 76 // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst 77 // CHECK: sub i16 78 --s; 79 } 80 // CHECK: testaddeq 81 void testaddeq(void) 82 { 83 // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b 84 // CHECK: atomicrmw add i32* @i, i32 42 seq_cst 85 // CHECK: atomicrmw add i64* @l, i64 42 seq_cst 86 // CHECK: atomicrmw add i16* @s, i16 42 seq_cst 87 b += 42; 88 i += 42; 89 l += 42; 90 s += 42; 91 } 92 // CHECK: testsubeq 93 void testsubeq(void) 94 { 95 // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b 96 // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst 97 // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst 98 // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst 99 b -= 42; 100 i -= 42; 101 l -= 42; 102 s -= 42; 103 } 104 // CHECK: testxoreq 105 void testxoreq(void) 106 { 107 // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b 108 // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst 109 // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst 110 // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst 111 b ^= 42; 112 i ^= 42; 113 l ^= 42; 114 s ^= 42; 115 } 116 // CHECK: testoreq 117 void testoreq(void) 118 { 119 // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b 120 // CHECK: atomicrmw or i32* @i, i32 42 seq_cst 121 // CHECK: atomicrmw or i64* @l, i64 42 seq_cst 122 // CHECK: atomicrmw or i16* @s, i16 42 seq_cst 123 b |= 42; 124 i |= 42; 125 l |= 42; 126 s |= 42; 127 } 128 // CHECK: testandeq 129 void testandeq(void) 130 { 131 // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b 132 // CHECK: atomicrmw and i32* @i, i32 42 seq_cst 133 // CHECK: atomicrmw and i64* @l, i64 42 seq_cst 134 // CHECK: atomicrmw and i16* @s, i16 42 seq_cst 135 b &= 42; 136 i &= 42; 137 l &= 42; 138 s &= 42; 139 } 140 141 // CHECK-LABEL: define arm_aapcscc void @testFloat(float* 142 void testFloat(_Atomic(float) *fp) { 143 // CHECK: [[FP:%.*]] = alloca float* 144 // CHECK-NEXT: [[X:%.*]] = alloca float 145 // CHECK-NEXT: [[F:%.*]] = alloca float 146 // CHECK-NEXT: [[TMP0:%.*]] = alloca float 147 // CHECK-NEXT: [[TMP1:%.*]] = alloca float 148 // CHECK-NEXT: store float* {{%.*}}, float** [[FP]] 149 150 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] 151 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4 152 __c11_atomic_init(fp, 1.0f); 153 154 // CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4 155 _Atomic(float) x = 2.0f; 156 157 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] 158 // CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8* 159 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8* 160 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5) 161 // CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4 162 // CHECK-NEXT: store float [[T3]], float* [[F]] 163 float f = *fp; 164 165 // CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4 166 // CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4 167 // CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4 168 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8* 169 // CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8* 170 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5) 171 *fp = f; 172 173 // CHECK-NEXT: ret void 174 } 175 176 // CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]* 177 void testComplexFloat(_Atomic(_Complex float) *fp) { 178 // CHECK: [[FP:%.*]] = alloca [[CF]]*, align 4 179 // CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8 180 // CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4 181 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8 182 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8 183 // CHECK-NEXT: store [[CF]]* 184 185 // CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]] 186 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0 187 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1 188 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]] 189 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]] 190 __c11_atomic_init(fp, 1.0f); 191 192 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0 193 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1 194 // CHECK-NEXT: store float 2.000000e+00, float* [[T0]] 195 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]] 196 _Atomic(_Complex float) x = 2.0f; 197 198 // CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]] 199 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8* 200 // CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8* 201 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 202 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0 203 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]] 204 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1 205 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]] 206 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 207 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 208 // CHECK-NEXT: store float [[R]], float* [[T0]] 209 // CHECK-NEXT: store float [[I]], float* [[T1]] 210 _Complex float f = *fp; 211 212 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 213 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]] 214 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 215 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]] 216 // CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4 217 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0 218 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1 219 // CHECK-NEXT: store float [[R]], float* [[T0]] 220 // CHECK-NEXT: store float [[I]], float* [[T1]] 221 // CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8* 222 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8* 223 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5) 224 *fp = f; 225 226 // CHECK-NEXT: ret void 227 } 228 229 typedef struct { short x, y, z, w; } S; 230 _Atomic S testStructGlobal = (S){1, 2, 3, 4}; 231 // CHECK: define arm_aapcscc void @testStruct([[S:.*]]* 232 void testStruct(_Atomic(S) *fp) { 233 // CHECK: [[FP:%.*]] = alloca [[S]]*, align 4 234 // CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8 235 // CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2 236 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8 237 // CHECK-NEXT: store [[S]]* 238 239 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]] 240 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0 241 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2 242 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1 243 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2 244 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2 245 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2 246 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3 247 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2 248 __c11_atomic_init(fp, (S){1,2,3,4}); 249 250 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0 251 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2 252 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1 253 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2 254 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2 255 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2 256 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3 257 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2 258 _Atomic(S) x = (S){1,2,3,4}; 259 260 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] 261 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8* 262 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* 263 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 264 S f = *fp; 265 266 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] 267 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8* 268 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* 269 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false) 270 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8* 271 // CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8* 272 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5) 273 *fp = f; 274 275 // CHECK-NEXT: ret void 276 } 277 278 typedef struct { short x, y, z; } PS; 279 _Atomic PS testPromotedStructGlobal = (PS){1, 2, 3}; 280 // CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]* 281 void testPromotedStruct(_Atomic(PS) *fp) { 282 // CHECK: [[FP:%.*]] = alloca [[APS]]*, align 4 283 // CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8 284 // CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2 285 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8 286 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8 287 // CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 288 // CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2 289 // CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8 290 // CHECK-NEXT: store [[APS]]* 291 292 // CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]] 293 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8* 294 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) 295 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0 296 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 297 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2 298 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 299 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2 300 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 301 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2 302 __c11_atomic_init(fp, (PS){1,2,3}); 303 304 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8* 305 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false) 306 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0 307 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 308 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2 309 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 310 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2 311 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 312 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2 313 _Atomic(PS) x = (PS){1,2,3}; 314 315 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] 316 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8* 317 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8* 318 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 319 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0 320 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8* 321 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8* 322 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) 323 PS f = *fp; 324 325 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] 326 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8* 327 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false) 328 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0 329 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8* 330 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8* 331 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false) 332 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8* 333 // CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8* 334 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5) 335 *fp = f; 336 337 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4 338 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8* 339 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8* 340 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 341 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0 342 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8* 343 // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8* 344 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) 345 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0 346 // CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2 347 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32 348 // CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4 349 int a = ((PS)*fp).x; 350 351 // CHECK-NEXT: ret void 352 } 353 354 // CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]* 355 356 // FIXME: none of these look right, but we can leave the "test" here 357 // to make sure they at least don't crash. 358 void testPromotedStructOps(_Atomic(PS) *p) { 359 PS a = __c11_atomic_load(p, 5); 360 __c11_atomic_store(p, a, 5); 361 PS b = __c11_atomic_exchange(p, a, 5); 362 _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); 363 v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); 364 } 365