1 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s
2 
3 // Test that we are generating atomicrmw instructions, rather than
4 // compare-exchange loops for common atomic ops.  This makes a big difference
5 // on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
6 // the load and then another ll/sc in the loop, expanding to about 30
7 // instructions when it should be only 4.  It has a smaller, but still
8 // noticeable, impact on platforms like x86 and RISC-V, where there are atomic
9 // RMW instructions.
10 //
11 // We currently emit cmpxchg loops for most operations on _Bools, because
12 // they're sufficiently rare that it's not worth making sure that the semantics
13 // are correct.
14 
15 // CHECK: @testStructGlobal = global {{.*}} { i16 1, i16 2, i16 3, i16 4 }
16 // CHECK: @testPromotedStructGlobal = global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
17 
18 typedef int __attribute__((vector_size(16))) vector;
19 
20 _Atomic(_Bool) b;
21 _Atomic(int) i;
22 _Atomic(long long) l;
23 _Atomic(short) s;
24 _Atomic(char*) p;
25 _Atomic(float) f;
26 _Atomic(vector) v;
27 
28 // CHECK: testinc
29 void testinc(void)
30 {
31   // Special case for suffix bool++, sets to true and returns the old value.
32   // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
33   b++;
34   // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
35   i++;
36   // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
37   l++;
38   // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
39   s++;
40   // Prefix increment
41   // Special case for bool: set to true and return true
42   // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
43   ++b;
44   // Currently, we have no variant of atomicrmw that returns the new value, so
45   // we have to generate an atomic add, which returns the old value, and then a
46   // non-atomic add.
47   // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
48   // CHECK: add i32
49   ++i;
50   // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
51   // CHECK: add i64
52   ++l;
53   // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
54   // CHECK: add i16
55   ++s;
56 }
57 // CHECK: testdec
58 void testdec(void)
59 {
60   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
61   b--;
62   // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
63   i--;
64   // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
65   l--;
66   // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
67   s--;
68   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
69   --b;
70   // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
71   // CHECK: sub i32
72   --i;
73   // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
74   // CHECK: sub i64
75   --l;
76   // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
77   // CHECK: sub i16
78   --s;
79 }
80 // CHECK: testaddeq
81 void testaddeq(void)
82 {
83   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
84   // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
85   // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
86   // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
87   b += 42;
88   i += 42;
89   l += 42;
90   s += 42;
91 }
92 // CHECK: testsubeq
93 void testsubeq(void)
94 {
95   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
96   // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
97   // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
98   // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
99   b -= 42;
100   i -= 42;
101   l -= 42;
102   s -= 42;
103 }
104 // CHECK: testxoreq
105 void testxoreq(void)
106 {
107   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
108   // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
109   // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
110   // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
111   b ^= 42;
112   i ^= 42;
113   l ^= 42;
114   s ^= 42;
115 }
116 // CHECK: testoreq
117 void testoreq(void)
118 {
119   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
120   // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
121   // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
122   // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
123   b |= 42;
124   i |= 42;
125   l |= 42;
126   s |= 42;
127 }
128 // CHECK: testandeq
129 void testandeq(void)
130 {
131   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
132   // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
133   // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
134   // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
135   b &= 42;
136   i &= 42;
137   l &= 42;
138   s &= 42;
139 }
140 
141 // CHECK-LABEL: define arm_aapcscc void @testFloat(float*
142 void testFloat(_Atomic(float) *fp) {
143 // CHECK:      [[FP:%.*]] = alloca float*
144 // CHECK-NEXT: [[X:%.*]] = alloca float
145 // CHECK-NEXT: [[F:%.*]] = alloca float
146 // CHECK-NEXT: [[TMP0:%.*]] = alloca float
147 // CHECK-NEXT: [[TMP1:%.*]] = alloca float
148 // CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
149 
150 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
151 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
152   __c11_atomic_init(fp, 1.0f);
153 
154 // CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
155   _Atomic(float) x = 2.0f;
156 
157 // CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
158 // CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8*
159 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8*
160 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5)
161 // CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4
162 // CHECK-NEXT: store float [[T3]], float* [[F]]
163   float f = *fp;
164 
165 // CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4
166 // CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4
167 // CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4
168 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8*
169 // CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8*
170 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5)
171   *fp = f;
172 
173 // CHECK-NEXT: ret void
174 }
175 
176 // CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]*
177 void testComplexFloat(_Atomic(_Complex float) *fp) {
178 // CHECK:      [[FP:%.*]] = alloca [[CF]]*, align 4
179 // CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8
180 // CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
181 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
182 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
183 // CHECK-NEXT: store [[CF]]*
184 
185 // CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]]
186 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0
187 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1
188 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
189 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
190   __c11_atomic_init(fp, 1.0f);
191 
192 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0
193 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1
194 // CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
195 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
196   _Atomic(_Complex float) x = 2.0f;
197 
198 // CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]]
199 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8*
200 // CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8*
201 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
202 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0
203 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
204 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1
205 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
206 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
207 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
208 // CHECK-NEXT: store float [[R]], float* [[T0]]
209 // CHECK-NEXT: store float [[I]], float* [[T1]]
210   _Complex float f = *fp;
211 
212 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
213 // CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
214 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
215 // CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
216 // CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4
217 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0
218 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1
219 // CHECK-NEXT: store float [[R]], float* [[T0]]
220 // CHECK-NEXT: store float [[I]], float* [[T1]]
221 // CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8*
222 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8*
223 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5)
224   *fp = f;
225 
226 // CHECK-NEXT: ret void
227 }
228 
229 typedef struct { short x, y, z, w; } S;
230 _Atomic S testStructGlobal = (S){1, 2, 3, 4};
231 // CHECK: define arm_aapcscc void @testStruct([[S:.*]]*
232 void testStruct(_Atomic(S) *fp) {
233 // CHECK:      [[FP:%.*]] = alloca [[S]]*, align 4
234 // CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8
235 // CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
236 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
237 // CHECK-NEXT: store [[S]]*
238 
239 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
240 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
241 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
242 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
243 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
244 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2
245 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
246 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3
247 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
248   __c11_atomic_init(fp, (S){1,2,3,4});
249 
250 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
251 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
252 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
253 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
254 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2
255 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
256 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3
257 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
258   _Atomic(S) x = (S){1,2,3,4};
259 
260 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
261 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8*
262 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
263 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
264   S f = *fp;
265 
266 // CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
267 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
268 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
269 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
270 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8*
271 // CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
272 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5)
273   *fp = f;
274 
275 // CHECK-NEXT: ret void
276 }
277 
278 typedef struct { short x, y, z; } PS;
279 _Atomic PS testPromotedStructGlobal = (PS){1, 2, 3};
280 // CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]*
281 void testPromotedStruct(_Atomic(PS) *fp) {
282 // CHECK:      [[FP:%.*]] = alloca [[APS]]*, align 4
283 // CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8
284 // CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
285 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
286 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
287 // CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
288 // CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
289 // CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
290 // CHECK-NEXT: store [[APS]]*
291 
292 // CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
293 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
294 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
295 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0
296 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
297 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
298 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
299 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
300 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
301 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
302   __c11_atomic_init(fp, (PS){1,2,3});
303 
304 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
305 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
306 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
307 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
308 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
309 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
310 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
311 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
312 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
313   _Atomic(PS) x = (PS){1,2,3};
314 
315 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
316 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
317 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8*
318 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
319 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
320 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
321 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
322 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
323   PS f = *fp;
324 
325 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
326 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
327 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
328 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
329 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
330 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
331 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
332 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8*
333 // CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8*
334 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
335   *fp = f;
336 
337 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4
338 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
339 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8*
340 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
341 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0
342 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
343 // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
344 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
345 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0
346 // CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2
347 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
348 // CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4
349   int a = ((PS)*fp).x;
350 
351 // CHECK-NEXT: ret void
352 }
353 
354 // CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]*
355 
356 // FIXME: none of these look right, but we can leave the "test" here
357 // to make sure they at least don't crash.
358 void testPromotedStructOps(_Atomic(PS) *p) {
359   PS a = __c11_atomic_load(p, 5);
360   __c11_atomic_store(p, a, 5);
361   PS b = __c11_atomic_exchange(p, a, 5);
362   _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
363   v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
364 }
365