xref: /freebsd/sys/arm/arm/stdatomic.c (revision 1d386b48)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33 
34 #include <machine/atomic.h>
35 #include <machine/cpufunc.h>
36 #include <machine/sysarch.h>
37 
38 /*
39  * Executing statements with interrupts disabled.
40  */
41 
42 #if defined(_KERNEL) && !defined(SMP)
43 #define	WITHOUT_INTERRUPTS(s) do {					\
44 	register_t regs;						\
45 									\
46 	regs = intr_disable();						\
47 	do s while (0);							\
48 	intr_restore(regs);						\
49 } while (0)
50 #endif /* _KERNEL && !SMP */
51 
52 /*
53  * Memory barriers.
54  *
55  * It turns out __sync_synchronize() does not emit any code when used
56  * with GCC 4.2. Implement our own version that does work reliably.
57  *
58  * Although __sync_lock_test_and_set() should only perform an acquire
59  * barrier, make it do a full barrier like the other functions. This
60  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
61  */
62 
63 #if defined(_KERNEL) && !defined(SMP)
64 static inline void
65 do_sync(void)
66 {
67 
68 	__asm volatile ("" : : : "memory");
69 }
70 #else
71 static inline void
72 do_sync(void)
73 {
74 
75 	dmb();
76 }
77 #endif
78 
79 
80 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
81 
82 #ifdef __clang__
83 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
84 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
85 #pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
86 #pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
87 #pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
88 #pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
89 #pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
90 #pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
91 #pragma	redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
92 #pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
93 #pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
94 #pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
95 #pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
96 #pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
97 #pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
98 #pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
99 #pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
100 #pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
101 #pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
102 #pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
103 #pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
104 #endif
105 
106 /*
107  * Old __sync_* API.
108  */
109 
110 
111 /* Implementations for old GCC versions, lacking support for atomics. */
112 
113 typedef union {
114 	uint8_t		v8[4];
115 	uint32_t	v32;
116 } reg_t;
117 
118 /*
119  * Given a memory address pointing to an 8-bit or 16-bit integer, return
120  * the address of the 32-bit word containing it.
121  */
122 
123 static inline uint32_t *
124 round_to_word(void *ptr)
125 {
126 
127 	return ((uint32_t *)((intptr_t)ptr & ~3));
128 }
129 
130 /*
131  * Utility functions for loading and storing 8-bit and 16-bit integers
132  * in 32-bit words at an offset corresponding with the location of the
133  * atomic variable.
134  */
135 
136 static inline void
137 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
138 {
139 	size_t offset;
140 
141 	offset = (intptr_t)offset_ptr & 3;
142 	r->v8[offset] = val;
143 }
144 
145 static inline uint8_t
146 get_1(const reg_t *r, const uint8_t *offset_ptr)
147 {
148 	size_t offset;
149 
150 	offset = (intptr_t)offset_ptr & 3;
151 	return (r->v8[offset]);
152 }
153 
154 static inline void
155 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
156 {
157 	size_t offset;
158 	union {
159 		uint16_t in;
160 		uint8_t out[2];
161 	} bytes;
162 
163 	offset = (intptr_t)offset_ptr & 3;
164 	bytes.in = val;
165 	r->v8[offset] = bytes.out[0];
166 	r->v8[offset + 1] = bytes.out[1];
167 }
168 
169 static inline uint16_t
170 get_2(const reg_t *r, const uint16_t *offset_ptr)
171 {
172 	size_t offset;
173 	union {
174 		uint8_t in[2];
175 		uint16_t out;
176 	} bytes;
177 
178 	offset = (intptr_t)offset_ptr & 3;
179 	bytes.in[0] = r->v8[offset];
180 	bytes.in[1] = r->v8[offset + 1];
181 	return (bytes.out);
182 }
183 
184 /*
185  * 8-bit and 16-bit routines.
186  *
187  * These operations are not natively supported by the CPU, so we use
188  * some shifting and bitmasking on top of the 32-bit instructions.
189  */
190 
191 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
192 uintN_t									\
193 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
194 {									\
195 	uint32_t *mem32;						\
196 	reg_t val32, negmask, old;					\
197 	uint32_t temp1, temp2;						\
198 									\
199 	mem32 = round_to_word(mem);					\
200 	val32.v32 = 0x00000000;						\
201 	put_##N(&val32, mem, val);					\
202 	negmask.v32 = 0xffffffff;					\
203 	put_##N(&negmask, mem, 0);					\
204 									\
205 	do_sync();							\
206 	__asm volatile (						\
207 		"1:"							\
208 		"\tldrex %0, %6\n"	/* Load old value. */		\
209 		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
210 		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
211 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
212 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
213 		"\tbne   1b\n"		/* Spin if failed. */		\
214 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
215 		  "=&r" (temp2)						\
216 		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
217 	return (get_##N(&old, mem));					\
218 }
219 
220 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
221 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
222 
223 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
224 uintN_t									\
225 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
226     uintN_t desired)							\
227 {									\
228 	uint32_t *mem32;						\
229 	reg_t expected32, desired32, posmask, old;			\
230 	uint32_t negmask, temp1, temp2;					\
231 									\
232 	mem32 = round_to_word(mem);					\
233 	expected32.v32 = 0x00000000;					\
234 	put_##N(&expected32, mem, expected);				\
235 	desired32.v32 = 0x00000000;					\
236 	put_##N(&desired32, mem, desired);				\
237 	posmask.v32 = 0x00000000;					\
238 	put_##N(&posmask, mem, ~0);					\
239 	negmask = ~posmask.v32;						\
240 									\
241 	do_sync();							\
242 	__asm volatile (						\
243 		"1:"							\
244 		"\tldrex %0, %8\n"	/* Load old value. */		\
245 		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
246 		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
247 		"\tbne   2f\n"		/* Values are unequal. */	\
248 		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
249 		"\torr   %2, %5\n"	/* Put in the new value. */	\
250 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
251 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
252 		"\tbne   1b\n"		/* Spin if failed. */		\
253 		"2:"							\
254 		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
255 		  "=&r" (temp2)						\
256 		: "r" (expected32.v32), "r" (desired32.v32),		\
257 		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
258 	return (get_##N(&old, mem));					\
259 }
260 
261 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
262 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
263 
264 #define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
265 uintN_t									\
266 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
267 {									\
268 	uint32_t *mem32;						\
269 	reg_t val32, posmask, old;					\
270 	uint32_t negmask, temp1, temp2;					\
271 									\
272 	mem32 = round_to_word(mem);					\
273 	val32.v32 = 0x00000000;						\
274 	put_##N(&val32, mem, val);					\
275 	posmask.v32 = 0x00000000;					\
276 	put_##N(&posmask, mem, ~0);					\
277 	negmask = ~posmask.v32;						\
278 									\
279 	do_sync();							\
280 	__asm volatile (						\
281 		"1:"							\
282 		"\tldrex %0, %7\n"	/* Load old value. */		\
283 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
284 		"\tand   %2, %5\n"	/* Isolate the new value. */	\
285 		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
286 		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
287 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
288 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
289 		"\tbne   1b\n"		/* Spin if failed. */		\
290 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
291 		  "=&r" (temp2)						\
292 		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
293 		  "m" (*mem32));					\
294 	return (get_##N(&old, mem));					\
295 }
296 
297 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
298 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
299 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
300 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
301 
302 #define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
303 uintN_t									\
304 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
305 {									\
306 	uint32_t *mem32;						\
307 	reg_t val32, old;						\
308 	uint32_t temp1, temp2;						\
309 									\
310 	mem32 = round_to_word(mem);					\
311 	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
312 	put_##N(&val32, mem, val);					\
313 									\
314 	do_sync();							\
315 	__asm volatile (						\
316 		"1:"							\
317 		"\tldrex %0, %5\n"	/* Load old value. */		\
318 		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
319 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
320 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
321 		"\tbne   1b\n"		/* Spin if failed. */		\
322 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
323 		  "=&r" (temp2)						\
324 		: "r" (val32.v32), "m" (*mem32));			\
325 	return (get_##N(&old, mem));					\
326 }
327 
328 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
329 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
330 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
331 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
332 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
333 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
334 
335 /*
336  * 32-bit routines.
337  */
338 
339 uint32_t
340 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
341 {
342 	uint32_t old, temp;
343 
344 	do_sync();
345 	__asm volatile (
346 		"1:"
347 		"\tldrex %0, %4\n"	/* Load old value. */
348 		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
349 		"\tcmp   %2, #0\n"	/* Did it succeed? */
350 		"\tbne   1b\n"		/* Spin if failed. */
351 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
352 		: "r" (val), "m" (*mem));
353 	return (old);
354 }
355 
356 uint32_t
357 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
358     uint32_t desired)
359 {
360 	uint32_t old, temp;
361 
362 	do_sync();
363 	__asm volatile (
364 		"1:"
365 		"\tldrex %0, %5\n"	/* Load old value. */
366 		"\tcmp   %0, %3\n"	/* Compare to expected value. */
367 		"\tbne   2f\n"		/* Values are unequal. */
368 		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
369 		"\tcmp   %2, #0\n"	/* Did it succeed? */
370 		"\tbne   1b\n"		/* Spin if failed. */
371 		"2:"
372 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
373 		: "r" (expected), "r" (desired), "m" (*mem));
374 	return (old);
375 }
376 
377 #define	EMIT_FETCH_AND_OP_4(name, op)					\
378 uint32_t								\
379 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
380 {									\
381 	uint32_t old, temp1, temp2;					\
382 									\
383 	do_sync();							\
384 	__asm volatile (						\
385 		"1:"							\
386 		"\tldrex %0, %5\n"	/* Load old value. */		\
387 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
388 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
389 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
390 		"\tbne   1b\n"		/* Spin if failed. */		\
391 		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
392 		  "=&r" (temp2)						\
393 		: "r" (val), "m" (*mem));				\
394 	return (old);							\
395 }
396 
397 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
398 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
399 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
400 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
401 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
402 
403 #ifndef __clang__
404 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
405 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
406 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
407 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
408 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
409 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
410 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
411 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
412 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
413 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
414 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
415 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
416 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
417 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
418 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
419 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
420 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
421 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
422 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
423 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
424 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
425 #endif
426 
427 #endif /* __SYNC_ATOMICS */
428