xref: /freebsd/sys/arm/arm/stdatomic.c (revision 4e8d558c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/stdatomic.h>
34 #include <sys/types.h>
35 
36 #include <machine/atomic.h>
37 #include <machine/cpufunc.h>
38 #include <machine/sysarch.h>
39 
40 /*
41  * Executing statements with interrupts disabled.
42  */
43 
44 #if defined(_KERNEL) && !defined(SMP)
45 #define	WITHOUT_INTERRUPTS(s) do {					\
46 	register_t regs;						\
47 									\
48 	regs = intr_disable();						\
49 	do s while (0);							\
50 	intr_restore(regs);						\
51 } while (0)
52 #endif /* _KERNEL && !SMP */
53 
54 /*
55  * Memory barriers.
56  *
57  * It turns out __sync_synchronize() does not emit any code when used
58  * with GCC 4.2. Implement our own version that does work reliably.
59  *
60  * Although __sync_lock_test_and_set() should only perform an acquire
61  * barrier, make it do a full barrier like the other functions. This
62  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
63  */
64 
65 #if defined(_KERNEL) && !defined(SMP)
66 static inline void
67 do_sync(void)
68 {
69 
70 	__asm volatile ("" : : : "memory");
71 }
72 #else
73 static inline void
74 do_sync(void)
75 {
76 
77 	dmb();
78 }
79 #endif
80 
81 
82 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
83 
84 #ifdef __clang__
85 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
86 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
87 #pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
88 #pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
89 #pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
90 #pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
91 #pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
92 #pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
93 #pragma	redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
94 #pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
95 #pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
96 #pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
97 #pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
98 #pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
99 #pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
100 #pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
101 #pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
102 #pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
103 #pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
104 #pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
105 #pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
106 #endif
107 
108 /*
109  * Old __sync_* API.
110  */
111 
112 
113 /* Implementations for old GCC versions, lacking support for atomics. */
114 
115 typedef union {
116 	uint8_t		v8[4];
117 	uint32_t	v32;
118 } reg_t;
119 
120 /*
121  * Given a memory address pointing to an 8-bit or 16-bit integer, return
122  * the address of the 32-bit word containing it.
123  */
124 
125 static inline uint32_t *
126 round_to_word(void *ptr)
127 {
128 
129 	return ((uint32_t *)((intptr_t)ptr & ~3));
130 }
131 
132 /*
133  * Utility functions for loading and storing 8-bit and 16-bit integers
134  * in 32-bit words at an offset corresponding with the location of the
135  * atomic variable.
136  */
137 
138 static inline void
139 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
140 {
141 	size_t offset;
142 
143 	offset = (intptr_t)offset_ptr & 3;
144 	r->v8[offset] = val;
145 }
146 
147 static inline uint8_t
148 get_1(const reg_t *r, const uint8_t *offset_ptr)
149 {
150 	size_t offset;
151 
152 	offset = (intptr_t)offset_ptr & 3;
153 	return (r->v8[offset]);
154 }
155 
156 static inline void
157 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
158 {
159 	size_t offset;
160 	union {
161 		uint16_t in;
162 		uint8_t out[2];
163 	} bytes;
164 
165 	offset = (intptr_t)offset_ptr & 3;
166 	bytes.in = val;
167 	r->v8[offset] = bytes.out[0];
168 	r->v8[offset + 1] = bytes.out[1];
169 }
170 
171 static inline uint16_t
172 get_2(const reg_t *r, const uint16_t *offset_ptr)
173 {
174 	size_t offset;
175 	union {
176 		uint8_t in[2];
177 		uint16_t out;
178 	} bytes;
179 
180 	offset = (intptr_t)offset_ptr & 3;
181 	bytes.in[0] = r->v8[offset];
182 	bytes.in[1] = r->v8[offset + 1];
183 	return (bytes.out);
184 }
185 
186 /*
187  * 8-bit and 16-bit routines.
188  *
189  * These operations are not natively supported by the CPU, so we use
190  * some shifting and bitmasking on top of the 32-bit instructions.
191  */
192 
193 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
194 uintN_t									\
195 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
196 {									\
197 	uint32_t *mem32;						\
198 	reg_t val32, negmask, old;					\
199 	uint32_t temp1, temp2;						\
200 									\
201 	mem32 = round_to_word(mem);					\
202 	val32.v32 = 0x00000000;						\
203 	put_##N(&val32, mem, val);					\
204 	negmask.v32 = 0xffffffff;					\
205 	put_##N(&negmask, mem, 0);					\
206 									\
207 	do_sync();							\
208 	__asm volatile (						\
209 		"1:"							\
210 		"\tldrex %0, %6\n"	/* Load old value. */		\
211 		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
212 		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
213 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
214 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
215 		"\tbne   1b\n"		/* Spin if failed. */		\
216 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
217 		  "=&r" (temp2)						\
218 		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
219 	return (get_##N(&old, mem));					\
220 }
221 
222 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
223 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
224 
225 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
226 uintN_t									\
227 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
228     uintN_t desired)							\
229 {									\
230 	uint32_t *mem32;						\
231 	reg_t expected32, desired32, posmask, old;			\
232 	uint32_t negmask, temp1, temp2;					\
233 									\
234 	mem32 = round_to_word(mem);					\
235 	expected32.v32 = 0x00000000;					\
236 	put_##N(&expected32, mem, expected);				\
237 	desired32.v32 = 0x00000000;					\
238 	put_##N(&desired32, mem, desired);				\
239 	posmask.v32 = 0x00000000;					\
240 	put_##N(&posmask, mem, ~0);					\
241 	negmask = ~posmask.v32;						\
242 									\
243 	do_sync();							\
244 	__asm volatile (						\
245 		"1:"							\
246 		"\tldrex %0, %8\n"	/* Load old value. */		\
247 		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
248 		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
249 		"\tbne   2f\n"		/* Values are unequal. */	\
250 		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
251 		"\torr   %2, %5\n"	/* Put in the new value. */	\
252 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
253 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
254 		"\tbne   1b\n"		/* Spin if failed. */		\
255 		"2:"							\
256 		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
257 		  "=&r" (temp2)						\
258 		: "r" (expected32.v32), "r" (desired32.v32),		\
259 		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
260 	return (get_##N(&old, mem));					\
261 }
262 
263 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
264 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
265 
266 #define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
267 uintN_t									\
268 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
269 {									\
270 	uint32_t *mem32;						\
271 	reg_t val32, posmask, old;					\
272 	uint32_t negmask, temp1, temp2;					\
273 									\
274 	mem32 = round_to_word(mem);					\
275 	val32.v32 = 0x00000000;						\
276 	put_##N(&val32, mem, val);					\
277 	posmask.v32 = 0x00000000;					\
278 	put_##N(&posmask, mem, ~0);					\
279 	negmask = ~posmask.v32;						\
280 									\
281 	do_sync();							\
282 	__asm volatile (						\
283 		"1:"							\
284 		"\tldrex %0, %7\n"	/* Load old value. */		\
285 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
286 		"\tand   %2, %5\n"	/* Isolate the new value. */	\
287 		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
288 		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
289 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
290 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
291 		"\tbne   1b\n"		/* Spin if failed. */		\
292 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
293 		  "=&r" (temp2)						\
294 		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
295 		  "m" (*mem32));					\
296 	return (get_##N(&old, mem));					\
297 }
298 
299 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
300 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
301 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
302 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
303 
304 #define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
305 uintN_t									\
306 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
307 {									\
308 	uint32_t *mem32;						\
309 	reg_t val32, old;						\
310 	uint32_t temp1, temp2;						\
311 									\
312 	mem32 = round_to_word(mem);					\
313 	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
314 	put_##N(&val32, mem, val);					\
315 									\
316 	do_sync();							\
317 	__asm volatile (						\
318 		"1:"							\
319 		"\tldrex %0, %5\n"	/* Load old value. */		\
320 		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
321 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
322 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
323 		"\tbne   1b\n"		/* Spin if failed. */		\
324 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
325 		  "=&r" (temp2)						\
326 		: "r" (val32.v32), "m" (*mem32));			\
327 	return (get_##N(&old, mem));					\
328 }
329 
330 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
331 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
332 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
333 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
334 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
335 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
336 
337 /*
338  * 32-bit routines.
339  */
340 
341 uint32_t
342 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
343 {
344 	uint32_t old, temp;
345 
346 	do_sync();
347 	__asm volatile (
348 		"1:"
349 		"\tldrex %0, %4\n"	/* Load old value. */
350 		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
351 		"\tcmp   %2, #0\n"	/* Did it succeed? */
352 		"\tbne   1b\n"		/* Spin if failed. */
353 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
354 		: "r" (val), "m" (*mem));
355 	return (old);
356 }
357 
358 uint32_t
359 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
360     uint32_t desired)
361 {
362 	uint32_t old, temp;
363 
364 	do_sync();
365 	__asm volatile (
366 		"1:"
367 		"\tldrex %0, %5\n"	/* Load old value. */
368 		"\tcmp   %0, %3\n"	/* Compare to expected value. */
369 		"\tbne   2f\n"		/* Values are unequal. */
370 		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
371 		"\tcmp   %2, #0\n"	/* Did it succeed? */
372 		"\tbne   1b\n"		/* Spin if failed. */
373 		"2:"
374 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
375 		: "r" (expected), "r" (desired), "m" (*mem));
376 	return (old);
377 }
378 
379 #define	EMIT_FETCH_AND_OP_4(name, op)					\
380 uint32_t								\
381 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
382 {									\
383 	uint32_t old, temp1, temp2;					\
384 									\
385 	do_sync();							\
386 	__asm volatile (						\
387 		"1:"							\
388 		"\tldrex %0, %5\n"	/* Load old value. */		\
389 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
390 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
391 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
392 		"\tbne   1b\n"		/* Spin if failed. */		\
393 		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
394 		  "=&r" (temp2)						\
395 		: "r" (val), "m" (*mem));				\
396 	return (old);							\
397 }
398 
399 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
400 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
401 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
402 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
403 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
404 
405 #ifndef __clang__
406 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
407 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
408 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
409 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
410 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
411 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
412 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
413 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
414 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
415 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
416 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
417 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
418 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
419 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
420 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
421 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
422 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
423 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
424 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
425 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
426 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
427 #endif
428 
429 #endif /* __SYNC_ATOMICS */
430