xref: /freebsd/sys/arm/arm/stdatomic.c (revision 4b9d6057)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/stdatomic.h>
31 #include <sys/types.h>
32 
33 #include <machine/atomic.h>
34 #include <machine/cpufunc.h>
35 #include <machine/sysarch.h>
36 
37 /*
38  * Executing statements with interrupts disabled.
39  */
40 
41 #if defined(_KERNEL) && !defined(SMP)
42 #define	WITHOUT_INTERRUPTS(s) do {					\
43 	register_t regs;						\
44 									\
45 	regs = intr_disable();						\
46 	do s while (0);							\
47 	intr_restore(regs);						\
48 } while (0)
49 #endif /* _KERNEL && !SMP */
50 
51 /*
52  * Memory barriers.
53  *
54  * It turns out __sync_synchronize() does not emit any code when used
55  * with GCC 4.2. Implement our own version that does work reliably.
56  *
57  * Although __sync_lock_test_and_set() should only perform an acquire
58  * barrier, make it do a full barrier like the other functions. This
59  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
60  */
61 
62 #if defined(_KERNEL) && !defined(SMP)
63 static inline void
64 do_sync(void)
65 {
66 
67 	__asm volatile ("" : : : "memory");
68 }
69 #else
70 static inline void
71 do_sync(void)
72 {
73 
74 	dmb();
75 }
76 #endif
77 
78 
79 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
80 
81 #ifdef __clang__
82 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
83 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
84 #pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
85 #pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
86 #pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
87 #pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
88 #pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
89 #pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
90 #pragma	redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
91 #pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
92 #pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
93 #pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
94 #pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
95 #pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
96 #pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
97 #pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
98 #pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
99 #pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
100 #pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
101 #pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
102 #pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
103 #endif
104 
105 /*
106  * Old __sync_* API.
107  */
108 
109 
110 /* Implementations for old GCC versions, lacking support for atomics. */
111 
112 typedef union {
113 	uint8_t		v8[4];
114 	uint32_t	v32;
115 } reg_t;
116 
117 /*
118  * Given a memory address pointing to an 8-bit or 16-bit integer, return
119  * the address of the 32-bit word containing it.
120  */
121 
122 static inline uint32_t *
123 round_to_word(void *ptr)
124 {
125 
126 	return ((uint32_t *)((intptr_t)ptr & ~3));
127 }
128 
129 /*
130  * Utility functions for loading and storing 8-bit and 16-bit integers
131  * in 32-bit words at an offset corresponding with the location of the
132  * atomic variable.
133  */
134 
135 static inline void
136 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
137 {
138 	size_t offset;
139 
140 	offset = (intptr_t)offset_ptr & 3;
141 	r->v8[offset] = val;
142 }
143 
144 static inline uint8_t
145 get_1(const reg_t *r, const uint8_t *offset_ptr)
146 {
147 	size_t offset;
148 
149 	offset = (intptr_t)offset_ptr & 3;
150 	return (r->v8[offset]);
151 }
152 
153 static inline void
154 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
155 {
156 	size_t offset;
157 	union {
158 		uint16_t in;
159 		uint8_t out[2];
160 	} bytes;
161 
162 	offset = (intptr_t)offset_ptr & 3;
163 	bytes.in = val;
164 	r->v8[offset] = bytes.out[0];
165 	r->v8[offset + 1] = bytes.out[1];
166 }
167 
168 static inline uint16_t
169 get_2(const reg_t *r, const uint16_t *offset_ptr)
170 {
171 	size_t offset;
172 	union {
173 		uint8_t in[2];
174 		uint16_t out;
175 	} bytes;
176 
177 	offset = (intptr_t)offset_ptr & 3;
178 	bytes.in[0] = r->v8[offset];
179 	bytes.in[1] = r->v8[offset + 1];
180 	return (bytes.out);
181 }
182 
183 /*
184  * 8-bit and 16-bit routines.
185  *
186  * These operations are not natively supported by the CPU, so we use
187  * some shifting and bitmasking on top of the 32-bit instructions.
188  */
189 
190 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
191 uintN_t									\
192 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
193 {									\
194 	uint32_t *mem32;						\
195 	reg_t val32, negmask, old;					\
196 	uint32_t temp1, temp2;						\
197 									\
198 	mem32 = round_to_word(mem);					\
199 	val32.v32 = 0x00000000;						\
200 	put_##N(&val32, mem, val);					\
201 	negmask.v32 = 0xffffffff;					\
202 	put_##N(&negmask, mem, 0);					\
203 									\
204 	do_sync();							\
205 	__asm volatile (						\
206 		"1:"							\
207 		"\tldrex %0, %6\n"	/* Load old value. */		\
208 		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
209 		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
210 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
211 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
212 		"\tbne   1b\n"		/* Spin if failed. */		\
213 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
214 		  "=&r" (temp2)						\
215 		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
216 	return (get_##N(&old, mem));					\
217 }
218 
219 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
220 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
221 
222 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
223 uintN_t									\
224 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
225     uintN_t desired)							\
226 {									\
227 	uint32_t *mem32;						\
228 	reg_t expected32, desired32, posmask, old;			\
229 	uint32_t negmask, temp1, temp2;					\
230 									\
231 	mem32 = round_to_word(mem);					\
232 	expected32.v32 = 0x00000000;					\
233 	put_##N(&expected32, mem, expected);				\
234 	desired32.v32 = 0x00000000;					\
235 	put_##N(&desired32, mem, desired);				\
236 	posmask.v32 = 0x00000000;					\
237 	put_##N(&posmask, mem, ~0);					\
238 	negmask = ~posmask.v32;						\
239 									\
240 	do_sync();							\
241 	__asm volatile (						\
242 		"1:"							\
243 		"\tldrex %0, %8\n"	/* Load old value. */		\
244 		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
245 		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
246 		"\tbne   2f\n"		/* Values are unequal. */	\
247 		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
248 		"\torr   %2, %5\n"	/* Put in the new value. */	\
249 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
250 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
251 		"\tbne   1b\n"		/* Spin if failed. */		\
252 		"2:"							\
253 		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
254 		  "=&r" (temp2)						\
255 		: "r" (expected32.v32), "r" (desired32.v32),		\
256 		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
257 	return (get_##N(&old, mem));					\
258 }
259 
260 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
261 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
262 
263 #define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
264 uintN_t									\
265 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
266 {									\
267 	uint32_t *mem32;						\
268 	reg_t val32, posmask, old;					\
269 	uint32_t negmask, temp1, temp2;					\
270 									\
271 	mem32 = round_to_word(mem);					\
272 	val32.v32 = 0x00000000;						\
273 	put_##N(&val32, mem, val);					\
274 	posmask.v32 = 0x00000000;					\
275 	put_##N(&posmask, mem, ~0);					\
276 	negmask = ~posmask.v32;						\
277 									\
278 	do_sync();							\
279 	__asm volatile (						\
280 		"1:"							\
281 		"\tldrex %0, %7\n"	/* Load old value. */		\
282 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
283 		"\tand   %2, %5\n"	/* Isolate the new value. */	\
284 		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
285 		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
286 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
287 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
288 		"\tbne   1b\n"		/* Spin if failed. */		\
289 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
290 		  "=&r" (temp2)						\
291 		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
292 		  "m" (*mem32));					\
293 	return (get_##N(&old, mem));					\
294 }
295 
296 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
297 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
298 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
299 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
300 
301 #define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
302 uintN_t									\
303 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
304 {									\
305 	uint32_t *mem32;						\
306 	reg_t val32, old;						\
307 	uint32_t temp1, temp2;						\
308 									\
309 	mem32 = round_to_word(mem);					\
310 	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
311 	put_##N(&val32, mem, val);					\
312 									\
313 	do_sync();							\
314 	__asm volatile (						\
315 		"1:"							\
316 		"\tldrex %0, %5\n"	/* Load old value. */		\
317 		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
318 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
319 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
320 		"\tbne   1b\n"		/* Spin if failed. */		\
321 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
322 		  "=&r" (temp2)						\
323 		: "r" (val32.v32), "m" (*mem32));			\
324 	return (get_##N(&old, mem));					\
325 }
326 
327 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
328 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
329 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
330 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
331 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
332 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
333 
334 /*
335  * 32-bit routines.
336  */
337 
338 uint32_t
339 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
340 {
341 	uint32_t old, temp;
342 
343 	do_sync();
344 	__asm volatile (
345 		"1:"
346 		"\tldrex %0, %4\n"	/* Load old value. */
347 		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
348 		"\tcmp   %2, #0\n"	/* Did it succeed? */
349 		"\tbne   1b\n"		/* Spin if failed. */
350 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
351 		: "r" (val), "m" (*mem));
352 	return (old);
353 }
354 
355 uint32_t
356 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
357     uint32_t desired)
358 {
359 	uint32_t old, temp;
360 
361 	do_sync();
362 	__asm volatile (
363 		"1:"
364 		"\tldrex %0, %5\n"	/* Load old value. */
365 		"\tcmp   %0, %3\n"	/* Compare to expected value. */
366 		"\tbne   2f\n"		/* Values are unequal. */
367 		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
368 		"\tcmp   %2, #0\n"	/* Did it succeed? */
369 		"\tbne   1b\n"		/* Spin if failed. */
370 		"2:"
371 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
372 		: "r" (expected), "r" (desired), "m" (*mem));
373 	return (old);
374 }
375 
376 #define	EMIT_FETCH_AND_OP_4(name, op)					\
377 uint32_t								\
378 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
379 {									\
380 	uint32_t old, temp1, temp2;					\
381 									\
382 	do_sync();							\
383 	__asm volatile (						\
384 		"1:"							\
385 		"\tldrex %0, %5\n"	/* Load old value. */		\
386 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
387 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
388 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
389 		"\tbne   1b\n"		/* Spin if failed. */		\
390 		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
391 		  "=&r" (temp2)						\
392 		: "r" (val), "m" (*mem));				\
393 	return (old);							\
394 }
395 
396 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
397 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
398 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
399 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
400 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
401 
402 #ifndef __clang__
403 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
404 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
405 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
406 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
407 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
408 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
409 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
410 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
411 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
412 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
413 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
414 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
415 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
416 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
417 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
418 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
419 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
420 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
421 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
422 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
423 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
424 #endif
425 
426 #endif /* __SYNC_ATOMICS */
427