xref: /freebsd/sys/arm/arm/stdatomic.c (revision 42249ef2)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/stdatomic.h>
34 #include <sys/types.h>
35 
36 #include <machine/atomic.h>
37 #include <machine/cpufunc.h>
38 #include <machine/sysarch.h>
39 
40 /*
41  * Executing statements with interrupts disabled.
42  */
43 
44 #if defined(_KERNEL) && !defined(SMP)
45 #define	WITHOUT_INTERRUPTS(s) do {					\
46 	register_t regs;						\
47 									\
48 	regs = intr_disable();						\
49 	do s while (0);							\
50 	intr_restore(regs);						\
51 } while (0)
52 #endif /* _KERNEL && !SMP */
53 
54 /*
55  * Memory barriers.
56  *
57  * It turns out __sync_synchronize() does not emit any code when used
58  * with GCC 4.2. Implement our own version that does work reliably.
59  *
60  * Although __sync_lock_test_and_set() should only perform an acquire
61  * barrier, make it do a full barrier like the other functions. This
62  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
63  */
64 
65 #if defined(_KERNEL) && !defined(SMP)
66 static inline void
67 do_sync(void)
68 {
69 
70 	__asm volatile ("" : : : "memory");
71 }
72 #elif __ARM_ARCH >= 6
73 static inline void
74 do_sync(void)
75 {
76 
77 	dmb();
78 }
79 #endif
80 
81 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
82 
83 /*
84  * New C11 __atomic_* API.
85  */
86 
87 /* ARMv6+ systems should be supported by the compiler. */
88 #if __ARM_ARCH <= 5
89 
90 /* Clang doesn't allow us to reimplement builtins without this. */
91 #ifdef __clang__
92 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
93 #define __sync_synchronize __sync_synchronize_ext
94 #endif
95 
96 void
97 __sync_synchronize(void)
98 {
99 }
100 
101 #ifdef _KERNEL
102 
103 #ifdef SMP
104 #error "On SMP systems we should have proper atomic operations."
105 #endif
106 
107 /*
108  * On uniprocessor systems, we can perform the atomic operations by
109  * disabling interrupts.
110  */
111 
112 #define	EMIT_LOAD_N(N, uintN_t)						\
113 uintN_t									\
114 __atomic_load_##N(uintN_t *mem, int model __unused)			\
115 {									\
116 	uintN_t ret;							\
117 									\
118 	WITHOUT_INTERRUPTS({						\
119 		ret = *mem;						\
120 	});								\
121 	return (ret);							\
122 }
123 
124 #define	EMIT_STORE_N(N, uintN_t)					\
125 void									\
126 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
127 {									\
128 									\
129 	WITHOUT_INTERRUPTS({						\
130 		*mem = val;						\
131 	});								\
132 }
133 
134 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
135 _Bool									\
136 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
137     uintN_t desired, int success __unused, int failure __unused)	\
138 {									\
139 	_Bool ret;							\
140 									\
141 	WITHOUT_INTERRUPTS({						\
142 		if (*mem == *expected) {				\
143 			*mem = desired;					\
144 			ret = 1;					\
145 		} else {						\
146 			*expected = *mem;				\
147 			ret = 0;					\
148 		}							\
149 	});								\
150 	return (ret);							\
151 }
152 
153 #define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
154 uintN_t									\
155 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
156 {									\
157 	uintN_t ret;							\
158 									\
159 	WITHOUT_INTERRUPTS({						\
160 		ret = *mem;						\
161 		*mem op val;						\
162 	});								\
163 	return (ret);							\
164 }
165 
166 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
167 EMIT_LOAD_N(N, uintN_t)							\
168 EMIT_STORE_N(N, uintN_t)						\
169 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
170 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
171 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
172 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
173 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
174 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
175 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
176 
177 EMIT_ALL_OPS_N(1, uint8_t)
178 EMIT_ALL_OPS_N(2, uint16_t)
179 EMIT_ALL_OPS_N(4, uint32_t)
180 EMIT_ALL_OPS_N(8, uint64_t)
181 #undef	EMIT_ALL_OPS_N
182 
183 #else /* !_KERNEL */
184 
185 /*
186  * For userspace on uniprocessor systems, we can implement the atomic
187  * operations by using a Restartable Atomic Sequence. This makes the
188  * kernel restart the code from the beginning when interrupted.
189  */
190 
191 #define	EMIT_LOAD_N(N, uintN_t)						\
192 uintN_t									\
193 __atomic_load_##N(uintN_t *mem, int model __unused)			\
194 {									\
195 									\
196 	return (*mem);							\
197 }
198 
199 #define	EMIT_STORE_N(N, uintN_t)					\
200 void									\
201 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
202 {									\
203 									\
204 	*mem = val;							\
205 }
206 
207 #define	EMIT_EXCHANGE_N(N, uintN_t, ldr, str)				\
208 uintN_t									\
209 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)	\
210 {									\
211 	uint32_t old, temp, ras_start;					\
212 									\
213 	ras_start = ARM_RAS_START;					\
214 	__asm volatile (						\
215 		/* Set up Restartable Atomic Sequence. */		\
216 		"1:"							\
217 		"\tadr   %2, 1b\n"					\
218 		"\tstr   %2, [%5]\n"					\
219 		"\tadr   %2, 2f\n"					\
220 		"\tstr   %2, [%5, #4]\n"				\
221 									\
222 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
223 		"\t"str" %3, %1\n"	/* Store new value. */		\
224 									\
225 		/* Tear down Restartable Atomic Sequence. */		\
226 		"2:"							\
227 		"\tmov   %2, #0x00000000\n"				\
228 		"\tstr   %2, [%5]\n"					\
229 		"\tmov   %2, #0xffffffff\n"				\
230 		"\tstr   %2, [%5, #4]\n"				\
231 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
232 		: "r" (val), "m" (*mem), "r" (ras_start));		\
233 	return (old);							\
234 }
235 
236 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)			\
237 _Bool									\
238 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,		\
239     uintN_t desired, int success __unused, int failure __unused)	\
240 {									\
241 	uint32_t expected, old, temp, ras_start;			\
242 									\
243 	expected = *pexpected;						\
244 	ras_start = ARM_RAS_START;					\
245 	__asm volatile (						\
246 		/* Set up Restartable Atomic Sequence. */		\
247 		"1:"							\
248 		"\tadr   %2, 1b\n"					\
249 		"\tstr   %2, [%6]\n"					\
250 		"\tadr   %2, 2f\n"					\
251 		"\tstr   %2, [%6, #4]\n"				\
252 									\
253 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
254 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
255 		"\t"streq" %4, %1\n"	/* Store new value. */		\
256 									\
257 		/* Tear down Restartable Atomic Sequence. */		\
258 		"2:"							\
259 		"\tmov   %2, #0x00000000\n"				\
260 		"\tstr   %2, [%6]\n"					\
261 		"\tmov   %2, #0xffffffff\n"				\
262 		"\tstr   %2, [%6, #4]\n"				\
263 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
264 		: "r" (expected), "r" (desired), "m" (*mem),		\
265 		  "r" (ras_start));					\
266 	if (old == expected) {						\
267 		return (1);						\
268 	} else {							\
269 		*pexpected = old;					\
270 		return (0);						\
271 	}								\
272 }
273 
274 #define	EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret)		\
275 uintN_t									\
276 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
277 {									\
278 	uint32_t old, new, ras_start;					\
279 									\
280 	ras_start = ARM_RAS_START;					\
281 	__asm volatile (						\
282 		/* Set up Restartable Atomic Sequence. */		\
283 		"1:"							\
284 		"\tadr   %2, 1b\n"					\
285 		"\tstr   %2, [%5]\n"					\
286 		"\tadr   %2, 2f\n"					\
287 		"\tstr   %2, [%5, #4]\n"				\
288 									\
289 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
290 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
291 		"\t"str" %2, %1\n"	/* Store new value. */		\
292 									\
293 		/* Tear down Restartable Atomic Sequence. */		\
294 		"2:"							\
295 		"\tmov   %2, #0x00000000\n"				\
296 		"\tstr   %2, [%5]\n"					\
297 		"\tmov   %2, #0xffffffff\n"				\
298 		"\tstr   %2, [%5, #4]\n"				\
299 		: "=&r" (old), "=m" (*mem), "=&r" (new)			\
300 		: "r" (val), "m" (*mem), "r" (ras_start));		\
301 	return (ret);							\
302 }
303 
304 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
305 EMIT_LOAD_N(N, uintN_t)							\
306 EMIT_STORE_N(N, uintN_t)						\
307 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)					\
308 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)				\
309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old)		\
310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old)		\
311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or,  "orr", old)		\
312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old)		\
313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old)		\
314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new)		\
315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new)		\
316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch,  "orr", new)		\
317 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new)		\
318 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new)
319 
320 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
321 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
322 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
323 #undef	EMIT_ALL_OPS_N
324 
325 #endif /* _KERNEL */
326 
327 #endif /* __ARM_ARCH */
328 
329 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
330 
331 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
332 
333 #ifdef __clang__
334 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
335 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
336 #pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
337 #pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
338 #pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
339 #pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
340 #pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
341 #pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
342 #pragma	redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
343 #pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
344 #pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
345 #pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
346 #pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
347 #pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
348 #pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
349 #pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
350 #pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
351 #pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
352 #pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
353 #pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
354 #pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
355 #endif
356 
357 /*
358  * Old __sync_* API.
359  */
360 
361 #if __ARM_ARCH >= 6
362 
363 /* Implementations for old GCC versions, lacking support for atomics. */
364 
365 typedef union {
366 	uint8_t		v8[4];
367 	uint32_t	v32;
368 } reg_t;
369 
370 /*
371  * Given a memory address pointing to an 8-bit or 16-bit integer, return
372  * the address of the 32-bit word containing it.
373  */
374 
375 static inline uint32_t *
376 round_to_word(void *ptr)
377 {
378 
379 	return ((uint32_t *)((intptr_t)ptr & ~3));
380 }
381 
382 /*
383  * Utility functions for loading and storing 8-bit and 16-bit integers
384  * in 32-bit words at an offset corresponding with the location of the
385  * atomic variable.
386  */
387 
388 static inline void
389 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
390 {
391 	size_t offset;
392 
393 	offset = (intptr_t)offset_ptr & 3;
394 	r->v8[offset] = val;
395 }
396 
397 static inline uint8_t
398 get_1(const reg_t *r, const uint8_t *offset_ptr)
399 {
400 	size_t offset;
401 
402 	offset = (intptr_t)offset_ptr & 3;
403 	return (r->v8[offset]);
404 }
405 
406 static inline void
407 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
408 {
409 	size_t offset;
410 	union {
411 		uint16_t in;
412 		uint8_t out[2];
413 	} bytes;
414 
415 	offset = (intptr_t)offset_ptr & 3;
416 	bytes.in = val;
417 	r->v8[offset] = bytes.out[0];
418 	r->v8[offset + 1] = bytes.out[1];
419 }
420 
421 static inline uint16_t
422 get_2(const reg_t *r, const uint16_t *offset_ptr)
423 {
424 	size_t offset;
425 	union {
426 		uint8_t in[2];
427 		uint16_t out;
428 	} bytes;
429 
430 	offset = (intptr_t)offset_ptr & 3;
431 	bytes.in[0] = r->v8[offset];
432 	bytes.in[1] = r->v8[offset + 1];
433 	return (bytes.out);
434 }
435 
436 /*
437  * 8-bit and 16-bit routines.
438  *
439  * These operations are not natively supported by the CPU, so we use
440  * some shifting and bitmasking on top of the 32-bit instructions.
441  */
442 
443 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
444 uintN_t									\
445 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
446 {									\
447 	uint32_t *mem32;						\
448 	reg_t val32, negmask, old;					\
449 	uint32_t temp1, temp2;						\
450 									\
451 	mem32 = round_to_word(mem);					\
452 	val32.v32 = 0x00000000;						\
453 	put_##N(&val32, mem, val);					\
454 	negmask.v32 = 0xffffffff;					\
455 	put_##N(&negmask, mem, 0);					\
456 									\
457 	do_sync();							\
458 	__asm volatile (						\
459 		"1:"							\
460 		"\tldrex %0, %6\n"	/* Load old value. */		\
461 		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
462 		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
463 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
464 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
465 		"\tbne   1b\n"		/* Spin if failed. */		\
466 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
467 		  "=&r" (temp2)						\
468 		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
469 	return (get_##N(&old, mem));					\
470 }
471 
472 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
473 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
474 
475 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
476 uintN_t									\
477 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
478     uintN_t desired)							\
479 {									\
480 	uint32_t *mem32;						\
481 	reg_t expected32, desired32, posmask, old;			\
482 	uint32_t negmask, temp1, temp2;					\
483 									\
484 	mem32 = round_to_word(mem);					\
485 	expected32.v32 = 0x00000000;					\
486 	put_##N(&expected32, mem, expected);				\
487 	desired32.v32 = 0x00000000;					\
488 	put_##N(&desired32, mem, desired);				\
489 	posmask.v32 = 0x00000000;					\
490 	put_##N(&posmask, mem, ~0);					\
491 	negmask = ~posmask.v32;						\
492 									\
493 	do_sync();							\
494 	__asm volatile (						\
495 		"1:"							\
496 		"\tldrex %0, %8\n"	/* Load old value. */		\
497 		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
498 		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
499 		"\tbne   2f\n"		/* Values are unequal. */	\
500 		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
501 		"\torr   %2, %5\n"	/* Put in the new value. */	\
502 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
503 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
504 		"\tbne   1b\n"		/* Spin if failed. */		\
505 		"2:"							\
506 		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
507 		  "=&r" (temp2)						\
508 		: "r" (expected32.v32), "r" (desired32.v32),		\
509 		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
510 	return (get_##N(&old, mem));					\
511 }
512 
513 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
514 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
515 
516 #define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
517 uintN_t									\
518 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
519 {									\
520 	uint32_t *mem32;						\
521 	reg_t val32, posmask, old;					\
522 	uint32_t negmask, temp1, temp2;					\
523 									\
524 	mem32 = round_to_word(mem);					\
525 	val32.v32 = 0x00000000;						\
526 	put_##N(&val32, mem, val);					\
527 	posmask.v32 = 0x00000000;					\
528 	put_##N(&posmask, mem, ~0);					\
529 	negmask = ~posmask.v32;						\
530 									\
531 	do_sync();							\
532 	__asm volatile (						\
533 		"1:"							\
534 		"\tldrex %0, %7\n"	/* Load old value. */		\
535 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
536 		"\tand   %2, %5\n"	/* Isolate the new value. */	\
537 		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
538 		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
539 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
540 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
541 		"\tbne   1b\n"		/* Spin if failed. */		\
542 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
543 		  "=&r" (temp2)						\
544 		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
545 		  "m" (*mem32));					\
546 	return (get_##N(&old, mem));					\
547 }
548 
549 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
550 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
551 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
552 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
553 
554 #define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
555 uintN_t									\
556 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
557 {									\
558 	uint32_t *mem32;						\
559 	reg_t val32, old;						\
560 	uint32_t temp1, temp2;						\
561 									\
562 	mem32 = round_to_word(mem);					\
563 	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
564 	put_##N(&val32, mem, val);					\
565 									\
566 	do_sync();							\
567 	__asm volatile (						\
568 		"1:"							\
569 		"\tldrex %0, %5\n"	/* Load old value. */		\
570 		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
571 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
572 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
573 		"\tbne   1b\n"		/* Spin if failed. */		\
574 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
575 		  "=&r" (temp2)						\
576 		: "r" (val32.v32), "m" (*mem32));			\
577 	return (get_##N(&old, mem));					\
578 }
579 
580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
581 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
582 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
584 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
585 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
586 
587 /*
588  * 32-bit routines.
589  */
590 
591 uint32_t
592 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
593 {
594 	uint32_t old, temp;
595 
596 	do_sync();
597 	__asm volatile (
598 		"1:"
599 		"\tldrex %0, %4\n"	/* Load old value. */
600 		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
601 		"\tcmp   %2, #0\n"	/* Did it succeed? */
602 		"\tbne   1b\n"		/* Spin if failed. */
603 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
604 		: "r" (val), "m" (*mem));
605 	return (old);
606 }
607 
608 uint32_t
609 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
610     uint32_t desired)
611 {
612 	uint32_t old, temp;
613 
614 	do_sync();
615 	__asm volatile (
616 		"1:"
617 		"\tldrex %0, %5\n"	/* Load old value. */
618 		"\tcmp   %0, %3\n"	/* Compare to expected value. */
619 		"\tbne   2f\n"		/* Values are unequal. */
620 		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
621 		"\tcmp   %2, #0\n"	/* Did it succeed? */
622 		"\tbne   1b\n"		/* Spin if failed. */
623 		"2:"
624 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
625 		: "r" (expected), "r" (desired), "m" (*mem));
626 	return (old);
627 }
628 
629 #define	EMIT_FETCH_AND_OP_4(name, op)					\
630 uint32_t								\
631 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
632 {									\
633 	uint32_t old, temp1, temp2;					\
634 									\
635 	do_sync();							\
636 	__asm volatile (						\
637 		"1:"							\
638 		"\tldrex %0, %5\n"	/* Load old value. */		\
639 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
640 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
641 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
642 		"\tbne   1b\n"		/* Spin if failed. */		\
643 		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
644 		  "=&r" (temp2)						\
645 		: "r" (val), "m" (*mem));				\
646 	return (old);							\
647 }
648 
649 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
650 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
651 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
652 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
653 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
654 
655 #ifndef __clang__
656 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
657 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
658 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
659 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
660 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
661 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
662 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
663 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
664 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
665 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
666 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
667 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
668 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
669 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
670 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
671 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
672 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
673 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
674 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
675 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
676 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
677 #endif
678 
679 #else /* __ARM_ARCH < 6 */
680 
681 #ifdef _KERNEL
682 
683 #ifdef SMP
684 #error "On SMP systems we should have proper atomic operations."
685 #endif
686 
687 /*
688  * On uniprocessor systems, we can perform the atomic operations by
689  * disabling interrupts.
690  */
691 
692 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
693 uintN_t									\
694 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
695     uintN_t desired)							\
696 {									\
697 	uintN_t ret;							\
698 									\
699 	WITHOUT_INTERRUPTS({						\
700 		ret = *mem;						\
701 		if (*mem == expected)					\
702 			*mem = desired;					\
703 	});								\
704 	return (ret);							\
705 }
706 
707 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
708 uintN_t									\
709 __sync_##name##_##N(uintN_t *mem, uintN_t val)				\
710 {									\
711 	uintN_t ret;							\
712 									\
713 	WITHOUT_INTERRUPTS({						\
714 		ret = *mem;						\
715 		*mem op val;						\
716 	});								\
717 	return (ret);							\
718 }
719 
720 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
721 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
722 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
726 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
727 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
728 
729 EMIT_ALL_OPS_N(1, uint8_t)
730 EMIT_ALL_OPS_N(2, uint16_t)
731 EMIT_ALL_OPS_N(4, uint32_t)
732 EMIT_ALL_OPS_N(8, uint64_t)
733 #undef	EMIT_ALL_OPS_N
734 
735 #else /* !_KERNEL */
736 
737 /*
738  * For userspace on uniprocessor systems, we can implement the atomic
739  * operations by using a Restartable Atomic Sequence. This makes the
740  * kernel restart the code from the beginning when interrupted.
741  */
742 
743 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)			\
744 uintN_t									\
745 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
746 {									\
747 	uint32_t old, temp, ras_start;					\
748 									\
749 	ras_start = ARM_RAS_START;					\
750 	__asm volatile (						\
751 		/* Set up Restartable Atomic Sequence. */		\
752 		"1:"							\
753 		"\tadr   %2, 1b\n"					\
754 		"\tstr   %2, [%5]\n"					\
755 		"\tadr   %2, 2f\n"					\
756 		"\tstr   %2, [%5, #4]\n"				\
757 									\
758 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
759 		"\t"str" %3, %1\n"	/* Store new value. */		\
760 									\
761 		/* Tear down Restartable Atomic Sequence. */		\
762 		"2:"							\
763 		"\tmov   %2, #0x00000000\n"				\
764 		"\tstr   %2, [%5]\n"					\
765 		"\tmov   %2, #0xffffffff\n"				\
766 		"\tstr   %2, [%5, #4]\n"				\
767 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
768 		: "r" (val), "m" (*mem), "r" (ras_start));		\
769 	return (old);							\
770 }
771 
772 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)		\
773 uintN_t									\
774 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
775     uintN_t desired)							\
776 {									\
777 	uint32_t old, temp, ras_start;					\
778 									\
779 	ras_start = ARM_RAS_START;					\
780 	__asm volatile (						\
781 		/* Set up Restartable Atomic Sequence. */		\
782 		"1:"							\
783 		"\tadr   %2, 1b\n"					\
784 		"\tstr   %2, [%6]\n"					\
785 		"\tadr   %2, 2f\n"					\
786 		"\tstr   %2, [%6, #4]\n"				\
787 									\
788 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
789 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
790 		"\t"streq" %4, %1\n"	/* Store new value. */		\
791 									\
792 		/* Tear down Restartable Atomic Sequence. */		\
793 		"2:"							\
794 		"\tmov   %2, #0x00000000\n"				\
795 		"\tstr   %2, [%6]\n"					\
796 		"\tmov   %2, #0xffffffff\n"				\
797 		"\tstr   %2, [%6, #4]\n"				\
798 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
799 		: "r" (expected), "r" (desired), "m" (*mem),		\
800 		  "r" (ras_start));					\
801 	return (old);							\
802 }
803 
804 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)		\
805 uintN_t									\
806 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
807 {									\
808 	uint32_t old, temp, ras_start;					\
809 									\
810 	ras_start = ARM_RAS_START;					\
811 	__asm volatile (						\
812 		/* Set up Restartable Atomic Sequence. */		\
813 		"1:"							\
814 		"\tadr   %2, 1b\n"					\
815 		"\tstr   %2, [%5]\n"					\
816 		"\tadr   %2, 2f\n"					\
817 		"\tstr   %2, [%5, #4]\n"				\
818 									\
819 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
820 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
821 		"\t"str" %2, %1\n"	/* Store new value. */		\
822 									\
823 		/* Tear down Restartable Atomic Sequence. */		\
824 		"2:"							\
825 		"\tmov   %2, #0x00000000\n"				\
826 		"\tstr   %2, [%5]\n"					\
827 		"\tmov   %2, #0xffffffff\n"				\
828 		"\tstr   %2, [%5, #4]\n"				\
829 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
830 		: "r" (val), "m" (*mem), "r" (ras_start));		\
831 	return (old);							\
832 }
833 
834 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
835 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)				\
836 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)			\
837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")		\
838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")		\
839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")		\
840 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")		\
841 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
842 
843 #ifdef __clang__
844 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
845 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
846 #else
847 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
848 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
849 #endif
850 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
851 
852 #ifndef __clang__
853 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
854 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
855 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
856 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
857 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
858 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
859 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
860 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
861 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
862 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
863 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
864 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
865 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
866 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
867 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
868 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
869 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
870 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
871 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
872 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
873 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
874 #endif /* __ARM_ARCH */
875 
876 #endif /* _KERNEL */
877 
878 #endif
879 
880 #endif /* __SYNC_ATOMICS */
881