xref: /freebsd/sys/arm/arm/stdatomic.c (revision f05cddf9)
1 /*-
2  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33 
34 #include <machine/cpufunc.h>
35 #include <machine/sysarch.h>
36 
37 #ifdef _KERNEL
38 #include "opt_global.h"
39 #endif
40 
41 /*
42  * Executing statements with interrupts disabled.
43  */
44 
45 #if defined(_KERNEL) && !defined(SMP)
46 #define	WITHOUT_INTERRUPTS(s) do {					\
47 	register_t regs;						\
48 									\
49 	regs = intr_disable();						\
50 	do s while (0);							\
51 	intr_restore(regs);						\
52 } while (0)
53 #endif /* _KERNEL && !SMP */
54 
55 /*
56  * Memory barriers.
57  *
58  * It turns out __sync_synchronize() does not emit any code when used
59  * with GCC 4.2. Implement our own version that does work reliably.
60  *
61  * Although __sync_lock_test_and_set() should only perform an acquire
62  * barrier, make it do a full barrier like the other functions. This
63  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
64  */
65 
66 #if defined(_KERNEL) && !defined(SMP)
67 static inline void
68 do_sync(void)
69 {
70 
71 	__asm volatile ("" : : : "memory");
72 }
73 #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
74 static inline void
75 do_sync(void)
76 {
77 
78 	__asm volatile ("dmb" : : : "memory");
79 }
80 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
81     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
82     defined(__ARM_ARCH_6ZK__)
83 static inline void
84 do_sync(void)
85 {
86 
87 	__asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
88 }
89 #endif
90 
91 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
92 
93 /*
94  * New C11 __atomic_* API.
95  */
96 
97 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
98     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
99     defined(__ARM_ARCH_6ZK__) || \
100     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
101 
102 /* These systems should be supported by the compiler. */
103 
104 #else /* __ARM_ARCH_5__ */
105 
106 /* Clang doesn't allow us to reimplement builtins without this. */
107 #ifdef __clang__
108 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
109 #define __sync_synchronize __sync_synchronize_ext
110 #endif
111 
112 void
113 __sync_synchronize(void)
114 {
115 }
116 
117 #ifdef _KERNEL
118 
119 #ifdef SMP
120 #error "On SMP systems we should have proper atomic operations."
121 #endif
122 
123 /*
124  * On uniprocessor systems, we can perform the atomic operations by
125  * disabling interrupts.
126  */
127 
128 #define	EMIT_LOAD_N(N, uintN_t)						\
129 uintN_t									\
130 __atomic_load_##N(uintN_t *mem, int model __unused)			\
131 {									\
132 	uintN_t ret;							\
133 									\
134 	WITHOUT_INTERRUPTS({						\
135 		ret = *mem;						\
136 	});								\
137 	return (ret);							\
138 }
139 
140 #define	EMIT_STORE_N(N, uintN_t)					\
141 void									\
142 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
143 {									\
144 									\
145 	WITHOUT_INTERRUPTS({						\
146 		*mem = val;						\
147 	});								\
148 }
149 
150 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
151 _Bool									\
152 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
153     uintN_t desired, int success __unused, int failure __unused)	\
154 {									\
155 	_Bool ret;							\
156 									\
157 	WITHOUT_INTERRUPTS({						\
158 		if (*mem == *expected) {				\
159 			*mem = desired;					\
160 			ret = 1;					\
161 		} else {						\
162 			*expected = *mem;				\
163 			ret = 0;					\
164 		}							\
165 	});								\
166 	return (ret);							\
167 }
168 
169 #define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
170 uintN_t									\
171 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
172 {									\
173 	uintN_t ret;							\
174 									\
175 	WITHOUT_INTERRUPTS({						\
176 		ret = *mem;						\
177 		*mem op val;						\
178 	});								\
179 	return (ret);							\
180 }
181 
182 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
183 EMIT_LOAD_N(N, uintN_t)							\
184 EMIT_STORE_N(N, uintN_t)						\
185 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
186 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
187 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
188 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
189 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
190 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
191 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
192 
193 EMIT_ALL_OPS_N(1, uint8_t)
194 EMIT_ALL_OPS_N(2, uint16_t)
195 EMIT_ALL_OPS_N(4, uint32_t)
196 EMIT_ALL_OPS_N(8, uint64_t)
197 
198 #else /* !_KERNEL */
199 
200 /*
201  * For userspace on uniprocessor systems, we can implement the atomic
202  * operations by using a Restartable Atomic Sequence. This makes the
203  * kernel restart the code from the beginning when interrupted.
204  */
205 
206 #define	EMIT_LOAD_N(N, uintN_t)						\
207 uintN_t									\
208 __atomic_load_##N(uintN_t *mem, int model __unused)			\
209 {									\
210 									\
211 	return (*mem);							\
212 }
213 
214 #define	EMIT_STORE_N(N, uintN_t)					\
215 void									\
216 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
217 {									\
218 									\
219 	*mem = val;							\
220 }
221 
222 #define	EMIT_EXCHANGE_N(N, uintN_t, ldr, str)				\
223 uintN_t									\
224 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)	\
225 {									\
226 	uint32_t old, temp, ras_start;					\
227 									\
228 	ras_start = ARM_RAS_START;					\
229 	__asm volatile (						\
230 		/* Set up Restartable Atomic Sequence. */		\
231 		"1:"							\
232 		"\tadr   %2, 1b\n"					\
233 		"\tstr   %2, [%5]\n"					\
234 		"\tadr   %2, 2f\n"					\
235 		"\tstr   %2, [%5, #4]\n"				\
236 									\
237 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
238 		"\t"str" %3, %1\n"	/* Store new value. */		\
239 									\
240 		/* Tear down Restartable Atomic Sequence. */		\
241 		"2:"							\
242 		"\tmov   %2, #0x00000000\n"				\
243 		"\tstr   %2, [%5]\n"					\
244 		"\tmov   %2, #0xffffffff\n"				\
245 		"\tstr   %2, [%5, #4]\n"				\
246 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
247 		: "r" (val), "m" (*mem), "r" (ras_start));		\
248 	return (old);							\
249 }
250 
251 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)			\
252 _Bool									\
253 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,		\
254     uintN_t desired, int success __unused, int failure __unused)	\
255 {									\
256 	uint32_t expected, old, temp, ras_start;			\
257 									\
258 	expected = *pexpected;						\
259 	ras_start = ARM_RAS_START;					\
260 	__asm volatile (						\
261 		/* Set up Restartable Atomic Sequence. */		\
262 		"1:"							\
263 		"\tadr   %2, 1b\n"					\
264 		"\tstr   %2, [%6]\n"					\
265 		"\tadr   %2, 2f\n"					\
266 		"\tstr   %2, [%6, #4]\n"				\
267 									\
268 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
269 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
270 		"\t"streq" %4, %1\n"	/* Store new value. */		\
271 									\
272 		/* Tear down Restartable Atomic Sequence. */		\
273 		"2:"							\
274 		"\tmov   %2, #0x00000000\n"				\
275 		"\tstr   %2, [%6]\n"					\
276 		"\tmov   %2, #0xffffffff\n"				\
277 		"\tstr   %2, [%6, #4]\n"				\
278 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
279 		: "r" (expected), "r" (desired), "m" (*mem),		\
280 		  "r" (ras_start));					\
281 	if (old == expected) {						\
282 		return (1);						\
283 	} else {							\
284 		*pexpected = old;					\
285 		return (0);						\
286 	}								\
287 }
288 
289 #define	EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op)			\
290 uintN_t									\
291 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
292 {									\
293 	uint32_t old, temp, ras_start;					\
294 									\
295 	ras_start = ARM_RAS_START;					\
296 	__asm volatile (						\
297 		/* Set up Restartable Atomic Sequence. */		\
298 		"1:"							\
299 		"\tadr   %2, 1b\n"					\
300 		"\tstr   %2, [%5]\n"					\
301 		"\tadr   %2, 2f\n"					\
302 		"\tstr   %2, [%5, #4]\n"				\
303 									\
304 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
305 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
306 		"\t"str" %2, %1\n"	/* Store new value. */		\
307 									\
308 		/* Tear down Restartable Atomic Sequence. */		\
309 		"2:"							\
310 		"\tmov   %2, #0x00000000\n"				\
311 		"\tstr   %2, [%5]\n"					\
312 		"\tmov   %2, #0xffffffff\n"				\
313 		"\tstr   %2, [%5, #4]\n"				\
314 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
315 		: "r" (val), "m" (*mem), "r" (ras_start));		\
316 	return (old);							\
317 }
318 
319 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
320 EMIT_LOAD_N(N, uintN_t)							\
321 EMIT_STORE_N(N, uintN_t)						\
322 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)					\
323 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)				\
324 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add")			\
325 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and")			\
326 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr")			\
327 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub")			\
328 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
329 
330 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
331 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
332 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
333 
334 #endif /* _KERNEL */
335 
336 #endif
337 
338 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
339 
340 #if defined(__SYNC_ATOMICS)
341 
342 /*
343  * Old __sync_* API.
344  */
345 
346 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
347     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
348     defined(__ARM_ARCH_6ZK__) || \
349     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
350 
351 /* Implementations for old GCC versions, lacking support for atomics. */
352 
353 typedef union {
354 	uint8_t		v8[4];
355 	uint32_t	v32;
356 } reg_t;
357 
358 /*
359  * Given a memory address pointing to an 8-bit or 16-bit integer, return
360  * the address of the 32-bit word containing it.
361  */
362 
363 static inline uint32_t *
364 round_to_word(void *ptr)
365 {
366 
367 	return ((uint32_t *)((intptr_t)ptr & ~3));
368 }
369 
370 /*
371  * Utility functions for loading and storing 8-bit and 16-bit integers
372  * in 32-bit words at an offset corresponding with the location of the
373  * atomic variable.
374  */
375 
376 static inline void
377 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
378 {
379 	size_t offset;
380 
381 	offset = (intptr_t)offset_ptr & 3;
382 	r->v8[offset] = val;
383 }
384 
385 static inline uint8_t
386 get_1(const reg_t *r, const uint8_t *offset_ptr)
387 {
388 	size_t offset;
389 
390 	offset = (intptr_t)offset_ptr & 3;
391 	return (r->v8[offset]);
392 }
393 
394 static inline void
395 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
396 {
397 	size_t offset;
398 	union {
399 		uint16_t in;
400 		uint8_t out[2];
401 	} bytes;
402 
403 	offset = (intptr_t)offset_ptr & 3;
404 	bytes.in = val;
405 	r->v8[offset] = bytes.out[0];
406 	r->v8[offset + 1] = bytes.out[1];
407 }
408 
409 static inline uint16_t
410 get_2(const reg_t *r, const uint16_t *offset_ptr)
411 {
412 	size_t offset;
413 	union {
414 		uint8_t in[2];
415 		uint16_t out;
416 	} bytes;
417 
418 	offset = (intptr_t)offset_ptr & 3;
419 	bytes.in[0] = r->v8[offset];
420 	bytes.in[1] = r->v8[offset + 1];
421 	return (bytes.out);
422 }
423 
424 /*
425  * 8-bit and 16-bit routines.
426  *
427  * These operations are not natively supported by the CPU, so we use
428  * some shifting and bitmasking on top of the 32-bit instructions.
429  */
430 
431 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
432 uintN_t									\
433 __sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val)			\
434 {									\
435 	uint32_t *mem32;						\
436 	reg_t val32, negmask, old;					\
437 	uint32_t temp1, temp2;						\
438 									\
439 	mem32 = round_to_word(mem);					\
440 	val32.v32 = 0x00000000;						\
441 	put_##N(&val32, mem, val);					\
442 	negmask.v32 = 0xffffffff;					\
443 	put_##N(&negmask, mem, 0);					\
444 									\
445 	do_sync();							\
446 	__asm volatile (						\
447 		"1:"							\
448 		"\tldrex %0, %6\n"	/* Load old value. */		\
449 		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
450 		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
451 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
452 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
453 		"\tbne   1b\n"		/* Spin if failed. */		\
454 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
455 		  "=&r" (temp2)						\
456 		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
457 	return (get_##N(&old, mem));					\
458 }
459 
460 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
461 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
462 
463 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
464 uintN_t									\
465 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
466     uintN_t desired)							\
467 {									\
468 	uint32_t *mem32;						\
469 	reg_t expected32, desired32, posmask, old;			\
470 	uint32_t negmask, temp1, temp2;					\
471 									\
472 	mem32 = round_to_word(mem);					\
473 	expected32.v32 = 0x00000000;					\
474 	put_##N(&expected32, mem, expected);				\
475 	desired32.v32 = 0x00000000;					\
476 	put_##N(&desired32, mem, desired);				\
477 	posmask.v32 = 0x00000000;					\
478 	put_##N(&posmask, mem, ~0);					\
479 	negmask = ~posmask.v32;						\
480 									\
481 	do_sync();							\
482 	__asm volatile (						\
483 		"1:"							\
484 		"\tldrex %0, %8\n"	/* Load old value. */		\
485 		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
486 		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
487 		"\tbne   2f\n"		/* Values are unequal. */	\
488 		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
489 		"\torr   %2, %5\n"	/* Put in the new value. */	\
490 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
491 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
492 		"\tbne   1b\n"		/* Spin if failed. */		\
493 		"2:"							\
494 		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
495 		  "=&r" (temp2)						\
496 		: "r" (expected32.v32), "r" (desired32.v32),		\
497 		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
498 	return (get_##N(&old, mem));					\
499 }
500 
501 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
502 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
503 
504 #define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
505 uintN_t									\
506 __sync_##name##_##N(uintN_t *mem, uintN_t val)				\
507 {									\
508 	uint32_t *mem32;						\
509 	reg_t val32, posmask, old;					\
510 	uint32_t negmask, temp1, temp2;					\
511 									\
512 	mem32 = round_to_word(mem);					\
513 	val32.v32 = 0x00000000;						\
514 	put_##N(&val32, mem, val);					\
515 	posmask.v32 = 0x00000000;					\
516 	put_##N(&posmask, mem, ~0);					\
517 	negmask = ~posmask.v32;						\
518 									\
519 	do_sync();							\
520 	__asm volatile (						\
521 		"1:"							\
522 		"\tldrex %0, %7\n"	/* Load old value. */		\
523 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
524 		"\tand   %2, %5\n"	/* Isolate the new value. */	\
525 		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
526 		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
527 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
528 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
529 		"\tbne   1b\n"		/* Spin if failed. */		\
530 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
531 		  "=&r" (temp2)						\
532 		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
533 		  "m" (*mem32));					\
534 	return (get_##N(&old, mem));					\
535 }
536 
537 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
538 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
539 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
540 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
541 
542 #define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
543 uintN_t									\
544 __sync_##name##_##N(uintN_t *mem, uintN_t val)				\
545 {									\
546 	uint32_t *mem32;						\
547 	reg_t val32, old;						\
548 	uint32_t temp1, temp2;						\
549 									\
550 	mem32 = round_to_word(mem);					\
551 	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
552 	put_##N(&val32, mem, val);					\
553 									\
554 	do_sync();							\
555 	__asm volatile (						\
556 		"1:"							\
557 		"\tldrex %0, %5\n"	/* Load old value. */		\
558 		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
559 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
560 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
561 		"\tbne   1b\n"		/* Spin if failed. */		\
562 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
563 		  "=&r" (temp2)						\
564 		: "r" (val32.v32), "m" (*mem32));			\
565 	return (get_##N(&old, mem));					\
566 }
567 
568 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
569 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
570 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
571 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
572 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
573 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
574 
575 /*
576  * 32-bit routines.
577  */
578 
579 uint32_t
580 __sync_lock_test_and_set_4(uint32_t *mem, uint32_t val)
581 {
582 	uint32_t old, temp;
583 
584 	do_sync();
585 	__asm volatile (
586 		"1:"
587 		"\tldrex %0, %4\n"	/* Load old value. */
588 		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
589 		"\tcmp   %2, #0\n"	/* Did it succeed? */
590 		"\tbne   1b\n"		/* Spin if failed. */
591 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
592 		: "r" (val), "m" (*mem));
593 	return (old);
594 }
595 
596 uint32_t
597 __sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected,
598     uint32_t desired)
599 {
600 	uint32_t old, temp;
601 
602 	do_sync();
603 	__asm volatile (
604 		"1:"
605 		"\tldrex %0, %5\n"	/* Load old value. */
606 		"\tcmp   %0, %3\n"	/* Compare to expected value. */
607 		"\tbne   2f\n"		/* Values are unequal. */
608 		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
609 		"\tcmp   %2, #0\n"	/* Did it succeed? */
610 		"\tbne   1b\n"		/* Spin if failed. */
611 		"2:"
612 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
613 		: "r" (expected), "r" (desired), "m" (*mem));
614 	return (old);
615 }
616 
617 #define	EMIT_FETCH_AND_OP_4(name, op)					\
618 uint32_t								\
619 __sync_##name##_4(uint32_t *mem, uint32_t val)				\
620 {									\
621 	uint32_t old, temp1, temp2;					\
622 									\
623 	do_sync();							\
624 	__asm volatile (						\
625 		"1:"							\
626 		"\tldrex %0, %5\n"	/* Load old value. */		\
627 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
628 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
629 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
630 		"\tbne   1b\n"		/* Spin if failed. */		\
631 		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
632 		  "=&r" (temp2)						\
633 		: "r" (val), "m" (*mem));				\
634 	return (old);							\
635 }
636 
637 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
638 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
639 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
640 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
641 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
642 
643 #else /* __ARM_ARCH_5__ */
644 
645 #ifdef _KERNEL
646 
647 #ifdef SMP
648 #error "On SMP systems we should have proper atomic operations."
649 #endif
650 
651 /*
652  * On uniprocessor systems, we can perform the atomic operations by
653  * disabling interrupts.
654  */
655 
656 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
657 uintN_t									\
658 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
659     uintN_t desired)							\
660 {									\
661 	uintN_t ret;							\
662 									\
663 	WITHOUT_INTERRUPTS({						\
664 		ret = *mem;						\
665 		if (*mem == expected)					\
666 			*mem = desired;					\
667 	});								\
668 	return (ret);							\
669 }
670 
671 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
672 uintN_t									\
673 __sync_##name##_##N(uintN_t *mem, uintN_t val)				\
674 {									\
675 	uintN_t ret;							\
676 									\
677 	WITHOUT_INTERRUPTS({						\
678 		ret = *mem;						\
679 		*mem op val;						\
680 	});								\
681 	return (ret);							\
682 }
683 
684 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
685 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
686 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
687 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
688 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
689 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
690 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
691 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
692 
693 EMIT_ALL_OPS_N(1, uint8_t)
694 EMIT_ALL_OPS_N(2, uint16_t)
695 EMIT_ALL_OPS_N(4, uint32_t)
696 EMIT_ALL_OPS_N(8, uint64_t)
697 
698 #else /* !_KERNEL */
699 
700 /*
701  * For userspace on uniprocessor systems, we can implement the atomic
702  * operations by using a Restartable Atomic Sequence. This makes the
703  * kernel restart the code from the beginning when interrupted.
704  */
705 
706 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)			\
707 uintN_t									\
708 __sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val)			\
709 {									\
710 	uint32_t old, temp, ras_start;					\
711 									\
712 	ras_start = ARM_RAS_START;					\
713 	__asm volatile (						\
714 		/* Set up Restartable Atomic Sequence. */		\
715 		"1:"							\
716 		"\tadr   %2, 1b\n"					\
717 		"\tstr   %2, [%5]\n"					\
718 		"\tadr   %2, 2f\n"					\
719 		"\tstr   %2, [%5, #4]\n"				\
720 									\
721 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
722 		"\t"str" %3, %1\n"	/* Store new value. */		\
723 									\
724 		/* Tear down Restartable Atomic Sequence. */		\
725 		"2:"							\
726 		"\tmov   %2, #0x00000000\n"				\
727 		"\tstr   %2, [%5]\n"					\
728 		"\tmov   %2, #0xffffffff\n"				\
729 		"\tstr   %2, [%5, #4]\n"				\
730 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
731 		: "r" (val), "m" (*mem), "r" (ras_start));		\
732 	return (old);							\
733 }
734 
735 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)		\
736 uintN_t									\
737 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
738     uintN_t desired)							\
739 {									\
740 	uint32_t old, temp, ras_start;					\
741 									\
742 	ras_start = ARM_RAS_START;					\
743 	__asm volatile (						\
744 		/* Set up Restartable Atomic Sequence. */		\
745 		"1:"							\
746 		"\tadr   %2, 1b\n"					\
747 		"\tstr   %2, [%6]\n"					\
748 		"\tadr   %2, 2f\n"					\
749 		"\tstr   %2, [%6, #4]\n"				\
750 									\
751 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
752 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
753 		"\t"streq" %4, %1\n"	/* Store new value. */		\
754 									\
755 		/* Tear down Restartable Atomic Sequence. */		\
756 		"2:"							\
757 		"\tmov   %2, #0x00000000\n"				\
758 		"\tstr   %2, [%6]\n"					\
759 		"\tmov   %2, #0xffffffff\n"				\
760 		"\tstr   %2, [%6, #4]\n"				\
761 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
762 		: "r" (expected), "r" (desired), "m" (*mem),		\
763 		  "r" (ras_start));					\
764 	return (old);							\
765 }
766 
767 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)		\
768 uintN_t									\
769 __sync_##name##_##N(uintN_t *mem, uintN_t val)				\
770 {									\
771 	uint32_t old, temp, ras_start;					\
772 									\
773 	ras_start = ARM_RAS_START;					\
774 	__asm volatile (						\
775 		/* Set up Restartable Atomic Sequence. */		\
776 		"1:"							\
777 		"\tadr   %2, 1b\n"					\
778 		"\tstr   %2, [%5]\n"					\
779 		"\tadr   %2, 2f\n"					\
780 		"\tstr   %2, [%5, #4]\n"				\
781 									\
782 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
783 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
784 		"\t"str" %2, %1\n"	/* Store new value. */		\
785 									\
786 		/* Tear down Restartable Atomic Sequence. */		\
787 		"2:"							\
788 		"\tmov   %2, #0x00000000\n"				\
789 		"\tstr   %2, [%5]\n"					\
790 		"\tmov   %2, #0xffffffff\n"				\
791 		"\tstr   %2, [%5, #4]\n"				\
792 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
793 		: "r" (val), "m" (*mem), "r" (ras_start));		\
794 	return (old);							\
795 }
796 
797 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
798 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)				\
799 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)			\
800 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")		\
801 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")		\
802 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")		\
803 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")		\
804 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
805 
806 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
807 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
808 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
809 
810 #endif /* _KERNEL */
811 
812 #endif
813 
814 #endif /* __SYNC_ATOMICS */
815