xref: /freebsd/sys/arm/arm/stdatomic.c (revision a0ee8cc6)
1 /*-
2  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33 
34 #include <machine/acle-compat.h>
35 #include <machine/atomic.h>
36 #include <machine/cpufunc.h>
37 #include <machine/sysarch.h>
38 
39 /*
40  * Executing statements with interrupts disabled.
41  */
42 
43 #if defined(_KERNEL) && !defined(SMP)
44 #define	WITHOUT_INTERRUPTS(s) do {					\
45 	register_t regs;						\
46 									\
47 	regs = intr_disable();						\
48 	do s while (0);							\
49 	intr_restore(regs);						\
50 } while (0)
51 #endif /* _KERNEL && !SMP */
52 
53 /*
54  * Memory barriers.
55  *
56  * It turns out __sync_synchronize() does not emit any code when used
57  * with GCC 4.2. Implement our own version that does work reliably.
58  *
59  * Although __sync_lock_test_and_set() should only perform an acquire
60  * barrier, make it do a full barrier like the other functions. This
61  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
62  */
63 
64 #if defined(_KERNEL) && !defined(SMP)
65 static inline void
66 do_sync(void)
67 {
68 
69 	__asm volatile ("" : : : "memory");
70 }
71 #elif __ARM_ARCH >= 6
72 static inline void
73 do_sync(void)
74 {
75 
76 	dmb();
77 }
78 #endif
79 
80 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
81 
82 /*
83  * New C11 __atomic_* API.
84  */
85 
86 /* ARMv6+ systems should be supported by the compiler. */
87 #if __ARM_ARCH <= 5
88 
89 /* Clang doesn't allow us to reimplement builtins without this. */
90 #ifdef __clang__
91 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
92 #define __sync_synchronize __sync_synchronize_ext
93 #endif
94 
95 void
96 __sync_synchronize(void)
97 {
98 }
99 
100 #ifdef _KERNEL
101 
102 #ifdef SMP
103 #error "On SMP systems we should have proper atomic operations."
104 #endif
105 
106 /*
107  * On uniprocessor systems, we can perform the atomic operations by
108  * disabling interrupts.
109  */
110 
111 #define	EMIT_LOAD_N(N, uintN_t)						\
112 uintN_t									\
113 __atomic_load_##N(uintN_t *mem, int model __unused)			\
114 {									\
115 	uintN_t ret;							\
116 									\
117 	WITHOUT_INTERRUPTS({						\
118 		ret = *mem;						\
119 	});								\
120 	return (ret);							\
121 }
122 
123 #define	EMIT_STORE_N(N, uintN_t)					\
124 void									\
125 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
126 {									\
127 									\
128 	WITHOUT_INTERRUPTS({						\
129 		*mem = val;						\
130 	});								\
131 }
132 
133 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
134 _Bool									\
135 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
136     uintN_t desired, int success __unused, int failure __unused)	\
137 {									\
138 	_Bool ret;							\
139 									\
140 	WITHOUT_INTERRUPTS({						\
141 		if (*mem == *expected) {				\
142 			*mem = desired;					\
143 			ret = 1;					\
144 		} else {						\
145 			*expected = *mem;				\
146 			ret = 0;					\
147 		}							\
148 	});								\
149 	return (ret);							\
150 }
151 
152 #define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
153 uintN_t									\
154 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
155 {									\
156 	uintN_t ret;							\
157 									\
158 	WITHOUT_INTERRUPTS({						\
159 		ret = *mem;						\
160 		*mem op val;						\
161 	});								\
162 	return (ret);							\
163 }
164 
165 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
166 EMIT_LOAD_N(N, uintN_t)							\
167 EMIT_STORE_N(N, uintN_t)						\
168 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
169 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
170 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
171 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
172 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
173 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
174 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
175 
176 EMIT_ALL_OPS_N(1, uint8_t)
177 EMIT_ALL_OPS_N(2, uint16_t)
178 EMIT_ALL_OPS_N(4, uint32_t)
179 EMIT_ALL_OPS_N(8, uint64_t)
180 #undef	EMIT_ALL_OPS_N
181 
182 #else /* !_KERNEL */
183 
184 /*
185  * For userspace on uniprocessor systems, we can implement the atomic
186  * operations by using a Restartable Atomic Sequence. This makes the
187  * kernel restart the code from the beginning when interrupted.
188  */
189 
190 #define	EMIT_LOAD_N(N, uintN_t)						\
191 uintN_t									\
192 __atomic_load_##N(uintN_t *mem, int model __unused)			\
193 {									\
194 									\
195 	return (*mem);							\
196 }
197 
198 #define	EMIT_STORE_N(N, uintN_t)					\
199 void									\
200 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
201 {									\
202 									\
203 	*mem = val;							\
204 }
205 
206 #define	EMIT_EXCHANGE_N(N, uintN_t, ldr, str)				\
207 uintN_t									\
208 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)	\
209 {									\
210 	uint32_t old, temp, ras_start;					\
211 									\
212 	ras_start = ARM_RAS_START;					\
213 	__asm volatile (						\
214 		/* Set up Restartable Atomic Sequence. */		\
215 		"1:"							\
216 		"\tadr   %2, 1b\n"					\
217 		"\tstr   %2, [%5]\n"					\
218 		"\tadr   %2, 2f\n"					\
219 		"\tstr   %2, [%5, #4]\n"				\
220 									\
221 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
222 		"\t"str" %3, %1\n"	/* Store new value. */		\
223 									\
224 		/* Tear down Restartable Atomic Sequence. */		\
225 		"2:"							\
226 		"\tmov   %2, #0x00000000\n"				\
227 		"\tstr   %2, [%5]\n"					\
228 		"\tmov   %2, #0xffffffff\n"				\
229 		"\tstr   %2, [%5, #4]\n"				\
230 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
231 		: "r" (val), "m" (*mem), "r" (ras_start));		\
232 	return (old);							\
233 }
234 
235 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)			\
236 _Bool									\
237 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,		\
238     uintN_t desired, int success __unused, int failure __unused)	\
239 {									\
240 	uint32_t expected, old, temp, ras_start;			\
241 									\
242 	expected = *pexpected;						\
243 	ras_start = ARM_RAS_START;					\
244 	__asm volatile (						\
245 		/* Set up Restartable Atomic Sequence. */		\
246 		"1:"							\
247 		"\tadr   %2, 1b\n"					\
248 		"\tstr   %2, [%6]\n"					\
249 		"\tadr   %2, 2f\n"					\
250 		"\tstr   %2, [%6, #4]\n"				\
251 									\
252 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
253 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
254 		"\t"streq" %4, %1\n"	/* Store new value. */		\
255 									\
256 		/* Tear down Restartable Atomic Sequence. */		\
257 		"2:"							\
258 		"\tmov   %2, #0x00000000\n"				\
259 		"\tstr   %2, [%6]\n"					\
260 		"\tmov   %2, #0xffffffff\n"				\
261 		"\tstr   %2, [%6, #4]\n"				\
262 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
263 		: "r" (expected), "r" (desired), "m" (*mem),		\
264 		  "r" (ras_start));					\
265 	if (old == expected) {						\
266 		return (1);						\
267 	} else {							\
268 		*pexpected = old;					\
269 		return (0);						\
270 	}								\
271 }
272 
273 #define	EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret)		\
274 uintN_t									\
275 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
276 {									\
277 	uint32_t old, new, ras_start;					\
278 									\
279 	ras_start = ARM_RAS_START;					\
280 	__asm volatile (						\
281 		/* Set up Restartable Atomic Sequence. */		\
282 		"1:"							\
283 		"\tadr   %2, 1b\n"					\
284 		"\tstr   %2, [%5]\n"					\
285 		"\tadr   %2, 2f\n"					\
286 		"\tstr   %2, [%5, #4]\n"				\
287 									\
288 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
289 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
290 		"\t"str" %2, %1\n"	/* Store new value. */		\
291 									\
292 		/* Tear down Restartable Atomic Sequence. */		\
293 		"2:"							\
294 		"\tmov   %2, #0x00000000\n"				\
295 		"\tstr   %2, [%5]\n"					\
296 		"\tmov   %2, #0xffffffff\n"				\
297 		"\tstr   %2, [%5, #4]\n"				\
298 		: "=&r" (old), "=m" (*mem), "=&r" (new)			\
299 		: "r" (val), "m" (*mem), "r" (ras_start));		\
300 	return (ret);							\
301 }
302 
303 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
304 EMIT_LOAD_N(N, uintN_t)							\
305 EMIT_STORE_N(N, uintN_t)						\
306 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)					\
307 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)				\
308 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old)		\
309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old)		\
310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or,  "orr", old)		\
311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old)		\
312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old)		\
313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new)		\
314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new)		\
315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch,  "orr", new)		\
316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new)		\
317 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new)
318 
319 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
320 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
321 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
322 #undef	EMIT_ALL_OPS_N
323 
324 #endif /* _KERNEL */
325 
326 #endif /* __ARM_ARCH */
327 
328 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
329 
330 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
331 
332 #ifdef __clang__
333 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
334 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
335 #pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
336 #pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
337 #pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
338 #pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
339 #pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
340 #pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
341 #pragma	redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
342 #pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
343 #pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
344 #pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
345 #pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
346 #pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
347 #pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
348 #pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
349 #pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
350 #pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
351 #pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
352 #pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
353 #pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
354 #endif
355 
356 /*
357  * Old __sync_* API.
358  */
359 
360 #if __ARM_ARCH >= 6
361 
362 /* Implementations for old GCC versions, lacking support for atomics. */
363 
364 typedef union {
365 	uint8_t		v8[4];
366 	uint32_t	v32;
367 } reg_t;
368 
369 /*
370  * Given a memory address pointing to an 8-bit or 16-bit integer, return
371  * the address of the 32-bit word containing it.
372  */
373 
374 static inline uint32_t *
375 round_to_word(void *ptr)
376 {
377 
378 	return ((uint32_t *)((intptr_t)ptr & ~3));
379 }
380 
381 /*
382  * Utility functions for loading and storing 8-bit and 16-bit integers
383  * in 32-bit words at an offset corresponding with the location of the
384  * atomic variable.
385  */
386 
387 static inline void
388 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
389 {
390 	size_t offset;
391 
392 	offset = (intptr_t)offset_ptr & 3;
393 	r->v8[offset] = val;
394 }
395 
396 static inline uint8_t
397 get_1(const reg_t *r, const uint8_t *offset_ptr)
398 {
399 	size_t offset;
400 
401 	offset = (intptr_t)offset_ptr & 3;
402 	return (r->v8[offset]);
403 }
404 
405 static inline void
406 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
407 {
408 	size_t offset;
409 	union {
410 		uint16_t in;
411 		uint8_t out[2];
412 	} bytes;
413 
414 	offset = (intptr_t)offset_ptr & 3;
415 	bytes.in = val;
416 	r->v8[offset] = bytes.out[0];
417 	r->v8[offset + 1] = bytes.out[1];
418 }
419 
420 static inline uint16_t
421 get_2(const reg_t *r, const uint16_t *offset_ptr)
422 {
423 	size_t offset;
424 	union {
425 		uint8_t in[2];
426 		uint16_t out;
427 	} bytes;
428 
429 	offset = (intptr_t)offset_ptr & 3;
430 	bytes.in[0] = r->v8[offset];
431 	bytes.in[1] = r->v8[offset + 1];
432 	return (bytes.out);
433 }
434 
435 /*
436  * 8-bit and 16-bit routines.
437  *
438  * These operations are not natively supported by the CPU, so we use
439  * some shifting and bitmasking on top of the 32-bit instructions.
440  */
441 
442 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
443 uintN_t									\
444 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
445 {									\
446 	uint32_t *mem32;						\
447 	reg_t val32, negmask, old;					\
448 	uint32_t temp1, temp2;						\
449 									\
450 	mem32 = round_to_word(mem);					\
451 	val32.v32 = 0x00000000;						\
452 	put_##N(&val32, mem, val);					\
453 	negmask.v32 = 0xffffffff;					\
454 	put_##N(&negmask, mem, 0);					\
455 									\
456 	do_sync();							\
457 	__asm volatile (						\
458 		"1:"							\
459 		"\tldrex %0, %6\n"	/* Load old value. */		\
460 		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
461 		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
462 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
463 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
464 		"\tbne   1b\n"		/* Spin if failed. */		\
465 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
466 		  "=&r" (temp2)						\
467 		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
468 	return (get_##N(&old, mem));					\
469 }
470 
471 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
472 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
473 
474 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
475 uintN_t									\
476 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
477     uintN_t desired)							\
478 {									\
479 	uint32_t *mem32;						\
480 	reg_t expected32, desired32, posmask, old;			\
481 	uint32_t negmask, temp1, temp2;					\
482 									\
483 	mem32 = round_to_word(mem);					\
484 	expected32.v32 = 0x00000000;					\
485 	put_##N(&expected32, mem, expected);				\
486 	desired32.v32 = 0x00000000;					\
487 	put_##N(&desired32, mem, desired);				\
488 	posmask.v32 = 0x00000000;					\
489 	put_##N(&posmask, mem, ~0);					\
490 	negmask = ~posmask.v32;						\
491 									\
492 	do_sync();							\
493 	__asm volatile (						\
494 		"1:"							\
495 		"\tldrex %0, %8\n"	/* Load old value. */		\
496 		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
497 		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
498 		"\tbne   2f\n"		/* Values are unequal. */	\
499 		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
500 		"\torr   %2, %5\n"	/* Put in the new value. */	\
501 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
502 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
503 		"\tbne   1b\n"		/* Spin if failed. */		\
504 		"2:"							\
505 		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
506 		  "=&r" (temp2)						\
507 		: "r" (expected32.v32), "r" (desired32.v32),		\
508 		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
509 	return (get_##N(&old, mem));					\
510 }
511 
512 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
513 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
514 
515 #define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
516 uintN_t									\
517 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
518 {									\
519 	uint32_t *mem32;						\
520 	reg_t val32, posmask, old;					\
521 	uint32_t negmask, temp1, temp2;					\
522 									\
523 	mem32 = round_to_word(mem);					\
524 	val32.v32 = 0x00000000;						\
525 	put_##N(&val32, mem, val);					\
526 	posmask.v32 = 0x00000000;					\
527 	put_##N(&posmask, mem, ~0);					\
528 	negmask = ~posmask.v32;						\
529 									\
530 	do_sync();							\
531 	__asm volatile (						\
532 		"1:"							\
533 		"\tldrex %0, %7\n"	/* Load old value. */		\
534 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
535 		"\tand   %2, %5\n"	/* Isolate the new value. */	\
536 		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
537 		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
538 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
539 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
540 		"\tbne   1b\n"		/* Spin if failed. */		\
541 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
542 		  "=&r" (temp2)						\
543 		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
544 		  "m" (*mem32));					\
545 	return (get_##N(&old, mem));					\
546 }
547 
548 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
549 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
550 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
551 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
552 
553 #define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
554 uintN_t									\
555 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
556 {									\
557 	uint32_t *mem32;						\
558 	reg_t val32, old;						\
559 	uint32_t temp1, temp2;						\
560 									\
561 	mem32 = round_to_word(mem);					\
562 	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
563 	put_##N(&val32, mem, val);					\
564 									\
565 	do_sync();							\
566 	__asm volatile (						\
567 		"1:"							\
568 		"\tldrex %0, %5\n"	/* Load old value. */		\
569 		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
570 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
571 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
572 		"\tbne   1b\n"		/* Spin if failed. */		\
573 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
574 		  "=&r" (temp2)						\
575 		: "r" (val32.v32), "m" (*mem32));			\
576 	return (get_##N(&old, mem));					\
577 }
578 
579 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
581 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
582 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
584 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
585 
586 /*
587  * 32-bit routines.
588  */
589 
590 uint32_t
591 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
592 {
593 	uint32_t old, temp;
594 
595 	do_sync();
596 	__asm volatile (
597 		"1:"
598 		"\tldrex %0, %4\n"	/* Load old value. */
599 		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
600 		"\tcmp   %2, #0\n"	/* Did it succeed? */
601 		"\tbne   1b\n"		/* Spin if failed. */
602 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
603 		: "r" (val), "m" (*mem));
604 	return (old);
605 }
606 
607 uint32_t
608 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
609     uint32_t desired)
610 {
611 	uint32_t old, temp;
612 
613 	do_sync();
614 	__asm volatile (
615 		"1:"
616 		"\tldrex %0, %5\n"	/* Load old value. */
617 		"\tcmp   %0, %3\n"	/* Compare to expected value. */
618 		"\tbne   2f\n"		/* Values are unequal. */
619 		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
620 		"\tcmp   %2, #0\n"	/* Did it succeed? */
621 		"\tbne   1b\n"		/* Spin if failed. */
622 		"2:"
623 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
624 		: "r" (expected), "r" (desired), "m" (*mem));
625 	return (old);
626 }
627 
628 #define	EMIT_FETCH_AND_OP_4(name, op)					\
629 uint32_t								\
630 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
631 {									\
632 	uint32_t old, temp1, temp2;					\
633 									\
634 	do_sync();							\
635 	__asm volatile (						\
636 		"1:"							\
637 		"\tldrex %0, %5\n"	/* Load old value. */		\
638 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
639 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
640 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
641 		"\tbne   1b\n"		/* Spin if failed. */		\
642 		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
643 		  "=&r" (temp2)						\
644 		: "r" (val), "m" (*mem));				\
645 	return (old);							\
646 }
647 
648 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
649 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
650 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
651 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
652 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
653 
654 #ifndef __clang__
655 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
656 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
657 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
658 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
659 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
660 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
661 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
662 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
663 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
664 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
665 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
666 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
667 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
668 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
669 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
670 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
671 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
672 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
673 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
674 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
675 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
676 #endif
677 
678 #else /* __ARM_ARCH < 6 */
679 
680 #ifdef _KERNEL
681 
682 #ifdef SMP
683 #error "On SMP systems we should have proper atomic operations."
684 #endif
685 
686 /*
687  * On uniprocessor systems, we can perform the atomic operations by
688  * disabling interrupts.
689  */
690 
691 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
692 uintN_t									\
693 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
694     uintN_t desired)							\
695 {									\
696 	uintN_t ret;							\
697 									\
698 	WITHOUT_INTERRUPTS({						\
699 		ret = *mem;						\
700 		if (*mem == expected)					\
701 			*mem = desired;					\
702 	});								\
703 	return (ret);							\
704 }
705 
706 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
707 uintN_t									\
708 __sync_##name##_##N(uintN_t *mem, uintN_t val)				\
709 {									\
710 	uintN_t ret;							\
711 									\
712 	WITHOUT_INTERRUPTS({						\
713 		ret = *mem;						\
714 		*mem op val;						\
715 	});								\
716 	return (ret);							\
717 }
718 
719 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
720 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
721 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
722 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
726 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
727 
728 EMIT_ALL_OPS_N(1, uint8_t)
729 EMIT_ALL_OPS_N(2, uint16_t)
730 EMIT_ALL_OPS_N(4, uint32_t)
731 EMIT_ALL_OPS_N(8, uint64_t)
732 #undef	EMIT_ALL_OPS_N
733 
734 #else /* !_KERNEL */
735 
736 /*
737  * For userspace on uniprocessor systems, we can implement the atomic
738  * operations by using a Restartable Atomic Sequence. This makes the
739  * kernel restart the code from the beginning when interrupted.
740  */
741 
742 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)			\
743 uintN_t									\
744 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
745 {									\
746 	uint32_t old, temp, ras_start;					\
747 									\
748 	ras_start = ARM_RAS_START;					\
749 	__asm volatile (						\
750 		/* Set up Restartable Atomic Sequence. */		\
751 		"1:"							\
752 		"\tadr   %2, 1b\n"					\
753 		"\tstr   %2, [%5]\n"					\
754 		"\tadr   %2, 2f\n"					\
755 		"\tstr   %2, [%5, #4]\n"				\
756 									\
757 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
758 		"\t"str" %3, %1\n"	/* Store new value. */		\
759 									\
760 		/* Tear down Restartable Atomic Sequence. */		\
761 		"2:"							\
762 		"\tmov   %2, #0x00000000\n"				\
763 		"\tstr   %2, [%5]\n"					\
764 		"\tmov   %2, #0xffffffff\n"				\
765 		"\tstr   %2, [%5, #4]\n"				\
766 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
767 		: "r" (val), "m" (*mem), "r" (ras_start));		\
768 	return (old);							\
769 }
770 
771 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)		\
772 uintN_t									\
773 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
774     uintN_t desired)							\
775 {									\
776 	uint32_t old, temp, ras_start;					\
777 									\
778 	ras_start = ARM_RAS_START;					\
779 	__asm volatile (						\
780 		/* Set up Restartable Atomic Sequence. */		\
781 		"1:"							\
782 		"\tadr   %2, 1b\n"					\
783 		"\tstr   %2, [%6]\n"					\
784 		"\tadr   %2, 2f\n"					\
785 		"\tstr   %2, [%6, #4]\n"				\
786 									\
787 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
788 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
789 		"\t"streq" %4, %1\n"	/* Store new value. */		\
790 									\
791 		/* Tear down Restartable Atomic Sequence. */		\
792 		"2:"							\
793 		"\tmov   %2, #0x00000000\n"				\
794 		"\tstr   %2, [%6]\n"					\
795 		"\tmov   %2, #0xffffffff\n"				\
796 		"\tstr   %2, [%6, #4]\n"				\
797 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
798 		: "r" (expected), "r" (desired), "m" (*mem),		\
799 		  "r" (ras_start));					\
800 	return (old);							\
801 }
802 
803 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)		\
804 uintN_t									\
805 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
806 {									\
807 	uint32_t old, temp, ras_start;					\
808 									\
809 	ras_start = ARM_RAS_START;					\
810 	__asm volatile (						\
811 		/* Set up Restartable Atomic Sequence. */		\
812 		"1:"							\
813 		"\tadr   %2, 1b\n"					\
814 		"\tstr   %2, [%5]\n"					\
815 		"\tadr   %2, 2f\n"					\
816 		"\tstr   %2, [%5, #4]\n"				\
817 									\
818 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
819 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
820 		"\t"str" %2, %1\n"	/* Store new value. */		\
821 									\
822 		/* Tear down Restartable Atomic Sequence. */		\
823 		"2:"							\
824 		"\tmov   %2, #0x00000000\n"				\
825 		"\tstr   %2, [%5]\n"					\
826 		"\tmov   %2, #0xffffffff\n"				\
827 		"\tstr   %2, [%5, #4]\n"				\
828 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
829 		: "r" (val), "m" (*mem), "r" (ras_start));		\
830 	return (old);							\
831 }
832 
833 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
834 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)				\
835 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)			\
836 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")		\
837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")		\
838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")		\
839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")		\
840 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
841 
842 #ifdef __clang__
843 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
844 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
845 #else
846 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
847 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
848 #endif
849 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
850 
851 #ifndef __clang__
852 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
853 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
854 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
855 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
856 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
857 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
858 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
859 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
860 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
861 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
862 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
863 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
864 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
865 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
866 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
867 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
868 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
869 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
870 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
871 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
872 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
873 #endif /* __ARM_ARCH */
874 
875 #endif /* _KERNEL */
876 
877 #endif
878 
879 #endif /* __SYNC_ATOMICS */
880