xref: /freebsd/sys/arm/arm/stdatomic.c (revision b0b1dbdd)
1 /*-
2  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33 
34 #include <machine/atomic.h>
35 #include <machine/cpufunc.h>
36 #include <machine/sysarch.h>
37 
38 /*
39  * Executing statements with interrupts disabled.
40  */
41 
42 #if defined(_KERNEL) && !defined(SMP)
43 #define	WITHOUT_INTERRUPTS(s) do {					\
44 	register_t regs;						\
45 									\
46 	regs = intr_disable();						\
47 	do s while (0);							\
48 	intr_restore(regs);						\
49 } while (0)
50 #endif /* _KERNEL && !SMP */
51 
52 /*
53  * Memory barriers.
54  *
55  * It turns out __sync_synchronize() does not emit any code when used
56  * with GCC 4.2. Implement our own version that does work reliably.
57  *
58  * Although __sync_lock_test_and_set() should only perform an acquire
59  * barrier, make it do a full barrier like the other functions. This
60  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
61  */
62 
63 #if defined(_KERNEL) && !defined(SMP)
64 static inline void
65 do_sync(void)
66 {
67 
68 	__asm volatile ("" : : : "memory");
69 }
70 #elif __ARM_ARCH >= 6
71 static inline void
72 do_sync(void)
73 {
74 
75 	dmb();
76 }
77 #endif
78 
79 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
80 
81 /*
82  * New C11 __atomic_* API.
83  */
84 
85 /* ARMv6+ systems should be supported by the compiler. */
86 #if __ARM_ARCH <= 5
87 
88 /* Clang doesn't allow us to reimplement builtins without this. */
89 #ifdef __clang__
90 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
91 #define __sync_synchronize __sync_synchronize_ext
92 #endif
93 
94 void
95 __sync_synchronize(void)
96 {
97 }
98 
99 #ifdef _KERNEL
100 
101 #ifdef SMP
102 #error "On SMP systems we should have proper atomic operations."
103 #endif
104 
105 /*
106  * On uniprocessor systems, we can perform the atomic operations by
107  * disabling interrupts.
108  */
109 
110 #define	EMIT_LOAD_N(N, uintN_t)						\
111 uintN_t									\
112 __atomic_load_##N(uintN_t *mem, int model __unused)			\
113 {									\
114 	uintN_t ret;							\
115 									\
116 	WITHOUT_INTERRUPTS({						\
117 		ret = *mem;						\
118 	});								\
119 	return (ret);							\
120 }
121 
122 #define	EMIT_STORE_N(N, uintN_t)					\
123 void									\
124 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
125 {									\
126 									\
127 	WITHOUT_INTERRUPTS({						\
128 		*mem = val;						\
129 	});								\
130 }
131 
132 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t)				\
133 _Bool									\
134 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,		\
135     uintN_t desired, int success __unused, int failure __unused)	\
136 {									\
137 	_Bool ret;							\
138 									\
139 	WITHOUT_INTERRUPTS({						\
140 		if (*mem == *expected) {				\
141 			*mem = desired;					\
142 			ret = 1;					\
143 		} else {						\
144 			*expected = *mem;				\
145 			ret = 0;					\
146 		}							\
147 	});								\
148 	return (ret);							\
149 }
150 
151 #define	EMIT_FETCH_OP_N(N, uintN_t, name, op)				\
152 uintN_t									\
153 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
154 {									\
155 	uintN_t ret;							\
156 									\
157 	WITHOUT_INTERRUPTS({						\
158 		ret = *mem;						\
159 		*mem op val;						\
160 	});								\
161 	return (ret);							\
162 }
163 
164 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
165 EMIT_LOAD_N(N, uintN_t)							\
166 EMIT_STORE_N(N, uintN_t)						\
167 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)					\
168 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)				\
169 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)				\
170 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)				\
171 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)				\
172 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)				\
173 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
174 
175 EMIT_ALL_OPS_N(1, uint8_t)
176 EMIT_ALL_OPS_N(2, uint16_t)
177 EMIT_ALL_OPS_N(4, uint32_t)
178 EMIT_ALL_OPS_N(8, uint64_t)
179 #undef	EMIT_ALL_OPS_N
180 
181 #else /* !_KERNEL */
182 
183 /*
184  * For userspace on uniprocessor systems, we can implement the atomic
185  * operations by using a Restartable Atomic Sequence. This makes the
186  * kernel restart the code from the beginning when interrupted.
187  */
188 
189 #define	EMIT_LOAD_N(N, uintN_t)						\
190 uintN_t									\
191 __atomic_load_##N(uintN_t *mem, int model __unused)			\
192 {									\
193 									\
194 	return (*mem);							\
195 }
196 
197 #define	EMIT_STORE_N(N, uintN_t)					\
198 void									\
199 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)	\
200 {									\
201 									\
202 	*mem = val;							\
203 }
204 
205 #define	EMIT_EXCHANGE_N(N, uintN_t, ldr, str)				\
206 uintN_t									\
207 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)	\
208 {									\
209 	uint32_t old, temp, ras_start;					\
210 									\
211 	ras_start = ARM_RAS_START;					\
212 	__asm volatile (						\
213 		/* Set up Restartable Atomic Sequence. */		\
214 		"1:"							\
215 		"\tadr   %2, 1b\n"					\
216 		"\tstr   %2, [%5]\n"					\
217 		"\tadr   %2, 2f\n"					\
218 		"\tstr   %2, [%5, #4]\n"				\
219 									\
220 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
221 		"\t"str" %3, %1\n"	/* Store new value. */		\
222 									\
223 		/* Tear down Restartable Atomic Sequence. */		\
224 		"2:"							\
225 		"\tmov   %2, #0x00000000\n"				\
226 		"\tstr   %2, [%5]\n"					\
227 		"\tmov   %2, #0xffffffff\n"				\
228 		"\tstr   %2, [%5, #4]\n"				\
229 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
230 		: "r" (val), "m" (*mem), "r" (ras_start));		\
231 	return (old);							\
232 }
233 
234 #define	EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)			\
235 _Bool									\
236 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,		\
237     uintN_t desired, int success __unused, int failure __unused)	\
238 {									\
239 	uint32_t expected, old, temp, ras_start;			\
240 									\
241 	expected = *pexpected;						\
242 	ras_start = ARM_RAS_START;					\
243 	__asm volatile (						\
244 		/* Set up Restartable Atomic Sequence. */		\
245 		"1:"							\
246 		"\tadr   %2, 1b\n"					\
247 		"\tstr   %2, [%6]\n"					\
248 		"\tadr   %2, 2f\n"					\
249 		"\tstr   %2, [%6, #4]\n"				\
250 									\
251 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
252 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
253 		"\t"streq" %4, %1\n"	/* Store new value. */		\
254 									\
255 		/* Tear down Restartable Atomic Sequence. */		\
256 		"2:"							\
257 		"\tmov   %2, #0x00000000\n"				\
258 		"\tstr   %2, [%6]\n"					\
259 		"\tmov   %2, #0xffffffff\n"				\
260 		"\tstr   %2, [%6, #4]\n"				\
261 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
262 		: "r" (expected), "r" (desired), "m" (*mem),		\
263 		  "r" (ras_start));					\
264 	if (old == expected) {						\
265 		return (1);						\
266 	} else {							\
267 		*pexpected = old;					\
268 		return (0);						\
269 	}								\
270 }
271 
272 #define	EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret)		\
273 uintN_t									\
274 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)	\
275 {									\
276 	uint32_t old, new, ras_start;					\
277 									\
278 	ras_start = ARM_RAS_START;					\
279 	__asm volatile (						\
280 		/* Set up Restartable Atomic Sequence. */		\
281 		"1:"							\
282 		"\tadr   %2, 1b\n"					\
283 		"\tstr   %2, [%5]\n"					\
284 		"\tadr   %2, 2f\n"					\
285 		"\tstr   %2, [%5, #4]\n"				\
286 									\
287 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
288 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
289 		"\t"str" %2, %1\n"	/* Store new value. */		\
290 									\
291 		/* Tear down Restartable Atomic Sequence. */		\
292 		"2:"							\
293 		"\tmov   %2, #0x00000000\n"				\
294 		"\tstr   %2, [%5]\n"					\
295 		"\tmov   %2, #0xffffffff\n"				\
296 		"\tstr   %2, [%5, #4]\n"				\
297 		: "=&r" (old), "=m" (*mem), "=&r" (new)			\
298 		: "r" (val), "m" (*mem), "r" (ras_start));		\
299 	return (ret);							\
300 }
301 
302 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
303 EMIT_LOAD_N(N, uintN_t)							\
304 EMIT_STORE_N(N, uintN_t)						\
305 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)					\
306 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)				\
307 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old)		\
308 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old)		\
309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or,  "orr", old)		\
310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old)		\
311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old)		\
312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new)		\
313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new)		\
314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch,  "orr", new)		\
315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new)		\
316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new)
317 
318 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
319 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
320 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
321 #undef	EMIT_ALL_OPS_N
322 
323 #endif /* _KERNEL */
324 
325 #endif /* __ARM_ARCH */
326 
327 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
328 
329 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
330 
331 #ifdef __clang__
332 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
333 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
334 #pragma	redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
335 #pragma	redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
336 #pragma	redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
337 #pragma	redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
338 #pragma	redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
339 #pragma	redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
340 #pragma	redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
341 #pragma	redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
342 #pragma	redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
343 #pragma	redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
344 #pragma	redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
345 #pragma	redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
346 #pragma	redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
347 #pragma	redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
348 #pragma	redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
349 #pragma	redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
350 #pragma	redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
351 #pragma	redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
352 #pragma	redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
353 #endif
354 
355 /*
356  * Old __sync_* API.
357  */
358 
359 #if __ARM_ARCH >= 6
360 
361 /* Implementations for old GCC versions, lacking support for atomics. */
362 
363 typedef union {
364 	uint8_t		v8[4];
365 	uint32_t	v32;
366 } reg_t;
367 
368 /*
369  * Given a memory address pointing to an 8-bit or 16-bit integer, return
370  * the address of the 32-bit word containing it.
371  */
372 
373 static inline uint32_t *
374 round_to_word(void *ptr)
375 {
376 
377 	return ((uint32_t *)((intptr_t)ptr & ~3));
378 }
379 
380 /*
381  * Utility functions for loading and storing 8-bit and 16-bit integers
382  * in 32-bit words at an offset corresponding with the location of the
383  * atomic variable.
384  */
385 
386 static inline void
387 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
388 {
389 	size_t offset;
390 
391 	offset = (intptr_t)offset_ptr & 3;
392 	r->v8[offset] = val;
393 }
394 
395 static inline uint8_t
396 get_1(const reg_t *r, const uint8_t *offset_ptr)
397 {
398 	size_t offset;
399 
400 	offset = (intptr_t)offset_ptr & 3;
401 	return (r->v8[offset]);
402 }
403 
404 static inline void
405 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
406 {
407 	size_t offset;
408 	union {
409 		uint16_t in;
410 		uint8_t out[2];
411 	} bytes;
412 
413 	offset = (intptr_t)offset_ptr & 3;
414 	bytes.in = val;
415 	r->v8[offset] = bytes.out[0];
416 	r->v8[offset + 1] = bytes.out[1];
417 }
418 
419 static inline uint16_t
420 get_2(const reg_t *r, const uint16_t *offset_ptr)
421 {
422 	size_t offset;
423 	union {
424 		uint8_t in[2];
425 		uint16_t out;
426 	} bytes;
427 
428 	offset = (intptr_t)offset_ptr & 3;
429 	bytes.in[0] = r->v8[offset];
430 	bytes.in[1] = r->v8[offset + 1];
431 	return (bytes.out);
432 }
433 
434 /*
435  * 8-bit and 16-bit routines.
436  *
437  * These operations are not natively supported by the CPU, so we use
438  * some shifting and bitmasking on top of the 32-bit instructions.
439  */
440 
441 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)				\
442 uintN_t									\
443 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
444 {									\
445 	uint32_t *mem32;						\
446 	reg_t val32, negmask, old;					\
447 	uint32_t temp1, temp2;						\
448 									\
449 	mem32 = round_to_word(mem);					\
450 	val32.v32 = 0x00000000;						\
451 	put_##N(&val32, mem, val);					\
452 	negmask.v32 = 0xffffffff;					\
453 	put_##N(&negmask, mem, 0);					\
454 									\
455 	do_sync();							\
456 	__asm volatile (						\
457 		"1:"							\
458 		"\tldrex %0, %6\n"	/* Load old value. */		\
459 		"\tand   %2, %5, %0\n"	/* Remove the old value. */	\
460 		"\torr   %2, %2, %4\n"	/* Put in the new value. */	\
461 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
462 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
463 		"\tbne   1b\n"		/* Spin if failed. */		\
464 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
465 		  "=&r" (temp2)						\
466 		: "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));	\
467 	return (get_##N(&old, mem));					\
468 }
469 
470 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
471 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
472 
473 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
474 uintN_t									\
475 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
476     uintN_t desired)							\
477 {									\
478 	uint32_t *mem32;						\
479 	reg_t expected32, desired32, posmask, old;			\
480 	uint32_t negmask, temp1, temp2;					\
481 									\
482 	mem32 = round_to_word(mem);					\
483 	expected32.v32 = 0x00000000;					\
484 	put_##N(&expected32, mem, expected);				\
485 	desired32.v32 = 0x00000000;					\
486 	put_##N(&desired32, mem, desired);				\
487 	posmask.v32 = 0x00000000;					\
488 	put_##N(&posmask, mem, ~0);					\
489 	negmask = ~posmask.v32;						\
490 									\
491 	do_sync();							\
492 	__asm volatile (						\
493 		"1:"							\
494 		"\tldrex %0, %8\n"	/* Load old value. */		\
495 		"\tand   %2, %6, %0\n"	/* Isolate the old value. */	\
496 		"\tcmp   %2, %4\n"	/* Compare to expected value. */\
497 		"\tbne   2f\n"		/* Values are unequal. */	\
498 		"\tand   %2, %7, %0\n"	/* Remove the old value. */	\
499 		"\torr   %2, %5\n"	/* Put in the new value. */	\
500 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
501 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
502 		"\tbne   1b\n"		/* Spin if failed. */		\
503 		"2:"							\
504 		: "=&r" (old), "=m" (*mem32), "=&r" (temp1),		\
505 		  "=&r" (temp2)						\
506 		: "r" (expected32.v32), "r" (desired32.v32),		\
507 		  "r" (posmask.v32), "r" (negmask), "m" (*mem32));	\
508 	return (get_##N(&old, mem));					\
509 }
510 
511 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
512 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
513 
514 #define	EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)		\
515 uintN_t									\
516 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
517 {									\
518 	uint32_t *mem32;						\
519 	reg_t val32, posmask, old;					\
520 	uint32_t negmask, temp1, temp2;					\
521 									\
522 	mem32 = round_to_word(mem);					\
523 	val32.v32 = 0x00000000;						\
524 	put_##N(&val32, mem, val);					\
525 	posmask.v32 = 0x00000000;					\
526 	put_##N(&posmask, mem, ~0);					\
527 	negmask = ~posmask.v32;						\
528 									\
529 	do_sync();							\
530 	__asm volatile (						\
531 		"1:"							\
532 		"\tldrex %0, %7\n"	/* Load old value. */		\
533 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
534 		"\tand   %2, %5\n"	/* Isolate the new value. */	\
535 		"\tand   %3, %6, %0\n"	/* Remove the old value. */	\
536 		"\torr   %2, %2, %3\n"	/* Put in the new value. */	\
537 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
538 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
539 		"\tbne   1b\n"		/* Spin if failed. */		\
540 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
541 		  "=&r" (temp2)						\
542 		: "r" (val32.v32), "r" (posmask.v32), "r" (negmask),	\
543 		  "m" (*mem32));					\
544 	return (get_##N(&old, mem));					\
545 }
546 
547 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
548 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
549 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
550 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
551 
552 #define	EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)	\
553 uintN_t									\
554 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
555 {									\
556 	uint32_t *mem32;						\
557 	reg_t val32, old;						\
558 	uint32_t temp1, temp2;						\
559 									\
560 	mem32 = round_to_word(mem);					\
561 	val32.v32 = idempotence ? 0xffffffff : 0x00000000;		\
562 	put_##N(&val32, mem, val);					\
563 									\
564 	do_sync();							\
565 	__asm volatile (						\
566 		"1:"							\
567 		"\tldrex %0, %5\n"	/* Load old value. */		\
568 		"\t"op"  %2, %4, %0\n"	/* Calculate new value. */	\
569 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
570 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
571 		"\tbne   1b\n"		/* Spin if failed. */		\
572 		: "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),	\
573 		  "=&r" (temp2)						\
574 		: "r" (val32.v32), "m" (*mem32));			\
575 	return (get_##N(&old, mem));					\
576 }
577 
578 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
579 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
581 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
582 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
584 
585 /*
586  * 32-bit routines.
587  */
588 
589 uint32_t
590 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
591 {
592 	uint32_t old, temp;
593 
594 	do_sync();
595 	__asm volatile (
596 		"1:"
597 		"\tldrex %0, %4\n"	/* Load old value. */
598 		"\tstrex %2, %3, %1\n"	/* Attempt to store. */
599 		"\tcmp   %2, #0\n"	/* Did it succeed? */
600 		"\tbne   1b\n"		/* Spin if failed. */
601 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
602 		: "r" (val), "m" (*mem));
603 	return (old);
604 }
605 
606 uint32_t
607 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
608     uint32_t desired)
609 {
610 	uint32_t old, temp;
611 
612 	do_sync();
613 	__asm volatile (
614 		"1:"
615 		"\tldrex %0, %5\n"	/* Load old value. */
616 		"\tcmp   %0, %3\n"	/* Compare to expected value. */
617 		"\tbne   2f\n"		/* Values are unequal. */
618 		"\tstrex %2, %4, %1\n"	/* Attempt to store. */
619 		"\tcmp   %2, #0\n"	/* Did it succeed? */
620 		"\tbne   1b\n"		/* Spin if failed. */
621 		"2:"
622 		: "=&r" (old), "=m" (*mem), "=&r" (temp)
623 		: "r" (expected), "r" (desired), "m" (*mem));
624 	return (old);
625 }
626 
627 #define	EMIT_FETCH_AND_OP_4(name, op)					\
628 uint32_t								\
629 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)				\
630 {									\
631 	uint32_t old, temp1, temp2;					\
632 									\
633 	do_sync();							\
634 	__asm volatile (						\
635 		"1:"							\
636 		"\tldrex %0, %5\n"	/* Load old value. */		\
637 		"\t"op"  %2, %0, %4\n"	/* Calculate new value. */	\
638 		"\tstrex %3, %2, %1\n"	/* Attempt to store. */		\
639 		"\tcmp   %3, #0\n"	/* Did it succeed? */		\
640 		"\tbne   1b\n"		/* Spin if failed. */		\
641 		: "=&r" (old), "=m" (*mem), "=&r" (temp1),		\
642 		  "=&r" (temp2)						\
643 		: "r" (val), "m" (*mem));				\
644 	return (old);							\
645 }
646 
647 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
648 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
649 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
650 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
651 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
652 
653 #ifndef __clang__
654 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
655 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
656 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
657 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
658 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
659 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
660 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
661 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
662 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
663 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
664 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
665 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
666 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
667 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
668 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
669 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
670 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
671 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
672 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
673 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
674 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
675 #endif
676 
677 #else /* __ARM_ARCH < 6 */
678 
679 #ifdef _KERNEL
680 
681 #ifdef SMP
682 #error "On SMP systems we should have proper atomic operations."
683 #endif
684 
685 /*
686  * On uniprocessor systems, we can perform the atomic operations by
687  * disabling interrupts.
688  */
689 
690 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)				\
691 uintN_t									\
692 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,		\
693     uintN_t desired)							\
694 {									\
695 	uintN_t ret;							\
696 									\
697 	WITHOUT_INTERRUPTS({						\
698 		ret = *mem;						\
699 		if (*mem == expected)					\
700 			*mem = desired;					\
701 	});								\
702 	return (ret);							\
703 }
704 
705 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)			\
706 uintN_t									\
707 __sync_##name##_##N(uintN_t *mem, uintN_t val)				\
708 {									\
709 	uintN_t ret;							\
710 									\
711 	WITHOUT_INTERRUPTS({						\
712 		ret = *mem;						\
713 		*mem op val;						\
714 	});								\
715 	return (ret);							\
716 }
717 
718 #define	EMIT_ALL_OPS_N(N, uintN_t)					\
719 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)					\
720 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)			\
721 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)			\
722 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)			\
723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)			\
724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)			\
725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
726 
727 EMIT_ALL_OPS_N(1, uint8_t)
728 EMIT_ALL_OPS_N(2, uint16_t)
729 EMIT_ALL_OPS_N(4, uint32_t)
730 EMIT_ALL_OPS_N(8, uint64_t)
731 #undef	EMIT_ALL_OPS_N
732 
733 #else /* !_KERNEL */
734 
735 /*
736  * For userspace on uniprocessor systems, we can implement the atomic
737  * operations by using a Restartable Atomic Sequence. This makes the
738  * kernel restart the code from the beginning when interrupted.
739  */
740 
741 #define	EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)			\
742 uintN_t									\
743 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)			\
744 {									\
745 	uint32_t old, temp, ras_start;					\
746 									\
747 	ras_start = ARM_RAS_START;					\
748 	__asm volatile (						\
749 		/* Set up Restartable Atomic Sequence. */		\
750 		"1:"							\
751 		"\tadr   %2, 1b\n"					\
752 		"\tstr   %2, [%5]\n"					\
753 		"\tadr   %2, 2f\n"					\
754 		"\tstr   %2, [%5, #4]\n"				\
755 									\
756 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
757 		"\t"str" %3, %1\n"	/* Store new value. */		\
758 									\
759 		/* Tear down Restartable Atomic Sequence. */		\
760 		"2:"							\
761 		"\tmov   %2, #0x00000000\n"				\
762 		"\tstr   %2, [%5]\n"					\
763 		"\tmov   %2, #0xffffffff\n"				\
764 		"\tstr   %2, [%5, #4]\n"				\
765 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
766 		: "r" (val), "m" (*mem), "r" (ras_start));		\
767 	return (old);							\
768 }
769 
770 #define	EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)		\
771 uintN_t									\
772 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,		\
773     uintN_t desired)							\
774 {									\
775 	uint32_t old, temp, ras_start;					\
776 									\
777 	ras_start = ARM_RAS_START;					\
778 	__asm volatile (						\
779 		/* Set up Restartable Atomic Sequence. */		\
780 		"1:"							\
781 		"\tadr   %2, 1b\n"					\
782 		"\tstr   %2, [%6]\n"					\
783 		"\tadr   %2, 2f\n"					\
784 		"\tstr   %2, [%6, #4]\n"				\
785 									\
786 		"\t"ldr" %0, %5\n"	/* Load old value. */		\
787 		"\tcmp   %0, %3\n"	/* Compare to expected value. */\
788 		"\t"streq" %4, %1\n"	/* Store new value. */		\
789 									\
790 		/* Tear down Restartable Atomic Sequence. */		\
791 		"2:"							\
792 		"\tmov   %2, #0x00000000\n"				\
793 		"\tstr   %2, [%6]\n"					\
794 		"\tmov   %2, #0xffffffff\n"				\
795 		"\tstr   %2, [%6, #4]\n"				\
796 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
797 		: "r" (expected), "r" (desired), "m" (*mem),		\
798 		  "r" (ras_start));					\
799 	return (old);							\
800 }
801 
802 #define	EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)		\
803 uintN_t									\
804 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)				\
805 {									\
806 	uint32_t old, temp, ras_start;					\
807 									\
808 	ras_start = ARM_RAS_START;					\
809 	__asm volatile (						\
810 		/* Set up Restartable Atomic Sequence. */		\
811 		"1:"							\
812 		"\tadr   %2, 1b\n"					\
813 		"\tstr   %2, [%5]\n"					\
814 		"\tadr   %2, 2f\n"					\
815 		"\tstr   %2, [%5, #4]\n"				\
816 									\
817 		"\t"ldr" %0, %4\n"	/* Load old value. */		\
818 		"\t"op"  %2, %0, %3\n"	/* Calculate new value. */	\
819 		"\t"str" %2, %1\n"	/* Store new value. */		\
820 									\
821 		/* Tear down Restartable Atomic Sequence. */		\
822 		"2:"							\
823 		"\tmov   %2, #0x00000000\n"				\
824 		"\tstr   %2, [%5]\n"					\
825 		"\tmov   %2, #0xffffffff\n"				\
826 		"\tstr   %2, [%5, #4]\n"				\
827 		: "=&r" (old), "=m" (*mem), "=&r" (temp)		\
828 		: "r" (val), "m" (*mem), "r" (ras_start));		\
829 	return (old);							\
830 }
831 
832 #define	EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)			\
833 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)				\
834 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)			\
835 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")		\
836 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")		\
837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")		\
838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")		\
839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
840 
841 #ifdef __clang__
842 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
843 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
844 #else
845 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
846 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
847 #endif
848 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
849 
850 #ifndef __clang__
851 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
852 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
853 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
854 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
855 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
856 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
857 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
858 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
859 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
860 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
861 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
862 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
863 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
864 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
865 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
866 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
867 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
868 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
869 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
870 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
871 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
872 #endif /* __ARM_ARCH */
873 
874 #endif /* _KERNEL */
875 
876 #endif
877 
878 #endif /* __SYNC_ATOMICS */
879