1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (C) 2003-2004 Olivier Houchard
7 * Copyright (C) 1994-1997 Mark Brinicombe
8 * Copyright (C) 1994 Brini
9 * All rights reserved.
10 *
11 * This code is derived from software written for Brini by Mark Brinicombe
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by Brini.
24 * 4. The name of Brini may not be used to endorse or promote products
25 * derived from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30 * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
33 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
35 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #ifndef _MACHINE_ATOMIC_H_
40 #define _MACHINE_ATOMIC_H_
41
42 #include <sys/atomic_common.h>
43
44 #if __ARM_ARCH >= 7
45 #define isb() __asm __volatile("isb" : : : "memory")
46 #define dsb() __asm __volatile("dsb" : : : "memory")
47 #define dmb() __asm __volatile("dmb" : : : "memory")
48 #else
49 #define isb() __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
50 #define dsb() __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
51 #define dmb() __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
52 #endif
53
54 #define mb() dmb()
55 #define wmb() dmb()
56 #define rmb() dmb()
57
58 #define ARM_HAVE_ATOMIC64
59
60 #define ATOMIC_ACQ_REL_LONG(NAME) \
61 static __inline void \
62 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v) \
63 { \
64 atomic_##NAME##_long(p, v); \
65 dmb(); \
66 } \
67 \
68 static __inline void \
69 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v) \
70 { \
71 dmb(); \
72 atomic_##NAME##_long(p, v); \
73 }
74
75 #define ATOMIC_ACQ_REL(NAME, WIDTH) \
76 static __inline void \
77 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
78 { \
79 atomic_##NAME##_##WIDTH(p, v); \
80 dmb(); \
81 } \
82 \
83 static __inline void \
84 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
85 { \
86 dmb(); \
87 atomic_##NAME##_##WIDTH(p, v); \
88 }
89
90 static __inline void
atomic_add_32(volatile uint32_t * p,uint32_t val)91 atomic_add_32(volatile uint32_t *p, uint32_t val)
92 {
93 uint32_t tmp = 0, tmp2 = 0;
94
95 __asm __volatile(
96 "1: ldrex %0, [%2] \n"
97 " add %0, %0, %3 \n"
98 " strex %1, %0, [%2] \n"
99 " cmp %1, #0 \n"
100 " it ne \n"
101 " bne 1b \n"
102 : "=&r" (tmp), "+r" (tmp2)
103 ,"+r" (p), "+r" (val) : : "cc", "memory");
104 }
105
106 static __inline void
atomic_add_64(volatile uint64_t * p,uint64_t val)107 atomic_add_64(volatile uint64_t *p, uint64_t val)
108 {
109 uint64_t tmp;
110 uint32_t exflag;
111
112 __asm __volatile(
113 "1: \n"
114 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
115 " adds %Q[tmp], %Q[val] \n"
116 " adc %R[tmp], %R[tmp], %R[val] \n"
117 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
118 " teq %[exf], #0 \n"
119 " it ne \n"
120 " bne 1b \n"
121 : [exf] "=&r" (exflag),
122 [tmp] "=&r" (tmp)
123 : [ptr] "r" (p),
124 [val] "r" (val)
125 : "cc", "memory");
126 }
127
128 static __inline void
atomic_add_long(volatile u_long * p,u_long val)129 atomic_add_long(volatile u_long *p, u_long val)
130 {
131
132 atomic_add_32((volatile uint32_t *)p, val);
133 }
134
135 ATOMIC_ACQ_REL(add, 32)
136 ATOMIC_ACQ_REL(add, 64)
ATOMIC_ACQ_REL_LONG(add)137 ATOMIC_ACQ_REL_LONG(add)
138
139 static __inline void
140 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
141 {
142 uint32_t tmp = 0, tmp2 = 0;
143
144 __asm __volatile(
145 "1: ldrex %0, [%2] \n"
146 " bic %0, %0, %3 \n"
147 " strex %1, %0, [%2] \n"
148 " cmp %1, #0 \n"
149 " it ne \n"
150 " bne 1b \n"
151 : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
152 : : "cc", "memory");
153 }
154
155 static __inline void
atomic_clear_64(volatile uint64_t * p,uint64_t val)156 atomic_clear_64(volatile uint64_t *p, uint64_t val)
157 {
158 uint64_t tmp;
159 uint32_t exflag;
160
161 __asm __volatile(
162 "1: \n"
163 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
164 " bic %Q[tmp], %Q[val] \n"
165 " bic %R[tmp], %R[val] \n"
166 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
167 " teq %[exf], #0 \n"
168 " it ne \n"
169 " bne 1b \n"
170 : [exf] "=&r" (exflag),
171 [tmp] "=&r" (tmp)
172 : [ptr] "r" (p),
173 [val] "r" (val)
174 : "cc", "memory");
175 }
176
177 static __inline void
atomic_clear_long(volatile u_long * address,u_long setmask)178 atomic_clear_long(volatile u_long *address, u_long setmask)
179 {
180
181 atomic_clear_32((volatile uint32_t *)address, setmask);
182 }
183
184 ATOMIC_ACQ_REL(clear, 32)
185 ATOMIC_ACQ_REL(clear, 64)
ATOMIC_ACQ_REL_LONG(clear)186 ATOMIC_ACQ_REL_LONG(clear)
187
188 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF) \
189 { \
190 TYPE tmp; \
191 \
192 __asm __volatile( \
193 "1: ldrex" SUF " %[tmp], [%[ptr]] \n" \
194 " ldr" SUF " %[ret], [%[oldv]] \n" \
195 " teq %[tmp], %[ret] \n" \
196 " ittee ne \n" \
197 " str" SUF "ne %[tmp], [%[oldv]] \n" \
198 " movne %[ret], #0 \n" \
199 " strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
200 " eorseq %[ret], #1 \n" \
201 " beq 1b \n" \
202 : [ret] "=&r" (RET), \
203 [tmp] "=&r" (tmp) \
204 : [ptr] "r" (_ptr), \
205 [oldv] "r" (_old), \
206 [newv] "r" (_new) \
207 : "cc", "memory"); \
208 }
209
210 #define ATOMIC_FCMPSET_CODE64(RET) \
211 { \
212 uint64_t cmp, tmp; \
213 \
214 __asm __volatile( \
215 "1: ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n" \
216 " ldrd %Q[cmp], %R[cmp], [%[oldv]] \n" \
217 " teq %Q[tmp], %Q[cmp] \n" \
218 " it eq \n" \
219 " teqeq %R[tmp], %R[cmp] \n" \
220 " ittee ne \n" \
221 " movne %[ret], #0 \n" \
222 " strdne %[cmp], [%[oldv]] \n" \
223 " strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
224 " eorseq %[ret], #1 \n" \
225 " beq 1b \n" \
226 : [ret] "=&r" (RET), \
227 [cmp] "=&r" (cmp), \
228 [tmp] "=&r" (tmp) \
229 : [ptr] "r" (_ptr), \
230 [oldv] "r" (_old), \
231 [newv] "r" (_new) \
232 : "cc", "memory"); \
233 }
234
235 static __inline int
236 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
237 {
238 int ret;
239
240 ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
241 return (ret);
242 }
243 #define atomic_fcmpset_8 atomic_fcmpset_8
244
245 static __inline int
atomic_fcmpset_acq_8(volatile uint8_t * _ptr,uint8_t * _old,uint8_t _new)246 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
247 {
248 int ret;
249
250 ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
251 dmb();
252 return (ret);
253 }
254
255 static __inline int
atomic_fcmpset_rel_8(volatile uint8_t * _ptr,uint8_t * _old,uint8_t _new)256 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
257 {
258 int ret;
259
260 dmb();
261 ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
262 return (ret);
263 }
264
265 static __inline int
atomic_fcmpset_16(volatile uint16_t * _ptr,uint16_t * _old,uint16_t _new)266 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
267 {
268 int ret;
269
270 ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
271 return (ret);
272 }
273 #define atomic_fcmpset_16 atomic_fcmpset_16
274
275 static __inline int
atomic_fcmpset_acq_16(volatile uint16_t * _ptr,uint16_t * _old,uint16_t _new)276 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
277 {
278 int ret;
279
280 ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
281 dmb();
282 return (ret);
283 }
284
285 static __inline int
atomic_fcmpset_rel_16(volatile uint16_t * _ptr,uint16_t * _old,uint16_t _new)286 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
287 {
288 int ret;
289
290 dmb();
291 ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
292 return (ret);
293 }
294
295 static __inline int
atomic_fcmpset_32(volatile uint32_t * _ptr,uint32_t * _old,uint32_t _new)296 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
297 {
298 int ret;
299
300 ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
301 return (ret);
302 }
303
304 static __inline int
atomic_fcmpset_acq_32(volatile uint32_t * _ptr,uint32_t * _old,uint32_t _new)305 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
306 {
307 int ret;
308
309 ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
310 dmb();
311 return (ret);
312 }
313
314 static __inline int
atomic_fcmpset_rel_32(volatile uint32_t * _ptr,uint32_t * _old,uint32_t _new)315 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
316 {
317 int ret;
318
319 dmb();
320 ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
321 return (ret);
322 }
323
324 static __inline int
atomic_fcmpset_long(volatile u_long * _ptr,u_long * _old,u_long _new)325 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
326 {
327 int ret;
328
329 ATOMIC_FCMPSET_CODE(ret, u_long, "");
330 return (ret);
331 }
332
333 static __inline int
atomic_fcmpset_acq_long(volatile u_long * _ptr,u_long * _old,u_long _new)334 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
335 {
336 int ret;
337
338 ATOMIC_FCMPSET_CODE(ret, u_long, "");
339 dmb();
340 return (ret);
341 }
342
343 static __inline int
atomic_fcmpset_rel_long(volatile u_long * _ptr,u_long * _old,u_long _new)344 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
345 {
346 int ret;
347
348 dmb();
349 ATOMIC_FCMPSET_CODE(ret, u_long, "");
350 return (ret);
351 }
352
353 static __inline int
atomic_fcmpset_64(volatile uint64_t * _ptr,uint64_t * _old,uint64_t _new)354 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
355 {
356 int ret;
357
358 ATOMIC_FCMPSET_CODE64(ret);
359 return (ret);
360 }
361
362 static __inline int
atomic_fcmpset_acq_64(volatile uint64_t * _ptr,uint64_t * _old,uint64_t _new)363 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
364 {
365 int ret;
366
367 ATOMIC_FCMPSET_CODE64(ret);
368 dmb();
369 return (ret);
370 }
371
372 static __inline int
atomic_fcmpset_rel_64(volatile uint64_t * _ptr,uint64_t * _old,uint64_t _new)373 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
374 {
375 int ret;
376
377 dmb();
378 ATOMIC_FCMPSET_CODE64(ret);
379 return (ret);
380 }
381
382 #define ATOMIC_CMPSET_CODE(RET, SUF) \
383 { \
384 __asm __volatile( \
385 "1: ldrex" SUF " %[ret], [%[ptr]] \n" \
386 " teq %[ret], %[oldv] \n" \
387 " itee ne \n" \
388 " movne %[ret], #0 \n" \
389 " strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
390 " eorseq %[ret], #1 \n" \
391 " beq 1b \n" \
392 : [ret] "=&r" (RET) \
393 : [ptr] "r" (_ptr), \
394 [oldv] "r" (_old), \
395 [newv] "r" (_new) \
396 : "cc", "memory"); \
397 }
398
399 #define ATOMIC_CMPSET_CODE64(RET) \
400 { \
401 uint64_t tmp; \
402 \
403 __asm __volatile( \
404 "1: ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n" \
405 " teq %Q[tmp], %Q[oldv] \n" \
406 " it eq \n" \
407 " teqeq %R[tmp], %R[oldv] \n" \
408 " itee ne \n" \
409 " movne %[ret], #0 \n" \
410 " strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
411 " eorseq %[ret], #1 \n" \
412 " beq 1b \n" \
413 : [ret] "=&r" (RET), \
414 [tmp] "=&r" (tmp) \
415 : [ptr] "r" (_ptr), \
416 [oldv] "r" (_old), \
417 [newv] "r" (_new) \
418 : "cc", "memory"); \
419 }
420
421 static __inline int
atomic_cmpset_8(volatile uint8_t * _ptr,uint8_t _old,uint8_t _new)422 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
423 {
424 int ret;
425
426 ATOMIC_CMPSET_CODE(ret, "b");
427 return (ret);
428 }
429 #define atomic_cmpset_8 atomic_cmpset_8
430
431 static __inline int
atomic_cmpset_acq_8(volatile uint8_t * _ptr,uint8_t _old,uint8_t _new)432 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
433 {
434 int ret;
435
436 ATOMIC_CMPSET_CODE(ret, "b");
437 dmb();
438 return (ret);
439 }
440
441 static __inline int
atomic_cmpset_rel_8(volatile uint8_t * _ptr,uint8_t _old,uint8_t _new)442 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
443 {
444 int ret;
445
446 dmb();
447 ATOMIC_CMPSET_CODE(ret, "b");
448 return (ret);
449 }
450
451 static __inline int
atomic_cmpset_16(volatile uint16_t * _ptr,uint16_t _old,uint16_t _new)452 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
453 {
454 int ret;
455
456 ATOMIC_CMPSET_CODE(ret, "h");
457 return (ret);
458 }
459 #define atomic_cmpset_16 atomic_cmpset_16
460
461 static __inline int
atomic_cmpset_acq_16(volatile uint16_t * _ptr,uint16_t _old,uint16_t _new)462 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
463 {
464 int ret;
465
466 ATOMIC_CMPSET_CODE(ret, "h");
467 dmb();
468 return (ret);
469 }
470
471 static __inline int
atomic_cmpset_rel_16(volatile uint16_t * _ptr,uint16_t _old,uint16_t _new)472 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
473 {
474 int ret;
475
476 dmb();
477 ATOMIC_CMPSET_CODE(ret, "h");
478 return (ret);
479 }
480
481 static __inline int
atomic_cmpset_32(volatile uint32_t * _ptr,uint32_t _old,uint32_t _new)482 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
483 {
484 int ret;
485
486 ATOMIC_CMPSET_CODE(ret, "");
487 return (ret);
488 }
489
490 static __inline int
atomic_cmpset_acq_32(volatile uint32_t * _ptr,uint32_t _old,uint32_t _new)491 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
492 {
493 int ret;
494
495 ATOMIC_CMPSET_CODE(ret, "");
496 dmb();
497 return (ret);
498 }
499
500 static __inline int
atomic_cmpset_rel_32(volatile uint32_t * _ptr,uint32_t _old,uint32_t _new)501 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
502 {
503 int ret;
504
505 dmb();
506 ATOMIC_CMPSET_CODE(ret, "");
507 return (ret);
508 }
509
510 static __inline int
atomic_cmpset_long(volatile u_long * _ptr,u_long _old,u_long _new)511 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
512 {
513 int ret;
514
515 ATOMIC_CMPSET_CODE(ret, "");
516 return (ret);
517 }
518
519 static __inline int
atomic_cmpset_acq_long(volatile u_long * _ptr,u_long _old,u_long _new)520 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
521 {
522 int ret;
523
524 ATOMIC_CMPSET_CODE(ret, "");
525 dmb();
526 return (ret);
527 }
528
529 static __inline int
atomic_cmpset_rel_long(volatile u_long * _ptr,u_long _old,u_long _new)530 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
531 {
532 int ret;
533
534 dmb();
535 ATOMIC_CMPSET_CODE(ret, "");
536 return (ret);
537 }
538
539 static __inline int
atomic_cmpset_64(volatile uint64_t * _ptr,uint64_t _old,uint64_t _new)540 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
541 {
542 int ret;
543
544 ATOMIC_CMPSET_CODE64(ret);
545 return (ret);
546 }
547
548 static __inline int
atomic_cmpset_acq_64(volatile uint64_t * _ptr,uint64_t _old,uint64_t _new)549 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
550 {
551 int ret;
552
553 ATOMIC_CMPSET_CODE64(ret);
554 dmb();
555 return (ret);
556 }
557
558 static __inline int
atomic_cmpset_rel_64(volatile uint64_t * _ptr,uint64_t _old,uint64_t _new)559 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
560 {
561 int ret;
562
563 dmb();
564 ATOMIC_CMPSET_CODE64(ret);
565 return (ret);
566 }
567
568 static __inline uint32_t
atomic_fetchadd_32(volatile uint32_t * p,uint32_t val)569 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
570 {
571 uint32_t tmp = 0, tmp2 = 0, ret = 0;
572
573 __asm __volatile(
574 "1: ldrex %0, [%3] \n"
575 " add %1, %0, %4 \n"
576 " strex %2, %1, [%3] \n"
577 " cmp %2, #0 \n"
578 " it ne \n"
579 " bne 1b \n"
580 : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
581 : : "cc", "memory");
582 return (ret);
583 }
584
585 static __inline uint64_t
atomic_fetchadd_64(volatile uint64_t * p,uint64_t val)586 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
587 {
588 uint64_t ret, tmp;
589 uint32_t exflag;
590
591 __asm __volatile(
592 "1: \n"
593 " ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
594 " adds %Q[tmp], %Q[ret], %Q[val] \n"
595 " adc %R[tmp], %R[ret], %R[val] \n"
596 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
597 " teq %[exf], #0 \n"
598 " it ne \n"
599 " bne 1b \n"
600 : [ret] "=&r" (ret),
601 [exf] "=&r" (exflag),
602 [tmp] "=&r" (tmp)
603 : [ptr] "r" (p),
604 [val] "r" (val)
605 : "cc", "memory");
606 return (ret);
607 }
608
609 static __inline u_long
atomic_fetchadd_long(volatile u_long * p,u_long val)610 atomic_fetchadd_long(volatile u_long *p, u_long val)
611 {
612
613 return (atomic_fetchadd_32((volatile uint32_t *)p, val));
614 }
615
616 static __inline uint32_t
atomic_load_acq_32(volatile uint32_t * p)617 atomic_load_acq_32(volatile uint32_t *p)
618 {
619 uint32_t v;
620
621 v = *p;
622 dmb();
623 return (v);
624 }
625
626 static __inline uint64_t
atomic_load_64(volatile uint64_t * p)627 atomic_load_64(volatile uint64_t *p)
628 {
629 uint64_t ret;
630
631 /*
632 * The only way to atomically load 64 bits is with LDREXD which puts the
633 * exclusive monitor into the exclusive state, so reset it to open state
634 * with CLREX because we don't actually need to store anything.
635 */
636 __asm __volatile(
637 "ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
638 "clrex \n"
639 : [ret] "=&r" (ret)
640 : [ptr] "r" (p)
641 : "cc", "memory");
642 return (ret);
643 }
644
645 static __inline uint64_t
atomic_load_acq_64(volatile uint64_t * p)646 atomic_load_acq_64(volatile uint64_t *p)
647 {
648 uint64_t ret;
649
650 ret = atomic_load_64(p);
651 dmb();
652 return (ret);
653 }
654
655 static __inline u_long
atomic_load_acq_long(volatile u_long * p)656 atomic_load_acq_long(volatile u_long *p)
657 {
658 u_long v;
659
660 v = *p;
661 dmb();
662 return (v);
663 }
664
665 static __inline uint32_t
atomic_readandclear_32(volatile uint32_t * p)666 atomic_readandclear_32(volatile uint32_t *p)
667 {
668 uint32_t ret, tmp = 0, tmp2 = 0;
669
670 __asm __volatile(
671 "1: ldrex %0, [%3] \n"
672 " mov %1, #0 \n"
673 " strex %2, %1, [%3] \n"
674 " cmp %2, #0 \n"
675 " it ne \n"
676 " bne 1b \n"
677 : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
678 : : "cc", "memory");
679 return (ret);
680 }
681
682 static __inline uint64_t
atomic_readandclear_64(volatile uint64_t * p)683 atomic_readandclear_64(volatile uint64_t *p)
684 {
685 uint64_t ret, tmp;
686 uint32_t exflag;
687
688 __asm __volatile(
689 "1: \n"
690 " ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
691 " mov %Q[tmp], #0 \n"
692 " mov %R[tmp], #0 \n"
693 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
694 " teq %[exf], #0 \n"
695 " it ne \n"
696 " bne 1b \n"
697 : [ret] "=&r" (ret),
698 [exf] "=&r" (exflag),
699 [tmp] "=&r" (tmp)
700 : [ptr] "r" (p)
701 : "cc", "memory");
702 return (ret);
703 }
704
705 static __inline u_long
atomic_readandclear_long(volatile u_long * p)706 atomic_readandclear_long(volatile u_long *p)
707 {
708
709 return (atomic_readandclear_32((volatile uint32_t *)p));
710 }
711
712 static __inline void
atomic_set_32(volatile uint32_t * address,uint32_t setmask)713 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
714 {
715 uint32_t tmp = 0, tmp2 = 0;
716
717 __asm __volatile(
718 "1: ldrex %0, [%2] \n"
719 " orr %0, %0, %3 \n"
720 " strex %1, %0, [%2] \n"
721 " cmp %1, #0 \n"
722 " it ne \n"
723 " bne 1b \n"
724 : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
725 : : "cc", "memory");
726 }
727
728 static __inline void
atomic_set_64(volatile uint64_t * p,uint64_t val)729 atomic_set_64(volatile uint64_t *p, uint64_t val)
730 {
731 uint64_t tmp;
732 uint32_t exflag;
733
734 __asm __volatile(
735 "1: \n"
736 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
737 " orr %Q[tmp], %Q[val] \n"
738 " orr %R[tmp], %R[val] \n"
739 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
740 " teq %[exf], #0 \n"
741 " it ne \n"
742 " bne 1b \n"
743 : [exf] "=&r" (exflag),
744 [tmp] "=&r" (tmp)
745 : [ptr] "r" (p),
746 [val] "r" (val)
747 : "cc", "memory");
748 }
749
750 static __inline void
atomic_set_long(volatile u_long * address,u_long setmask)751 atomic_set_long(volatile u_long *address, u_long setmask)
752 {
753
754 atomic_set_32((volatile uint32_t *)address, setmask);
755 }
756
757 ATOMIC_ACQ_REL(set, 32)
758 ATOMIC_ACQ_REL(set, 64)
ATOMIC_ACQ_REL_LONG(set)759 ATOMIC_ACQ_REL_LONG(set)
760
761 static __inline void
762 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
763 {
764 uint32_t tmp = 0, tmp2 = 0;
765
766 __asm __volatile(
767 "1: ldrex %0, [%2] \n"
768 " sub %0, %0, %3 \n"
769 " strex %1, %0, [%2] \n"
770 " cmp %1, #0 \n"
771 " it ne \n"
772 " bne 1b \n"
773 : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
774 : : "cc", "memory");
775 }
776
777 static __inline void
atomic_subtract_64(volatile uint64_t * p,uint64_t val)778 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
779 {
780 uint64_t tmp;
781 uint32_t exflag;
782
783 __asm __volatile(
784 "1: \n"
785 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
786 " subs %Q[tmp], %Q[val] \n"
787 " sbc %R[tmp], %R[tmp], %R[val] \n"
788 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
789 " teq %[exf], #0 \n"
790 " it ne \n"
791 " bne 1b \n"
792 : [exf] "=&r" (exflag),
793 [tmp] "=&r" (tmp)
794 : [ptr] "r" (p),
795 [val] "r" (val)
796 : "cc", "memory");
797 }
798
799 static __inline void
atomic_subtract_long(volatile u_long * p,u_long val)800 atomic_subtract_long(volatile u_long *p, u_long val)
801 {
802
803 atomic_subtract_32((volatile uint32_t *)p, val);
804 }
805
806 ATOMIC_ACQ_REL(subtract, 32)
807 ATOMIC_ACQ_REL(subtract, 64)
ATOMIC_ACQ_REL_LONG(subtract)808 ATOMIC_ACQ_REL_LONG(subtract)
809
810 static __inline void
811 atomic_store_64(volatile uint64_t *p, uint64_t val)
812 {
813 uint64_t tmp;
814 uint32_t exflag;
815
816 /*
817 * The only way to atomically store 64 bits is with STREXD, which will
818 * succeed only if paired up with a preceeding LDREXD using the same
819 * address, so we read and discard the existing value before storing.
820 */
821 __asm __volatile(
822 "1: \n"
823 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
824 " strexd %[exf], %Q[val], %R[val], [%[ptr]] \n"
825 " teq %[exf], #0 \n"
826 " it ne \n"
827 " bne 1b \n"
828 : [tmp] "=&r" (tmp),
829 [exf] "=&r" (exflag)
830 : [ptr] "r" (p),
831 [val] "r" (val)
832 : "cc", "memory");
833 }
834
835 static __inline void
atomic_store_rel_32(volatile uint32_t * p,uint32_t v)836 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
837 {
838
839 dmb();
840 *p = v;
841 }
842
843 static __inline void
atomic_store_rel_64(volatile uint64_t * p,uint64_t val)844 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
845 {
846
847 dmb();
848 atomic_store_64(p, val);
849 }
850
851 static __inline void
atomic_store_rel_long(volatile u_long * p,u_long v)852 atomic_store_rel_long(volatile u_long *p, u_long v)
853 {
854
855 dmb();
856 *p = v;
857 }
858
859 static __inline int
atomic_testandclear_32(volatile uint32_t * ptr,u_int bit)860 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
861 {
862 int newv, oldv, result;
863
864 __asm __volatile(
865 " mov ip, #1 \n"
866 " lsl ip, ip, %[bit] \n"
867 /* Done with %[bit] as input, reuse below as output. */
868 "1: \n"
869 " ldrex %[oldv], [%[ptr]] \n"
870 " bic %[newv], %[oldv], ip \n"
871 " strex %[bit], %[newv], [%[ptr]] \n"
872 " teq %[bit], #0 \n"
873 " it ne \n"
874 " bne 1b \n"
875 " ands %[bit], %[oldv], ip \n"
876 " it ne \n"
877 " movne %[bit], #1 \n"
878 : [bit] "=&r" (result),
879 [oldv] "=&r" (oldv),
880 [newv] "=&r" (newv)
881 : [ptr] "r" (ptr),
882 "[bit]" (bit & 0x1f)
883 : "cc", "ip", "memory");
884
885 return (result);
886 }
887
888 static __inline int
atomic_testandclear_int(volatile u_int * p,u_int v)889 atomic_testandclear_int(volatile u_int *p, u_int v)
890 {
891
892 return (atomic_testandclear_32((volatile uint32_t *)p, v));
893 }
894
895 static __inline int
atomic_testandclear_long(volatile u_long * p,u_int v)896 atomic_testandclear_long(volatile u_long *p, u_int v)
897 {
898
899 return (atomic_testandclear_32((volatile uint32_t *)p, v));
900 }
901 #define atomic_testandclear_long atomic_testandclear_long
902
903
904 static __inline int
atomic_testandclear_64(volatile uint64_t * p,u_int v)905 atomic_testandclear_64(volatile uint64_t *p, u_int v)
906 {
907 volatile uint32_t *p32;
908
909 p32 = (volatile uint32_t *)p;
910 /*
911 * Assume little-endian,
912 * atomic_testandclear_32() uses only last 5 bits of v
913 */
914 if ((v & 0x20) != 0)
915 p32++;
916 return (atomic_testandclear_32(p32, v));
917 }
918
919 static __inline int
atomic_testandset_32(volatile uint32_t * ptr,u_int bit)920 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
921 {
922 int newv, oldv, result;
923
924 __asm __volatile(
925 " mov ip, #1 \n"
926 " lsl ip, ip, %[bit] \n"
927 /* Done with %[bit] as input, reuse below as output. */
928 "1: \n"
929 " ldrex %[oldv], [%[ptr]] \n"
930 " orr %[newv], %[oldv], ip \n"
931 " strex %[bit], %[newv], [%[ptr]] \n"
932 " teq %[bit], #0 \n"
933 " it ne \n"
934 " bne 1b \n"
935 " ands %[bit], %[oldv], ip \n"
936 " it ne \n"
937 " movne %[bit], #1 \n"
938 : [bit] "=&r" (result),
939 [oldv] "=&r" (oldv),
940 [newv] "=&r" (newv)
941 : [ptr] "r" (ptr),
942 "[bit]" (bit & 0x1f)
943 : "cc", "ip", "memory");
944
945 return (result);
946 }
947
948 static __inline int
atomic_testandset_int(volatile u_int * p,u_int v)949 atomic_testandset_int(volatile u_int *p, u_int v)
950 {
951
952 return (atomic_testandset_32((volatile uint32_t *)p, v));
953 }
954
955 static __inline int
atomic_testandset_long(volatile u_long * p,u_int v)956 atomic_testandset_long(volatile u_long *p, u_int v)
957 {
958
959 return (atomic_testandset_32((volatile uint32_t *)p, v));
960 }
961 #define atomic_testandset_long atomic_testandset_long
962
963 static __inline int
atomic_testandset_64(volatile uint64_t * p,u_int v)964 atomic_testandset_64(volatile uint64_t *p, u_int v)
965 {
966 volatile uint32_t *p32;
967
968 p32 = (volatile uint32_t *)p;
969 /*
970 * Assume little-endian,
971 * atomic_testandset_32() uses only last 5 bits of v
972 */
973 if ((v & 0x20) != 0)
974 p32++;
975 return (atomic_testandset_32(p32, v));
976 }
977
978 static __inline uint32_t
atomic_swap_32(volatile uint32_t * p,uint32_t v)979 atomic_swap_32(volatile uint32_t *p, uint32_t v)
980 {
981 uint32_t ret, exflag;
982
983 __asm __volatile(
984 "1: ldrex %[ret], [%[ptr]] \n"
985 " strex %[exf], %[val], [%[ptr]] \n"
986 " teq %[exf], #0 \n"
987 " it ne \n"
988 " bne 1b \n"
989 : [ret] "=&r" (ret),
990 [exf] "=&r" (exflag)
991 : [val] "r" (v),
992 [ptr] "r" (p)
993 : "cc", "memory");
994 return (ret);
995 }
996
997 static __inline u_long
atomic_swap_long(volatile u_long * p,u_long v)998 atomic_swap_long(volatile u_long *p, u_long v)
999 {
1000
1001 return (atomic_swap_32((volatile uint32_t *)p, v));
1002 }
1003
1004 static __inline uint64_t
atomic_swap_64(volatile uint64_t * p,uint64_t v)1005 atomic_swap_64(volatile uint64_t *p, uint64_t v)
1006 {
1007 uint64_t ret;
1008 uint32_t exflag;
1009
1010 __asm __volatile(
1011 "1: ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
1012 " strexd %[exf], %Q[val], %R[val], [%[ptr]] \n"
1013 " teq %[exf], #0 \n"
1014 " it ne \n"
1015 " bne 1b \n"
1016 : [ret] "=&r" (ret),
1017 [exf] "=&r" (exflag)
1018 : [val] "r" (v),
1019 [ptr] "r" (p)
1020 : "cc", "memory");
1021 return (ret);
1022 }
1023
1024 #undef ATOMIC_ACQ_REL
1025 #undef ATOMIC_ACQ_REL_LONG
1026
1027 static __inline void
atomic_thread_fence_acq(void)1028 atomic_thread_fence_acq(void)
1029 {
1030
1031 dmb();
1032 }
1033
1034 static __inline void
atomic_thread_fence_rel(void)1035 atomic_thread_fence_rel(void)
1036 {
1037
1038 dmb();
1039 }
1040
1041 static __inline void
atomic_thread_fence_acq_rel(void)1042 atomic_thread_fence_acq_rel(void)
1043 {
1044
1045 dmb();
1046 }
1047
1048 static __inline void
atomic_thread_fence_seq_cst(void)1049 atomic_thread_fence_seq_cst(void)
1050 {
1051
1052 dmb();
1053 }
1054
1055 #define atomic_add_ptr atomic_add_32
1056 #define atomic_add_acq_ptr atomic_add_acq_32
1057 #define atomic_add_rel_ptr atomic_add_rel_32
1058 #define atomic_subtract_ptr atomic_subtract_32
1059 #define atomic_subtract_acq_ptr atomic_subtract_acq_32
1060 #define atomic_subtract_rel_ptr atomic_subtract_rel_32
1061 #define atomic_clear_ptr atomic_clear_32
1062 #define atomic_clear_acq_ptr atomic_clear_acq_32
1063 #define atomic_clear_rel_ptr atomic_clear_rel_32
1064 #define atomic_set_ptr atomic_set_32
1065 #define atomic_set_acq_ptr atomic_set_acq_32
1066 #define atomic_set_rel_ptr atomic_set_rel_32
1067 #define atomic_fcmpset_ptr atomic_fcmpset_32
1068 #define atomic_fcmpset_acq_ptr atomic_fcmpset_acq_32
1069 #define atomic_fcmpset_rel_ptr atomic_fcmpset_rel_32
1070 #define atomic_cmpset_ptr atomic_cmpset_32
1071 #define atomic_cmpset_acq_ptr atomic_cmpset_acq_32
1072 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_32
1073 #define atomic_fetchadd_ptr atomic_fetchadd_32
1074 #define atomic_readandclear_ptr atomic_readandclear_32
1075 #define atomic_load_acq_ptr atomic_load_acq_32
1076 #define atomic_store_rel_ptr atomic_store_rel_32
1077 #define atomic_swap_ptr atomic_swap_32
1078
1079 #define atomic_add_int atomic_add_32
1080 #define atomic_add_acq_int atomic_add_acq_32
1081 #define atomic_add_rel_int atomic_add_rel_32
1082 #define atomic_subtract_int atomic_subtract_32
1083 #define atomic_subtract_acq_int atomic_subtract_acq_32
1084 #define atomic_subtract_rel_int atomic_subtract_rel_32
1085 #define atomic_clear_int atomic_clear_32
1086 #define atomic_clear_acq_int atomic_clear_acq_32
1087 #define atomic_clear_rel_int atomic_clear_rel_32
1088 #define atomic_set_int atomic_set_32
1089 #define atomic_set_acq_int atomic_set_acq_32
1090 #define atomic_set_rel_int atomic_set_rel_32
1091 #define atomic_fcmpset_int atomic_fcmpset_32
1092 #define atomic_fcmpset_acq_int atomic_fcmpset_acq_32
1093 #define atomic_fcmpset_rel_int atomic_fcmpset_rel_32
1094 #define atomic_cmpset_int atomic_cmpset_32
1095 #define atomic_cmpset_acq_int atomic_cmpset_acq_32
1096 #define atomic_cmpset_rel_int atomic_cmpset_rel_32
1097 #define atomic_fetchadd_int atomic_fetchadd_32
1098 #define atomic_readandclear_int atomic_readandclear_32
1099 #define atomic_load_acq_int atomic_load_acq_32
1100 #define atomic_store_rel_int atomic_store_rel_32
1101 #define atomic_swap_int atomic_swap_32
1102
1103 /*
1104 * For:
1105 * - atomic_load_acq_8
1106 * - atomic_load_acq_16
1107 * - atomic_testandset_acq_long
1108 */
1109 #include <sys/_atomic_subword.h>
1110
1111 #endif /* _MACHINE_ATOMIC_H_ */
1112