1 /*-
2 * Copyright (c) 1998 Doug Rabson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: src/sys/i386/include/atomic.h,v 1.9.2.1 2000/07/07 00:38:47 obrien Exp $
27 */
28 #ifndef _CPU_ATOMIC_H_
29 #define _CPU_ATOMIC_H_
30
31 #include <sys/types.h>
32 #include <sys/atomic_common.h>
33
34 /*
35 * Various simple arithmetic on memory which is atomic in the presence
36 * of interrupts and multiple processors.
37 *
38 * atomic_set_char(P, V) (*(u_char*)(P) |= (V))
39 * atomic_clear_char(P, V) (*(u_char*)(P) &= ~(V))
40 * atomic_add_char(P, V) (*(u_char*)(P) += (V))
41 * atomic_subtract_char(P, V) (*(u_char*)(P) -= (V))
42 *
43 * atomic_set_short(P, V) (*(u_short*)(P) |= (V))
44 * atomic_clear_short(P, V) (*(u_short*)(P) &= ~(V))
45 * atomic_add_short(P, V) (*(u_short*)(P) += (V))
46 * atomic_subtract_short(P, V) (*(u_short*)(P) -= (V))
47 *
48 * atomic_set_int(P, V) (*(u_int*)(P) |= (V))
49 * atomic_clear_int(P, V) (*(u_int*)(P) &= ~(V))
50 * atomic_add_int(P, V) (*(u_int*)(P) += (V))
51 * atomic_subtract_int(P, V) (*(u_int*)(P) -= (V))
52 *
53 * atomic_set_long(P, V) (*(u_long*)(P) |= (V))
54 * atomic_clear_long(P, V) (*(u_long*)(P) &= ~(V))
55 * atomic_add_long(P, V) (*(u_long*)(P) += (V))
56 * atomic_subtract_long(P, V) (*(u_long*)(P) -= (V))
57 * atomic_readandclear_long(P) (return (*(u_long*)(P)); *(u_long*)(P) = 0;)
58 * atomic_readandclear_int(P) (return (*(u_int*)(P)); *(u_int*)(P) = 0;)
59 */
60
61 /*
62 * locked bus cycle
63 * lock elision (backwards compatible)
64 */
65 #define MPLOCKED "lock ; "
66 #define XACQUIRE "repne; " /* lock elision */
67 #define XRELEASE "repe; " /* lock elision */
68
69 /*
70 * The assembly is volatilized to demark potential before-and-after side
71 * effects if an interrupt or SMP collision were to occur. The primary
72 * atomic instructions are MP safe, the nonlocked instructions are
73 * local-interrupt-safe (so we don't depend on C 'X |= Y' generating an
74 * atomic instruction).
75 *
76 * +m - memory is read and written (=m - memory is only written)
77 * iq - integer constant or %ax/%bx/%cx/%dx (ir = int constant or any reg)
78 * (Note: byte instructions only work on %ax,%bx,%cx, or %dx). iq
79 * is good enough for our needs so don't get fancy.
80 * r - any register.
81 *
82 * NOTE: 64-bit immediate values are not supported for most x86-64
83 * instructions so we have to use "r".
84 */
85
86 /* egcs 1.1.2+ version */
87 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \
88 static __inline void \
89 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
90 { \
91 __asm __volatile(MPLOCKED OP \
92 : "+m" (*p) \
93 : CONS (V)); \
94 } \
95 static __inline void \
96 atomic_##NAME##_##TYPE##_xacquire(volatile u_##TYPE *p, u_##TYPE v)\
97 { \
98 __asm __volatile(XACQUIRE MPLOCKED OP \
99 : "+m" (*p) \
100 : CONS (V)); \
101 } \
102 static __inline void \
103 atomic_##NAME##_##TYPE##_xrelease(volatile u_##TYPE *p, u_##TYPE v)\
104 { \
105 __asm __volatile(XRELEASE MPLOCKED OP \
106 : "+m" (*p) \
107 : CONS (V)); \
108 } \
109 static __inline void \
110 atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)\
111 { \
112 __asm __volatile(OP \
113 : "+m" (*p) \
114 : CONS (V)); \
115 }
116
117 /* egcs 1.1.2+ version */
118 ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v)
119 ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v)
120 ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v)
121 ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v)
122
123 ATOMIC_ASM(set, short, "orw %w1,%0", "iq", v)
124 ATOMIC_ASM(clear, short, "andw %w1,%0", "iq", ~v)
125 ATOMIC_ASM(add, short, "addw %w1,%0", "iq", v)
126 ATOMIC_ASM(subtract, short, "subw %w1,%0", "iq", v)
127
128 ATOMIC_ASM(set, int, "orl %1,%0", "iq", v)
129 ATOMIC_ASM(clear, int, "andl %1,%0", "iq", ~v)
130 ATOMIC_ASM(add, int, "addl %1,%0", "iq", v)
131 ATOMIC_ASM(subtract, int, "subl %1,%0", "iq", v)
132
133 ATOMIC_ASM(set, long, "orq %1,%0", "r", v)
134 ATOMIC_ASM(clear, long, "andq %1,%0", "r", ~v)
135 ATOMIC_ASM(add, long, "addq %1,%0", "r", v)
136 ATOMIC_ASM(subtract, long, "subq %1,%0", "r", v)
137
138 static __inline u_long
atomic_readandclear_long(volatile u_long * addr)139 atomic_readandclear_long(volatile u_long *addr)
140 {
141 u_long res;
142
143 res = 0;
144 __asm __volatile(
145 " xchgq %1,%0 ; "
146 "# atomic_readandclear_long"
147 : "+r" (res), /* 0 */
148 "=m" (*addr) /* 1 */
149 : "m" (*addr));
150
151 return (res);
152 }
153
154 static __inline u_int
atomic_readandclear_int(volatile u_int * addr)155 atomic_readandclear_int(volatile u_int *addr)
156 {
157 u_int res;
158
159 res = 0;
160 __asm __volatile(
161 " xchgl %1,%0 ; "
162 "# atomic_readandclear_int"
163 : "+r" (res), /* 0 */
164 "=m" (*addr) /* 1 */
165 : "m" (*addr));
166
167 return (res);
168 }
169
170 /*
171 * atomic_poll_acquire_int(P) Returns non-zero on success, 0 if the lock
172 * has already been acquired.
173 * atomic_poll_release_int(P)
174 *
175 * These are used for IPIQ interlocks between CPUs.
176 * Both the acquisition and release must be cache-synchronizing instructions.
177 */
178
179 static __inline int
atomic_swap_int(volatile int * addr,int value)180 atomic_swap_int(volatile int *addr, int value)
181 {
182 __asm __volatile("xchgl %0, %1" :
183 "=r" (value), "=m" (*addr) : "0" (value) : "memory");
184 return (value);
185 }
186
187 static __inline long
atomic_swap_long(volatile long * addr,long value)188 atomic_swap_long(volatile long *addr, long value)
189 {
190 __asm __volatile("xchgq %0, %1" :
191 "=r" (value), "=m" (*addr) : "0" (value) : "memory");
192 return (value);
193 }
194
195 static __inline void *
atomic_swap_ptr(volatile void ** addr,void * value)196 atomic_swap_ptr(volatile void **addr, void *value)
197 {
198 __asm __volatile("xchgq %0, %1" :
199 "=r" (value), "=m" (*addr) : "0" (value) : "memory");
200 return (value);
201 }
202
203 static __inline int
atomic_poll_acquire_int(volatile u_int * p)204 atomic_poll_acquire_int(volatile u_int *p)
205 {
206 u_int data;
207
208 __asm __volatile(MPLOCKED "btsl $0,%0; setnc %%al; andl $255,%%eax" : "+m" (*p), "=a" (data));
209 return(data);
210 }
211
212 static __inline void
atomic_poll_release_int(volatile u_int * p)213 atomic_poll_release_int(volatile u_int *p)
214 {
215 __asm __volatile(MPLOCKED "btrl $0,%0" : "+m" (*p));
216 }
217
218 /*
219 * These functions operate on a 32 bit interrupt interlock which is defined
220 * as follows:
221 *
222 * bit 0-29 interrupt handler wait counter
223 * bit 30 interrupt handler disabled bit
224 * bit 31 interrupt handler currently running bit (1 = run)
225 *
226 * atomic_intr_cond_test(P) Determine if the interlock is in an
227 * acquired state. Returns 0 if it not
228 * acquired, non-zero if it is. (not MPLOCKed)
229 *
230 * atomic_intr_cond_try(P) Attempt to set bit 31 to acquire the
231 * interlock. If we are unable to set bit 31
232 * we return 1, otherwise we return 0.
233 *
234 * atomic_intr_cond_enter(P, func, arg)
235 * Attempt to set bit 31 to acquire the
236 * interlock. If we are unable to set bit 31,
237 * the wait is incremented counter and func(arg)
238 * is called in a loop until we are able to set
239 * bit 31. Once we set bit 31, wait counter
240 * is decremented.
241 *
242 * atomic_intr_cond_exit(P, func, arg)
243 * Clear bit 31. If the wait counter is still
244 * non-zero call func(arg) once.
245 *
246 * atomic_intr_handler_disable(P)
247 * Set bit 30, indicating that the interrupt
248 * handler has been disabled. Must be called
249 * after the hardware is disabled.
250 *
251 * Returns bit 31 indicating whether a serialized
252 * accessor is active (typically the interrupt
253 * handler is running). 0 == not active,
254 * non-zero == active.
255 *
256 * atomic_intr_handler_enable(P)
257 * Clear bit 30, indicating that the interrupt
258 * handler has been enabled. Must be called
259 * before the hardware is actually enabled.
260 *
261 * atomic_intr_handler_is_enabled(P)
262 * Returns bit 30, 0 indicates that the handler
263 * is enabled, non-zero indicates that it is
264 * disabled. The request counter portion of
265 * the field is ignored. (not MPLOCKed)
266 *
267 * atomic_intr_cond_inc(P) Increment wait counter by 1.
268 * atomic_intr_cond_dec(P) Decrement wait counter by 1.
269 */
270
271 static __inline void
atomic_intr_init(__atomic_intr_t * p)272 atomic_intr_init(__atomic_intr_t *p)
273 {
274 *p = 0;
275 }
276
277 static __inline int
atomic_intr_handler_disable(__atomic_intr_t * p)278 atomic_intr_handler_disable(__atomic_intr_t *p)
279 {
280 int data;
281
282 __asm __volatile(MPLOCKED "orl $0x40000000,%1; movl %1,%%eax; " \
283 "andl $0x80000000,%%eax" \
284 : "=a"(data) , "+m"(*p));
285 return(data);
286 }
287
288 static __inline void
atomic_intr_handler_enable(__atomic_intr_t * p)289 atomic_intr_handler_enable(__atomic_intr_t *p)
290 {
291 __asm __volatile(MPLOCKED "andl $0xBFFFFFFF,%0" : "+m" (*p));
292 }
293
294 static __inline int
atomic_intr_handler_is_enabled(__atomic_intr_t * p)295 atomic_intr_handler_is_enabled(__atomic_intr_t *p)
296 {
297 int data;
298
299 __asm __volatile("movl %1,%%eax; andl $0x40000000,%%eax" \
300 : "=a"(data) : "m"(*p));
301 return(data);
302 }
303
304 static __inline void
atomic_intr_cond_inc(__atomic_intr_t * p)305 atomic_intr_cond_inc(__atomic_intr_t *p)
306 {
307 __asm __volatile(MPLOCKED "incl %0" : "+m" (*p));
308 }
309
310 static __inline void
atomic_intr_cond_dec(__atomic_intr_t * p)311 atomic_intr_cond_dec(__atomic_intr_t *p)
312 {
313 __asm __volatile(MPLOCKED "decl %0" : "+m" (*p));
314 }
315
316 static __inline void
atomic_intr_cond_enter(__atomic_intr_t * p,void (* func)(void *),void * arg)317 atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg)
318 {
319 __asm __volatile(MPLOCKED "btsl $31,%0; jnc 3f; " \
320 MPLOCKED "incl %0; " \
321 "1: ;" \
322 MPLOCKED "btsl $31,%0; jnc 2f; " \
323 "movq %2,%%rdi; call *%1; " \
324 "jmp 1b; " \
325 "2: ;" \
326 MPLOCKED "decl %0; " \
327 "3: ;" \
328 : "+m" (*p) \
329 : "r"(func), "m"(arg) \
330 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
331 /* YYY the function call may clobber even more registers? */
332 }
333
334 /*
335 * Attempt to enter the interrupt condition variable. Returns zero on
336 * success, 1 on failure.
337 */
338 static __inline int
atomic_intr_cond_try(__atomic_intr_t * p)339 atomic_intr_cond_try(__atomic_intr_t *p)
340 {
341 int ret;
342
343 __asm __volatile("subl %%eax,%%eax; " \
344 MPLOCKED "btsl $31,%0; jnc 2f; " \
345 "movl $1,%%eax;" \
346 "2: ;"
347 : "+m" (*p), "=&a"(ret)
348 : : "cx", "dx");
349 return (ret);
350 }
351
352
353 static __inline int
atomic_intr_cond_test(__atomic_intr_t * p)354 atomic_intr_cond_test(__atomic_intr_t *p)
355 {
356 return((int)(*p & 0x80000000));
357 }
358
359 static __inline void
atomic_intr_cond_exit(__atomic_intr_t * p,void (* func)(void *),void * arg)360 atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg)
361 {
362 __asm __volatile(MPLOCKED "btrl $31,%0; " \
363 "testl $0x3FFFFFFF,%0; jz 1f; " \
364 "movq %2,%%rdi; call *%1; " \
365 "1: ;" \
366 : "+m" (*p) \
367 : "r"(func), "m"(arg) \
368 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
369 /* YYY the function call may clobber even more registers? */
370 }
371
372 /*
373 * Atomic compare and set
374 *
375 * if (*_dst == _old) *_dst = _new (all 32 bit words)
376 *
377 * Returns 0 on failure, non-zero on success. The inline is designed to
378 * allow the compiler to optimize the common case where the caller calls
379 * these functions from inside a conditional.
380 */
381
382 static __inline int
atomic_cmpxchg_int(volatile u_int * _dst,u_int _old,u_int _new)383 atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new)
384 {
385 u_int res = _old;
386
387 __asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
388 : "+a" (res), "=m" (*_dst) \
389 : "r" (_new), "m" (*_dst) \
390 : "memory");
391 return (res);
392 }
393
394 static __inline int
atomic_cmpxchg_long_test(volatile u_long * _dst,u_long _old,u_long _new)395 atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new)
396 {
397 u_int res = _old;
398
399 __asm __volatile(MPLOCKED "cmpxchgq %2,%1; "
400 " setz %%al;"
401 " movsbq %%al,%%rax" \
402 : "+a" (res), "=m" (*_dst) \
403 : "r" (_new), "m" (*_dst) \
404 : "memory");
405 return (res);
406 }
407
408 static __inline int
atomic_cmpset_short(volatile u_short * _dst,u_short _old,u_short _new)409 atomic_cmpset_short(volatile u_short *_dst, u_short _old, u_short _new)
410 {
411 u_short res = _old;
412
413 __asm __volatile(MPLOCKED "cmpxchgw %w2,%1; " \
414 : "+a" (res), "=m" (*_dst) \
415 : "r" (_new), "m" (*_dst) \
416 : "memory");
417 return (res == _old);
418 }
419
420 static __inline int
atomic_fcmpset_char(volatile u_char * _dst,u_char * _old,u_char _new)421 atomic_fcmpset_char(volatile u_char *_dst, u_char *_old, u_char _new)
422 {
423 u_char res = *_old;
424
425 __asm __volatile(MPLOCKED "cmpxchgb %2,%0; " \
426 : "+m" (*_dst), /* 0 */
427 "+a" (*_old) /* 1 */
428 : "r" (_new) /* 2 */
429 : "memory", "cc");
430 return (res == *_old);
431 }
432
433 static __inline int
atomic_fcmpset_short(volatile u_short * _dst,u_short * _old,u_short _new)434 atomic_fcmpset_short(volatile u_short *_dst, u_short *_old, u_short _new)
435 {
436 u_short res = *_old;
437
438 __asm __volatile(MPLOCKED "cmpxchgw %2,%0; " \
439 : "+m" (*_dst), /* 0 */
440 "+a" (*_old) /* 1 */
441 : "r" (_new) /* 2 */
442 : "memory", "cc");
443 return (res == *_old);
444 }
445
446 static __inline int
atomic_cmpset_int(volatile u_int * _dst,u_int _old,u_int _new)447 atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new)
448 {
449 u_int res = _old;
450
451 __asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
452 : "+a" (res), "=m" (*_dst) \
453 : "r" (_new), "m" (*_dst) \
454 : "memory");
455 return (res == _old);
456 }
457
458 static __inline int
atomic_fcmpset_int(volatile u_int * _dst,u_int * _old,u_int _new)459 atomic_fcmpset_int(volatile u_int *_dst, u_int *_old, u_int _new)
460 {
461 u_int res = *_old;
462
463 __asm __volatile(MPLOCKED "cmpxchgl %2,%0; " \
464 : "+m" (*_dst), /* 0 */
465 "+a" (*_old) /* 1 */
466 : "r" (_new) /* 2 */
467 : "memory", "cc");
468 return (res == *_old);
469 }
470
471 static __inline int
atomic_cmpset_int_xacquire(volatile u_int * _dst,u_int _old,u_int _new)472 atomic_cmpset_int_xacquire(volatile u_int *_dst, u_int _old, u_int _new)
473 {
474 u_int res = _old;
475
476 __asm __volatile(XACQUIRE MPLOCKED "cmpxchgl %2,%1; " \
477 : "+a" (res), "=m" (*_dst) \
478 : "r" (_new), "m" (*_dst) \
479 : "memory");
480 return (res == _old);
481 }
482
483 static __inline int
atomic_cmpset_int_xrelease(volatile u_int * _dst,u_int _old,u_int _new)484 atomic_cmpset_int_xrelease(volatile u_int *_dst, u_int _old, u_int _new)
485 {
486 u_int res = _old;
487
488 __asm __volatile(XRELEASE MPLOCKED "cmpxchgl %2,%1; " \
489 : "+a" (res), "=m" (*_dst) \
490 : "r" (_new), "m" (*_dst) \
491 : "memory");
492 return (res == _old);
493 }
494
495 static __inline int
atomic_cmpset_long(volatile u_long * _dst,u_long _old,u_long _new)496 atomic_cmpset_long(volatile u_long *_dst, u_long _old, u_long _new)
497 {
498 u_long res = _old;
499
500 __asm __volatile(MPLOCKED "cmpxchgq %2,%1; " \
501 : "+a" (res), "=m" (*_dst) \
502 : "r" (_new), "m" (*_dst) \
503 : "memory");
504 return (res == _old);
505 }
506
507 static __inline int
atomic_fcmpset_long(volatile u_long * _dst,u_long * _old,u_long _new)508 atomic_fcmpset_long(volatile u_long *_dst, u_long *_old, u_long _new)
509 {
510 u_long res = *_old;
511
512 __asm __volatile(MPLOCKED "cmpxchgq %2,%0; " \
513 : "+m" (*_dst), /* 0 */
514 "+a" (*_old) /* 1 */
515 : "r" (_new) /* 2 */
516 : "memory", "cc");
517 return (res == *_old);
518 }
519
520 static __inline int
atomic_cmpset_long_xacquire(volatile u_long * _dst,u_long _old,u_long _new)521 atomic_cmpset_long_xacquire(volatile u_long *_dst, u_long _old, u_long _new)
522 {
523 u_long res = _old;
524
525 __asm __volatile(XACQUIRE MPLOCKED "cmpxchgq %2,%1; " \
526 : "+a" (res), "=m" (*_dst) \
527 : "r" (_new), "m" (*_dst) \
528 : "memory");
529 return (res == _old);
530 }
531
532 static __inline int
atomic_cmpset_long_xrelease(volatile u_long * _dst,u_long _old,u_long _new)533 atomic_cmpset_long_xrelease(volatile u_long *_dst, u_long _old, u_long _new)
534 {
535 u_long res = _old;
536
537 __asm __volatile(XRELEASE MPLOCKED "cmpxchgq %2,%1; " \
538 : "+a" (res), "=m" (*_dst) \
539 : "r" (_new), "m" (*_dst) \
540 : "memory");
541 return (res == _old);
542 }
543
544 static inline void *
atomic_cas_ptr(volatile void * p,void * e,void * n)545 atomic_cas_ptr(volatile void *p, void *e, void *n)
546 {
547 __asm volatile(MPLOCKED " cmpxchgq %2, %1"
548 : "=a" (n), "=m" (*(volatile unsigned long *)p)
549 : "r" (n), "a" (e), "m" (*(volatile unsigned long *)p));
550
551 return (n);
552 }
553
554 /*
555 * Atomically add the value of v to the integer pointed to by p and return
556 * the previous value of *p.
557 */
558 static __inline u_int
atomic_fetchadd_int(volatile u_int * _p,u_int _v)559 atomic_fetchadd_int(volatile u_int *_p, u_int _v)
560 {
561 __asm __volatile(MPLOCKED "xaddl %0,%1; " \
562 : "+r" (_v), "=m" (*_p) \
563 : "m" (*_p) \
564 : "memory");
565 return (_v);
566 }
567
568 static __inline u_int
atomic_fetchadd_int_xacquire(volatile u_int * _p,u_int _v)569 atomic_fetchadd_int_xacquire(volatile u_int *_p, u_int _v)
570 {
571 __asm __volatile(XACQUIRE MPLOCKED "xaddl %0,%1; " \
572 : "+r" (_v), "=m" (*_p) \
573 : "m" (*_p) \
574 : "memory");
575 return (_v);
576 }
577
578 static __inline u_int
atomic_fetchadd_int_xrelease(volatile u_int * _p,u_int _v)579 atomic_fetchadd_int_xrelease(volatile u_int *_p, u_int _v)
580 {
581 __asm __volatile(XRELEASE MPLOCKED "xaddl %0,%1; " \
582 : "+r" (_v), "=m" (*_p) \
583 : "m" (*_p) \
584 : "memory");
585 return (_v);
586 }
587
588 static __inline u_long
atomic_fetchadd_long(volatile u_long * _p,u_long _v)589 atomic_fetchadd_long(volatile u_long *_p, u_long _v)
590 {
591 __asm __volatile(MPLOCKED "xaddq %0,%1; " \
592 : "+r" (_v), "=m" (*_p) \
593 : "m" (*_p) \
594 : "memory");
595 return (_v);
596 }
597
598 static __inline u_long
atomic_fetchadd_long_xacquire(volatile u_long * _p,u_long _v)599 atomic_fetchadd_long_xacquire(volatile u_long *_p, u_long _v)
600 {
601 __asm __volatile(XACQUIRE MPLOCKED "xaddq %0,%1; " \
602 : "+r" (_v), "=m" (*_p) \
603 : "m" (*_p) \
604 : "memory");
605 return (_v);
606 }
607
608 static __inline u_long
atomic_fetchadd_long_xrelease(volatile u_long * _p,u_long _v)609 atomic_fetchadd_long_xrelease(volatile u_long *_p, u_long _v)
610 {
611 __asm __volatile(XRELEASE MPLOCKED "xaddq %0,%1; " \
612 : "+r" (_v), "=m" (*_p) \
613 : "m" (*_p) \
614 : "memory");
615 return (_v);
616 }
617
618 static __inline int
atomic_testandset_int(volatile u_int * p,u_int v)619 atomic_testandset_int(volatile u_int *p, u_int v)
620 {
621 u_char res;
622
623 __asm __volatile(
624 " " MPLOCKED " "
625 " btsl %2,%1 ; "
626 " setc %0 ; "
627 "# atomic_testandset_int"
628 : "=q" (res), /* 0 */
629 "+m" (*p) /* 1 */
630 : "Ir" (v & 0x1f) /* 2 */
631 : "cc");
632 return (res);
633 }
634
635 static __inline int
atomic_testandset_long(volatile u_long * p,u_long v)636 atomic_testandset_long(volatile u_long *p, u_long v)
637 {
638 u_char res;
639
640 __asm __volatile(
641 " " MPLOCKED " "
642 " btsq %2,%1 ; "
643 " setc %0 ; "
644 "# atomic_testandset_long"
645 : "=q" (res), /* 0 */
646 "+m" (*p) /* 1 */
647 : "Ir" (v & 0x3f) /* 2 */
648 : "cc");
649 return (res);
650 }
651
652 static __inline int
atomic_testandclear_int(volatile u_int * p,u_int v)653 atomic_testandclear_int(volatile u_int *p, u_int v)
654 {
655 u_char res;
656
657 __asm __volatile(
658 " " MPLOCKED " "
659 " btrl %2,%1 ; "
660 " setc %0 ; "
661 "# atomic_testandclear_int"
662 : "=q" (res), /* 0 */
663 "+m" (*p) /* 1 */
664 : "Ir" (v & 0x1f) /* 2 */
665 : "cc");
666 return (res);
667 }
668
669 static __inline int
atomic_testandclear_long(volatile u_long * p,u_long v)670 atomic_testandclear_long(volatile u_long *p, u_long v)
671 {
672 u_char res;
673
674 __asm __volatile(
675 " " MPLOCKED " "
676 " btrq %2,%1 ; "
677 " setc %0 ; "
678 "# atomic_testandclear_long"
679 : "=q" (res), /* 0 */
680 "+m" (*p) /* 1 */
681 : "Ir" (v & 0x3f) /* 2 */
682 : "cc");
683 return (res);
684 }
685
686 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \
687 static __inline u_##TYPE \
688 atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
689 { \
690 u_##TYPE res; /* accumulator can be anything */ \
691 \
692 __asm __volatile(MPLOCKED LOP \
693 : "=a" (res), /* 0 */ \
694 "=m" (*p) /* 1 */ \
695 : "m" (*p) /* 2 */ \
696 : "memory"); \
697 \
698 return (res); \
699 } \
700 \
701 /* \
702 * The XCHG instruction asserts LOCK automagically. \
703 */ \
704 static __inline void \
705 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
706 { \
707 __asm __volatile(SOP \
708 : "=m" (*p), /* 0 */ \
709 "+r" (v) /* 1 */ \
710 : "m" (*p)); /* 2 */ \
711 } \
712 struct __hack
713
714 ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0");
715 ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
716 ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0");
717 ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0");
718
719 #undef ATOMIC_ASM
720 #undef ATOMIC_STORE_LOAD
721
722 /* Acquire and release variants are identical to the normal ones. */
723 #define atomic_set_acq_char atomic_set_char
724 #define atomic_set_rel_char atomic_set_char
725 #define atomic_clear_acq_char atomic_clear_char
726 #define atomic_clear_rel_char atomic_clear_char
727 #define atomic_add_acq_char atomic_add_char
728 #define atomic_add_rel_char atomic_add_char
729 #define atomic_subtract_acq_char atomic_subtract_char
730 #define atomic_subtract_rel_char atomic_subtract_char
731
732 #define atomic_set_acq_short atomic_set_short
733 #define atomic_set_rel_short atomic_set_short
734 #define atomic_clear_acq_short atomic_clear_short
735 #define atomic_clear_rel_short atomic_clear_short
736 #define atomic_add_acq_short atomic_add_short
737 #define atomic_add_rel_short atomic_add_short
738 #define atomic_subtract_acq_short atomic_subtract_short
739 #define atomic_subtract_rel_short atomic_subtract_short
740
741 #define atomic_set_acq_int atomic_set_int
742 #define atomic_set_rel_int atomic_set_int
743 #define atomic_clear_acq_int atomic_clear_int
744 #define atomic_clear_rel_int atomic_clear_int
745 #define atomic_add_acq_int atomic_add_int
746 #define atomic_add_rel_int atomic_add_int
747 #define atomic_subtract_acq_int atomic_subtract_int
748 #define atomic_subtract_rel_int atomic_subtract_int
749 #define atomic_cmpset_acq_int atomic_cmpset_int
750 #define atomic_cmpset_rel_int atomic_cmpset_int
751
752 #define atomic_set_acq_long atomic_set_long
753 #define atomic_set_rel_long atomic_set_long
754 #define atomic_clear_acq_long atomic_clear_long
755 #define atomic_clear_rel_long atomic_clear_long
756 #define atomic_add_acq_long atomic_add_long
757 #define atomic_add_rel_long atomic_add_long
758 #define atomic_subtract_acq_long atomic_subtract_long
759 #define atomic_subtract_rel_long atomic_subtract_long
760 #define atomic_cmpset_acq_long atomic_cmpset_long
761 #define atomic_cmpset_rel_long atomic_cmpset_long
762
763 /* cpumask_t is 64-bits on x86-64 */
764 #define atomic_set_cpumask atomic_set_long
765 #define atomic_clear_cpumask atomic_clear_long
766 #define atomic_cmpset_cpumask atomic_cmpset_long
767 #define atomic_store_rel_cpumask atomic_store_rel_long
768 #define atomic_load_acq_cpumask atomic_load_acq_long
769
770 /* Operations on 8-bit bytes. */
771 #define atomic_set_8 atomic_set_char
772 #define atomic_set_acq_8 atomic_set_acq_char
773 #define atomic_set_rel_8 atomic_set_rel_char
774 #define atomic_clear_8 atomic_clear_char
775 #define atomic_clear_acq_8 atomic_clear_acq_char
776 #define atomic_clear_rel_8 atomic_clear_rel_char
777 #define atomic_add_8 atomic_add_char
778 #define atomic_add_acq_8 atomic_add_acq_char
779 #define atomic_add_rel_8 atomic_add_rel_char
780 #define atomic_subtract_8 atomic_subtract_char
781 #define atomic_subtract_acq_8 atomic_subtract_acq_char
782 #define atomic_subtract_rel_8 atomic_subtract_rel_char
783 #define atomic_load_acq_8 atomic_load_acq_char
784 #define atomic_store_rel_8 atomic_store_rel_char
785 #define atomic_fcmpset_8 atomic_fcmpset_char
786
787 /* Operations on 16-bit words. */
788 #define atomic_set_16 atomic_set_short
789 #define atomic_set_acq_16 atomic_set_acq_short
790 #define atomic_set_rel_16 atomic_set_rel_short
791 #define atomic_clear_16 atomic_clear_short
792 #define atomic_clear_acq_16 atomic_clear_acq_short
793 #define atomic_clear_rel_16 atomic_clear_rel_short
794 #define atomic_add_16 atomic_add_short
795 #define atomic_add_acq_16 atomic_add_acq_short
796 #define atomic_add_rel_16 atomic_add_rel_short
797 #define atomic_subtract_16 atomic_subtract_short
798 #define atomic_subtract_acq_16 atomic_subtract_acq_short
799 #define atomic_subtract_rel_16 atomic_subtract_rel_short
800 #define atomic_load_acq_16 atomic_load_acq_short
801 #define atomic_store_rel_16 atomic_store_rel_short
802 #define atomic_fcmpset_16 atomic_fcmpset_short
803
804 /* Operations on 32-bit double words. */
805 #define atomic_set_32 atomic_set_int
806 #define atomic_set_acq_32 atomic_set_acq_int
807 #define atomic_set_rel_32 atomic_set_rel_int
808 #define atomic_clear_32 atomic_clear_int
809 #define atomic_clear_acq_32 atomic_clear_acq_int
810 #define atomic_clear_rel_32 atomic_clear_rel_int
811 #define atomic_add_32 atomic_add_int
812 #define atomic_add_acq_32 atomic_add_acq_int
813 #define atomic_add_rel_32 atomic_add_rel_int
814 #define atomic_subtract_32 atomic_subtract_int
815 #define atomic_subtract_acq_32 atomic_subtract_acq_int
816 #define atomic_subtract_rel_32 atomic_subtract_rel_int
817 #define atomic_load_acq_32 atomic_load_acq_int
818 #define atomic_store_rel_32 atomic_store_rel_int
819 #define atomic_cmpset_32 atomic_cmpset_int
820 #define atomic_fcmpset_32 atomic_fcmpset_int
821 #define atomic_cmpset_acq_32 atomic_cmpset_acq_int
822 #define atomic_cmpset_rel_32 atomic_cmpset_rel_int
823 #define atomic_readandclear_32 atomic_readandclear_int
824 #define atomic_fetchadd_32 atomic_fetchadd_int
825
826 /* Operations on 64-bit quad words. */
827 #define atomic_load_acq_64 atomic_load_acq_long
828 #define atomic_store_rel_64 atomic_store_rel_long
829 #define atomic_swap_64 atomic_swap_long
830 #define atomic_fetchadd_64 atomic_fetchadd_long
831 #define atomic_add_64 atomic_add_long
832 #define atomic_cmpset_64 atomic_cmpset_long
833 #define atomic_fcmpset_64 atomic_fcmpset_long
834 #define atomic_set_64 atomic_set_long
835 #define atomic_clear_64 atomic_clear_long
836
837 /* Operations on pointers. */
838 #define atomic_set_ptr(p, v) \
839 atomic_set_long((volatile u_long *)(p), (u_long)(v))
840 #define atomic_set_acq_ptr(p, v) \
841 atomic_set_acq_long((volatile u_long *)(p), (u_long)(v))
842 #define atomic_set_rel_ptr(p, v) \
843 atomic_set_rel_long((volatile u_long *)(p), (u_long)(v))
844 #define atomic_clear_ptr(p, v) \
845 atomic_clear_long((volatile u_long *)(p), (u_long)(v))
846 #define atomic_clear_acq_ptr(p, v) \
847 atomic_clear_acq_long((volatile u_long *)(p), (u_long)(v))
848 #define atomic_clear_rel_ptr(p, v) \
849 atomic_clear_rel_long((volatile u_long *)(p), (u_long)(v))
850 #define atomic_add_ptr(p, v) \
851 atomic_add_long((volatile u_long *)(p), (u_long)(v))
852 #define atomic_add_acq_ptr(p, v) \
853 atomic_add_acq_long((volatile u_long *)(p), (u_long)(v))
854 #define atomic_add_rel_ptr(p, v) \
855 atomic_add_rel_long((volatile u_long *)(p), (u_long)(v))
856 #define atomic_subtract_ptr(p, v) \
857 atomic_subtract_long((volatile u_long *)(p), (u_long)(v))
858 #define atomic_subtract_acq_ptr(p, v) \
859 atomic_subtract_acq_long((volatile u_long *)(p), (u_long)(v))
860 #define atomic_subtract_rel_ptr(p, v) \
861 atomic_subtract_rel_long((volatile u_long *)(p), (u_long)(v))
862 #define atomic_load_acq_ptr(p) \
863 atomic_load_acq_long((volatile u_long *)(p))
864 #define atomic_store_rel_ptr(p, v) \
865 atomic_store_rel_long((volatile u_long *)(p), (v))
866 #define atomic_cmpset_ptr(dst, old, new) \
867 atomic_cmpset_long((volatile u_long *)(dst), (u_long)(old), \
868 (u_long)(new))
869 #define atomic_fcmpset_ptr(dst, old, new) \
870 atomic_fcmpset_long((volatile u_long *)(dst), (u_long *)(old), \
871 (u_long)(new))
872 #define atomic_cmpset_acq_ptr(dst, old, new) \
873 atomic_cmpset_acq_long((volatile u_long *)(dst), (u_long)(old), \
874 (u_long)(new))
875 #define atomic_cmpset_rel_ptr(dst, old, new) \
876 atomic_cmpset_rel_long((volatile u_long *)(dst), (u_long)(old), \
877 (u_long)(new))
878 #define atomic_readandclear_ptr(p) \
879 atomic_readandclear_long((volatile u_long *)(p))
880
881 #endif /* ! _CPU_ATOMIC_H_ */
882