xref: /dragonfly/sys/cpu/x86_64/include/atomic.h (revision df052c2a)
1 /*-
2  * Copyright (c) 1998 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/i386/include/atomic.h,v 1.9.2.1 2000/07/07 00:38:47 obrien Exp $
27  */
28 #ifndef _CPU_ATOMIC_H_
29 #define _CPU_ATOMIC_H_
30 
31 #ifndef _SYS_TYPES_H_
32 #include <sys/types.h>
33 #endif
34 
35 /*
36  * Various simple arithmetic on memory which is atomic in the presence
37  * of interrupts and multiple processors.
38  *
39  * atomic_set_char(P, V)	(*(u_char*)(P) |= (V))
40  * atomic_clear_char(P, V)	(*(u_char*)(P) &= ~(V))
41  * atomic_add_char(P, V)	(*(u_char*)(P) += (V))
42  * atomic_subtract_char(P, V)	(*(u_char*)(P) -= (V))
43  *
44  * atomic_set_short(P, V)	(*(u_short*)(P) |= (V))
45  * atomic_clear_short(P, V)	(*(u_short*)(P) &= ~(V))
46  * atomic_add_short(P, V)	(*(u_short*)(P) += (V))
47  * atomic_subtract_short(P, V)	(*(u_short*)(P) -= (V))
48  *
49  * atomic_set_int(P, V)		(*(u_int*)(P) |= (V))
50  * atomic_clear_int(P, V)	(*(u_int*)(P) &= ~(V))
51  * atomic_add_int(P, V)		(*(u_int*)(P) += (V))
52  * atomic_subtract_int(P, V)	(*(u_int*)(P) -= (V))
53  *
54  * atomic_set_long(P, V)	(*(u_long*)(P) |= (V))
55  * atomic_clear_long(P, V)	(*(u_long*)(P) &= ~(V))
56  * atomic_add_long(P, V)	(*(u_long*)(P) += (V))
57  * atomic_subtract_long(P, V)	(*(u_long*)(P) -= (V))
58  * atomic_readandclear_long(P)	(return (*(u_long*)(P)); *(u_long*)(P) = 0;)
59  * atomic_readandclear_int(P)	(return (*(u_int*)(P)); *(u_int*)(P) = 0;)
60  */
61 
62 /*
63  * locked bus cycle
64  * lock elision (backwards compatible)
65  */
66 #define MPLOCKED	"lock ; "
67 #define XACQUIRE	"repne; "	/* lock elision */
68 #define XRELEASE	"repe; "	/* lock elision */
69 
70 /*
71  * The assembly is volatilized to demark potential before-and-after side
72  * effects if an interrupt or SMP collision were to occur.  The primary
73  * atomic instructions are MP safe, the nonlocked instructions are
74  * local-interrupt-safe (so we don't depend on C 'X |= Y' generating an
75  * atomic instruction).
76  *
77  * +m - memory is read and written (=m - memory is only written)
78  * iq - integer constant or %ax/%bx/%cx/%dx (ir = int constant or any reg)
79  *	(Note: byte instructions only work on %ax,%bx,%cx, or %dx).  iq
80  *	is good enough for our needs so don't get fancy.
81  * r  - any register.
82  *
83  * NOTE: 64-bit immediate values are not supported for most x86-64
84  *	 instructions so we have to use "r".
85  */
86 
87 /* egcs 1.1.2+ version */
88 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
89 static __inline void					\
90 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
91 {							\
92 	__asm __volatile(MPLOCKED OP			\
93 			 : "+m" (*p)			\
94 			 : CONS (V)); 			\
95 }							\
96 static __inline void					\
97 atomic_##NAME##_##TYPE##_xacquire(volatile u_##TYPE *p, u_##TYPE v)\
98 {							\
99 	__asm __volatile(XACQUIRE MPLOCKED OP		\
100 			 : "+m" (*p)			\
101 			 : CONS (V)); 			\
102 }							\
103 static __inline void					\
104 atomic_##NAME##_##TYPE##_xrelease(volatile u_##TYPE *p, u_##TYPE v)\
105 {							\
106 	__asm __volatile(XRELEASE MPLOCKED OP		\
107 			 : "+m" (*p)			\
108 			 : CONS (V)); 			\
109 }							\
110 static __inline void					\
111 atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)\
112 {							\
113 	__asm __volatile(OP				\
114 			 : "+m" (*p)			\
115 			 : CONS (V)); 			\
116 }
117 
118 /* egcs 1.1.2+ version */
119 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",   v)
120 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq",   ~v)
121 ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",   v)
122 ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",   v)
123 
124 ATOMIC_ASM(set,	     short, "orw %w1,%0",  "iq",   v)
125 ATOMIC_ASM(clear,    short, "andw %w1,%0", "iq",  ~v)
126 ATOMIC_ASM(add,	     short, "addw %w1,%0", "iq",   v)
127 ATOMIC_ASM(subtract, short, "subw %w1,%0", "iq",   v)
128 
129 ATOMIC_ASM(set,	     int,   "orl %1,%0",  "iq",   v)
130 ATOMIC_ASM(clear,    int,   "andl %1,%0", "iq",  ~v)
131 ATOMIC_ASM(add,	     int,   "addl %1,%0", "iq",   v)
132 ATOMIC_ASM(subtract, int,   "subl %1,%0", "iq",   v)
133 
134 ATOMIC_ASM(set,	     long,  "orq %1,%0",  "r",   v)
135 ATOMIC_ASM(clear,    long,  "andq %1,%0", "r",  ~v)
136 ATOMIC_ASM(add,	     long,  "addq %1,%0", "r",   v)
137 ATOMIC_ASM(subtract, long,  "subq %1,%0", "r",   v)
138 
139 static __inline u_long
140 atomic_readandclear_long(volatile u_long *addr)
141 {
142 	u_long res;
143 
144 	res = 0;
145 	__asm __volatile(
146 	"	xchgq	%1,%0 ;		"
147 	"# atomic_readandclear_long"
148 	: "+r" (res),			/* 0 */
149 	  "=m" (*addr)			/* 1 */
150 	: "m" (*addr));
151 
152 	return (res);
153 }
154 
155 static __inline u_int
156 atomic_readandclear_int(volatile u_int *addr)
157 {
158 	u_int res;
159 
160 	res = 0;
161 	__asm __volatile(
162 	"	xchgl	%1,%0 ;		"
163 	"# atomic_readandclear_int"
164 	: "+r" (res),			/* 0 */
165 	  "=m" (*addr)			/* 1 */
166 	: "m" (*addr));
167 
168 	return (res);
169 }
170 
171 /*
172  * atomic_poll_acquire_int(P)	Returns non-zero on success, 0 if the lock
173  *				has already been acquired.
174  * atomic_poll_release_int(P)
175  *
176  * These support the NDIS driver and are also used for IPIQ interlocks
177  * between cpus.  Both the acquisition and release must be
178  * cache-synchronizing instructions.
179  */
180 
181 static __inline int
182 atomic_swap_int(volatile int *addr, int value)
183 {
184 	__asm __volatile("xchgl %0, %1" :
185 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
186 	return (value);
187 }
188 
189 static __inline long
190 atomic_swap_long(volatile long *addr, long value)
191 {
192 	__asm __volatile("xchgq %0, %1" :
193 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
194 	return (value);
195 }
196 
197 static __inline void *
198 atomic_swap_ptr(volatile void **addr, void *value)
199 {
200 	__asm __volatile("xchgq %0, %1" :
201 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
202 	return (value);
203 }
204 
205 static __inline int
206 atomic_poll_acquire_int(volatile u_int *p)
207 {
208 	u_int data;
209 
210 	__asm __volatile(MPLOCKED "btsl $0,%0; setnc %%al; andl $255,%%eax" : "+m" (*p), "=a" (data));
211 	return(data);
212 }
213 
214 static __inline void
215 atomic_poll_release_int(volatile u_int *p)
216 {
217 	__asm __volatile(MPLOCKED "btrl $0,%0" : "+m" (*p));
218 }
219 
220 /*
221  * These functions operate on a 32 bit interrupt interlock which is defined
222  * as follows:
223  *
224  *	bit 0-29	interrupt handler wait counter
225  *	bit 30		interrupt handler disabled bit
226  *	bit 31		interrupt handler currently running bit (1 = run)
227  *
228  * atomic_intr_cond_test(P)	Determine if the interlock is in an
229  *				acquired state.  Returns 0 if it not
230  *				acquired, non-zero if it is. (not MPLOCKed)
231  *
232  * atomic_intr_cond_try(P) 	Attempt to set bit 31 to acquire the
233  *				interlock.  If we are unable to set bit 31
234  *				we return 1, otherwise we return 0.
235  *
236  * atomic_intr_cond_enter(P, func, arg)
237  *				Attempt to set bit 31 to acquire the
238  *				interlock.  If we are unable to set bit 31,
239  *				the wait is incremented counter and func(arg)
240  *				is called in a loop until we are able to set
241  *				bit 31.  Once we set bit 31, wait counter
242  *				is decremented.
243  *
244  * atomic_intr_cond_exit(P, func, arg)
245  *				Clear bit 31.  If the wait counter is still
246  *				non-zero call func(arg) once.
247  *
248  * atomic_intr_handler_disable(P)
249  *				Set bit 30, indicating that the interrupt
250  *				handler has been disabled.  Must be called
251  *				after the hardware is disabled.
252  *
253  *				Returns bit 31 indicating whether a serialized
254  *				accessor is active (typically the interrupt
255  *				handler is running).  0 == not active,
256  *				non-zero == active.
257  *
258  * atomic_intr_handler_enable(P)
259  *				Clear bit 30, indicating that the interrupt
260  *				handler has been enabled.  Must be called
261  *				before the hardware is actually enabled.
262  *
263  * atomic_intr_handler_is_enabled(P)
264  *				Returns bit 30, 0 indicates that the handler
265  *				is enabled, non-zero indicates that it is
266  *				disabled.  The request counter portion of
267  *				the field is ignored. (not MPLOCKed)
268  *
269  * atomic_intr_cond_inc(P)	Increment wait counter by 1.
270  * atomic_intr_cond_dec(P)	Decrement wait counter by 1.
271  */
272 
273 static __inline void
274 atomic_intr_init(__atomic_intr_t *p)
275 {
276 	*p = 0;
277 }
278 
279 static __inline int
280 atomic_intr_handler_disable(__atomic_intr_t *p)
281 {
282 	int data;
283 
284 	__asm __volatile(MPLOCKED "orl $0x40000000,%1; movl %1,%%eax; " \
285 				  "andl $0x80000000,%%eax" \
286 				  : "=a"(data) , "+m"(*p));
287 	return(data);
288 }
289 
290 static __inline void
291 atomic_intr_handler_enable(__atomic_intr_t *p)
292 {
293 	__asm __volatile(MPLOCKED "andl $0xBFFFFFFF,%0" : "+m" (*p));
294 }
295 
296 static __inline int
297 atomic_intr_handler_is_enabled(__atomic_intr_t *p)
298 {
299 	int data;
300 
301 	__asm __volatile("movl %1,%%eax; andl $0x40000000,%%eax" \
302 			 : "=a"(data) : "m"(*p));
303 	return(data);
304 }
305 
306 static __inline void
307 atomic_intr_cond_inc(__atomic_intr_t *p)
308 {
309 	__asm __volatile(MPLOCKED "incl %0" : "+m" (*p));
310 }
311 
312 static __inline void
313 atomic_intr_cond_dec(__atomic_intr_t *p)
314 {
315 	__asm __volatile(MPLOCKED "decl %0" : "+m" (*p));
316 }
317 
318 static __inline void
319 atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg)
320 {
321 	__asm __volatile(MPLOCKED "btsl $31,%0; jnc 3f; " \
322 			 MPLOCKED "incl %0; " \
323 			 "1: ;" \
324 			 MPLOCKED "btsl $31,%0; jnc 2f; " \
325 			 "movq %2,%%rdi; call *%1; " \
326 			 "jmp 1b; " \
327 			 "2: ;" \
328 			 MPLOCKED "decl %0; " \
329 			 "3: ;" \
330 			 : "+m" (*p) \
331 			 : "r"(func), "m"(arg) \
332 			 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
333 		/* YYY the function call may clobber even more registers? */
334 }
335 
336 /*
337  * Attempt to enter the interrupt condition variable.  Returns zero on
338  * success, 1 on failure.
339  */
340 static __inline int
341 atomic_intr_cond_try(__atomic_intr_t *p)
342 {
343 	int ret;
344 
345 	__asm __volatile("subl %%eax,%%eax; "			\
346 			 MPLOCKED "btsl $31,%0; jnc 2f; "	\
347 			 "movl $1,%%eax;"			\
348 			 "2: ;"
349 			 : "+m" (*p), "=&a"(ret)
350                          : : "cx", "dx");
351 	return (ret);
352 }
353 
354 
355 static __inline int
356 atomic_intr_cond_test(__atomic_intr_t *p)
357 {
358 	return((int)(*p & 0x80000000));
359 }
360 
361 static __inline void
362 atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg)
363 {
364 	__asm __volatile(MPLOCKED "btrl $31,%0; " \
365 			 "testl $0x3FFFFFFF,%0; jz 1f; " \
366 			 "movq %2,%%rdi; call *%1; " \
367 			 "1: ;" \
368 			 : "+m" (*p) \
369 			 : "r"(func), "m"(arg) \
370 			 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
371 		/* YYY the function call may clobber even more registers? */
372 }
373 
374 /*
375  * Atomic compare and set
376  *
377  * if (*_dst == _old) *_dst = _new (all 32 bit words)
378  *
379  * Returns 0 on failure, non-zero on success.  The inline is designed to
380  * allow the compiler to optimize the common case where the caller calls
381  * these functions from inside a conditional.
382  */
383 
384 static __inline int
385 atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new)
386 {
387 	u_int res = _old;
388 
389 	__asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
390 			 : "+a" (res), "=m" (*_dst) \
391 			 : "r" (_new), "m" (*_dst) \
392 			 : "memory");
393 	return (res);
394 }
395 
396 static __inline int
397 atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new)
398 {
399 	u_int res = _old;
400 
401 	__asm __volatile(MPLOCKED "cmpxchgq %2,%1; "
402 				  " setz %%al;"
403 				  " movsbq %%al,%%rax" \
404 			 : "+a" (res), "=m" (*_dst) \
405 			 : "r" (_new), "m" (*_dst) \
406 			 : "memory");
407 	return (res);
408 }
409 
410 static __inline int
411 atomic_cmpset_short(volatile u_short *_dst, u_short _old, u_short _new)
412 {
413 	u_short res = _old;
414 
415 	__asm __volatile(MPLOCKED "cmpxchgw %w2,%1; " \
416 			 : "+a" (res), "=m" (*_dst) \
417 			 : "r" (_new), "m" (*_dst) \
418 			 : "memory");
419 	return (res == _old);
420 }
421 
422 static __inline int
423 atomic_fcmpset_char(volatile u_char *_dst, u_char *_old, u_char _new)
424 {
425 	u_char res = *_old;
426 
427 	__asm __volatile(MPLOCKED "cmpxchgb %2,%0; " \
428 			 : "+m" (*_dst),		/* 0 */
429 			   "+a" (*_old)			/* 1 */
430 			 : "r" (_new)			/* 2 */
431 			 : "memory", "cc");
432 	return (res == *_old);
433 }
434 
435 static __inline int
436 atomic_fcmpset_short(volatile u_short *_dst, u_short *_old, u_short _new)
437 {
438 	u_short res = *_old;
439 
440 	__asm __volatile(MPLOCKED "cmpxchgw %2,%0; " \
441 			 : "+m" (*_dst),		/* 0 */
442 			   "+a" (*_old)			/* 1 */
443 			 : "r" (_new)			/* 2 */
444 			 : "memory", "cc");
445 	return (res == *_old);
446 }
447 
448 static __inline int
449 atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new)
450 {
451 	u_int res = _old;
452 
453 	__asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
454 			 : "+a" (res), "=m" (*_dst) \
455 			 : "r" (_new), "m" (*_dst) \
456 			 : "memory");
457 	return (res == _old);
458 }
459 
460 static __inline int
461 atomic_fcmpset_int(volatile u_int *_dst, u_int *_old, u_int _new)
462 {
463 	u_int res = *_old;
464 
465 	__asm __volatile(MPLOCKED "cmpxchgl %2,%0; " \
466 			 : "+m" (*_dst),		/* 0 */
467 			   "+a" (*_old)			/* 1 */
468 			 : "r" (_new)			/* 2 */
469 			 : "memory", "cc");
470 	return (res == *_old);
471 }
472 
473 static __inline int
474 atomic_cmpset_int_xacquire(volatile u_int *_dst, u_int _old, u_int _new)
475 {
476 	u_int res = _old;
477 
478 	__asm __volatile(XACQUIRE MPLOCKED "cmpxchgl %2,%1; " \
479 			 : "+a" (res), "=m" (*_dst) \
480 			 : "r" (_new), "m" (*_dst) \
481 			 : "memory");
482 	return (res == _old);
483 }
484 
485 static __inline int
486 atomic_cmpset_int_xrelease(volatile u_int *_dst, u_int _old, u_int _new)
487 {
488 	u_int res = _old;
489 
490 	__asm __volatile(XRELEASE MPLOCKED "cmpxchgl %2,%1; " \
491 			 : "+a" (res), "=m" (*_dst) \
492 			 : "r" (_new), "m" (*_dst) \
493 			 : "memory");
494 	return (res == _old);
495 }
496 
497 static __inline int
498 atomic_cmpset_long(volatile u_long *_dst, u_long _old, u_long _new)
499 {
500 	u_long res = _old;
501 
502 	__asm __volatile(MPLOCKED "cmpxchgq %2,%1; " \
503 			 : "+a" (res), "=m" (*_dst) \
504 			 : "r" (_new), "m" (*_dst) \
505 			 : "memory");
506 	return (res == _old);
507 }
508 
509 static __inline int
510 atomic_fcmpset_long(volatile u_long *_dst, u_long *_old, u_long _new)
511 {
512 	u_long res = *_old;
513 
514 	__asm __volatile(MPLOCKED "cmpxchgq %2,%0; " \
515 			 : "+m" (*_dst),		/* 0 */
516 			   "+a" (*_old)			/* 1 */
517 			 : "r" (_new)			/* 2 */
518 			 : "memory", "cc");
519 	return (res == *_old);
520 }
521 
522 static __inline int
523 atomic_cmpset_long_xacquire(volatile u_long *_dst, u_long _old, u_long _new)
524 {
525 	u_long res = _old;
526 
527 	__asm __volatile(XACQUIRE MPLOCKED "cmpxchgq %2,%1; " \
528 			 : "+a" (res), "=m" (*_dst) \
529 			 : "r" (_new), "m" (*_dst) \
530 			 : "memory");
531 	return (res == _old);
532 }
533 
534 static __inline int
535 atomic_cmpset_long_xrelease(volatile u_long *_dst, u_long _old, u_long _new)
536 {
537 	u_long res = _old;
538 
539 	__asm __volatile(XRELEASE MPLOCKED "cmpxchgq %2,%1; " \
540 			 : "+a" (res), "=m" (*_dst) \
541 			 : "r" (_new), "m" (*_dst) \
542 			 : "memory");
543 	return (res == _old);
544 }
545 
546 static inline void *
547 atomic_cas_ptr(volatile void *p, void *e, void *n)
548 {
549 	__asm volatile(MPLOCKED " cmpxchgq %2, %1"
550 	    : "=a" (n), "=m" (*(volatile unsigned long *)p)
551 	    : "r" (n), "a" (e), "m" (*(volatile unsigned long *)p));
552 
553 	return (n);
554 }
555 
556 /*
557  * Atomically add the value of v to the integer pointed to by p and return
558  * the previous value of *p.
559  */
560 static __inline u_int
561 atomic_fetchadd_int(volatile u_int *_p, u_int _v)
562 {
563 	__asm __volatile(MPLOCKED "xaddl %0,%1; " \
564 			 : "+r" (_v), "=m" (*_p)	\
565 			 : "m" (*_p)		\
566 			 : "memory");
567 	return (_v);
568 }
569 
570 static __inline u_int
571 atomic_fetchadd_int_xacquire(volatile u_int *_p, u_int _v)
572 {
573 	__asm __volatile(XACQUIRE MPLOCKED "xaddl %0,%1; " \
574 			 : "+r" (_v), "=m" (*_p)	\
575 			 : "m" (*_p)		\
576 			 : "memory");
577 	return (_v);
578 }
579 
580 static __inline u_int
581 atomic_fetchadd_int_xrelease(volatile u_int *_p, u_int _v)
582 {
583 	__asm __volatile(XRELEASE MPLOCKED "xaddl %0,%1; " \
584 			 : "+r" (_v), "=m" (*_p)	\
585 			 : "m" (*_p)		\
586 			 : "memory");
587 	return (_v);
588 }
589 
590 static __inline u_long
591 atomic_fetchadd_long(volatile u_long *_p, u_long _v)
592 {
593 	__asm __volatile(MPLOCKED "xaddq %0,%1; " \
594 			 : "+r" (_v), "=m" (*_p)	\
595 			 : "m" (*_p)		\
596 			 : "memory");
597 	return (_v);
598 }
599 
600 static __inline u_long
601 atomic_fetchadd_long_xacquire(volatile u_long *_p, u_long _v)
602 {
603 	__asm __volatile(XACQUIRE MPLOCKED "xaddq %0,%1; " \
604 			 : "+r" (_v), "=m" (*_p)	\
605 			 : "m" (*_p)		\
606 			 : "memory");
607 	return (_v);
608 }
609 
610 static __inline u_long
611 atomic_fetchadd_long_xrelease(volatile u_long *_p, u_long _v)
612 {
613 	__asm __volatile(XRELEASE MPLOCKED "xaddq %0,%1; " \
614 			 : "+r" (_v), "=m" (*_p)	\
615 			 : "m" (*_p)		\
616 			 : "memory");
617 	return (_v);
618 }
619 
620 static __inline int
621 atomic_testandset_int(volatile u_int *p, u_int v)
622 {
623 	u_char res;
624 
625 	__asm __volatile(
626 	"	" MPLOCKED "		"
627 	"	btsl	%2,%1 ;		"
628 	"	setc	%0 ;		"
629 	"# atomic_testandset_int"
630 	: "=q" (res),			/* 0 */
631 	  "+m" (*p)			/* 1 */
632 	: "Ir" (v & 0x1f)		/* 2 */
633 	: "cc");
634 	return (res);
635 }
636 
637 static __inline int
638 atomic_testandset_long(volatile u_long *p, u_long v)
639 {
640 	u_char res;
641 
642 	__asm __volatile(
643 	"	" MPLOCKED "		"
644 	"	btsq	%2,%1 ;		"
645 	"	setc	%0 ;		"
646 	"# atomic_testandset_long"
647 	: "=q" (res),			/* 0 */
648 	  "+m" (*p)			/* 1 */
649 	: "Ir" (v & 0x3f)		/* 2 */
650 	: "cc");
651 	return (res);
652 }
653 
654 static __inline int
655 atomic_testandclear_int(volatile u_int *p, u_int v)
656 {
657 	u_char res;
658 
659 	__asm __volatile(
660 	"	" MPLOCKED "		"
661 	"	btrl	%2,%1 ;		"
662 	"	setc	%0 ;		"
663 	"# atomic_testandclear_int"
664 	: "=q" (res),			/* 0 */
665 	  "+m" (*p)			/* 1 */
666 	: "Ir" (v & 0x1f)		/* 2 */
667 	: "cc");
668 	return (res);
669 }
670 
671 static __inline int
672 atomic_testandclear_long(volatile u_long *p, u_long v)
673 {
674 	u_char res;
675 
676 	__asm __volatile(
677 	"	" MPLOCKED "		"
678 	"	btrq	%2,%1 ;		"
679 	"	setc	%0 ;		"
680 	"# atomic_testandclear_long"
681 	: "=q" (res),			/* 0 */
682 	  "+m" (*p)			/* 1 */
683 	: "Ir" (v & 0x3f)		/* 2 */
684 	: "cc");
685 	return (res);
686 }
687 
688 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
689 static __inline u_##TYPE				\
690 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
691 {							\
692 	u_##TYPE res; /* accumulator can be anything */	\
693 							\
694 	__asm __volatile(MPLOCKED LOP			\
695 	: "=a" (res),			/* 0 */		\
696 	  "=m" (*p)			/* 1 */		\
697 	: "m" (*p)			/* 2 */		\
698 	: "memory");					\
699 							\
700 	return (res);					\
701 }							\
702 							\
703 /*							\
704  * The XCHG instruction asserts LOCK automagically.	\
705  */							\
706 static __inline void					\
707 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
708 {							\
709 	__asm __volatile(SOP				\
710 	: "=m" (*p),			/* 0 */		\
711 	  "+r" (v)			/* 1 */		\
712 	: "m" (*p));			/* 2 */		\
713 }							\
714 struct __hack
715 
716 ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0");
717 ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
718 ATOMIC_STORE_LOAD(int,  "cmpxchgl %0,%1",  "xchgl %1,%0");
719 ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1",  "xchgq %1,%0");
720 
721 #undef ATOMIC_ASM
722 #undef ATOMIC_STORE_LOAD
723 
724 /* Acquire and release variants are identical to the normal ones. */
725 #define	atomic_set_acq_char		atomic_set_char
726 #define	atomic_set_rel_char		atomic_set_char
727 #define	atomic_clear_acq_char		atomic_clear_char
728 #define	atomic_clear_rel_char		atomic_clear_char
729 #define	atomic_add_acq_char		atomic_add_char
730 #define	atomic_add_rel_char		atomic_add_char
731 #define	atomic_subtract_acq_char	atomic_subtract_char
732 #define	atomic_subtract_rel_char	atomic_subtract_char
733 
734 #define	atomic_set_acq_short		atomic_set_short
735 #define	atomic_set_rel_short		atomic_set_short
736 #define	atomic_clear_acq_short		atomic_clear_short
737 #define	atomic_clear_rel_short		atomic_clear_short
738 #define	atomic_add_acq_short		atomic_add_short
739 #define	atomic_add_rel_short		atomic_add_short
740 #define	atomic_subtract_acq_short	atomic_subtract_short
741 #define	atomic_subtract_rel_short	atomic_subtract_short
742 
743 #define	atomic_set_acq_int		atomic_set_int
744 #define	atomic_set_rel_int		atomic_set_int
745 #define	atomic_clear_acq_int		atomic_clear_int
746 #define	atomic_clear_rel_int		atomic_clear_int
747 #define	atomic_add_acq_int		atomic_add_int
748 #define	atomic_add_rel_int		atomic_add_int
749 #define	atomic_subtract_acq_int		atomic_subtract_int
750 #define	atomic_subtract_rel_int		atomic_subtract_int
751 #define	atomic_cmpset_acq_int		atomic_cmpset_int
752 #define	atomic_cmpset_rel_int		atomic_cmpset_int
753 
754 #define	atomic_set_acq_long		atomic_set_long
755 #define	atomic_set_rel_long		atomic_set_long
756 #define	atomic_clear_acq_long		atomic_clear_long
757 #define	atomic_clear_rel_long		atomic_clear_long
758 #define	atomic_add_acq_long		atomic_add_long
759 #define	atomic_add_rel_long		atomic_add_long
760 #define	atomic_subtract_acq_long	atomic_subtract_long
761 #define	atomic_subtract_rel_long	atomic_subtract_long
762 #define	atomic_cmpset_acq_long		atomic_cmpset_long
763 #define	atomic_cmpset_rel_long		atomic_cmpset_long
764 
765 /* cpumask_t is 64-bits on x86-64 */
766 #define atomic_set_cpumask		atomic_set_long
767 #define atomic_clear_cpumask		atomic_clear_long
768 #define atomic_cmpset_cpumask		atomic_cmpset_long
769 #define atomic_store_rel_cpumask	atomic_store_rel_long
770 #define atomic_load_acq_cpumask		atomic_load_acq_long
771 
772 /* Operations on 8-bit bytes. */
773 #define	atomic_set_8		atomic_set_char
774 #define	atomic_set_acq_8	atomic_set_acq_char
775 #define	atomic_set_rel_8	atomic_set_rel_char
776 #define	atomic_clear_8		atomic_clear_char
777 #define	atomic_clear_acq_8	atomic_clear_acq_char
778 #define	atomic_clear_rel_8	atomic_clear_rel_char
779 #define	atomic_add_8		atomic_add_char
780 #define	atomic_add_acq_8	atomic_add_acq_char
781 #define	atomic_add_rel_8	atomic_add_rel_char
782 #define	atomic_subtract_8	atomic_subtract_char
783 #define	atomic_subtract_acq_8	atomic_subtract_acq_char
784 #define	atomic_subtract_rel_8	atomic_subtract_rel_char
785 #define	atomic_load_acq_8	atomic_load_acq_char
786 #define	atomic_store_rel_8	atomic_store_rel_char
787 #define	atomic_fcmpset_8	atomic_fcmpset_char
788 
789 /* Operations on 16-bit words. */
790 #define	atomic_set_16		atomic_set_short
791 #define	atomic_set_acq_16	atomic_set_acq_short
792 #define	atomic_set_rel_16	atomic_set_rel_short
793 #define	atomic_clear_16		atomic_clear_short
794 #define	atomic_clear_acq_16	atomic_clear_acq_short
795 #define	atomic_clear_rel_16	atomic_clear_rel_short
796 #define	atomic_add_16		atomic_add_short
797 #define	atomic_add_acq_16	atomic_add_acq_short
798 #define	atomic_add_rel_16	atomic_add_rel_short
799 #define	atomic_subtract_16	atomic_subtract_short
800 #define	atomic_subtract_acq_16	atomic_subtract_acq_short
801 #define	atomic_subtract_rel_16	atomic_subtract_rel_short
802 #define	atomic_load_acq_16	atomic_load_acq_short
803 #define	atomic_store_rel_16	atomic_store_rel_short
804 #define	atomic_fcmpset_16	atomic_fcmpset_short
805 
806 /* Operations on 32-bit double words. */
807 #define	atomic_set_32		atomic_set_int
808 #define	atomic_set_acq_32	atomic_set_acq_int
809 #define	atomic_set_rel_32	atomic_set_rel_int
810 #define	atomic_clear_32		atomic_clear_int
811 #define	atomic_clear_acq_32	atomic_clear_acq_int
812 #define	atomic_clear_rel_32	atomic_clear_rel_int
813 #define	atomic_add_32		atomic_add_int
814 #define	atomic_add_acq_32	atomic_add_acq_int
815 #define	atomic_add_rel_32	atomic_add_rel_int
816 #define	atomic_subtract_32	atomic_subtract_int
817 #define	atomic_subtract_acq_32	atomic_subtract_acq_int
818 #define	atomic_subtract_rel_32	atomic_subtract_rel_int
819 #define	atomic_load_acq_32	atomic_load_acq_int
820 #define	atomic_store_rel_32	atomic_store_rel_int
821 #define	atomic_cmpset_32	atomic_cmpset_int
822 #define	atomic_fcmpset_32	atomic_fcmpset_int
823 #define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
824 #define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
825 #define	atomic_readandclear_32	atomic_readandclear_int
826 #define	atomic_fetchadd_32	atomic_fetchadd_int
827 
828 /* Operations on 64-bit quad words. */
829 #define	atomic_load_acq_64	atomic_load_acq_long
830 #define	atomic_store_rel_64	atomic_store_rel_long
831 #define	atomic_swap_64		atomic_swap_long
832 #define	atomic_fetchadd_64	atomic_fetchadd_long
833 #define	atomic_add_64		atomic_add_long
834 #define atomic_cmpset_64	atomic_cmpset_long
835 #define atomic_fcmpset_64	atomic_fcmpset_long
836 #define atomic_set_64		atomic_set_long
837 #define atomic_clear_64		atomic_clear_long
838 
839 /* Operations on pointers. */
840 #define atomic_set_ptr(p, v) \
841 	atomic_set_long((volatile u_long *)(p), (u_long)(v))
842 #define atomic_set_acq_ptr(p, v) \
843 	atomic_set_acq_long((volatile u_long *)(p), (u_long)(v))
844 #define atomic_set_rel_ptr(p, v) \
845 	atomic_set_rel_long((volatile u_long *)(p), (u_long)(v))
846 #define atomic_clear_ptr(p, v) \
847 	atomic_clear_long((volatile u_long *)(p), (u_long)(v))
848 #define atomic_clear_acq_ptr(p, v) \
849 	atomic_clear_acq_long((volatile u_long *)(p), (u_long)(v))
850 #define atomic_clear_rel_ptr(p, v) \
851 	atomic_clear_rel_long((volatile u_long *)(p), (u_long)(v))
852 #define atomic_add_ptr(p, v) \
853 	atomic_add_long((volatile u_long *)(p), (u_long)(v))
854 #define atomic_add_acq_ptr(p, v) \
855 	atomic_add_acq_long((volatile u_long *)(p), (u_long)(v))
856 #define atomic_add_rel_ptr(p, v) \
857 	atomic_add_rel_long((volatile u_long *)(p), (u_long)(v))
858 #define atomic_subtract_ptr(p, v) \
859 	atomic_subtract_long((volatile u_long *)(p), (u_long)(v))
860 #define atomic_subtract_acq_ptr(p, v) \
861 	atomic_subtract_acq_long((volatile u_long *)(p), (u_long)(v))
862 #define atomic_subtract_rel_ptr(p, v) \
863 	atomic_subtract_rel_long((volatile u_long *)(p), (u_long)(v))
864 #define atomic_load_acq_ptr(p) \
865 	atomic_load_acq_long((volatile u_long *)(p))
866 #define atomic_store_rel_ptr(p, v) \
867 	atomic_store_rel_long((volatile u_long *)(p), (v))
868 #define atomic_cmpset_ptr(dst, old, new) 				\
869 	atomic_cmpset_long((volatile u_long *)(dst), (u_long)(old),	\
870 				(u_long)(new))
871 #define atomic_fcmpset_ptr(dst, old, new) 				\
872 	atomic_fcmpset_long((volatile u_long *)(dst), (u_long *)(old),	\
873 				(u_long)(new))
874 #define atomic_cmpset_acq_ptr(dst, old, new)				\
875 	atomic_cmpset_acq_long((volatile u_long *)(dst), (u_long)(old), \
876 				(u_long)(new))
877 #define atomic_cmpset_rel_ptr(dst, old, new)				\
878 	atomic_cmpset_rel_long((volatile u_long *)(dst), (u_long)(old), \
879 				(u_long)(new))
880 #define atomic_readandclear_ptr(p)					\
881 	atomic_readandclear_long((volatile u_long *)(p))
882 
883 #endif /* ! _CPU_ATOMIC_H_ */
884