xref: /dragonfly/sys/cpu/x86_64/include/atomic.h (revision e6e77800)
1 /*-
2  * Copyright (c) 1998 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/i386/include/atomic.h,v 1.9.2.1 2000/07/07 00:38:47 obrien Exp $
27  */
28 #ifndef _CPU_ATOMIC_H_
29 #define _CPU_ATOMIC_H_
30 
31 #ifndef _SYS_TYPES_H_
32 #include <sys/types.h>
33 #endif
34 
35 /*
36  * Various simple arithmetic on memory which is atomic in the presence
37  * of interrupts and multiple processors.
38  *
39  * atomic_set_char(P, V)	(*(u_char*)(P) |= (V))
40  * atomic_clear_char(P, V)	(*(u_char*)(P) &= ~(V))
41  * atomic_add_char(P, V)	(*(u_char*)(P) += (V))
42  * atomic_subtract_char(P, V)	(*(u_char*)(P) -= (V))
43  *
44  * atomic_set_short(P, V)	(*(u_short*)(P) |= (V))
45  * atomic_clear_short(P, V)	(*(u_short*)(P) &= ~(V))
46  * atomic_add_short(P, V)	(*(u_short*)(P) += (V))
47  * atomic_subtract_short(P, V)	(*(u_short*)(P) -= (V))
48  *
49  * atomic_set_int(P, V)		(*(u_int*)(P) |= (V))
50  * atomic_clear_int(P, V)	(*(u_int*)(P) &= ~(V))
51  * atomic_add_int(P, V)		(*(u_int*)(P) += (V))
52  * atomic_subtract_int(P, V)	(*(u_int*)(P) -= (V))
53  *
54  * atomic_set_long(P, V)	(*(u_long*)(P) |= (V))
55  * atomic_clear_long(P, V)	(*(u_long*)(P) &= ~(V))
56  * atomic_add_long(P, V)	(*(u_long*)(P) += (V))
57  * atomic_subtract_long(P, V)	(*(u_long*)(P) -= (V))
58  * atomic_readandclear_long(P)	(return (*(u_long*)(P)); *(u_long*)(P) = 0;)
59  * atomic_readandclear_int(P)	(return (*(u_int*)(P)); *(u_int*)(P) = 0;)
60  */
61 
62 /*
63  * The above functions are expanded inline in the statically-linked
64  * kernel and lock prefixes are generated.
65  *
66  * Kernel modules call real functions which are built into the kernel.
67  */
68 #if defined(KLD_MODULE)
69 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
70 	extern void atomic_##NAME##_##TYPE		\
71 		(volatile u_##TYPE *p, u_##TYPE v);	\
72 	extern void atomic_##NAME##_##TYPE##_nonlocked	\
73 		(volatile u_##TYPE *p, u_##TYPE v);	\
74 	extern void atomic_##NAME##_##TYPE##_xacquire	\
75 		(volatile u_##TYPE *p, u_##TYPE v);	\
76 	extern void atomic_##NAME##_##TYPE##_xrelease	\
77 		(volatile u_##TYPE *p, u_##TYPE v);	\
78 
79 int	atomic_testandset_int(volatile u_int *p, u_int v);
80 int	atomic_testandset_long(volatile u_long *p, u_long v);
81 int	atomic_testandclear_int(volatile u_int *p, u_int v);
82 int	atomic_testandclear_long(volatile u_long *p, u_long v);
83 
84 #else /* !KLD_MODULE */
85 
86 /*
87  * locked bus cycle
88  * lock elision (backwards compatible)
89  */
90 #define MPLOCKED	"lock ; "
91 #define XACQUIRE	"repne; "	/* lock elision */
92 #define XRELEASE	"repe; "	/* lock elision */
93 
94 /*
95  * The assembly is volatilized to demark potential before-and-after side
96  * effects if an interrupt or SMP collision were to occur.  The primary
97  * atomic instructions are MP safe, the nonlocked instructions are
98  * local-interrupt-safe (so we don't depend on C 'X |= Y' generating an
99  * atomic instruction).
100  *
101  * +m - memory is read and written (=m - memory is only written)
102  * iq - integer constant or %ax/%bx/%cx/%dx (ir = int constant or any reg)
103  *	(Note: byte instructions only work on %ax,%bx,%cx, or %dx).  iq
104  *	is good enough for our needs so don't get fancy.
105  * r  - any register.
106  *
107  * NOTE: 64-bit immediate values are not supported for most x86-64
108  *	 instructions so we have to use "r".
109  */
110 
111 /* egcs 1.1.2+ version */
112 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
113 static __inline void					\
114 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
115 {							\
116 	__asm __volatile(MPLOCKED OP			\
117 			 : "+m" (*p)			\
118 			 : CONS (V)); 			\
119 }							\
120 static __inline void					\
121 atomic_##NAME##_##TYPE##_xacquire(volatile u_##TYPE *p, u_##TYPE v)\
122 {							\
123 	__asm __volatile(XACQUIRE MPLOCKED OP		\
124 			 : "+m" (*p)			\
125 			 : CONS (V)); 			\
126 }							\
127 static __inline void					\
128 atomic_##NAME##_##TYPE##_xrelease(volatile u_##TYPE *p, u_##TYPE v)\
129 {							\
130 	__asm __volatile(XRELEASE MPLOCKED OP		\
131 			 : "+m" (*p)			\
132 			 : CONS (V)); 			\
133 }							\
134 static __inline void					\
135 atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)\
136 {							\
137 	__asm __volatile(OP				\
138 			 : "+m" (*p)			\
139 			 : CONS (V)); 			\
140 }
141 
142 #endif /* KLD_MODULE */
143 
144 /* egcs 1.1.2+ version */
145 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",   v)
146 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq",   ~v)
147 ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",   v)
148 ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",   v)
149 
150 ATOMIC_ASM(set,	     short, "orw %w1,%0",  "iq",   v)
151 ATOMIC_ASM(clear,    short, "andw %w1,%0", "iq",  ~v)
152 ATOMIC_ASM(add,	     short, "addw %w1,%0", "iq",   v)
153 ATOMIC_ASM(subtract, short, "subw %w1,%0", "iq",   v)
154 
155 ATOMIC_ASM(set,	     int,   "orl %1,%0",  "iq",   v)
156 ATOMIC_ASM(clear,    int,   "andl %1,%0", "iq",  ~v)
157 ATOMIC_ASM(add,	     int,   "addl %1,%0", "iq",   v)
158 ATOMIC_ASM(subtract, int,   "subl %1,%0", "iq",   v)
159 
160 ATOMIC_ASM(set,	     long,  "orq %1,%0",  "r",   v)
161 ATOMIC_ASM(clear,    long,  "andq %1,%0", "r",  ~v)
162 ATOMIC_ASM(add,	     long,  "addq %1,%0", "r",   v)
163 ATOMIC_ASM(subtract, long,  "subq %1,%0", "r",   v)
164 
165 #if defined(KLD_MODULE)
166 
167 u_long	atomic_readandclear_long(volatile u_long *addr);
168 u_int	atomic_readandclear_int(volatile u_int *addr);
169 
170 #else /* !KLD_MODULE */
171 
172 static __inline u_long
173 atomic_readandclear_long(volatile u_long *addr)
174 {
175 	u_long res;
176 
177 	res = 0;
178 	__asm __volatile(
179 	"	xchgq	%1,%0 ;		"
180 	"# atomic_readandclear_long"
181 	: "+r" (res),			/* 0 */
182 	  "=m" (*addr)			/* 1 */
183 	: "m" (*addr));
184 
185 	return (res);
186 }
187 
188 static __inline u_int
189 atomic_readandclear_int(volatile u_int *addr)
190 {
191 	u_int res;
192 
193 	res = 0;
194 	__asm __volatile(
195 	"	xchgl	%1,%0 ;		"
196 	"# atomic_readandclear_int"
197 	: "+r" (res),			/* 0 */
198 	  "=m" (*addr)			/* 1 */
199 	: "m" (*addr));
200 
201 	return (res);
202 }
203 
204 #endif /* KLD_MODULE */
205 
206 /*
207  * atomic_poll_acquire_int(P)	Returns non-zero on success, 0 if the lock
208  *				has already been acquired.
209  * atomic_poll_release_int(P)
210  *
211  * These support the NDIS driver and are also used for IPIQ interlocks
212  * between cpus.  Both the acquisition and release must be
213  * cache-synchronizing instructions.
214  */
215 
216 #if defined(KLD_MODULE)
217 
218 extern int atomic_swap_int(volatile int *addr, int value);
219 extern long atomic_swap_long(volatile long *addr, long value);
220 extern void *atomic_swap_ptr(volatile void **addr, void *value);
221 extern int atomic_poll_acquire_int(volatile u_int *p);
222 extern void atomic_poll_release_int(volatile u_int *p);
223 
224 #else
225 
226 static __inline int
227 atomic_swap_int(volatile int *addr, int value)
228 {
229 	__asm __volatile("xchgl %0, %1" :
230 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
231 	return (value);
232 }
233 
234 static __inline long
235 atomic_swap_long(volatile long *addr, long value)
236 {
237 	__asm __volatile("xchgq %0, %1" :
238 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
239 	return (value);
240 }
241 
242 static __inline void *
243 atomic_swap_ptr(volatile void **addr, void *value)
244 {
245 	__asm __volatile("xchgq %0, %1" :
246 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
247 	return (value);
248 }
249 
250 static __inline int
251 atomic_poll_acquire_int(volatile u_int *p)
252 {
253 	u_int data;
254 
255 	__asm __volatile(MPLOCKED "btsl $0,%0; setnc %%al; andl $255,%%eax" : "+m" (*p), "=a" (data));
256 	return(data);
257 }
258 
259 static __inline void
260 atomic_poll_release_int(volatile u_int *p)
261 {
262 	__asm __volatile(MPLOCKED "btrl $0,%0" : "+m" (*p));
263 }
264 
265 #endif
266 
267 /*
268  * These functions operate on a 32 bit interrupt interlock which is defined
269  * as follows:
270  *
271  *	bit 0-29	interrupt handler wait counter
272  *	bit 30		interrupt handler disabled bit
273  *	bit 31		interrupt handler currently running bit (1 = run)
274  *
275  * atomic_intr_cond_test(P)	Determine if the interlock is in an
276  *				acquired state.  Returns 0 if it not
277  *				acquired, non-zero if it is. (not MPLOCKed)
278  *
279  * atomic_intr_cond_try(P) 	Attempt to set bit 31 to acquire the
280  *				interlock.  If we are unable to set bit 31
281  *				we return 1, otherwise we return 0.
282  *
283  * atomic_intr_cond_enter(P, func, arg)
284  *				Attempt to set bit 31 to acquire the
285  *				interlock.  If we are unable to set bit 31,
286  *				the wait is incremented counter and func(arg)
287  *				is called in a loop until we are able to set
288  *				bit 31.  Once we set bit 31, wait counter
289  *				is decremented.
290  *
291  * atomic_intr_cond_exit(P, func, arg)
292  *				Clear bit 31.  If the wait counter is still
293  *				non-zero call func(arg) once.
294  *
295  * atomic_intr_handler_disable(P)
296  *				Set bit 30, indicating that the interrupt
297  *				handler has been disabled.  Must be called
298  *				after the hardware is disabled.
299  *
300  *				Returns bit 31 indicating whether a serialized
301  *				accessor is active (typically the interrupt
302  *				handler is running).  0 == not active,
303  *				non-zero == active.
304  *
305  * atomic_intr_handler_enable(P)
306  *				Clear bit 30, indicating that the interrupt
307  *				handler has been enabled.  Must be called
308  *				before the hardware is actually enabled.
309  *
310  * atomic_intr_handler_is_enabled(P)
311  *				Returns bit 30, 0 indicates that the handler
312  *				is enabled, non-zero indicates that it is
313  *				disabled.  The request counter portion of
314  *				the field is ignored. (not MPLOCKed)
315  *
316  * atomic_intr_cond_inc(P)	Increment wait counter by 1.
317  * atomic_intr_cond_dec(P)	Decrement wait counter by 1.
318  */
319 
320 #if defined(KLD_MODULE)
321 
322 void atomic_intr_init(__atomic_intr_t *p);
323 int atomic_intr_handler_disable(__atomic_intr_t *p);
324 void atomic_intr_handler_enable(__atomic_intr_t *p);
325 int atomic_intr_handler_is_enabled(__atomic_intr_t *p);
326 int atomic_intr_cond_test(__atomic_intr_t *p);
327 int atomic_intr_cond_try(__atomic_intr_t *p);
328 void atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg);
329 void atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg);
330 void atomic_intr_cond_inc(__atomic_intr_t *p);
331 void atomic_intr_cond_dec(__atomic_intr_t *p);
332 
333 #else
334 
335 static __inline void
336 atomic_intr_init(__atomic_intr_t *p)
337 {
338 	*p = 0;
339 }
340 
341 static __inline int
342 atomic_intr_handler_disable(__atomic_intr_t *p)
343 {
344 	int data;
345 
346 	__asm __volatile(MPLOCKED "orl $0x40000000,%1; movl %1,%%eax; " \
347 				  "andl $0x80000000,%%eax" \
348 				  : "=a"(data) , "+m"(*p));
349 	return(data);
350 }
351 
352 static __inline void
353 atomic_intr_handler_enable(__atomic_intr_t *p)
354 {
355 	__asm __volatile(MPLOCKED "andl $0xBFFFFFFF,%0" : "+m" (*p));
356 }
357 
358 static __inline int
359 atomic_intr_handler_is_enabled(__atomic_intr_t *p)
360 {
361 	int data;
362 
363 	__asm __volatile("movl %1,%%eax; andl $0x40000000,%%eax" \
364 			 : "=a"(data) : "m"(*p));
365 	return(data);
366 }
367 
368 static __inline void
369 atomic_intr_cond_inc(__atomic_intr_t *p)
370 {
371 	__asm __volatile(MPLOCKED "incl %0" : "+m" (*p));
372 }
373 
374 static __inline void
375 atomic_intr_cond_dec(__atomic_intr_t *p)
376 {
377 	__asm __volatile(MPLOCKED "decl %0" : "+m" (*p));
378 }
379 
380 static __inline void
381 atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg)
382 {
383 	__asm __volatile(MPLOCKED "btsl $31,%0; jnc 3f; " \
384 			 MPLOCKED "incl %0; " \
385 			 "1: ;" \
386 			 MPLOCKED "btsl $31,%0; jnc 2f; " \
387 			 "movq %2,%%rdi; call *%1; " \
388 			 "jmp 1b; " \
389 			 "2: ;" \
390 			 MPLOCKED "decl %0; " \
391 			 "3: ;" \
392 			 : "+m" (*p) \
393 			 : "r"(func), "m"(arg) \
394 			 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
395 		/* YYY the function call may clobber even more registers? */
396 }
397 
398 /*
399  * Attempt to enter the interrupt condition variable.  Returns zero on
400  * success, 1 on failure.
401  */
402 static __inline int
403 atomic_intr_cond_try(__atomic_intr_t *p)
404 {
405 	int ret;
406 
407 	__asm __volatile("subl %%eax,%%eax; "			\
408 			 MPLOCKED "btsl $31,%0; jnc 2f; "	\
409 			 "movl $1,%%eax;"			\
410 			 "2: ;"
411 			 : "+m" (*p), "=&a"(ret)
412                          : : "cx", "dx");
413 	return (ret);
414 }
415 
416 
417 static __inline int
418 atomic_intr_cond_test(__atomic_intr_t *p)
419 {
420 	return((int)(*p & 0x80000000));
421 }
422 
423 static __inline void
424 atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg)
425 {
426 	__asm __volatile(MPLOCKED "btrl $31,%0; " \
427 			 "testl $0x3FFFFFFF,%0; jz 1f; " \
428 			 "movq %2,%%rdi; call *%1; " \
429 			 "1: ;" \
430 			 : "+m" (*p) \
431 			 : "r"(func), "m"(arg) \
432 			 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
433 		/* YYY the function call may clobber even more registers? */
434 }
435 
436 #endif
437 
438 /*
439  * Atomic compare and set
440  *
441  * if (*_dst == _old) *_dst = _new (all 32 bit words)
442  *
443  * Returns 0 on failure, non-zero on success.  The inline is designed to
444  * allow the compiler to optimize the common case where the caller calls
445  * these functions from inside a conditional.
446  */
447 #if defined(KLD_MODULE)
448 
449 extern int atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new);
450 extern int atomic_cmpxchg_long_test(volatile u_long *_dst,
451 			u_long _old, u_long _new);
452 extern int atomic_cmpset_short(volatile u_short *_dst,
453 			u_short _old, u_short _new);
454 extern int atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new);
455 extern int atomic_cmpset_int_xacquire(volatile u_int *_dst,
456 			u_int _old, u_int _new);
457 extern int atomic_cmpset_int_xrelease(volatile u_int *_dst,
458 			u_int _old, u_int _new);
459 extern int atomic_cmpset_long(volatile u_long *_dst, u_long _exp, u_long _src);
460 extern int atomic_cmpset_long_xacquire(volatile u_long *_dst,
461 			u_long _exp, u_long _src);
462 extern int atomic_cmpset_long_xrelease(volatile u_long *_dst,
463 			u_long _exp, u_long _src);
464 
465 extern int atomic_fcmpset_short(volatile u_short *_dst,
466 			u_short *_old, u_short _new);
467 extern int atomic_fcmpset_int(volatile u_int *_dst,
468 			u_int *_old, u_int _new);
469 extern int atomic_fcmpset_long(volatile u_long *_dst,
470 			u_long *_exp, u_long _src);
471 
472 extern u_int atomic_fetchadd_int(volatile u_int *_p, u_int _v);
473 extern u_int atomic_fetchadd_int_xacquire(volatile u_int *_p, u_int _v);
474 extern u_int atomic_fetchadd_int_xrelease(volatile u_int *_p, u_int _v);
475 extern u_long atomic_fetchadd_long(volatile u_long *_p, u_long _v);
476 extern u_long atomic_fetchadd_long_xacquire(volatile u_long *_p, u_long _v);
477 extern u_long atomic_fetchadd_long_xrelease(volatile u_long *_p, u_long _v);
478 
479 #else
480 
481 static __inline int
482 atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new)
483 {
484 	u_int res = _old;
485 
486 	__asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
487 			 : "+a" (res), "=m" (*_dst) \
488 			 : "r" (_new), "m" (*_dst) \
489 			 : "memory");
490 	return (res);
491 }
492 
493 static __inline int
494 atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new)
495 {
496 	u_int res = _old;
497 
498 	__asm __volatile(MPLOCKED "cmpxchgq %2,%1; "
499 				  " setz %%al;"
500 				  " movsbq %%al,%%rax" \
501 			 : "+a" (res), "=m" (*_dst) \
502 			 : "r" (_new), "m" (*_dst) \
503 			 : "memory");
504 	return (res);
505 }
506 
507 static __inline int
508 atomic_cmpset_short(volatile u_short *_dst, u_short _old, u_short _new)
509 {
510 	u_short res = _old;
511 
512 	__asm __volatile(MPLOCKED "cmpxchgw %w2,%1; " \
513 			 : "+a" (res), "=m" (*_dst) \
514 			 : "r" (_new), "m" (*_dst) \
515 			 : "memory");
516 	return (res == _old);
517 }
518 
519 static __inline int
520 atomic_fcmpset_short(volatile u_short *_dst, u_short *_old, u_short _new)
521 {
522 	u_short res = *_old;
523 
524 	__asm __volatile(MPLOCKED "cmpxchgw %2,%0; " \
525 			 : "+m" (*_dst),		/* 0 */
526 			   "+a" (*_old)			/* 1 */
527 			 : "r" (_new)			/* 2 */
528 			 : "memory", "cc");
529 	return (res == *_old);
530 }
531 
532 static __inline int
533 atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new)
534 {
535 	u_int res = _old;
536 
537 	__asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
538 			 : "+a" (res), "=m" (*_dst) \
539 			 : "r" (_new), "m" (*_dst) \
540 			 : "memory");
541 	return (res == _old);
542 }
543 
544 static __inline int
545 atomic_fcmpset_int(volatile u_int *_dst, u_int *_old, u_int _new)
546 {
547 	u_int res = *_old;
548 
549 	__asm __volatile(MPLOCKED "cmpxchgl %2,%0; " \
550 			 : "+m" (*_dst),		/* 0 */
551 			   "+a" (*_old)			/* 1 */
552 			 : "r" (_new)			/* 2 */
553 			 : "memory", "cc");
554 	return (res == *_old);
555 }
556 
557 static __inline int
558 atomic_cmpset_int_xacquire(volatile u_int *_dst, u_int _old, u_int _new)
559 {
560 	u_int res = _old;
561 
562 	__asm __volatile(XACQUIRE MPLOCKED "cmpxchgl %2,%1; " \
563 			 : "+a" (res), "=m" (*_dst) \
564 			 : "r" (_new), "m" (*_dst) \
565 			 : "memory");
566 	return (res == _old);
567 }
568 
569 static __inline int
570 atomic_cmpset_int_xrelease(volatile u_int *_dst, u_int _old, u_int _new)
571 {
572 	u_int res = _old;
573 
574 	__asm __volatile(XRELEASE MPLOCKED "cmpxchgl %2,%1; " \
575 			 : "+a" (res), "=m" (*_dst) \
576 			 : "r" (_new), "m" (*_dst) \
577 			 : "memory");
578 	return (res == _old);
579 }
580 
581 static __inline int
582 atomic_cmpset_long(volatile u_long *_dst, u_long _old, u_long _new)
583 {
584 	u_long res = _old;
585 
586 	__asm __volatile(MPLOCKED "cmpxchgq %2,%1; " \
587 			 : "+a" (res), "=m" (*_dst) \
588 			 : "r" (_new), "m" (*_dst) \
589 			 : "memory");
590 	return (res == _old);
591 }
592 
593 static __inline int
594 atomic_fcmpset_long(volatile u_long *_dst, u_long *_old, u_long _new)
595 {
596 	u_long res = *_old;
597 
598 	__asm __volatile(MPLOCKED "cmpxchgq %2,%0; " \
599 			 : "+m" (*_dst),		/* 0 */
600 			   "+a" (*_old)			/* 1 */
601 			 : "r" (_new)			/* 2 */
602 			 : "memory", "cc");
603 	return (res == *_old);
604 }
605 
606 static __inline int
607 atomic_cmpset_long_xacquire(volatile u_long *_dst, u_long _old, u_long _new)
608 {
609 	u_long res = _old;
610 
611 	__asm __volatile(XACQUIRE MPLOCKED "cmpxchgq %2,%1; " \
612 			 : "+a" (res), "=m" (*_dst) \
613 			 : "r" (_new), "m" (*_dst) \
614 			 : "memory");
615 	return (res == _old);
616 }
617 
618 static __inline int
619 atomic_cmpset_long_xrelease(volatile u_long *_dst, u_long _old, u_long _new)
620 {
621 	u_long res = _old;
622 
623 	__asm __volatile(XRELEASE MPLOCKED "cmpxchgq %2,%1; " \
624 			 : "+a" (res), "=m" (*_dst) \
625 			 : "r" (_new), "m" (*_dst) \
626 			 : "memory");
627 	return (res == _old);
628 }
629 
630 /*
631  * Atomically add the value of v to the integer pointed to by p and return
632  * the previous value of *p.
633  */
634 static __inline u_int
635 atomic_fetchadd_int(volatile u_int *_p, u_int _v)
636 {
637 	__asm __volatile(MPLOCKED "xaddl %0,%1; " \
638 			 : "+r" (_v), "=m" (*_p)	\
639 			 : "m" (*_p)		\
640 			 : "memory");
641 	return (_v);
642 }
643 
644 static __inline u_int
645 atomic_fetchadd_int_xacquire(volatile u_int *_p, u_int _v)
646 {
647 	__asm __volatile(XACQUIRE MPLOCKED "xaddl %0,%1; " \
648 			 : "+r" (_v), "=m" (*_p)	\
649 			 : "m" (*_p)		\
650 			 : "memory");
651 	return (_v);
652 }
653 
654 static __inline u_int
655 atomic_fetchadd_int_xrelease(volatile u_int *_p, u_int _v)
656 {
657 	__asm __volatile(XRELEASE MPLOCKED "xaddl %0,%1; " \
658 			 : "+r" (_v), "=m" (*_p)	\
659 			 : "m" (*_p)		\
660 			 : "memory");
661 	return (_v);
662 }
663 
664 static __inline u_long
665 atomic_fetchadd_long(volatile u_long *_p, u_long _v)
666 {
667 	__asm __volatile(MPLOCKED "xaddq %0,%1; " \
668 			 : "+r" (_v), "=m" (*_p)	\
669 			 : "m" (*_p)		\
670 			 : "memory");
671 	return (_v);
672 }
673 
674 static __inline u_long
675 atomic_fetchadd_long_xacquire(volatile u_long *_p, u_long _v)
676 {
677 	__asm __volatile(XACQUIRE MPLOCKED "xaddq %0,%1; " \
678 			 : "+r" (_v), "=m" (*_p)	\
679 			 : "m" (*_p)		\
680 			 : "memory");
681 	return (_v);
682 }
683 
684 static __inline u_long
685 atomic_fetchadd_long_xrelease(volatile u_long *_p, u_long _v)
686 {
687 	__asm __volatile(XRELEASE MPLOCKED "xaddq %0,%1; " \
688 			 : "+r" (_v), "=m" (*_p)	\
689 			 : "m" (*_p)		\
690 			 : "memory");
691 	return (_v);
692 }
693 
694 static __inline int
695 atomic_testandset_int(volatile u_int *p, u_int v)
696 {
697 	u_char res;
698 
699 	__asm __volatile(
700 	"	" MPLOCKED "		"
701 	"	btsl	%2,%1 ;		"
702 	"	setc	%0 ;		"
703 	"# atomic_testandset_int"
704 	: "=q" (res),			/* 0 */
705 	  "+m" (*p)			/* 1 */
706 	: "Ir" (v & 0x1f)		/* 2 */
707 	: "cc");
708 	return (res);
709 }
710 
711 static __inline int
712 atomic_testandset_long(volatile u_long *p, u_long v)
713 {
714 	u_char res;
715 
716 	__asm __volatile(
717 	"	" MPLOCKED "		"
718 	"	btsq	%2,%1 ;		"
719 	"	setc	%0 ;		"
720 	"# atomic_testandset_int"
721 	: "=q" (res),			/* 0 */
722 	  "+m" (*p)			/* 1 */
723 	: "Ir" (v & 0x3f)		/* 2 */
724 	: "cc");
725 	return (res);
726 }
727 
728 static __inline int
729 atomic_testandclear_int(volatile u_int *p, u_int v)
730 {
731 	u_char res;
732 
733 	__asm __volatile(
734 	"	" MPLOCKED "		"
735 	"	btrl	%2,%1 ;		"
736 	"	setc	%0 ;		"
737 	"# atomic_testandclear_int"
738 	: "=q" (res),			/* 0 */
739 	  "+m" (*p)			/* 1 */
740 	: "Ir" (v & 0x1f)		/* 2 */
741 	: "cc");
742 	return (res);
743 }
744 
745 static __inline int
746 atomic_testandclear_long(volatile u_long *p, u_long v)
747 {
748 	u_char res;
749 
750 	__asm __volatile(
751 	"	" MPLOCKED "		"
752 	"	btrq	%2,%1 ;		"
753 	"	setc	%0 ;		"
754 	"# atomic_testandclear_int"
755 	: "=q" (res),			/* 0 */
756 	  "+m" (*p)			/* 1 */
757 	: "Ir" (v & 0x3f)		/* 2 */
758 	: "cc");
759 	return (res);
760 }
761 
762 #endif	/* KLD_MODULE */
763 
764 #if defined(KLD_MODULE)
765 
766 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP)			\
767 extern u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p);	\
768 extern void	atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v);
769 
770 #else /* !KLD_MODULE */
771 
772 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
773 static __inline u_##TYPE				\
774 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
775 {							\
776 	u_##TYPE res; /* accumulator can be anything */	\
777 							\
778 	__asm __volatile(MPLOCKED LOP			\
779 	: "=a" (res),			/* 0 */		\
780 	  "=m" (*p)			/* 1 */		\
781 	: "m" (*p)			/* 2 */		\
782 	: "memory");					\
783 							\
784 	return (res);					\
785 }							\
786 							\
787 /*							\
788  * The XCHG instruction asserts LOCK automagically.	\
789  */							\
790 static __inline void					\
791 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
792 {							\
793 	__asm __volatile(SOP				\
794 	: "=m" (*p),			/* 0 */		\
795 	  "+r" (v)			/* 1 */		\
796 	: "m" (*p));			/* 2 */		\
797 }							\
798 struct __hack
799 
800 #endif /* !KLD_MODULE */
801 
802 ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0");
803 ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
804 ATOMIC_STORE_LOAD(int,  "cmpxchgl %0,%1",  "xchgl %1,%0");
805 ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1",  "xchgq %1,%0");
806 
807 #undef ATOMIC_ASM
808 #undef ATOMIC_STORE_LOAD
809 
810 /* Acquire and release variants are identical to the normal ones. */
811 #define	atomic_set_acq_char		atomic_set_char
812 #define	atomic_set_rel_char		atomic_set_char
813 #define	atomic_clear_acq_char		atomic_clear_char
814 #define	atomic_clear_rel_char		atomic_clear_char
815 #define	atomic_add_acq_char		atomic_add_char
816 #define	atomic_add_rel_char		atomic_add_char
817 #define	atomic_subtract_acq_char	atomic_subtract_char
818 #define	atomic_subtract_rel_char	atomic_subtract_char
819 
820 #define	atomic_set_acq_short		atomic_set_short
821 #define	atomic_set_rel_short		atomic_set_short
822 #define	atomic_clear_acq_short		atomic_clear_short
823 #define	atomic_clear_rel_short		atomic_clear_short
824 #define	atomic_add_acq_short		atomic_add_short
825 #define	atomic_add_rel_short		atomic_add_short
826 #define	atomic_subtract_acq_short	atomic_subtract_short
827 #define	atomic_subtract_rel_short	atomic_subtract_short
828 
829 #define	atomic_set_acq_int		atomic_set_int
830 #define	atomic_set_rel_int		atomic_set_int
831 #define	atomic_clear_acq_int		atomic_clear_int
832 #define	atomic_clear_rel_int		atomic_clear_int
833 #define	atomic_add_acq_int		atomic_add_int
834 #define	atomic_add_rel_int		atomic_add_int
835 #define	atomic_subtract_acq_int		atomic_subtract_int
836 #define	atomic_subtract_rel_int		atomic_subtract_int
837 #define	atomic_cmpset_acq_int		atomic_cmpset_int
838 #define	atomic_cmpset_rel_int		atomic_cmpset_int
839 
840 #define	atomic_set_acq_long		atomic_set_long
841 #define	atomic_set_rel_long		atomic_set_long
842 #define	atomic_clear_acq_long		atomic_clear_long
843 #define	atomic_clear_rel_long		atomic_clear_long
844 #define	atomic_add_acq_long		atomic_add_long
845 #define	atomic_add_rel_long		atomic_add_long
846 #define	atomic_subtract_acq_long	atomic_subtract_long
847 #define	atomic_subtract_rel_long	atomic_subtract_long
848 #define	atomic_cmpset_acq_long		atomic_cmpset_long
849 #define	atomic_cmpset_rel_long		atomic_cmpset_long
850 
851 /* cpumask_t is 64-bits on x86-64 */
852 #define atomic_set_cpumask		atomic_set_long
853 #define atomic_clear_cpumask		atomic_clear_long
854 #define atomic_cmpset_cpumask		atomic_cmpset_long
855 #define atomic_store_rel_cpumask	atomic_store_rel_long
856 #define atomic_load_acq_cpumask		atomic_load_acq_long
857 
858 /* Operations on 8-bit bytes. */
859 #define	atomic_set_8		atomic_set_char
860 #define	atomic_set_acq_8	atomic_set_acq_char
861 #define	atomic_set_rel_8	atomic_set_rel_char
862 #define	atomic_clear_8		atomic_clear_char
863 #define	atomic_clear_acq_8	atomic_clear_acq_char
864 #define	atomic_clear_rel_8	atomic_clear_rel_char
865 #define	atomic_add_8		atomic_add_char
866 #define	atomic_add_acq_8	atomic_add_acq_char
867 #define	atomic_add_rel_8	atomic_add_rel_char
868 #define	atomic_subtract_8	atomic_subtract_char
869 #define	atomic_subtract_acq_8	atomic_subtract_acq_char
870 #define	atomic_subtract_rel_8	atomic_subtract_rel_char
871 #define	atomic_load_acq_8	atomic_load_acq_char
872 #define	atomic_store_rel_8	atomic_store_rel_char
873 
874 /* Operations on 16-bit words. */
875 #define	atomic_set_16		atomic_set_short
876 #define	atomic_set_acq_16	atomic_set_acq_short
877 #define	atomic_set_rel_16	atomic_set_rel_short
878 #define	atomic_clear_16		atomic_clear_short
879 #define	atomic_clear_acq_16	atomic_clear_acq_short
880 #define	atomic_clear_rel_16	atomic_clear_rel_short
881 #define	atomic_add_16		atomic_add_short
882 #define	atomic_add_acq_16	atomic_add_acq_short
883 #define	atomic_add_rel_16	atomic_add_rel_short
884 #define	atomic_subtract_16	atomic_subtract_short
885 #define	atomic_subtract_acq_16	atomic_subtract_acq_short
886 #define	atomic_subtract_rel_16	atomic_subtract_rel_short
887 #define	atomic_load_acq_16	atomic_load_acq_short
888 #define	atomic_store_rel_16	atomic_store_rel_short
889 
890 /* Operations on 32-bit double words. */
891 #define	atomic_set_32		atomic_set_int
892 #define	atomic_set_acq_32	atomic_set_acq_int
893 #define	atomic_set_rel_32	atomic_set_rel_int
894 #define	atomic_clear_32		atomic_clear_int
895 #define	atomic_clear_acq_32	atomic_clear_acq_int
896 #define	atomic_clear_rel_32	atomic_clear_rel_int
897 #define	atomic_add_32		atomic_add_int
898 #define	atomic_add_acq_32	atomic_add_acq_int
899 #define	atomic_add_rel_32	atomic_add_rel_int
900 #define	atomic_subtract_32	atomic_subtract_int
901 #define	atomic_subtract_acq_32	atomic_subtract_acq_int
902 #define	atomic_subtract_rel_32	atomic_subtract_rel_int
903 #define	atomic_load_acq_32	atomic_load_acq_int
904 #define	atomic_store_rel_32	atomic_store_rel_int
905 #define	atomic_cmpset_32	atomic_cmpset_int
906 #define	atomic_fcmpset_32	atomic_fcmpset_int
907 #define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
908 #define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
909 #define	atomic_readandclear_32	atomic_readandclear_int
910 #define	atomic_fetchadd_32	atomic_fetchadd_int
911 
912 /* Operations on 64-bit quad words. */
913 #define	atomic_load_acq_64	atomic_load_acq_long
914 #define	atomic_store_rel_64	atomic_store_rel_long
915 #define	atomic_swap_64		atomic_swap_long
916 #define	atomic_fetchadd_64	atomic_fetchadd_long
917 #define	atomic_add_64		atomic_add_long
918 #define atomic_cmpset_64	atomic_cmpset_long
919 #define atomic_fcmpset_64	atomic_fcmpset_long
920 #define atomic_set_64		atomic_set_long
921 #define atomic_clear_64		atomic_clear_long
922 
923 /* Operations on pointers. */
924 #define atomic_set_ptr(p, v) \
925 	atomic_set_long((volatile u_long *)(p), (u_long)(v))
926 #define atomic_set_acq_ptr(p, v) \
927 	atomic_set_acq_long((volatile u_long *)(p), (u_long)(v))
928 #define atomic_set_rel_ptr(p, v) \
929 	atomic_set_rel_long((volatile u_long *)(p), (u_long)(v))
930 #define atomic_clear_ptr(p, v) \
931 	atomic_clear_long((volatile u_long *)(p), (u_long)(v))
932 #define atomic_clear_acq_ptr(p, v) \
933 	atomic_clear_acq_long((volatile u_long *)(p), (u_long)(v))
934 #define atomic_clear_rel_ptr(p, v) \
935 	atomic_clear_rel_long((volatile u_long *)(p), (u_long)(v))
936 #define atomic_add_ptr(p, v) \
937 	atomic_add_long((volatile u_long *)(p), (u_long)(v))
938 #define atomic_add_acq_ptr(p, v) \
939 	atomic_add_acq_long((volatile u_long *)(p), (u_long)(v))
940 #define atomic_add_rel_ptr(p, v) \
941 	atomic_add_rel_long((volatile u_long *)(p), (u_long)(v))
942 #define atomic_subtract_ptr(p, v) \
943 	atomic_subtract_long((volatile u_long *)(p), (u_long)(v))
944 #define atomic_subtract_acq_ptr(p, v) \
945 	atomic_subtract_acq_long((volatile u_long *)(p), (u_long)(v))
946 #define atomic_subtract_rel_ptr(p, v) \
947 	atomic_subtract_rel_long((volatile u_long *)(p), (u_long)(v))
948 #define atomic_load_acq_ptr(p) \
949 	atomic_load_acq_long((volatile u_long *)(p))
950 #define atomic_store_rel_ptr(p, v) \
951 	atomic_store_rel_long((volatile u_long *)(p), (v))
952 #define atomic_cmpset_ptr(dst, old, new) 				\
953 	atomic_cmpset_long((volatile u_long *)(dst), (u_long)(old),	\
954 				(u_long)(new))
955 #define atomic_fcmpset_ptr(dst, old, new) 				\
956 	atomic_fcmpset_long((volatile u_long *)(dst), (u_long *)(old),	\
957 				(u_long)(new))
958 #define atomic_cmpset_acq_ptr(dst, old, new)				\
959 	atomic_cmpset_acq_long((volatile u_long *)(dst), (u_long)(old), \
960 				(u_long)(new))
961 #define atomic_cmpset_rel_ptr(dst, old, new)				\
962 	atomic_cmpset_rel_long((volatile u_long *)(dst), (u_long)(old), \
963 				(u_long)(new))
964 #define atomic_readandclear_ptr(p)					\
965 	atomic_readandclear_long((volatile u_long *)(p))
966 
967 #endif /* ! _CPU_ATOMIC_H_ */
968