xref: /dragonfly/sys/cpu/x86_64/include/atomic.h (revision 02318f07)
1 /*-
2  * Copyright (c) 1998 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/i386/include/atomic.h,v 1.9.2.1 2000/07/07 00:38:47 obrien Exp $
27  */
28 #ifndef _CPU_ATOMIC_H_
29 #define _CPU_ATOMIC_H_
30 
31 #ifndef _SYS_TYPES_H_
32 #include <sys/types.h>
33 #endif
34 
35 /*
36  * Various simple arithmetic on memory which is atomic in the presence
37  * of interrupts and multiple processors.
38  *
39  * atomic_set_char(P, V)	(*(u_char*)(P) |= (V))
40  * atomic_clear_char(P, V)	(*(u_char*)(P) &= ~(V))
41  * atomic_add_char(P, V)	(*(u_char*)(P) += (V))
42  * atomic_subtract_char(P, V)	(*(u_char*)(P) -= (V))
43  *
44  * atomic_set_short(P, V)	(*(u_short*)(P) |= (V))
45  * atomic_clear_short(P, V)	(*(u_short*)(P) &= ~(V))
46  * atomic_add_short(P, V)	(*(u_short*)(P) += (V))
47  * atomic_subtract_short(P, V)	(*(u_short*)(P) -= (V))
48  *
49  * atomic_set_int(P, V)		(*(u_int*)(P) |= (V))
50  * atomic_clear_int(P, V)	(*(u_int*)(P) &= ~(V))
51  * atomic_add_int(P, V)		(*(u_int*)(P) += (V))
52  * atomic_subtract_int(P, V)	(*(u_int*)(P) -= (V))
53  *
54  * atomic_set_long(P, V)	(*(u_long*)(P) |= (V))
55  * atomic_clear_long(P, V)	(*(u_long*)(P) &= ~(V))
56  * atomic_add_long(P, V)	(*(u_long*)(P) += (V))
57  * atomic_subtract_long(P, V)	(*(u_long*)(P) -= (V))
58  * atomic_readandclear_long(P)	(return (*(u_long*)(P)); *(u_long*)(P) = 0;)
59  * atomic_readandclear_int(P)	(return (*(u_int*)(P)); *(u_int*)(P) = 0;)
60  */
61 
62 /*
63  * The above functions are expanded inline in the statically-linked
64  * kernel and lock prefixes are generated.
65  *
66  * Kernel modules call real functions which are built into the kernel.
67  */
68 #if defined(KLD_MODULE)
69 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
70 	extern void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \
71 	extern void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v);
72 
73 int	atomic_testandset_int(volatile u_int *p, u_int v);
74 int	atomic_testandset_long(volatile u_long *p, u_long v);
75 int	atomic_testandclear_int(volatile u_int *p, u_int v);
76 int	atomic_testandclear_long(volatile u_long *p, u_long v);
77 
78 #else /* !KLD_MODULE */
79 #define MPLOCKED	"lock ; "
80 
81 /*
82  * The assembly is volatilized to demark potential before-and-after side
83  * effects if an interrupt or SMP collision were to occur.  The primary
84  * atomic instructions are MP safe, the nonlocked instructions are
85  * local-interrupt-safe (so we don't depend on C 'X |= Y' generating an
86  * atomic instruction).
87  *
88  * +m - memory is read and written (=m - memory is only written)
89  * iq - integer constant or %ax/%bx/%cx/%dx (ir = int constant or any reg)
90  *	(Note: byte instructions only work on %ax,%bx,%cx, or %dx).  iq
91  *	is good enough for our needs so don't get fancy.
92  * r  - any register.
93  *
94  * NOTE: 64-bit immediate values are not supported for most x86-64
95  *	 instructions so we have to use "r".
96  */
97 
98 /* egcs 1.1.2+ version */
99 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
100 static __inline void					\
101 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
102 {							\
103 	__asm __volatile(MPLOCKED OP			\
104 			 : "+m" (*p)			\
105 			 : CONS (V)); 			\
106 }							\
107 static __inline void					\
108 atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)\
109 {							\
110 	__asm __volatile(OP				\
111 			 : "+m" (*p)			\
112 			 : CONS (V)); 			\
113 }
114 
115 #endif /* KLD_MODULE */
116 
117 /* egcs 1.1.2+ version */
118 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",   v)
119 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq",   ~v)
120 ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",   v)
121 ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",   v)
122 
123 ATOMIC_ASM(set,	     short, "orw %w1,%0",  "iq",   v)
124 ATOMIC_ASM(clear,    short, "andw %w1,%0", "iq",  ~v)
125 ATOMIC_ASM(add,	     short, "addw %w1,%0", "iq",   v)
126 ATOMIC_ASM(subtract, short, "subw %w1,%0", "iq",   v)
127 
128 ATOMIC_ASM(set,	     int,   "orl %1,%0",  "iq",   v)
129 ATOMIC_ASM(clear,    int,   "andl %1,%0", "iq",  ~v)
130 ATOMIC_ASM(add,	     int,   "addl %1,%0", "iq",   v)
131 ATOMIC_ASM(subtract, int,   "subl %1,%0", "iq",   v)
132 
133 ATOMIC_ASM(set,	     long,  "orq %1,%0",  "r",   v)
134 ATOMIC_ASM(clear,    long,  "andq %1,%0", "r",  ~v)
135 ATOMIC_ASM(add,	     long,  "addq %1,%0", "r",   v)
136 ATOMIC_ASM(subtract, long,  "subq %1,%0", "r",   v)
137 
138 #if defined(KLD_MODULE)
139 
140 u_long	atomic_readandclear_long(volatile u_long *addr);
141 u_int	atomic_readandclear_int(volatile u_int *addr);
142 
143 #else /* !KLD_MODULE */
144 
145 static __inline u_long
146 atomic_readandclear_long(volatile u_long *addr)
147 {
148 	u_long res;
149 
150 	res = 0;
151 	__asm __volatile(
152 	"	xchgq	%1,%0 ;		"
153 	"# atomic_readandclear_long"
154 	: "+r" (res),			/* 0 */
155 	  "=m" (*addr)			/* 1 */
156 	: "m" (*addr));
157 
158 	return (res);
159 }
160 
161 static __inline u_int
162 atomic_readandclear_int(volatile u_int *addr)
163 {
164 	u_int res;
165 
166 	res = 0;
167 	__asm __volatile(
168 	"	xchgl	%1,%0 ;		"
169 	"# atomic_readandclear_int"
170 	: "+r" (res),			/* 0 */
171 	  "=m" (*addr)			/* 1 */
172 	: "m" (*addr));
173 
174 	return (res);
175 }
176 
177 #endif /* KLD_MODULE */
178 
179 /*
180  * atomic_poll_acquire_int(P)	Returns non-zero on success, 0 if the lock
181  *				has already been acquired.
182  * atomic_poll_release_int(P)
183  *
184  * These support the NDIS driver and are also used for IPIQ interlocks
185  * between cpus.  Both the acquisition and release must be
186  * cache-synchronizing instructions.
187  */
188 
189 #if defined(KLD_MODULE)
190 
191 extern int atomic_swap_int(volatile int *addr, int value);
192 extern long atomic_swap_long(volatile long *addr, long value);
193 extern void *atomic_swap_ptr(volatile void **addr, void *value);
194 extern int atomic_poll_acquire_int(volatile u_int *p);
195 extern void atomic_poll_release_int(volatile u_int *p);
196 
197 #else
198 
199 static __inline int
200 atomic_swap_int(volatile int *addr, int value)
201 {
202 	__asm __volatile("xchgl %0, %1" :
203 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
204 	return (value);
205 }
206 
207 static __inline long
208 atomic_swap_long(volatile long *addr, long value)
209 {
210 	__asm __volatile("xchgq %0, %1" :
211 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
212 	return (value);
213 }
214 
215 static __inline void *
216 atomic_swap_ptr(volatile void **addr, void *value)
217 {
218 	__asm __volatile("xchgq %0, %1" :
219 	    "=r" (value), "=m" (*addr) : "0" (value) : "memory");
220 	return (value);
221 }
222 
223 static __inline int
224 atomic_poll_acquire_int(volatile u_int *p)
225 {
226 	u_int data;
227 
228 	__asm __volatile(MPLOCKED "btsl $0,%0; setnc %%al; andl $255,%%eax" : "+m" (*p), "=a" (data));
229 	return(data);
230 }
231 
232 static __inline void
233 atomic_poll_release_int(volatile u_int *p)
234 {
235 	__asm __volatile(MPLOCKED "btrl $0,%0" : "+m" (*p));
236 }
237 
238 #endif
239 
240 /*
241  * These functions operate on a 32 bit interrupt interlock which is defined
242  * as follows:
243  *
244  *	bit 0-29	interrupt handler wait counter
245  *	bit 30		interrupt handler disabled bit
246  *	bit 31		interrupt handler currently running bit (1 = run)
247  *
248  * atomic_intr_cond_test(P)	Determine if the interlock is in an
249  *				acquired state.  Returns 0 if it not
250  *				acquired, non-zero if it is. (not MPLOCKed)
251  *
252  * atomic_intr_cond_try(P) 	Attempt to set bit 31 to acquire the
253  *				interlock.  If we are unable to set bit 31
254  *				we return 1, otherwise we return 0.
255  *
256  * atomic_intr_cond_enter(P, func, arg)
257  *				Attempt to set bit 31 to acquire the
258  *				interlock.  If we are unable to set bit 31,
259  *				the wait is incremented counter and func(arg)
260  *				is called in a loop until we are able to set
261  *				bit 31.  Once we set bit 31, wait counter
262  *				is decremented.
263  *
264  * atomic_intr_cond_exit(P, func, arg)
265  *				Clear bit 31.  If the wait counter is still
266  *				non-zero call func(arg) once.
267  *
268  * atomic_intr_handler_disable(P)
269  *				Set bit 30, indicating that the interrupt
270  *				handler has been disabled.  Must be called
271  *				after the hardware is disabled.
272  *
273  *				Returns bit 31 indicating whether a serialized
274  *				accessor is active (typically the interrupt
275  *				handler is running).  0 == not active,
276  *				non-zero == active.
277  *
278  * atomic_intr_handler_enable(P)
279  *				Clear bit 30, indicating that the interrupt
280  *				handler has been enabled.  Must be called
281  *				before the hardware is actually enabled.
282  *
283  * atomic_intr_handler_is_enabled(P)
284  *				Returns bit 30, 0 indicates that the handler
285  *				is enabled, non-zero indicates that it is
286  *				disabled.  The request counter portion of
287  *				the field is ignored. (not MPLOCKed)
288  *
289  * atomic_intr_cond_inc(P)	Increment wait counter by 1.
290  * atomic_intr_cond_dec(P)	Decrement wait counter by 1.
291  */
292 
293 #if defined(KLD_MODULE)
294 
295 void atomic_intr_init(__atomic_intr_t *p);
296 int atomic_intr_handler_disable(__atomic_intr_t *p);
297 void atomic_intr_handler_enable(__atomic_intr_t *p);
298 int atomic_intr_handler_is_enabled(__atomic_intr_t *p);
299 int atomic_intr_cond_test(__atomic_intr_t *p);
300 int atomic_intr_cond_try(__atomic_intr_t *p);
301 void atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg);
302 void atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg);
303 void atomic_intr_cond_inc(__atomic_intr_t *p);
304 void atomic_intr_cond_dec(__atomic_intr_t *p);
305 
306 #else
307 
308 static __inline void
309 atomic_intr_init(__atomic_intr_t *p)
310 {
311 	*p = 0;
312 }
313 
314 static __inline int
315 atomic_intr_handler_disable(__atomic_intr_t *p)
316 {
317 	int data;
318 
319 	__asm __volatile(MPLOCKED "orl $0x40000000,%1; movl %1,%%eax; " \
320 				  "andl $0x80000000,%%eax" \
321 				  : "=a"(data) , "+m"(*p));
322 	return(data);
323 }
324 
325 static __inline void
326 atomic_intr_handler_enable(__atomic_intr_t *p)
327 {
328 	__asm __volatile(MPLOCKED "andl $0xBFFFFFFF,%0" : "+m" (*p));
329 }
330 
331 static __inline int
332 atomic_intr_handler_is_enabled(__atomic_intr_t *p)
333 {
334 	int data;
335 
336 	__asm __volatile("movl %1,%%eax; andl $0x40000000,%%eax" \
337 			 : "=a"(data) : "m"(*p));
338 	return(data);
339 }
340 
341 static __inline void
342 atomic_intr_cond_inc(__atomic_intr_t *p)
343 {
344 	__asm __volatile(MPLOCKED "incl %0" : "+m" (*p));
345 }
346 
347 static __inline void
348 atomic_intr_cond_dec(__atomic_intr_t *p)
349 {
350 	__asm __volatile(MPLOCKED "decl %0" : "+m" (*p));
351 }
352 
353 static __inline void
354 atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg)
355 {
356 	__asm __volatile(MPLOCKED "btsl $31,%0; jnc 3f; " \
357 			 MPLOCKED "incl %0; " \
358 			 "1: ;" \
359 			 MPLOCKED "btsl $31,%0; jnc 2f; " \
360 			 "movq %2,%%rdi; call *%1; " \
361 			 "jmp 1b; " \
362 			 "2: ;" \
363 			 MPLOCKED "decl %0; " \
364 			 "3: ;" \
365 			 : "+m" (*p) \
366 			 : "r"(func), "m"(arg) \
367 			 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
368 		/* YYY the function call may clobber even more registers? */
369 }
370 
371 /*
372  * Attempt to enter the interrupt condition variable.  Returns zero on
373  * success, 1 on failure.
374  */
375 static __inline int
376 atomic_intr_cond_try(__atomic_intr_t *p)
377 {
378 	int ret;
379 
380 	__asm __volatile("subl %%eax,%%eax; "			\
381 			 MPLOCKED "btsl $31,%0; jnc 2f; "	\
382 			 "movl $1,%%eax;"			\
383 			 "2: ;"
384 			 : "+m" (*p), "=&a"(ret)
385                          : : "cx", "dx");
386 	return (ret);
387 }
388 
389 
390 static __inline int
391 atomic_intr_cond_test(__atomic_intr_t *p)
392 {
393 	return((int)(*p & 0x80000000));
394 }
395 
396 static __inline void
397 atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg)
398 {
399 	__asm __volatile(MPLOCKED "btrl $31,%0; " \
400 			 "testl $0x3FFFFFFF,%0; jz 1f; " \
401 			 "movq %2,%%rdi; call *%1; " \
402 			 "1: ;" \
403 			 : "+m" (*p) \
404 			 : "r"(func), "m"(arg) \
405 			 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11");
406 		/* YYY the function call may clobber even more registers? */
407 }
408 
409 #endif
410 
411 /*
412  * Atomic compare and set
413  *
414  * if (*_dst == _old) *_dst = _new (all 32 bit words)
415  *
416  * Returns 0 on failure, non-zero on success.  The inline is designed to
417  * allow the compiler to optimize the common case where the caller calls
418  * these functions from inside a conditional.
419  */
420 #if defined(KLD_MODULE)
421 
422 extern int atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new);
423 extern int atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new);
424 extern int atomic_cmpset_short(volatile u_short *_dst,
425 	u_short _old, u_short _new);
426 extern int atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new);
427 extern int atomic_cmpset_long(volatile u_long *_dst, u_long _exp, u_long _src);
428 extern u_int atomic_fetchadd_int(volatile u_int *_p, u_int _v);
429 extern u_long atomic_fetchadd_long(volatile u_long *_p, u_long _v);
430 
431 #else
432 
433 static __inline int
434 atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new)
435 {
436 	u_int res = _old;
437 
438 	__asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
439 			 : "+a" (res), "=m" (*_dst) \
440 			 : "r" (_new), "m" (*_dst) \
441 			 : "memory");
442 	return (res);
443 }
444 
445 static __inline int
446 atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new)
447 {
448 	u_int res = _old;
449 
450 	__asm __volatile(MPLOCKED "cmpxchgq %2,%1; "
451 				  " setz %%al;"
452 				  " movsbq %%al,%%rax" \
453 			 : "+a" (res), "=m" (*_dst) \
454 			 : "r" (_new), "m" (*_dst) \
455 			 : "memory");
456 	return (res);
457 }
458 
459 static __inline int
460 atomic_cmpset_short(volatile u_short *_dst, u_short _old, u_short _new)
461 {
462 	u_short res = _old;
463 
464 	__asm __volatile(MPLOCKED "cmpxchgw %w2,%1; " \
465 			 : "+a" (res), "=m" (*_dst) \
466 			 : "r" (_new), "m" (*_dst) \
467 			 : "memory");
468 	return (res == _old);
469 }
470 
471 static __inline int
472 atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new)
473 {
474 	u_int res = _old;
475 
476 	__asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \
477 			 : "+a" (res), "=m" (*_dst) \
478 			 : "r" (_new), "m" (*_dst) \
479 			 : "memory");
480 	return (res == _old);
481 }
482 
483 static __inline int
484 atomic_cmpset_long(volatile u_long *_dst, u_long _old, u_long _new)
485 {
486 	u_long res = _old;
487 
488 	__asm __volatile(MPLOCKED "cmpxchgq %2,%1; " \
489 			 : "+a" (res), "=m" (*_dst) \
490 			 : "r" (_new), "m" (*_dst) \
491 			 : "memory");
492 	return (res == _old);
493 }
494 
495 /*
496  * Atomically add the value of v to the integer pointed to by p and return
497  * the previous value of *p.
498  */
499 static __inline u_int
500 atomic_fetchadd_int(volatile u_int *_p, u_int _v)
501 {
502 	__asm __volatile(MPLOCKED "xaddl %0,%1; " \
503 			 : "+r" (_v), "=m" (*_p)	\
504 			 : "m" (*_p)		\
505 			 : "memory");
506 	return (_v);
507 }
508 
509 static __inline u_long
510 atomic_fetchadd_long(volatile u_long *_p, u_long _v)
511 {
512 	__asm __volatile(MPLOCKED "xaddq %0,%1; " \
513 			 : "+r" (_v), "=m" (*_p)	\
514 			 : "m" (*_p)		\
515 			 : "memory");
516 	return (_v);
517 }
518 
519 static __inline int
520 atomic_testandset_int(volatile u_int *p, u_int v)
521 {
522 	u_char res;
523 
524 	__asm __volatile(
525 	"	" MPLOCKED "		"
526 	"	btsl	%2,%1 ;		"
527 	"	setc	%0 ;		"
528 	"# atomic_testandset_int"
529 	: "=q" (res),			/* 0 */
530 	  "+m" (*p)			/* 1 */
531 	: "Ir" (v & 0x1f)		/* 2 */
532 	: "cc");
533 	return (res);
534 }
535 
536 static __inline int
537 atomic_testandset_long(volatile u_long *p, u_long v)
538 {
539 	u_char res;
540 
541 	__asm __volatile(
542 	"	" MPLOCKED "		"
543 	"	btsq	%2,%1 ;		"
544 	"	setc	%0 ;		"
545 	"# atomic_testandset_int"
546 	: "=q" (res),			/* 0 */
547 	  "+m" (*p)			/* 1 */
548 	: "Ir" (v & 0x3f)		/* 2 */
549 	: "cc");
550 	return (res);
551 }
552 
553 static __inline int
554 atomic_testandclear_int(volatile u_int *p, u_int v)
555 {
556 	u_char res;
557 
558 	__asm __volatile(
559 	"	" MPLOCKED "		"
560 	"	btrl	%2,%1 ;		"
561 	"	setc	%0 ;		"
562 	"# atomic_testandclear_int"
563 	: "=q" (res),			/* 0 */
564 	  "+m" (*p)			/* 1 */
565 	: "Ir" (v & 0x1f)		/* 2 */
566 	: "cc");
567 	return (res);
568 }
569 
570 static __inline int
571 atomic_testandclear_long(volatile u_long *p, u_long v)
572 {
573 	u_char res;
574 
575 	__asm __volatile(
576 	"	" MPLOCKED "		"
577 	"	btrq	%2,%1 ;		"
578 	"	setc	%0 ;		"
579 	"# atomic_testandclear_int"
580 	: "=q" (res),			/* 0 */
581 	  "+m" (*p)			/* 1 */
582 	: "Ir" (v & 0x3f)		/* 2 */
583 	: "cc");
584 	return (res);
585 }
586 
587 #endif	/* KLD_MODULE */
588 
589 #if defined(KLD_MODULE)
590 
591 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP)			\
592 extern u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p);	\
593 extern void	atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v);
594 
595 #else /* !KLD_MODULE */
596 
597 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
598 static __inline u_##TYPE				\
599 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
600 {							\
601 	u_##TYPE res; /* accumulator can be anything */	\
602 							\
603 	__asm __volatile(MPLOCKED LOP			\
604 	: "=a" (res),			/* 0 */		\
605 	  "=m" (*p)			/* 1 */		\
606 	: "m" (*p)			/* 2 */		\
607 	: "memory");					\
608 							\
609 	return (res);					\
610 }							\
611 							\
612 /*							\
613  * The XCHG instruction asserts LOCK automagically.	\
614  */							\
615 static __inline void					\
616 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
617 {							\
618 	__asm __volatile(SOP				\
619 	: "=m" (*p),			/* 0 */		\
620 	  "+r" (v)			/* 1 */		\
621 	: "m" (*p));			/* 2 */		\
622 }							\
623 struct __hack
624 
625 #endif /* !KLD_MODULE */
626 
627 ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0");
628 ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
629 ATOMIC_STORE_LOAD(int,  "cmpxchgl %0,%1",  "xchgl %1,%0");
630 ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1",  "xchgq %1,%0");
631 
632 #undef ATOMIC_ASM
633 #undef ATOMIC_STORE_LOAD
634 
635 /* Acquire and release variants are identical to the normal ones. */
636 #define	atomic_set_acq_char		atomic_set_char
637 #define	atomic_set_rel_char		atomic_set_char
638 #define	atomic_clear_acq_char		atomic_clear_char
639 #define	atomic_clear_rel_char		atomic_clear_char
640 #define	atomic_add_acq_char		atomic_add_char
641 #define	atomic_add_rel_char		atomic_add_char
642 #define	atomic_subtract_acq_char	atomic_subtract_char
643 #define	atomic_subtract_rel_char	atomic_subtract_char
644 
645 #define	atomic_set_acq_short		atomic_set_short
646 #define	atomic_set_rel_short		atomic_set_short
647 #define	atomic_clear_acq_short		atomic_clear_short
648 #define	atomic_clear_rel_short		atomic_clear_short
649 #define	atomic_add_acq_short		atomic_add_short
650 #define	atomic_add_rel_short		atomic_add_short
651 #define	atomic_subtract_acq_short	atomic_subtract_short
652 #define	atomic_subtract_rel_short	atomic_subtract_short
653 
654 #define	atomic_set_acq_int		atomic_set_int
655 #define	atomic_set_rel_int		atomic_set_int
656 #define	atomic_clear_acq_int		atomic_clear_int
657 #define	atomic_clear_rel_int		atomic_clear_int
658 #define	atomic_add_acq_int		atomic_add_int
659 #define	atomic_add_rel_int		atomic_add_int
660 #define	atomic_subtract_acq_int		atomic_subtract_int
661 #define	atomic_subtract_rel_int		atomic_subtract_int
662 #define	atomic_cmpset_acq_int		atomic_cmpset_int
663 #define	atomic_cmpset_rel_int		atomic_cmpset_int
664 
665 #define	atomic_set_acq_long		atomic_set_long
666 #define	atomic_set_rel_long		atomic_set_long
667 #define	atomic_clear_acq_long		atomic_clear_long
668 #define	atomic_clear_rel_long		atomic_clear_long
669 #define	atomic_add_acq_long		atomic_add_long
670 #define	atomic_add_rel_long		atomic_add_long
671 #define	atomic_subtract_acq_long	atomic_subtract_long
672 #define	atomic_subtract_rel_long	atomic_subtract_long
673 #define	atomic_cmpset_acq_long		atomic_cmpset_long
674 #define	atomic_cmpset_rel_long		atomic_cmpset_long
675 
676 /* cpumask_t is 64-bits on x86-64 */
677 #define atomic_set_cpumask		atomic_set_long
678 #define atomic_clear_cpumask		atomic_clear_long
679 #define atomic_cmpset_cpumask		atomic_cmpset_long
680 #define atomic_store_rel_cpumask	atomic_store_rel_long
681 #define atomic_load_acq_cpumask		atomic_load_acq_long
682 
683 /* Operations on 8-bit bytes. */
684 #define	atomic_set_8		atomic_set_char
685 #define	atomic_set_acq_8	atomic_set_acq_char
686 #define	atomic_set_rel_8	atomic_set_rel_char
687 #define	atomic_clear_8		atomic_clear_char
688 #define	atomic_clear_acq_8	atomic_clear_acq_char
689 #define	atomic_clear_rel_8	atomic_clear_rel_char
690 #define	atomic_add_8		atomic_add_char
691 #define	atomic_add_acq_8	atomic_add_acq_char
692 #define	atomic_add_rel_8	atomic_add_rel_char
693 #define	atomic_subtract_8	atomic_subtract_char
694 #define	atomic_subtract_acq_8	atomic_subtract_acq_char
695 #define	atomic_subtract_rel_8	atomic_subtract_rel_char
696 #define	atomic_load_acq_8	atomic_load_acq_char
697 #define	atomic_store_rel_8	atomic_store_rel_char
698 
699 /* Operations on 16-bit words. */
700 #define	atomic_set_16		atomic_set_short
701 #define	atomic_set_acq_16	atomic_set_acq_short
702 #define	atomic_set_rel_16	atomic_set_rel_short
703 #define	atomic_clear_16		atomic_clear_short
704 #define	atomic_clear_acq_16	atomic_clear_acq_short
705 #define	atomic_clear_rel_16	atomic_clear_rel_short
706 #define	atomic_add_16		atomic_add_short
707 #define	atomic_add_acq_16	atomic_add_acq_short
708 #define	atomic_add_rel_16	atomic_add_rel_short
709 #define	atomic_subtract_16	atomic_subtract_short
710 #define	atomic_subtract_acq_16	atomic_subtract_acq_short
711 #define	atomic_subtract_rel_16	atomic_subtract_rel_short
712 #define	atomic_load_acq_16	atomic_load_acq_short
713 #define	atomic_store_rel_16	atomic_store_rel_short
714 
715 /* Operations on 32-bit double words. */
716 #define	atomic_set_32		atomic_set_int
717 #define	atomic_set_acq_32	atomic_set_acq_int
718 #define	atomic_set_rel_32	atomic_set_rel_int
719 #define	atomic_clear_32		atomic_clear_int
720 #define	atomic_clear_acq_32	atomic_clear_acq_int
721 #define	atomic_clear_rel_32	atomic_clear_rel_int
722 #define	atomic_add_32		atomic_add_int
723 #define	atomic_add_acq_32	atomic_add_acq_int
724 #define	atomic_add_rel_32	atomic_add_rel_int
725 #define	atomic_subtract_32	atomic_subtract_int
726 #define	atomic_subtract_acq_32	atomic_subtract_acq_int
727 #define	atomic_subtract_rel_32	atomic_subtract_rel_int
728 #define	atomic_load_acq_32	atomic_load_acq_int
729 #define	atomic_store_rel_32	atomic_store_rel_int
730 #define	atomic_cmpset_32	atomic_cmpset_int
731 #define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
732 #define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
733 #define	atomic_readandclear_32	atomic_readandclear_int
734 #define	atomic_fetchadd_32	atomic_fetchadd_int
735 
736 /* Operations on 64-bit quad words. */
737 #define	atomic_load_acq_64	atomic_load_acq_long
738 #define	atomic_store_rel_64	atomic_store_rel_long
739 #define	atomic_swap_64		atomic_swap_long
740 #define	atomic_fetchadd_64	atomic_fetchadd_long
741 #define atomic_cmpset_64	atomic_cmpset_long
742 #define atomic_set_64		atomic_set_long
743 #define atomic_clear_64		atomic_clear_long
744 
745 /* Operations on pointers. */
746 #define atomic_set_ptr(p, v) \
747 	atomic_set_long((volatile u_long *)(p), (u_long)(v))
748 #define atomic_set_acq_ptr(p, v) \
749 	atomic_set_acq_long((volatile u_long *)(p), (u_long)(v))
750 #define atomic_set_rel_ptr(p, v) \
751 	atomic_set_rel_long((volatile u_long *)(p), (u_long)(v))
752 #define atomic_clear_ptr(p, v) \
753 	atomic_clear_long((volatile u_long *)(p), (u_long)(v))
754 #define atomic_clear_acq_ptr(p, v) \
755 	atomic_clear_acq_long((volatile u_long *)(p), (u_long)(v))
756 #define atomic_clear_rel_ptr(p, v) \
757 	atomic_clear_rel_long((volatile u_long *)(p), (u_long)(v))
758 #define atomic_add_ptr(p, v) \
759 	atomic_add_long((volatile u_long *)(p), (u_long)(v))
760 #define atomic_add_acq_ptr(p, v) \
761 	atomic_add_acq_long((volatile u_long *)(p), (u_long)(v))
762 #define atomic_add_rel_ptr(p, v) \
763 	atomic_add_rel_long((volatile u_long *)(p), (u_long)(v))
764 #define atomic_subtract_ptr(p, v) \
765 	atomic_subtract_long((volatile u_long *)(p), (u_long)(v))
766 #define atomic_subtract_acq_ptr(p, v) \
767 	atomic_subtract_acq_long((volatile u_long *)(p), (u_long)(v))
768 #define atomic_subtract_rel_ptr(p, v) \
769 	atomic_subtract_rel_long((volatile u_long *)(p), (u_long)(v))
770 #define atomic_load_acq_ptr(p) \
771 	atomic_load_acq_long((volatile u_long *)(p))
772 #define atomic_store_rel_ptr(p, v) \
773 	atomic_store_rel_long((volatile u_long *)(p), (v))
774 #define atomic_cmpset_ptr(dst, old, new) 				\
775 	atomic_cmpset_long((volatile u_long *)(dst), (u_long)(old),	\
776 				(u_long)(new))
777 #define atomic_cmpset_acq_ptr(dst, old, new)				\
778 	atomic_cmpset_acq_long((volatile u_long *)(dst), (u_long)(old), \
779 				(u_long)(new))
780 #define atomic_cmpset_rel_ptr(dst, old, new)				\
781 	atomic_cmpset_rel_long((volatile u_long *)(dst), (u_long)(old), \
782 				(u_long)(new))
783 #define atomic_readandclear_ptr(p)					\
784 	atomic_readandclear_long((volatile u_long *)(p))
785 
786 #endif /* ! _CPU_ATOMIC_H_ */
787