1 /*- 2 * Copyright (c) 1998 Doug Rabson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/i386/include/atomic.h,v 1.9.2.1 2000/07/07 00:38:47 obrien Exp $ 27 */ 28 #ifndef _CPU_ATOMIC_H_ 29 #define _CPU_ATOMIC_H_ 30 31 #ifndef _SYS_TYPES_H_ 32 #include <sys/types.h> 33 #endif 34 35 /* 36 * Various simple arithmetic on memory which is atomic in the presence 37 * of interrupts and multiple processors. 38 * 39 * atomic_set_char(P, V) (*(u_char*)(P) |= (V)) 40 * atomic_clear_char(P, V) (*(u_char*)(P) &= ~(V)) 41 * atomic_add_char(P, V) (*(u_char*)(P) += (V)) 42 * atomic_subtract_char(P, V) (*(u_char*)(P) -= (V)) 43 * 44 * atomic_set_short(P, V) (*(u_short*)(P) |= (V)) 45 * atomic_clear_short(P, V) (*(u_short*)(P) &= ~(V)) 46 * atomic_add_short(P, V) (*(u_short*)(P) += (V)) 47 * atomic_subtract_short(P, V) (*(u_short*)(P) -= (V)) 48 * 49 * atomic_set_int(P, V) (*(u_int*)(P) |= (V)) 50 * atomic_clear_int(P, V) (*(u_int*)(P) &= ~(V)) 51 * atomic_add_int(P, V) (*(u_int*)(P) += (V)) 52 * atomic_subtract_int(P, V) (*(u_int*)(P) -= (V)) 53 * 54 * atomic_set_long(P, V) (*(u_long*)(P) |= (V)) 55 * atomic_clear_long(P, V) (*(u_long*)(P) &= ~(V)) 56 * atomic_add_long(P, V) (*(u_long*)(P) += (V)) 57 * atomic_subtract_long(P, V) (*(u_long*)(P) -= (V)) 58 * atomic_readandclear_long(P) (return (*(u_long*)(P)); *(u_long*)(P) = 0;) 59 * atomic_readandclear_int(P) (return (*(u_int*)(P)); *(u_int*)(P) = 0;) 60 */ 61 62 /* 63 * The above functions are expanded inline in the statically-linked 64 * kernel and lock prefixes are generated. 65 * 66 * Kernel modules call real functions which are built into the kernel. 67 */ 68 #if defined(KLD_MODULE) 69 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ 70 extern void atomic_##NAME##_##TYPE \ 71 (volatile u_##TYPE *p, u_##TYPE v); \ 72 extern void atomic_##NAME##_##TYPE##_nonlocked \ 73 (volatile u_##TYPE *p, u_##TYPE v); \ 74 extern void atomic_##NAME##_##TYPE##_xacquire \ 75 (volatile u_##TYPE *p, u_##TYPE v); \ 76 extern void atomic_##NAME##_##TYPE##_xrelease \ 77 (volatile u_##TYPE *p, u_##TYPE v); 78 79 int atomic_testandset_int(volatile u_int *p, u_int v); 80 int atomic_testandset_long(volatile u_long *p, u_long v); 81 int atomic_testandclear_int(volatile u_int *p, u_int v); 82 int atomic_testandclear_long(volatile u_long *p, u_long v); 83 84 #else /* !KLD_MODULE */ 85 86 /* 87 * locked bus cycle 88 * lock elision (backwards compatible) 89 */ 90 #define MPLOCKED "lock ; " 91 #define XACQUIRE "repne; " /* lock elision */ 92 #define XRELEASE "repe; " /* lock elision */ 93 94 /* 95 * The assembly is volatilized to demark potential before-and-after side 96 * effects if an interrupt or SMP collision were to occur. The primary 97 * atomic instructions are MP safe, the nonlocked instructions are 98 * local-interrupt-safe (so we don't depend on C 'X |= Y' generating an 99 * atomic instruction). 100 * 101 * +m - memory is read and written (=m - memory is only written) 102 * iq - integer constant or %ax/%bx/%cx/%dx (ir = int constant or any reg) 103 * (Note: byte instructions only work on %ax,%bx,%cx, or %dx). iq 104 * is good enough for our needs so don't get fancy. 105 * r - any register. 106 * 107 * NOTE: 64-bit immediate values are not supported for most x86-64 108 * instructions so we have to use "r". 109 */ 110 111 /* egcs 1.1.2+ version */ 112 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ 113 static __inline void \ 114 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 115 { \ 116 __asm __volatile(MPLOCKED OP \ 117 : "+m" (*p) \ 118 : CONS (V)); \ 119 } \ 120 static __inline void \ 121 atomic_##NAME##_##TYPE##_xacquire(volatile u_##TYPE *p, u_##TYPE v)\ 122 { \ 123 __asm __volatile(XACQUIRE MPLOCKED OP \ 124 : "+m" (*p) \ 125 : CONS (V)); \ 126 } \ 127 static __inline void \ 128 atomic_##NAME##_##TYPE##_xrelease(volatile u_##TYPE *p, u_##TYPE v)\ 129 { \ 130 __asm __volatile(XRELEASE MPLOCKED OP \ 131 : "+m" (*p) \ 132 : CONS (V)); \ 133 } \ 134 static __inline void \ 135 atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v)\ 136 { \ 137 __asm __volatile(OP \ 138 : "+m" (*p) \ 139 : CONS (V)); \ 140 } 141 142 #endif /* KLD_MODULE */ 143 144 /* egcs 1.1.2+ version */ 145 ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v) 146 ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v) 147 ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v) 148 ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v) 149 150 ATOMIC_ASM(set, short, "orw %w1,%0", "iq", v) 151 ATOMIC_ASM(clear, short, "andw %w1,%0", "iq", ~v) 152 ATOMIC_ASM(add, short, "addw %w1,%0", "iq", v) 153 ATOMIC_ASM(subtract, short, "subw %w1,%0", "iq", v) 154 155 ATOMIC_ASM(set, int, "orl %1,%0", "iq", v) 156 ATOMIC_ASM(clear, int, "andl %1,%0", "iq", ~v) 157 ATOMIC_ASM(add, int, "addl %1,%0", "iq", v) 158 ATOMIC_ASM(subtract, int, "subl %1,%0", "iq", v) 159 160 ATOMIC_ASM(set, long, "orq %1,%0", "r", v) 161 ATOMIC_ASM(clear, long, "andq %1,%0", "r", ~v) 162 ATOMIC_ASM(add, long, "addq %1,%0", "r", v) 163 ATOMIC_ASM(subtract, long, "subq %1,%0", "r", v) 164 165 #if defined(KLD_MODULE) 166 167 u_long atomic_readandclear_long(volatile u_long *addr); 168 u_int atomic_readandclear_int(volatile u_int *addr); 169 170 #else /* !KLD_MODULE */ 171 172 static __inline u_long 173 atomic_readandclear_long(volatile u_long *addr) 174 { 175 u_long res; 176 177 res = 0; 178 __asm __volatile( 179 " xchgq %1,%0 ; " 180 "# atomic_readandclear_long" 181 : "+r" (res), /* 0 */ 182 "=m" (*addr) /* 1 */ 183 : "m" (*addr)); 184 185 return (res); 186 } 187 188 static __inline u_int 189 atomic_readandclear_int(volatile u_int *addr) 190 { 191 u_int res; 192 193 res = 0; 194 __asm __volatile( 195 " xchgl %1,%0 ; " 196 "# atomic_readandclear_int" 197 : "+r" (res), /* 0 */ 198 "=m" (*addr) /* 1 */ 199 : "m" (*addr)); 200 201 return (res); 202 } 203 204 #endif /* KLD_MODULE */ 205 206 /* 207 * atomic_poll_acquire_int(P) Returns non-zero on success, 0 if the lock 208 * has already been acquired. 209 * atomic_poll_release_int(P) 210 * 211 * These support the NDIS driver and are also used for IPIQ interlocks 212 * between cpus. Both the acquisition and release must be 213 * cache-synchronizing instructions. 214 */ 215 216 #if defined(KLD_MODULE) 217 218 extern int atomic_swap_int(volatile int *addr, int value); 219 extern long atomic_swap_long(volatile long *addr, long value); 220 extern void *atomic_swap_ptr(volatile void **addr, void *value); 221 extern int atomic_poll_acquire_int(volatile u_int *p); 222 extern void atomic_poll_release_int(volatile u_int *p); 223 224 #else 225 226 static __inline int 227 atomic_swap_int(volatile int *addr, int value) 228 { 229 __asm __volatile("xchgl %0, %1" : 230 "=r" (value), "=m" (*addr) : "0" (value) : "memory"); 231 return (value); 232 } 233 234 static __inline long 235 atomic_swap_long(volatile long *addr, long value) 236 { 237 __asm __volatile("xchgq %0, %1" : 238 "=r" (value), "=m" (*addr) : "0" (value) : "memory"); 239 return (value); 240 } 241 242 static __inline void * 243 atomic_swap_ptr(volatile void **addr, void *value) 244 { 245 __asm __volatile("xchgq %0, %1" : 246 "=r" (value), "=m" (*addr) : "0" (value) : "memory"); 247 return (value); 248 } 249 250 static __inline int 251 atomic_poll_acquire_int(volatile u_int *p) 252 { 253 u_int data; 254 255 __asm __volatile(MPLOCKED "btsl $0,%0; setnc %%al; andl $255,%%eax" : "+m" (*p), "=a" (data)); 256 return(data); 257 } 258 259 static __inline void 260 atomic_poll_release_int(volatile u_int *p) 261 { 262 __asm __volatile(MPLOCKED "btrl $0,%0" : "+m" (*p)); 263 } 264 265 #endif 266 267 /* 268 * These functions operate on a 32 bit interrupt interlock which is defined 269 * as follows: 270 * 271 * bit 0-29 interrupt handler wait counter 272 * bit 30 interrupt handler disabled bit 273 * bit 31 interrupt handler currently running bit (1 = run) 274 * 275 * atomic_intr_cond_test(P) Determine if the interlock is in an 276 * acquired state. Returns 0 if it not 277 * acquired, non-zero if it is. (not MPLOCKed) 278 * 279 * atomic_intr_cond_try(P) Attempt to set bit 31 to acquire the 280 * interlock. If we are unable to set bit 31 281 * we return 1, otherwise we return 0. 282 * 283 * atomic_intr_cond_enter(P, func, arg) 284 * Attempt to set bit 31 to acquire the 285 * interlock. If we are unable to set bit 31, 286 * the wait is incremented counter and func(arg) 287 * is called in a loop until we are able to set 288 * bit 31. Once we set bit 31, wait counter 289 * is decremented. 290 * 291 * atomic_intr_cond_exit(P, func, arg) 292 * Clear bit 31. If the wait counter is still 293 * non-zero call func(arg) once. 294 * 295 * atomic_intr_handler_disable(P) 296 * Set bit 30, indicating that the interrupt 297 * handler has been disabled. Must be called 298 * after the hardware is disabled. 299 * 300 * Returns bit 31 indicating whether a serialized 301 * accessor is active (typically the interrupt 302 * handler is running). 0 == not active, 303 * non-zero == active. 304 * 305 * atomic_intr_handler_enable(P) 306 * Clear bit 30, indicating that the interrupt 307 * handler has been enabled. Must be called 308 * before the hardware is actually enabled. 309 * 310 * atomic_intr_handler_is_enabled(P) 311 * Returns bit 30, 0 indicates that the handler 312 * is enabled, non-zero indicates that it is 313 * disabled. The request counter portion of 314 * the field is ignored. (not MPLOCKed) 315 * 316 * atomic_intr_cond_inc(P) Increment wait counter by 1. 317 * atomic_intr_cond_dec(P) Decrement wait counter by 1. 318 */ 319 320 #if defined(KLD_MODULE) 321 322 void atomic_intr_init(__atomic_intr_t *p); 323 int atomic_intr_handler_disable(__atomic_intr_t *p); 324 void atomic_intr_handler_enable(__atomic_intr_t *p); 325 int atomic_intr_handler_is_enabled(__atomic_intr_t *p); 326 int atomic_intr_cond_test(__atomic_intr_t *p); 327 int atomic_intr_cond_try(__atomic_intr_t *p); 328 void atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg); 329 void atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg); 330 void atomic_intr_cond_inc(__atomic_intr_t *p); 331 void atomic_intr_cond_dec(__atomic_intr_t *p); 332 333 #else 334 335 static __inline void 336 atomic_intr_init(__atomic_intr_t *p) 337 { 338 *p = 0; 339 } 340 341 static __inline int 342 atomic_intr_handler_disable(__atomic_intr_t *p) 343 { 344 int data; 345 346 __asm __volatile(MPLOCKED "orl $0x40000000,%1; movl %1,%%eax; " \ 347 "andl $0x80000000,%%eax" \ 348 : "=a"(data) , "+m"(*p)); 349 return(data); 350 } 351 352 static __inline void 353 atomic_intr_handler_enable(__atomic_intr_t *p) 354 { 355 __asm __volatile(MPLOCKED "andl $0xBFFFFFFF,%0" : "+m" (*p)); 356 } 357 358 static __inline int 359 atomic_intr_handler_is_enabled(__atomic_intr_t *p) 360 { 361 int data; 362 363 __asm __volatile("movl %1,%%eax; andl $0x40000000,%%eax" \ 364 : "=a"(data) : "m"(*p)); 365 return(data); 366 } 367 368 static __inline void 369 atomic_intr_cond_inc(__atomic_intr_t *p) 370 { 371 __asm __volatile(MPLOCKED "incl %0" : "+m" (*p)); 372 } 373 374 static __inline void 375 atomic_intr_cond_dec(__atomic_intr_t *p) 376 { 377 __asm __volatile(MPLOCKED "decl %0" : "+m" (*p)); 378 } 379 380 static __inline void 381 atomic_intr_cond_enter(__atomic_intr_t *p, void (*func)(void *), void *arg) 382 { 383 __asm __volatile(MPLOCKED "btsl $31,%0; jnc 3f; " \ 384 MPLOCKED "incl %0; " \ 385 "1: ;" \ 386 MPLOCKED "btsl $31,%0; jnc 2f; " \ 387 "movq %2,%%rdi; call *%1; " \ 388 "jmp 1b; " \ 389 "2: ;" \ 390 MPLOCKED "decl %0; " \ 391 "3: ;" \ 392 : "+m" (*p) \ 393 : "r"(func), "m"(arg) \ 394 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11"); 395 /* YYY the function call may clobber even more registers? */ 396 } 397 398 /* 399 * Attempt to enter the interrupt condition variable. Returns zero on 400 * success, 1 on failure. 401 */ 402 static __inline int 403 atomic_intr_cond_try(__atomic_intr_t *p) 404 { 405 int ret; 406 407 __asm __volatile("subl %%eax,%%eax; " \ 408 MPLOCKED "btsl $31,%0; jnc 2f; " \ 409 "movl $1,%%eax;" \ 410 "2: ;" 411 : "+m" (*p), "=&a"(ret) 412 : : "cx", "dx"); 413 return (ret); 414 } 415 416 417 static __inline int 418 atomic_intr_cond_test(__atomic_intr_t *p) 419 { 420 return((int)(*p & 0x80000000)); 421 } 422 423 static __inline void 424 atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg) 425 { 426 __asm __volatile(MPLOCKED "btrl $31,%0; " \ 427 "testl $0x3FFFFFFF,%0; jz 1f; " \ 428 "movq %2,%%rdi; call *%1; " \ 429 "1: ;" \ 430 : "+m" (*p) \ 431 : "r"(func), "m"(arg) \ 432 : "ax", "cx", "dx", "rsi", "rdi", "r8", "r9", "r10", "r11"); 433 /* YYY the function call may clobber even more registers? */ 434 } 435 436 #endif 437 438 /* 439 * Atomic compare and set 440 * 441 * if (*_dst == _old) *_dst = _new (all 32 bit words) 442 * 443 * Returns 0 on failure, non-zero on success. The inline is designed to 444 * allow the compiler to optimize the common case where the caller calls 445 * these functions from inside a conditional. 446 */ 447 #if defined(KLD_MODULE) 448 449 extern int atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new); 450 extern int atomic_cmpxchg_long_test(volatile u_long *_dst, 451 u_long _old, u_long _new); 452 extern int atomic_cmpset_short(volatile u_short *_dst, 453 u_short _old, u_short _new); 454 extern int atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new); 455 extern int atomic_cmpset_int_xacquire(volatile u_int *_dst, 456 u_int _old, u_int _new); 457 extern int atomic_cmpset_int_xrelease(volatile u_int *_dst, 458 u_int _old, u_int _new); 459 extern int atomic_cmpset_long(volatile u_long *_dst, u_long _exp, u_long _src); 460 extern int atomic_cmpset_long_xacquire(volatile u_long *_dst, 461 u_long _exp, u_long _src); 462 extern int atomic_cmpset_long_xrelease(volatile u_long *_dst, 463 u_long _exp, u_long _src); 464 465 extern int atomic_fcmpset_char(volatile u_char *_dst, 466 u_char *_old, u_char _new); 467 extern int atomic_fcmpset_short(volatile u_short *_dst, 468 u_short *_old, u_short _new); 469 extern int atomic_fcmpset_int(volatile u_int *_dst, 470 u_int *_old, u_int _new); 471 extern int atomic_fcmpset_long(volatile u_long *_dst, 472 u_long *_exp, u_long _src); 473 474 extern u_int atomic_fetchadd_int(volatile u_int *_p, u_int _v); 475 extern u_int atomic_fetchadd_int_xacquire(volatile u_int *_p, u_int _v); 476 extern u_int atomic_fetchadd_int_xrelease(volatile u_int *_p, u_int _v); 477 extern u_long atomic_fetchadd_long(volatile u_long *_p, u_long _v); 478 extern u_long atomic_fetchadd_long_xacquire(volatile u_long *_p, u_long _v); 479 extern u_long atomic_fetchadd_long_xrelease(volatile u_long *_p, u_long _v); 480 481 #else 482 483 static __inline int 484 atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new) 485 { 486 u_int res = _old; 487 488 __asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \ 489 : "+a" (res), "=m" (*_dst) \ 490 : "r" (_new), "m" (*_dst) \ 491 : "memory"); 492 return (res); 493 } 494 495 static __inline int 496 atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new) 497 { 498 u_int res = _old; 499 500 __asm __volatile(MPLOCKED "cmpxchgq %2,%1; " 501 " setz %%al;" 502 " movsbq %%al,%%rax" \ 503 : "+a" (res), "=m" (*_dst) \ 504 : "r" (_new), "m" (*_dst) \ 505 : "memory"); 506 return (res); 507 } 508 509 static __inline int 510 atomic_cmpset_short(volatile u_short *_dst, u_short _old, u_short _new) 511 { 512 u_short res = _old; 513 514 __asm __volatile(MPLOCKED "cmpxchgw %w2,%1; " \ 515 : "+a" (res), "=m" (*_dst) \ 516 : "r" (_new), "m" (*_dst) \ 517 : "memory"); 518 return (res == _old); 519 } 520 521 static __inline int 522 atomic_fcmpset_char(volatile u_char *_dst, u_char *_old, u_char _new) 523 { 524 u_char res = *_old; 525 526 __asm __volatile(MPLOCKED "cmpxchgb %2,%0; " \ 527 : "+m" (*_dst), /* 0 */ 528 "+a" (*_old) /* 1 */ 529 : "r" (_new) /* 2 */ 530 : "memory", "cc"); 531 return (res == *_old); 532 } 533 534 static __inline int 535 atomic_fcmpset_short(volatile u_short *_dst, u_short *_old, u_short _new) 536 { 537 u_short res = *_old; 538 539 __asm __volatile(MPLOCKED "cmpxchgw %2,%0; " \ 540 : "+m" (*_dst), /* 0 */ 541 "+a" (*_old) /* 1 */ 542 : "r" (_new) /* 2 */ 543 : "memory", "cc"); 544 return (res == *_old); 545 } 546 547 static __inline int 548 atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new) 549 { 550 u_int res = _old; 551 552 __asm __volatile(MPLOCKED "cmpxchgl %2,%1; " \ 553 : "+a" (res), "=m" (*_dst) \ 554 : "r" (_new), "m" (*_dst) \ 555 : "memory"); 556 return (res == _old); 557 } 558 559 static __inline int 560 atomic_fcmpset_int(volatile u_int *_dst, u_int *_old, u_int _new) 561 { 562 u_int res = *_old; 563 564 __asm __volatile(MPLOCKED "cmpxchgl %2,%0; " \ 565 : "+m" (*_dst), /* 0 */ 566 "+a" (*_old) /* 1 */ 567 : "r" (_new) /* 2 */ 568 : "memory", "cc"); 569 return (res == *_old); 570 } 571 572 static __inline int 573 atomic_cmpset_int_xacquire(volatile u_int *_dst, u_int _old, u_int _new) 574 { 575 u_int res = _old; 576 577 __asm __volatile(XACQUIRE MPLOCKED "cmpxchgl %2,%1; " \ 578 : "+a" (res), "=m" (*_dst) \ 579 : "r" (_new), "m" (*_dst) \ 580 : "memory"); 581 return (res == _old); 582 } 583 584 static __inline int 585 atomic_cmpset_int_xrelease(volatile u_int *_dst, u_int _old, u_int _new) 586 { 587 u_int res = _old; 588 589 __asm __volatile(XRELEASE MPLOCKED "cmpxchgl %2,%1; " \ 590 : "+a" (res), "=m" (*_dst) \ 591 : "r" (_new), "m" (*_dst) \ 592 : "memory"); 593 return (res == _old); 594 } 595 596 static __inline int 597 atomic_cmpset_long(volatile u_long *_dst, u_long _old, u_long _new) 598 { 599 u_long res = _old; 600 601 __asm __volatile(MPLOCKED "cmpxchgq %2,%1; " \ 602 : "+a" (res), "=m" (*_dst) \ 603 : "r" (_new), "m" (*_dst) \ 604 : "memory"); 605 return (res == _old); 606 } 607 608 static __inline int 609 atomic_fcmpset_long(volatile u_long *_dst, u_long *_old, u_long _new) 610 { 611 u_long res = *_old; 612 613 __asm __volatile(MPLOCKED "cmpxchgq %2,%0; " \ 614 : "+m" (*_dst), /* 0 */ 615 "+a" (*_old) /* 1 */ 616 : "r" (_new) /* 2 */ 617 : "memory", "cc"); 618 return (res == *_old); 619 } 620 621 static __inline int 622 atomic_cmpset_long_xacquire(volatile u_long *_dst, u_long _old, u_long _new) 623 { 624 u_long res = _old; 625 626 __asm __volatile(XACQUIRE MPLOCKED "cmpxchgq %2,%1; " \ 627 : "+a" (res), "=m" (*_dst) \ 628 : "r" (_new), "m" (*_dst) \ 629 : "memory"); 630 return (res == _old); 631 } 632 633 static __inline int 634 atomic_cmpset_long_xrelease(volatile u_long *_dst, u_long _old, u_long _new) 635 { 636 u_long res = _old; 637 638 __asm __volatile(XRELEASE MPLOCKED "cmpxchgq %2,%1; " \ 639 : "+a" (res), "=m" (*_dst) \ 640 : "r" (_new), "m" (*_dst) \ 641 : "memory"); 642 return (res == _old); 643 } 644 645 /* 646 * Atomically add the value of v to the integer pointed to by p and return 647 * the previous value of *p. 648 */ 649 static __inline u_int 650 atomic_fetchadd_int(volatile u_int *_p, u_int _v) 651 { 652 __asm __volatile(MPLOCKED "xaddl %0,%1; " \ 653 : "+r" (_v), "=m" (*_p) \ 654 : "m" (*_p) \ 655 : "memory"); 656 return (_v); 657 } 658 659 static __inline u_int 660 atomic_fetchadd_int_xacquire(volatile u_int *_p, u_int _v) 661 { 662 __asm __volatile(XACQUIRE MPLOCKED "xaddl %0,%1; " \ 663 : "+r" (_v), "=m" (*_p) \ 664 : "m" (*_p) \ 665 : "memory"); 666 return (_v); 667 } 668 669 static __inline u_int 670 atomic_fetchadd_int_xrelease(volatile u_int *_p, u_int _v) 671 { 672 __asm __volatile(XRELEASE MPLOCKED "xaddl %0,%1; " \ 673 : "+r" (_v), "=m" (*_p) \ 674 : "m" (*_p) \ 675 : "memory"); 676 return (_v); 677 } 678 679 static __inline u_long 680 atomic_fetchadd_long(volatile u_long *_p, u_long _v) 681 { 682 __asm __volatile(MPLOCKED "xaddq %0,%1; " \ 683 : "+r" (_v), "=m" (*_p) \ 684 : "m" (*_p) \ 685 : "memory"); 686 return (_v); 687 } 688 689 static __inline u_long 690 atomic_fetchadd_long_xacquire(volatile u_long *_p, u_long _v) 691 { 692 __asm __volatile(XACQUIRE MPLOCKED "xaddq %0,%1; " \ 693 : "+r" (_v), "=m" (*_p) \ 694 : "m" (*_p) \ 695 : "memory"); 696 return (_v); 697 } 698 699 static __inline u_long 700 atomic_fetchadd_long_xrelease(volatile u_long *_p, u_long _v) 701 { 702 __asm __volatile(XRELEASE MPLOCKED "xaddq %0,%1; " \ 703 : "+r" (_v), "=m" (*_p) \ 704 : "m" (*_p) \ 705 : "memory"); 706 return (_v); 707 } 708 709 static __inline int 710 atomic_testandset_int(volatile u_int *p, u_int v) 711 { 712 u_char res; 713 714 __asm __volatile( 715 " " MPLOCKED " " 716 " btsl %2,%1 ; " 717 " setc %0 ; " 718 "# atomic_testandset_int" 719 : "=q" (res), /* 0 */ 720 "+m" (*p) /* 1 */ 721 : "Ir" (v & 0x1f) /* 2 */ 722 : "cc"); 723 return (res); 724 } 725 726 static __inline int 727 atomic_testandset_long(volatile u_long *p, u_long v) 728 { 729 u_char res; 730 731 __asm __volatile( 732 " " MPLOCKED " " 733 " btsq %2,%1 ; " 734 " setc %0 ; " 735 "# atomic_testandset_int" 736 : "=q" (res), /* 0 */ 737 "+m" (*p) /* 1 */ 738 : "Ir" (v & 0x3f) /* 2 */ 739 : "cc"); 740 return (res); 741 } 742 743 static __inline int 744 atomic_testandclear_int(volatile u_int *p, u_int v) 745 { 746 u_char res; 747 748 __asm __volatile( 749 " " MPLOCKED " " 750 " btrl %2,%1 ; " 751 " setc %0 ; " 752 "# atomic_testandclear_int" 753 : "=q" (res), /* 0 */ 754 "+m" (*p) /* 1 */ 755 : "Ir" (v & 0x1f) /* 2 */ 756 : "cc"); 757 return (res); 758 } 759 760 static __inline int 761 atomic_testandclear_long(volatile u_long *p, u_long v) 762 { 763 u_char res; 764 765 __asm __volatile( 766 " " MPLOCKED " " 767 " btrq %2,%1 ; " 768 " setc %0 ; " 769 "# atomic_testandclear_int" 770 : "=q" (res), /* 0 */ 771 "+m" (*p) /* 1 */ 772 : "Ir" (v & 0x3f) /* 2 */ 773 : "cc"); 774 return (res); 775 } 776 777 #endif /* KLD_MODULE */ 778 779 #if defined(KLD_MODULE) 780 781 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ 782 extern u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ 783 extern void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v); 784 785 #else /* !KLD_MODULE */ 786 787 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ 788 static __inline u_##TYPE \ 789 atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ 790 { \ 791 u_##TYPE res; /* accumulator can be anything */ \ 792 \ 793 __asm __volatile(MPLOCKED LOP \ 794 : "=a" (res), /* 0 */ \ 795 "=m" (*p) /* 1 */ \ 796 : "m" (*p) /* 2 */ \ 797 : "memory"); \ 798 \ 799 return (res); \ 800 } \ 801 \ 802 /* \ 803 * The XCHG instruction asserts LOCK automagically. \ 804 */ \ 805 static __inline void \ 806 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 807 { \ 808 __asm __volatile(SOP \ 809 : "=m" (*p), /* 0 */ \ 810 "+r" (v) /* 1 */ \ 811 : "m" (*p)); /* 2 */ \ 812 } \ 813 struct __hack 814 815 #endif /* !KLD_MODULE */ 816 817 ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); 818 ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); 819 ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); 820 ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); 821 822 #undef ATOMIC_ASM 823 #undef ATOMIC_STORE_LOAD 824 825 /* Acquire and release variants are identical to the normal ones. */ 826 #define atomic_set_acq_char atomic_set_char 827 #define atomic_set_rel_char atomic_set_char 828 #define atomic_clear_acq_char atomic_clear_char 829 #define atomic_clear_rel_char atomic_clear_char 830 #define atomic_add_acq_char atomic_add_char 831 #define atomic_add_rel_char atomic_add_char 832 #define atomic_subtract_acq_char atomic_subtract_char 833 #define atomic_subtract_rel_char atomic_subtract_char 834 835 #define atomic_set_acq_short atomic_set_short 836 #define atomic_set_rel_short atomic_set_short 837 #define atomic_clear_acq_short atomic_clear_short 838 #define atomic_clear_rel_short atomic_clear_short 839 #define atomic_add_acq_short atomic_add_short 840 #define atomic_add_rel_short atomic_add_short 841 #define atomic_subtract_acq_short atomic_subtract_short 842 #define atomic_subtract_rel_short atomic_subtract_short 843 844 #define atomic_set_acq_int atomic_set_int 845 #define atomic_set_rel_int atomic_set_int 846 #define atomic_clear_acq_int atomic_clear_int 847 #define atomic_clear_rel_int atomic_clear_int 848 #define atomic_add_acq_int atomic_add_int 849 #define atomic_add_rel_int atomic_add_int 850 #define atomic_subtract_acq_int atomic_subtract_int 851 #define atomic_subtract_rel_int atomic_subtract_int 852 #define atomic_cmpset_acq_int atomic_cmpset_int 853 #define atomic_cmpset_rel_int atomic_cmpset_int 854 855 #define atomic_set_acq_long atomic_set_long 856 #define atomic_set_rel_long atomic_set_long 857 #define atomic_clear_acq_long atomic_clear_long 858 #define atomic_clear_rel_long atomic_clear_long 859 #define atomic_add_acq_long atomic_add_long 860 #define atomic_add_rel_long atomic_add_long 861 #define atomic_subtract_acq_long atomic_subtract_long 862 #define atomic_subtract_rel_long atomic_subtract_long 863 #define atomic_cmpset_acq_long atomic_cmpset_long 864 #define atomic_cmpset_rel_long atomic_cmpset_long 865 866 /* cpumask_t is 64-bits on x86-64 */ 867 #define atomic_set_cpumask atomic_set_long 868 #define atomic_clear_cpumask atomic_clear_long 869 #define atomic_cmpset_cpumask atomic_cmpset_long 870 #define atomic_store_rel_cpumask atomic_store_rel_long 871 #define atomic_load_acq_cpumask atomic_load_acq_long 872 873 /* Operations on 8-bit bytes. */ 874 #define atomic_set_8 atomic_set_char 875 #define atomic_set_acq_8 atomic_set_acq_char 876 #define atomic_set_rel_8 atomic_set_rel_char 877 #define atomic_clear_8 atomic_clear_char 878 #define atomic_clear_acq_8 atomic_clear_acq_char 879 #define atomic_clear_rel_8 atomic_clear_rel_char 880 #define atomic_add_8 atomic_add_char 881 #define atomic_add_acq_8 atomic_add_acq_char 882 #define atomic_add_rel_8 atomic_add_rel_char 883 #define atomic_subtract_8 atomic_subtract_char 884 #define atomic_subtract_acq_8 atomic_subtract_acq_char 885 #define atomic_subtract_rel_8 atomic_subtract_rel_char 886 #define atomic_load_acq_8 atomic_load_acq_char 887 #define atomic_store_rel_8 atomic_store_rel_char 888 #define atomic_fcmpset_8 atomic_fcmpset_char 889 890 /* Operations on 16-bit words. */ 891 #define atomic_set_16 atomic_set_short 892 #define atomic_set_acq_16 atomic_set_acq_short 893 #define atomic_set_rel_16 atomic_set_rel_short 894 #define atomic_clear_16 atomic_clear_short 895 #define atomic_clear_acq_16 atomic_clear_acq_short 896 #define atomic_clear_rel_16 atomic_clear_rel_short 897 #define atomic_add_16 atomic_add_short 898 #define atomic_add_acq_16 atomic_add_acq_short 899 #define atomic_add_rel_16 atomic_add_rel_short 900 #define atomic_subtract_16 atomic_subtract_short 901 #define atomic_subtract_acq_16 atomic_subtract_acq_short 902 #define atomic_subtract_rel_16 atomic_subtract_rel_short 903 #define atomic_load_acq_16 atomic_load_acq_short 904 #define atomic_store_rel_16 atomic_store_rel_short 905 #define atomic_fcmpset_16 atomic_fcmpset_short 906 907 /* Operations on 32-bit double words. */ 908 #define atomic_set_32 atomic_set_int 909 #define atomic_set_acq_32 atomic_set_acq_int 910 #define atomic_set_rel_32 atomic_set_rel_int 911 #define atomic_clear_32 atomic_clear_int 912 #define atomic_clear_acq_32 atomic_clear_acq_int 913 #define atomic_clear_rel_32 atomic_clear_rel_int 914 #define atomic_add_32 atomic_add_int 915 #define atomic_add_acq_32 atomic_add_acq_int 916 #define atomic_add_rel_32 atomic_add_rel_int 917 #define atomic_subtract_32 atomic_subtract_int 918 #define atomic_subtract_acq_32 atomic_subtract_acq_int 919 #define atomic_subtract_rel_32 atomic_subtract_rel_int 920 #define atomic_load_acq_32 atomic_load_acq_int 921 #define atomic_store_rel_32 atomic_store_rel_int 922 #define atomic_cmpset_32 atomic_cmpset_int 923 #define atomic_fcmpset_32 atomic_fcmpset_int 924 #define atomic_cmpset_acq_32 atomic_cmpset_acq_int 925 #define atomic_cmpset_rel_32 atomic_cmpset_rel_int 926 #define atomic_readandclear_32 atomic_readandclear_int 927 #define atomic_fetchadd_32 atomic_fetchadd_int 928 929 /* Operations on 64-bit quad words. */ 930 #define atomic_load_acq_64 atomic_load_acq_long 931 #define atomic_store_rel_64 atomic_store_rel_long 932 #define atomic_swap_64 atomic_swap_long 933 #define atomic_fetchadd_64 atomic_fetchadd_long 934 #define atomic_add_64 atomic_add_long 935 #define atomic_cmpset_64 atomic_cmpset_long 936 #define atomic_fcmpset_64 atomic_fcmpset_long 937 #define atomic_set_64 atomic_set_long 938 #define atomic_clear_64 atomic_clear_long 939 940 /* Operations on pointers. */ 941 #define atomic_set_ptr(p, v) \ 942 atomic_set_long((volatile u_long *)(p), (u_long)(v)) 943 #define atomic_set_acq_ptr(p, v) \ 944 atomic_set_acq_long((volatile u_long *)(p), (u_long)(v)) 945 #define atomic_set_rel_ptr(p, v) \ 946 atomic_set_rel_long((volatile u_long *)(p), (u_long)(v)) 947 #define atomic_clear_ptr(p, v) \ 948 atomic_clear_long((volatile u_long *)(p), (u_long)(v)) 949 #define atomic_clear_acq_ptr(p, v) \ 950 atomic_clear_acq_long((volatile u_long *)(p), (u_long)(v)) 951 #define atomic_clear_rel_ptr(p, v) \ 952 atomic_clear_rel_long((volatile u_long *)(p), (u_long)(v)) 953 #define atomic_add_ptr(p, v) \ 954 atomic_add_long((volatile u_long *)(p), (u_long)(v)) 955 #define atomic_add_acq_ptr(p, v) \ 956 atomic_add_acq_long((volatile u_long *)(p), (u_long)(v)) 957 #define atomic_add_rel_ptr(p, v) \ 958 atomic_add_rel_long((volatile u_long *)(p), (u_long)(v)) 959 #define atomic_subtract_ptr(p, v) \ 960 atomic_subtract_long((volatile u_long *)(p), (u_long)(v)) 961 #define atomic_subtract_acq_ptr(p, v) \ 962 atomic_subtract_acq_long((volatile u_long *)(p), (u_long)(v)) 963 #define atomic_subtract_rel_ptr(p, v) \ 964 atomic_subtract_rel_long((volatile u_long *)(p), (u_long)(v)) 965 #define atomic_load_acq_ptr(p) \ 966 atomic_load_acq_long((volatile u_long *)(p)) 967 #define atomic_store_rel_ptr(p, v) \ 968 atomic_store_rel_long((volatile u_long *)(p), (v)) 969 #define atomic_cmpset_ptr(dst, old, new) \ 970 atomic_cmpset_long((volatile u_long *)(dst), (u_long)(old), \ 971 (u_long)(new)) 972 #define atomic_fcmpset_ptr(dst, old, new) \ 973 atomic_fcmpset_long((volatile u_long *)(dst), (u_long *)(old), \ 974 (u_long)(new)) 975 #define atomic_cmpset_acq_ptr(dst, old, new) \ 976 atomic_cmpset_acq_long((volatile u_long *)(dst), (u_long)(old), \ 977 (u_long)(new)) 978 #define atomic_cmpset_rel_ptr(dst, old, new) \ 979 atomic_cmpset_rel_long((volatile u_long *)(dst), (u_long)(old), \ 980 (u_long)(new)) 981 #define atomic_readandclear_ptr(p) \ 982 atomic_readandclear_long((volatile u_long *)(p)) 983 984 #endif /* ! _CPU_ATOMIC_H_ */ 985