1 /*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/stdatomic.h> 32 #include <sys/types.h> 33 34 #include <machine/cpufunc.h> 35 #include <machine/sysarch.h> 36 37 #ifdef _KERNEL 38 #include "opt_global.h" 39 #endif 40 41 /* 42 * Executing statements with interrupts disabled. 43 */ 44 45 #if defined(_KERNEL) && !defined(SMP) 46 #define WITHOUT_INTERRUPTS(s) do { \ 47 register_t regs; \ 48 \ 49 regs = intr_disable(); \ 50 do s while (0); \ 51 intr_restore(regs); \ 52 } while (0) 53 #endif /* _KERNEL && !SMP */ 54 55 /* 56 * Memory barriers. 57 * 58 * It turns out __sync_synchronize() does not emit any code when used 59 * with GCC 4.2. Implement our own version that does work reliably. 60 * 61 * Although __sync_lock_test_and_set() should only perform an acquire 62 * barrier, make it do a full barrier like the other functions. This 63 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 64 */ 65 66 #if defined(_KERNEL) && !defined(SMP) 67 static inline void 68 do_sync(void) 69 { 70 71 __asm volatile ("" : : : "memory"); 72 } 73 #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 74 static inline void 75 do_sync(void) 76 { 77 78 __asm volatile ("dmb" : : : "memory"); 79 } 80 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 81 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 82 defined(__ARM_ARCH_6ZK__) 83 static inline void 84 do_sync(void) 85 { 86 87 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); 88 } 89 #endif 90 91 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 92 93 /* 94 * New C11 __atomic_* API. 95 */ 96 97 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 98 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 99 defined(__ARM_ARCH_6ZK__) || \ 100 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 101 102 /* These systems should be supported by the compiler. */ 103 104 #else /* __ARM_ARCH_5__ */ 105 106 /* Clang doesn't allow us to reimplement builtins without this. */ 107 #ifdef __clang__ 108 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize 109 #define __sync_synchronize __sync_synchronize_ext 110 #endif 111 112 void 113 __sync_synchronize(void) 114 { 115 } 116 117 #ifdef _KERNEL 118 119 #ifdef SMP 120 #error "On SMP systems we should have proper atomic operations." 121 #endif 122 123 /* 124 * On uniprocessor systems, we can perform the atomic operations by 125 * disabling interrupts. 126 */ 127 128 #define EMIT_LOAD_N(N, uintN_t) \ 129 uintN_t \ 130 __atomic_load_##N(uintN_t *mem, int model __unused) \ 131 { \ 132 uintN_t ret; \ 133 \ 134 WITHOUT_INTERRUPTS({ \ 135 ret = *mem; \ 136 }); \ 137 return (ret); \ 138 } 139 140 #define EMIT_STORE_N(N, uintN_t) \ 141 void \ 142 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 143 { \ 144 \ 145 WITHOUT_INTERRUPTS({ \ 146 *mem = val; \ 147 }); \ 148 } 149 150 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 151 _Bool \ 152 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 153 uintN_t desired, int success __unused, int failure __unused) \ 154 { \ 155 _Bool ret; \ 156 \ 157 WITHOUT_INTERRUPTS({ \ 158 if (*mem == *expected) { \ 159 *mem = desired; \ 160 ret = 1; \ 161 } else { \ 162 *expected = *mem; \ 163 ret = 0; \ 164 } \ 165 }); \ 166 return (ret); \ 167 } 168 169 #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 170 uintN_t \ 171 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 172 { \ 173 uintN_t ret; \ 174 \ 175 WITHOUT_INTERRUPTS({ \ 176 ret = *mem; \ 177 *mem op val; \ 178 }); \ 179 return (ret); \ 180 } 181 182 #define EMIT_ALL_OPS_N(N, uintN_t) \ 183 EMIT_LOAD_N(N, uintN_t) \ 184 EMIT_STORE_N(N, uintN_t) \ 185 EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 186 EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 187 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 188 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 189 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 190 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 191 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 192 193 EMIT_ALL_OPS_N(1, uint8_t) 194 EMIT_ALL_OPS_N(2, uint16_t) 195 EMIT_ALL_OPS_N(4, uint32_t) 196 EMIT_ALL_OPS_N(8, uint64_t) 197 198 #else /* !_KERNEL */ 199 200 /* 201 * For userspace on uniprocessor systems, we can implement the atomic 202 * operations by using a Restartable Atomic Sequence. This makes the 203 * kernel restart the code from the beginning when interrupted. 204 */ 205 206 #define EMIT_LOAD_N(N, uintN_t) \ 207 uintN_t \ 208 __atomic_load_##N(uintN_t *mem, int model __unused) \ 209 { \ 210 \ 211 return (*mem); \ 212 } 213 214 #define EMIT_STORE_N(N, uintN_t) \ 215 void \ 216 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 217 { \ 218 \ 219 *mem = val; \ 220 } 221 222 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 223 uintN_t \ 224 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 225 { \ 226 uint32_t old, temp, ras_start; \ 227 \ 228 ras_start = ARM_RAS_START; \ 229 __asm volatile ( \ 230 /* Set up Restartable Atomic Sequence. */ \ 231 "1:" \ 232 "\tadr %2, 1b\n" \ 233 "\tstr %2, [%5]\n" \ 234 "\tadr %2, 2f\n" \ 235 "\tstr %2, [%5, #4]\n" \ 236 \ 237 "\t"ldr" %0, %4\n" /* Load old value. */ \ 238 "\t"str" %3, %1\n" /* Store new value. */ \ 239 \ 240 /* Tear down Restartable Atomic Sequence. */ \ 241 "2:" \ 242 "\tmov %2, #0x00000000\n" \ 243 "\tstr %2, [%5]\n" \ 244 "\tmov %2, #0xffffffff\n" \ 245 "\tstr %2, [%5, #4]\n" \ 246 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 247 : "r" (val), "m" (*mem), "r" (ras_start)); \ 248 return (old); \ 249 } 250 251 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 252 _Bool \ 253 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 254 uintN_t desired, int success __unused, int failure __unused) \ 255 { \ 256 uint32_t expected, old, temp, ras_start; \ 257 \ 258 expected = *pexpected; \ 259 ras_start = ARM_RAS_START; \ 260 __asm volatile ( \ 261 /* Set up Restartable Atomic Sequence. */ \ 262 "1:" \ 263 "\tadr %2, 1b\n" \ 264 "\tstr %2, [%6]\n" \ 265 "\tadr %2, 2f\n" \ 266 "\tstr %2, [%6, #4]\n" \ 267 \ 268 "\t"ldr" %0, %5\n" /* Load old value. */ \ 269 "\tcmp %0, %3\n" /* Compare to expected value. */\ 270 "\t"streq" %4, %1\n" /* Store new value. */ \ 271 \ 272 /* Tear down Restartable Atomic Sequence. */ \ 273 "2:" \ 274 "\tmov %2, #0x00000000\n" \ 275 "\tstr %2, [%6]\n" \ 276 "\tmov %2, #0xffffffff\n" \ 277 "\tstr %2, [%6, #4]\n" \ 278 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 279 : "r" (expected), "r" (desired), "m" (*mem), \ 280 "r" (ras_start)); \ 281 if (old == expected) { \ 282 return (1); \ 283 } else { \ 284 *pexpected = old; \ 285 return (0); \ 286 } \ 287 } 288 289 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \ 290 uintN_t \ 291 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 292 { \ 293 uint32_t old, temp, ras_start; \ 294 \ 295 ras_start = ARM_RAS_START; \ 296 __asm volatile ( \ 297 /* Set up Restartable Atomic Sequence. */ \ 298 "1:" \ 299 "\tadr %2, 1b\n" \ 300 "\tstr %2, [%5]\n" \ 301 "\tadr %2, 2f\n" \ 302 "\tstr %2, [%5, #4]\n" \ 303 \ 304 "\t"ldr" %0, %4\n" /* Load old value. */ \ 305 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 306 "\t"str" %2, %1\n" /* Store new value. */ \ 307 \ 308 /* Tear down Restartable Atomic Sequence. */ \ 309 "2:" \ 310 "\tmov %2, #0x00000000\n" \ 311 "\tstr %2, [%5]\n" \ 312 "\tmov %2, #0xffffffff\n" \ 313 "\tstr %2, [%5, #4]\n" \ 314 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 315 : "r" (val), "m" (*mem), "r" (ras_start)); \ 316 return (old); \ 317 } 318 319 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 320 EMIT_LOAD_N(N, uintN_t) \ 321 EMIT_STORE_N(N, uintN_t) \ 322 EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 323 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 324 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \ 325 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \ 326 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \ 327 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \ 328 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor") 329 330 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 331 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 332 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 333 334 #endif /* _KERNEL */ 335 336 #endif 337 338 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 339 340 #if defined(__SYNC_ATOMICS) 341 342 /* 343 * Old __sync_* API. 344 */ 345 346 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 347 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 348 defined(__ARM_ARCH_6ZK__) || \ 349 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 350 351 /* Implementations for old GCC versions, lacking support for atomics. */ 352 353 typedef union { 354 uint8_t v8[4]; 355 uint32_t v32; 356 } reg_t; 357 358 /* 359 * Given a memory address pointing to an 8-bit or 16-bit integer, return 360 * the address of the 32-bit word containing it. 361 */ 362 363 static inline uint32_t * 364 round_to_word(void *ptr) 365 { 366 367 return ((uint32_t *)((intptr_t)ptr & ~3)); 368 } 369 370 /* 371 * Utility functions for loading and storing 8-bit and 16-bit integers 372 * in 32-bit words at an offset corresponding with the location of the 373 * atomic variable. 374 */ 375 376 static inline void 377 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 378 { 379 size_t offset; 380 381 offset = (intptr_t)offset_ptr & 3; 382 r->v8[offset] = val; 383 } 384 385 static inline uint8_t 386 get_1(const reg_t *r, const uint8_t *offset_ptr) 387 { 388 size_t offset; 389 390 offset = (intptr_t)offset_ptr & 3; 391 return (r->v8[offset]); 392 } 393 394 static inline void 395 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 396 { 397 size_t offset; 398 union { 399 uint16_t in; 400 uint8_t out[2]; 401 } bytes; 402 403 offset = (intptr_t)offset_ptr & 3; 404 bytes.in = val; 405 r->v8[offset] = bytes.out[0]; 406 r->v8[offset + 1] = bytes.out[1]; 407 } 408 409 static inline uint16_t 410 get_2(const reg_t *r, const uint16_t *offset_ptr) 411 { 412 size_t offset; 413 union { 414 uint8_t in[2]; 415 uint16_t out; 416 } bytes; 417 418 offset = (intptr_t)offset_ptr & 3; 419 bytes.in[0] = r->v8[offset]; 420 bytes.in[1] = r->v8[offset + 1]; 421 return (bytes.out); 422 } 423 424 /* 425 * 8-bit and 16-bit routines. 426 * 427 * These operations are not natively supported by the CPU, so we use 428 * some shifting and bitmasking on top of the 32-bit instructions. 429 */ 430 431 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 432 uintN_t \ 433 __sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \ 434 { \ 435 uint32_t *mem32; \ 436 reg_t val32, negmask, old; \ 437 uint32_t temp1, temp2; \ 438 \ 439 mem32 = round_to_word(mem); \ 440 val32.v32 = 0x00000000; \ 441 put_##N(&val32, mem, val); \ 442 negmask.v32 = 0xffffffff; \ 443 put_##N(&negmask, mem, 0); \ 444 \ 445 do_sync(); \ 446 __asm volatile ( \ 447 "1:" \ 448 "\tldrex %0, %6\n" /* Load old value. */ \ 449 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 450 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 451 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 452 "\tcmp %3, #0\n" /* Did it succeed? */ \ 453 "\tbne 1b\n" /* Spin if failed. */ \ 454 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 455 "=&r" (temp2) \ 456 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 457 return (get_##N(&old, mem)); \ 458 } 459 460 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 461 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 462 463 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 464 uintN_t \ 465 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 466 uintN_t desired) \ 467 { \ 468 uint32_t *mem32; \ 469 reg_t expected32, desired32, posmask, old; \ 470 uint32_t negmask, temp1, temp2; \ 471 \ 472 mem32 = round_to_word(mem); \ 473 expected32.v32 = 0x00000000; \ 474 put_##N(&expected32, mem, expected); \ 475 desired32.v32 = 0x00000000; \ 476 put_##N(&desired32, mem, desired); \ 477 posmask.v32 = 0x00000000; \ 478 put_##N(&posmask, mem, ~0); \ 479 negmask = ~posmask.v32; \ 480 \ 481 do_sync(); \ 482 __asm volatile ( \ 483 "1:" \ 484 "\tldrex %0, %8\n" /* Load old value. */ \ 485 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 486 "\tcmp %2, %4\n" /* Compare to expected value. */\ 487 "\tbne 2f\n" /* Values are unequal. */ \ 488 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 489 "\torr %2, %5\n" /* Put in the new value. */ \ 490 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 491 "\tcmp %3, #0\n" /* Did it succeed? */ \ 492 "\tbne 1b\n" /* Spin if failed. */ \ 493 "2:" \ 494 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 495 "=&r" (temp2) \ 496 : "r" (expected32.v32), "r" (desired32.v32), \ 497 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 498 return (get_##N(&old, mem)); \ 499 } 500 501 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 502 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 503 504 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 505 uintN_t \ 506 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 507 { \ 508 uint32_t *mem32; \ 509 reg_t val32, posmask, old; \ 510 uint32_t negmask, temp1, temp2; \ 511 \ 512 mem32 = round_to_word(mem); \ 513 val32.v32 = 0x00000000; \ 514 put_##N(&val32, mem, val); \ 515 posmask.v32 = 0x00000000; \ 516 put_##N(&posmask, mem, ~0); \ 517 negmask = ~posmask.v32; \ 518 \ 519 do_sync(); \ 520 __asm volatile ( \ 521 "1:" \ 522 "\tldrex %0, %7\n" /* Load old value. */ \ 523 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 524 "\tand %2, %5\n" /* Isolate the new value. */ \ 525 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 526 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 527 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 528 "\tcmp %3, #0\n" /* Did it succeed? */ \ 529 "\tbne 1b\n" /* Spin if failed. */ \ 530 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 531 "=&r" (temp2) \ 532 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 533 "m" (*mem32)); \ 534 return (get_##N(&old, mem)); \ 535 } 536 537 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 538 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 539 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 540 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 541 542 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 543 uintN_t \ 544 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 545 { \ 546 uint32_t *mem32; \ 547 reg_t val32, old; \ 548 uint32_t temp1, temp2; \ 549 \ 550 mem32 = round_to_word(mem); \ 551 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 552 put_##N(&val32, mem, val); \ 553 \ 554 do_sync(); \ 555 __asm volatile ( \ 556 "1:" \ 557 "\tldrex %0, %5\n" /* Load old value. */ \ 558 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 559 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 560 "\tcmp %3, #0\n" /* Did it succeed? */ \ 561 "\tbne 1b\n" /* Spin if failed. */ \ 562 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 563 "=&r" (temp2) \ 564 : "r" (val32.v32), "m" (*mem32)); \ 565 return (get_##N(&old, mem)); \ 566 } 567 568 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 569 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 570 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 571 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 572 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 573 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 574 575 /* 576 * 32-bit routines. 577 */ 578 579 uint32_t 580 __sync_lock_test_and_set_4(uint32_t *mem, uint32_t val) 581 { 582 uint32_t old, temp; 583 584 do_sync(); 585 __asm volatile ( 586 "1:" 587 "\tldrex %0, %4\n" /* Load old value. */ 588 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 589 "\tcmp %2, #0\n" /* Did it succeed? */ 590 "\tbne 1b\n" /* Spin if failed. */ 591 : "=&r" (old), "=m" (*mem), "=&r" (temp) 592 : "r" (val), "m" (*mem)); 593 return (old); 594 } 595 596 uint32_t 597 __sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected, 598 uint32_t desired) 599 { 600 uint32_t old, temp; 601 602 do_sync(); 603 __asm volatile ( 604 "1:" 605 "\tldrex %0, %5\n" /* Load old value. */ 606 "\tcmp %0, %3\n" /* Compare to expected value. */ 607 "\tbne 2f\n" /* Values are unequal. */ 608 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 609 "\tcmp %2, #0\n" /* Did it succeed? */ 610 "\tbne 1b\n" /* Spin if failed. */ 611 "2:" 612 : "=&r" (old), "=m" (*mem), "=&r" (temp) 613 : "r" (expected), "r" (desired), "m" (*mem)); 614 return (old); 615 } 616 617 #define EMIT_FETCH_AND_OP_4(name, op) \ 618 uint32_t \ 619 __sync_##name##_4(uint32_t *mem, uint32_t val) \ 620 { \ 621 uint32_t old, temp1, temp2; \ 622 \ 623 do_sync(); \ 624 __asm volatile ( \ 625 "1:" \ 626 "\tldrex %0, %5\n" /* Load old value. */ \ 627 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 628 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 629 "\tcmp %3, #0\n" /* Did it succeed? */ \ 630 "\tbne 1b\n" /* Spin if failed. */ \ 631 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 632 "=&r" (temp2) \ 633 : "r" (val), "m" (*mem)); \ 634 return (old); \ 635 } 636 637 EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 638 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 639 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 640 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 641 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 642 643 #else /* __ARM_ARCH_5__ */ 644 645 #ifdef _KERNEL 646 647 #ifdef SMP 648 #error "On SMP systems we should have proper atomic operations." 649 #endif 650 651 /* 652 * On uniprocessor systems, we can perform the atomic operations by 653 * disabling interrupts. 654 */ 655 656 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 657 uintN_t \ 658 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 659 uintN_t desired) \ 660 { \ 661 uintN_t ret; \ 662 \ 663 WITHOUT_INTERRUPTS({ \ 664 ret = *mem; \ 665 if (*mem == expected) \ 666 *mem = desired; \ 667 }); \ 668 return (ret); \ 669 } 670 671 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 672 uintN_t \ 673 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 674 { \ 675 uintN_t ret; \ 676 \ 677 WITHOUT_INTERRUPTS({ \ 678 ret = *mem; \ 679 *mem op val; \ 680 }); \ 681 return (ret); \ 682 } 683 684 #define EMIT_ALL_OPS_N(N, uintN_t) \ 685 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 686 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 687 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 688 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 689 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 690 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 691 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 692 693 EMIT_ALL_OPS_N(1, uint8_t) 694 EMIT_ALL_OPS_N(2, uint16_t) 695 EMIT_ALL_OPS_N(4, uint32_t) 696 EMIT_ALL_OPS_N(8, uint64_t) 697 698 #else /* !_KERNEL */ 699 700 /* 701 * For userspace on uniprocessor systems, we can implement the atomic 702 * operations by using a Restartable Atomic Sequence. This makes the 703 * kernel restart the code from the beginning when interrupted. 704 */ 705 706 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 707 uintN_t \ 708 __sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \ 709 { \ 710 uint32_t old, temp, ras_start; \ 711 \ 712 ras_start = ARM_RAS_START; \ 713 __asm volatile ( \ 714 /* Set up Restartable Atomic Sequence. */ \ 715 "1:" \ 716 "\tadr %2, 1b\n" \ 717 "\tstr %2, [%5]\n" \ 718 "\tadr %2, 2f\n" \ 719 "\tstr %2, [%5, #4]\n" \ 720 \ 721 "\t"ldr" %0, %4\n" /* Load old value. */ \ 722 "\t"str" %3, %1\n" /* Store new value. */ \ 723 \ 724 /* Tear down Restartable Atomic Sequence. */ \ 725 "2:" \ 726 "\tmov %2, #0x00000000\n" \ 727 "\tstr %2, [%5]\n" \ 728 "\tmov %2, #0xffffffff\n" \ 729 "\tstr %2, [%5, #4]\n" \ 730 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 731 : "r" (val), "m" (*mem), "r" (ras_start)); \ 732 return (old); \ 733 } 734 735 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 736 uintN_t \ 737 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 738 uintN_t desired) \ 739 { \ 740 uint32_t old, temp, ras_start; \ 741 \ 742 ras_start = ARM_RAS_START; \ 743 __asm volatile ( \ 744 /* Set up Restartable Atomic Sequence. */ \ 745 "1:" \ 746 "\tadr %2, 1b\n" \ 747 "\tstr %2, [%6]\n" \ 748 "\tadr %2, 2f\n" \ 749 "\tstr %2, [%6, #4]\n" \ 750 \ 751 "\t"ldr" %0, %5\n" /* Load old value. */ \ 752 "\tcmp %0, %3\n" /* Compare to expected value. */\ 753 "\t"streq" %4, %1\n" /* Store new value. */ \ 754 \ 755 /* Tear down Restartable Atomic Sequence. */ \ 756 "2:" \ 757 "\tmov %2, #0x00000000\n" \ 758 "\tstr %2, [%6]\n" \ 759 "\tmov %2, #0xffffffff\n" \ 760 "\tstr %2, [%6, #4]\n" \ 761 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 762 : "r" (expected), "r" (desired), "m" (*mem), \ 763 "r" (ras_start)); \ 764 return (old); \ 765 } 766 767 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 768 uintN_t \ 769 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 770 { \ 771 uint32_t old, temp, ras_start; \ 772 \ 773 ras_start = ARM_RAS_START; \ 774 __asm volatile ( \ 775 /* Set up Restartable Atomic Sequence. */ \ 776 "1:" \ 777 "\tadr %2, 1b\n" \ 778 "\tstr %2, [%5]\n" \ 779 "\tadr %2, 2f\n" \ 780 "\tstr %2, [%5, #4]\n" \ 781 \ 782 "\t"ldr" %0, %4\n" /* Load old value. */ \ 783 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 784 "\t"str" %2, %1\n" /* Store new value. */ \ 785 \ 786 /* Tear down Restartable Atomic Sequence. */ \ 787 "2:" \ 788 "\tmov %2, #0x00000000\n" \ 789 "\tstr %2, [%5]\n" \ 790 "\tmov %2, #0xffffffff\n" \ 791 "\tstr %2, [%5, #4]\n" \ 792 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 793 : "r" (val), "m" (*mem), "r" (ras_start)); \ 794 return (old); \ 795 } 796 797 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 798 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 799 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 800 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 801 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 802 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 803 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 804 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 805 806 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 807 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 808 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 809 810 #endif /* _KERNEL */ 811 812 #endif 813 814 #endif /* __SYNC_ATOMICS */ 815