1 /*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/stdatomic.h> 32 #include <sys/types.h> 33 34 #include <machine/cpufunc.h> 35 #include <machine/sysarch.h> 36 37 /* 38 * Executing statements with interrupts disabled. 39 */ 40 41 #if defined(_KERNEL) && !defined(SMP) 42 #define WITHOUT_INTERRUPTS(s) do { \ 43 register_t regs; \ 44 \ 45 regs = intr_disable(); \ 46 do s while (0); \ 47 intr_restore(regs); \ 48 } while (0) 49 #endif /* _KERNEL && !SMP */ 50 51 /* 52 * Memory barriers. 53 * 54 * It turns out __sync_synchronize() does not emit any code when used 55 * with GCC 4.2. Implement our own version that does work reliably. 56 * 57 * Although __sync_lock_test_and_set() should only perform an acquire 58 * barrier, make it do a full barrier like the other functions. This 59 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 60 */ 61 62 #if defined(_KERNEL) && !defined(SMP) 63 static inline void 64 do_sync(void) 65 { 66 67 __asm volatile ("" : : : "memory"); 68 } 69 #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 70 static inline void 71 do_sync(void) 72 { 73 74 __asm volatile ("dmb" : : : "memory"); 75 } 76 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 77 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 78 defined(__ARM_ARCH_6ZK__) 79 static inline void 80 do_sync(void) 81 { 82 83 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); 84 } 85 #endif 86 87 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 88 89 /* 90 * New C11 __atomic_* API. 91 */ 92 93 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 94 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 95 defined(__ARM_ARCH_6ZK__) || \ 96 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 97 98 /* These systems should be supported by the compiler. */ 99 100 #else /* __ARM_ARCH_5__ */ 101 102 /* Clang doesn't allow us to reimplement builtins without this. */ 103 #ifdef __clang__ 104 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize 105 #define __sync_synchronize __sync_synchronize_ext 106 #endif 107 108 void 109 __sync_synchronize(void) 110 { 111 } 112 113 #ifdef _KERNEL 114 115 #ifdef SMP 116 #error "On SMP systems we should have proper atomic operations." 117 #endif 118 119 /* 120 * On uniprocessor systems, we can perform the atomic operations by 121 * disabling interrupts. 122 */ 123 124 #define EMIT_LOAD_N(N, uintN_t) \ 125 uintN_t \ 126 __atomic_load_##N(uintN_t *mem, int model __unused) \ 127 { \ 128 uintN_t ret; \ 129 \ 130 WITHOUT_INTERRUPTS({ \ 131 ret = *mem; \ 132 }); \ 133 return (ret); \ 134 } 135 136 #define EMIT_STORE_N(N, uintN_t) \ 137 void \ 138 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 139 { \ 140 \ 141 WITHOUT_INTERRUPTS({ \ 142 *mem = val; \ 143 }); \ 144 } 145 146 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 147 _Bool \ 148 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 149 uintN_t desired, int success __unused, int failure __unused) \ 150 { \ 151 _Bool ret; \ 152 \ 153 WITHOUT_INTERRUPTS({ \ 154 if (*mem == *expected) { \ 155 *mem = desired; \ 156 ret = 1; \ 157 } else { \ 158 *expected = *mem; \ 159 ret = 0; \ 160 } \ 161 }); \ 162 return (ret); \ 163 } 164 165 #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 166 uintN_t \ 167 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 168 { \ 169 uintN_t ret; \ 170 \ 171 WITHOUT_INTERRUPTS({ \ 172 ret = *mem; \ 173 *mem op val; \ 174 }); \ 175 return (ret); \ 176 } 177 178 #define EMIT_ALL_OPS_N(N, uintN_t) \ 179 EMIT_LOAD_N(N, uintN_t) \ 180 EMIT_STORE_N(N, uintN_t) \ 181 EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 182 EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 183 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 184 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 185 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 186 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 187 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 188 189 EMIT_ALL_OPS_N(1, uint8_t) 190 EMIT_ALL_OPS_N(2, uint16_t) 191 EMIT_ALL_OPS_N(4, uint32_t) 192 EMIT_ALL_OPS_N(8, uint64_t) 193 #undef EMIT_ALL_OPS_N 194 195 #else /* !_KERNEL */ 196 197 /* 198 * For userspace on uniprocessor systems, we can implement the atomic 199 * operations by using a Restartable Atomic Sequence. This makes the 200 * kernel restart the code from the beginning when interrupted. 201 */ 202 203 #define EMIT_LOAD_N(N, uintN_t) \ 204 uintN_t \ 205 __atomic_load_##N(uintN_t *mem, int model __unused) \ 206 { \ 207 \ 208 return (*mem); \ 209 } 210 211 #define EMIT_STORE_N(N, uintN_t) \ 212 void \ 213 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 214 { \ 215 \ 216 *mem = val; \ 217 } 218 219 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 220 uintN_t \ 221 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 222 { \ 223 uint32_t old, temp, ras_start; \ 224 \ 225 ras_start = ARM_RAS_START; \ 226 __asm volatile ( \ 227 /* Set up Restartable Atomic Sequence. */ \ 228 "1:" \ 229 "\tadr %2, 1b\n" \ 230 "\tstr %2, [%5]\n" \ 231 "\tadr %2, 2f\n" \ 232 "\tstr %2, [%5, #4]\n" \ 233 \ 234 "\t"ldr" %0, %4\n" /* Load old value. */ \ 235 "\t"str" %3, %1\n" /* Store new value. */ \ 236 \ 237 /* Tear down Restartable Atomic Sequence. */ \ 238 "2:" \ 239 "\tmov %2, #0x00000000\n" \ 240 "\tstr %2, [%5]\n" \ 241 "\tmov %2, #0xffffffff\n" \ 242 "\tstr %2, [%5, #4]\n" \ 243 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 244 : "r" (val), "m" (*mem), "r" (ras_start)); \ 245 return (old); \ 246 } 247 248 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 249 _Bool \ 250 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 251 uintN_t desired, int success __unused, int failure __unused) \ 252 { \ 253 uint32_t expected, old, temp, ras_start; \ 254 \ 255 expected = *pexpected; \ 256 ras_start = ARM_RAS_START; \ 257 __asm volatile ( \ 258 /* Set up Restartable Atomic Sequence. */ \ 259 "1:" \ 260 "\tadr %2, 1b\n" \ 261 "\tstr %2, [%6]\n" \ 262 "\tadr %2, 2f\n" \ 263 "\tstr %2, [%6, #4]\n" \ 264 \ 265 "\t"ldr" %0, %5\n" /* Load old value. */ \ 266 "\tcmp %0, %3\n" /* Compare to expected value. */\ 267 "\t"streq" %4, %1\n" /* Store new value. */ \ 268 \ 269 /* Tear down Restartable Atomic Sequence. */ \ 270 "2:" \ 271 "\tmov %2, #0x00000000\n" \ 272 "\tstr %2, [%6]\n" \ 273 "\tmov %2, #0xffffffff\n" \ 274 "\tstr %2, [%6, #4]\n" \ 275 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 276 : "r" (expected), "r" (desired), "m" (*mem), \ 277 "r" (ras_start)); \ 278 if (old == expected) { \ 279 return (1); \ 280 } else { \ 281 *pexpected = old; \ 282 return (0); \ 283 } \ 284 } 285 286 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \ 287 uintN_t \ 288 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 289 { \ 290 uint32_t old, temp, ras_start; \ 291 \ 292 ras_start = ARM_RAS_START; \ 293 __asm volatile ( \ 294 /* Set up Restartable Atomic Sequence. */ \ 295 "1:" \ 296 "\tadr %2, 1b\n" \ 297 "\tstr %2, [%5]\n" \ 298 "\tadr %2, 2f\n" \ 299 "\tstr %2, [%5, #4]\n" \ 300 \ 301 "\t"ldr" %0, %4\n" /* Load old value. */ \ 302 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 303 "\t"str" %2, %1\n" /* Store new value. */ \ 304 \ 305 /* Tear down Restartable Atomic Sequence. */ \ 306 "2:" \ 307 "\tmov %2, #0x00000000\n" \ 308 "\tstr %2, [%5]\n" \ 309 "\tmov %2, #0xffffffff\n" \ 310 "\tstr %2, [%5, #4]\n" \ 311 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 312 : "r" (val), "m" (*mem), "r" (ras_start)); \ 313 return (old); \ 314 } 315 316 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 317 EMIT_LOAD_N(N, uintN_t) \ 318 EMIT_STORE_N(N, uintN_t) \ 319 EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 320 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 321 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \ 322 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \ 323 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \ 324 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \ 325 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor") 326 327 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 328 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 329 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 330 #undef EMIT_ALL_OPS_N 331 332 #endif /* _KERNEL */ 333 334 #endif 335 336 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 337 338 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 339 340 #ifdef __clang__ 341 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 342 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 343 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 344 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 345 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 346 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 347 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 348 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 349 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 350 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 351 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 352 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 353 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 354 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 355 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 356 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 357 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 358 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 359 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 360 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 361 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 362 #endif 363 364 /* 365 * Old __sync_* API. 366 */ 367 368 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 369 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 370 defined(__ARM_ARCH_6ZK__) || \ 371 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) 372 373 /* Implementations for old GCC versions, lacking support for atomics. */ 374 375 typedef union { 376 uint8_t v8[4]; 377 uint32_t v32; 378 } reg_t; 379 380 /* 381 * Given a memory address pointing to an 8-bit or 16-bit integer, return 382 * the address of the 32-bit word containing it. 383 */ 384 385 static inline uint32_t * 386 round_to_word(void *ptr) 387 { 388 389 return ((uint32_t *)((intptr_t)ptr & ~3)); 390 } 391 392 /* 393 * Utility functions for loading and storing 8-bit and 16-bit integers 394 * in 32-bit words at an offset corresponding with the location of the 395 * atomic variable. 396 */ 397 398 static inline void 399 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 400 { 401 size_t offset; 402 403 offset = (intptr_t)offset_ptr & 3; 404 r->v8[offset] = val; 405 } 406 407 static inline uint8_t 408 get_1(const reg_t *r, const uint8_t *offset_ptr) 409 { 410 size_t offset; 411 412 offset = (intptr_t)offset_ptr & 3; 413 return (r->v8[offset]); 414 } 415 416 static inline void 417 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 418 { 419 size_t offset; 420 union { 421 uint16_t in; 422 uint8_t out[2]; 423 } bytes; 424 425 offset = (intptr_t)offset_ptr & 3; 426 bytes.in = val; 427 r->v8[offset] = bytes.out[0]; 428 r->v8[offset + 1] = bytes.out[1]; 429 } 430 431 static inline uint16_t 432 get_2(const reg_t *r, const uint16_t *offset_ptr) 433 { 434 size_t offset; 435 union { 436 uint8_t in[2]; 437 uint16_t out; 438 } bytes; 439 440 offset = (intptr_t)offset_ptr & 3; 441 bytes.in[0] = r->v8[offset]; 442 bytes.in[1] = r->v8[offset + 1]; 443 return (bytes.out); 444 } 445 446 /* 447 * 8-bit and 16-bit routines. 448 * 449 * These operations are not natively supported by the CPU, so we use 450 * some shifting and bitmasking on top of the 32-bit instructions. 451 */ 452 453 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 454 uintN_t \ 455 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 456 { \ 457 uint32_t *mem32; \ 458 reg_t val32, negmask, old; \ 459 uint32_t temp1, temp2; \ 460 \ 461 mem32 = round_to_word(mem); \ 462 val32.v32 = 0x00000000; \ 463 put_##N(&val32, mem, val); \ 464 negmask.v32 = 0xffffffff; \ 465 put_##N(&negmask, mem, 0); \ 466 \ 467 do_sync(); \ 468 __asm volatile ( \ 469 "1:" \ 470 "\tldrex %0, %6\n" /* Load old value. */ \ 471 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 472 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 473 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 474 "\tcmp %3, #0\n" /* Did it succeed? */ \ 475 "\tbne 1b\n" /* Spin if failed. */ \ 476 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 477 "=&r" (temp2) \ 478 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 479 return (get_##N(&old, mem)); \ 480 } 481 482 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 483 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 484 485 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 486 uintN_t \ 487 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 488 uintN_t desired) \ 489 { \ 490 uint32_t *mem32; \ 491 reg_t expected32, desired32, posmask, old; \ 492 uint32_t negmask, temp1, temp2; \ 493 \ 494 mem32 = round_to_word(mem); \ 495 expected32.v32 = 0x00000000; \ 496 put_##N(&expected32, mem, expected); \ 497 desired32.v32 = 0x00000000; \ 498 put_##N(&desired32, mem, desired); \ 499 posmask.v32 = 0x00000000; \ 500 put_##N(&posmask, mem, ~0); \ 501 negmask = ~posmask.v32; \ 502 \ 503 do_sync(); \ 504 __asm volatile ( \ 505 "1:" \ 506 "\tldrex %0, %8\n" /* Load old value. */ \ 507 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 508 "\tcmp %2, %4\n" /* Compare to expected value. */\ 509 "\tbne 2f\n" /* Values are unequal. */ \ 510 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 511 "\torr %2, %5\n" /* Put in the new value. */ \ 512 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 513 "\tcmp %3, #0\n" /* Did it succeed? */ \ 514 "\tbne 1b\n" /* Spin if failed. */ \ 515 "2:" \ 516 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 517 "=&r" (temp2) \ 518 : "r" (expected32.v32), "r" (desired32.v32), \ 519 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 520 return (get_##N(&old, mem)); \ 521 } 522 523 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 524 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 525 526 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 527 uintN_t \ 528 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 529 { \ 530 uint32_t *mem32; \ 531 reg_t val32, posmask, old; \ 532 uint32_t negmask, temp1, temp2; \ 533 \ 534 mem32 = round_to_word(mem); \ 535 val32.v32 = 0x00000000; \ 536 put_##N(&val32, mem, val); \ 537 posmask.v32 = 0x00000000; \ 538 put_##N(&posmask, mem, ~0); \ 539 negmask = ~posmask.v32; \ 540 \ 541 do_sync(); \ 542 __asm volatile ( \ 543 "1:" \ 544 "\tldrex %0, %7\n" /* Load old value. */ \ 545 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 546 "\tand %2, %5\n" /* Isolate the new value. */ \ 547 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 548 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 549 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 550 "\tcmp %3, #0\n" /* Did it succeed? */ \ 551 "\tbne 1b\n" /* Spin if failed. */ \ 552 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 553 "=&r" (temp2) \ 554 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 555 "m" (*mem32)); \ 556 return (get_##N(&old, mem)); \ 557 } 558 559 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 560 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 561 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 562 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 563 564 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 565 uintN_t \ 566 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 567 { \ 568 uint32_t *mem32; \ 569 reg_t val32, old; \ 570 uint32_t temp1, temp2; \ 571 \ 572 mem32 = round_to_word(mem); \ 573 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 574 put_##N(&val32, mem, val); \ 575 \ 576 do_sync(); \ 577 __asm volatile ( \ 578 "1:" \ 579 "\tldrex %0, %5\n" /* Load old value. */ \ 580 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 581 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 582 "\tcmp %3, #0\n" /* Did it succeed? */ \ 583 "\tbne 1b\n" /* Spin if failed. */ \ 584 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 585 "=&r" (temp2) \ 586 : "r" (val32.v32), "m" (*mem32)); \ 587 return (get_##N(&old, mem)); \ 588 } 589 590 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 591 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 592 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 593 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 594 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 595 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 596 597 /* 598 * 32-bit routines. 599 */ 600 601 uint32_t 602 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 603 { 604 uint32_t old, temp; 605 606 do_sync(); 607 __asm volatile ( 608 "1:" 609 "\tldrex %0, %4\n" /* Load old value. */ 610 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 611 "\tcmp %2, #0\n" /* Did it succeed? */ 612 "\tbne 1b\n" /* Spin if failed. */ 613 : "=&r" (old), "=m" (*mem), "=&r" (temp) 614 : "r" (val), "m" (*mem)); 615 return (old); 616 } 617 618 uint32_t 619 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 620 uint32_t desired) 621 { 622 uint32_t old, temp; 623 624 do_sync(); 625 __asm volatile ( 626 "1:" 627 "\tldrex %0, %5\n" /* Load old value. */ 628 "\tcmp %0, %3\n" /* Compare to expected value. */ 629 "\tbne 2f\n" /* Values are unequal. */ 630 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 631 "\tcmp %2, #0\n" /* Did it succeed? */ 632 "\tbne 1b\n" /* Spin if failed. */ 633 "2:" 634 : "=&r" (old), "=m" (*mem), "=&r" (temp) 635 : "r" (expected), "r" (desired), "m" (*mem)); 636 return (old); 637 } 638 639 #define EMIT_FETCH_AND_OP_4(name, op) \ 640 uint32_t \ 641 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 642 { \ 643 uint32_t old, temp1, temp2; \ 644 \ 645 do_sync(); \ 646 __asm volatile ( \ 647 "1:" \ 648 "\tldrex %0, %5\n" /* Load old value. */ \ 649 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 650 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 651 "\tcmp %3, #0\n" /* Did it succeed? */ \ 652 "\tbne 1b\n" /* Spin if failed. */ \ 653 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 654 "=&r" (temp2) \ 655 : "r" (val), "m" (*mem)); \ 656 return (old); \ 657 } 658 659 EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 660 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 661 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 662 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 663 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 664 665 #ifndef __clang__ 666 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 667 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 668 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 669 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 670 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 671 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 672 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 673 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 674 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 675 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 676 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 677 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 678 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 679 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 680 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 681 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 682 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 683 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 684 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 685 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 686 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 687 #endif 688 689 #else /* __ARM_ARCH_5__ */ 690 691 #ifdef _KERNEL 692 693 #ifdef SMP 694 #error "On SMP systems we should have proper atomic operations." 695 #endif 696 697 /* 698 * On uniprocessor systems, we can perform the atomic operations by 699 * disabling interrupts. 700 */ 701 702 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 703 uintN_t \ 704 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 705 uintN_t desired) \ 706 { \ 707 uintN_t ret; \ 708 \ 709 WITHOUT_INTERRUPTS({ \ 710 ret = *mem; \ 711 if (*mem == expected) \ 712 *mem = desired; \ 713 }); \ 714 return (ret); \ 715 } 716 717 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 718 uintN_t \ 719 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 720 { \ 721 uintN_t ret; \ 722 \ 723 WITHOUT_INTERRUPTS({ \ 724 ret = *mem; \ 725 *mem op val; \ 726 }); \ 727 return (ret); \ 728 } 729 730 #define EMIT_ALL_OPS_N(N, uintN_t) \ 731 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 732 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 733 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 734 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 735 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 736 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 737 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 738 739 EMIT_ALL_OPS_N(1, uint8_t) 740 EMIT_ALL_OPS_N(2, uint16_t) 741 EMIT_ALL_OPS_N(4, uint32_t) 742 EMIT_ALL_OPS_N(8, uint64_t) 743 #undef EMIT_ALL_OPS_N 744 745 #else /* !_KERNEL */ 746 747 /* 748 * For userspace on uniprocessor systems, we can implement the atomic 749 * operations by using a Restartable Atomic Sequence. This makes the 750 * kernel restart the code from the beginning when interrupted. 751 */ 752 753 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 754 uintN_t \ 755 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 756 { \ 757 uint32_t old, temp, ras_start; \ 758 \ 759 ras_start = ARM_RAS_START; \ 760 __asm volatile ( \ 761 /* Set up Restartable Atomic Sequence. */ \ 762 "1:" \ 763 "\tadr %2, 1b\n" \ 764 "\tstr %2, [%5]\n" \ 765 "\tadr %2, 2f\n" \ 766 "\tstr %2, [%5, #4]\n" \ 767 \ 768 "\t"ldr" %0, %4\n" /* Load old value. */ \ 769 "\t"str" %3, %1\n" /* Store new value. */ \ 770 \ 771 /* Tear down Restartable Atomic Sequence. */ \ 772 "2:" \ 773 "\tmov %2, #0x00000000\n" \ 774 "\tstr %2, [%5]\n" \ 775 "\tmov %2, #0xffffffff\n" \ 776 "\tstr %2, [%5, #4]\n" \ 777 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 778 : "r" (val), "m" (*mem), "r" (ras_start)); \ 779 return (old); \ 780 } 781 782 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 783 uintN_t \ 784 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 785 uintN_t desired) \ 786 { \ 787 uint32_t old, temp, ras_start; \ 788 \ 789 ras_start = ARM_RAS_START; \ 790 __asm volatile ( \ 791 /* Set up Restartable Atomic Sequence. */ \ 792 "1:" \ 793 "\tadr %2, 1b\n" \ 794 "\tstr %2, [%6]\n" \ 795 "\tadr %2, 2f\n" \ 796 "\tstr %2, [%6, #4]\n" \ 797 \ 798 "\t"ldr" %0, %5\n" /* Load old value. */ \ 799 "\tcmp %0, %3\n" /* Compare to expected value. */\ 800 "\t"streq" %4, %1\n" /* Store new value. */ \ 801 \ 802 /* Tear down Restartable Atomic Sequence. */ \ 803 "2:" \ 804 "\tmov %2, #0x00000000\n" \ 805 "\tstr %2, [%6]\n" \ 806 "\tmov %2, #0xffffffff\n" \ 807 "\tstr %2, [%6, #4]\n" \ 808 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 809 : "r" (expected), "r" (desired), "m" (*mem), \ 810 "r" (ras_start)); \ 811 return (old); \ 812 } 813 814 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 815 uintN_t \ 816 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 817 { \ 818 uint32_t old, temp, ras_start; \ 819 \ 820 ras_start = ARM_RAS_START; \ 821 __asm volatile ( \ 822 /* Set up Restartable Atomic Sequence. */ \ 823 "1:" \ 824 "\tadr %2, 1b\n" \ 825 "\tstr %2, [%5]\n" \ 826 "\tadr %2, 2f\n" \ 827 "\tstr %2, [%5, #4]\n" \ 828 \ 829 "\t"ldr" %0, %4\n" /* Load old value. */ \ 830 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 831 "\t"str" %2, %1\n" /* Store new value. */ \ 832 \ 833 /* Tear down Restartable Atomic Sequence. */ \ 834 "2:" \ 835 "\tmov %2, #0x00000000\n" \ 836 "\tstr %2, [%5]\n" \ 837 "\tmov %2, #0xffffffff\n" \ 838 "\tstr %2, [%5, #4]\n" \ 839 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 840 : "r" (val), "m" (*mem), "r" (ras_start)); \ 841 return (old); \ 842 } 843 844 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 845 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 846 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 847 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 848 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 849 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 850 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 851 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 852 853 #ifdef __clang__ 854 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 855 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 856 #else 857 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 858 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 859 #endif 860 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 861 862 #ifndef __clang__ 863 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 864 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 865 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 866 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 867 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 868 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 869 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 870 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 871 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 872 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 873 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 874 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 875 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 876 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 877 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 878 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 879 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 880 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 881 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 882 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 883 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 884 #endif 885 886 #endif /* _KERNEL */ 887 888 #endif 889 890 #endif /* __SYNC_ATOMICS */ 891