1 /*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/stdatomic.h> 32 #include <sys/types.h> 33 34 #include <machine/atomic.h> 35 #include <machine/cpufunc.h> 36 #include <machine/sysarch.h> 37 38 /* 39 * Executing statements with interrupts disabled. 40 */ 41 42 #if defined(_KERNEL) && !defined(SMP) 43 #define WITHOUT_INTERRUPTS(s) do { \ 44 register_t regs; \ 45 \ 46 regs = intr_disable(); \ 47 do s while (0); \ 48 intr_restore(regs); \ 49 } while (0) 50 #endif /* _KERNEL && !SMP */ 51 52 /* 53 * Memory barriers. 54 * 55 * It turns out __sync_synchronize() does not emit any code when used 56 * with GCC 4.2. Implement our own version that does work reliably. 57 * 58 * Although __sync_lock_test_and_set() should only perform an acquire 59 * barrier, make it do a full barrier like the other functions. This 60 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 61 */ 62 63 #if defined(_KERNEL) && !defined(SMP) 64 static inline void 65 do_sync(void) 66 { 67 68 __asm volatile ("" : : : "memory"); 69 } 70 #elif __ARM_ARCH >= 6 71 static inline void 72 do_sync(void) 73 { 74 75 dmb(); 76 } 77 #endif 78 79 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 80 81 /* 82 * New C11 __atomic_* API. 83 */ 84 85 /* ARMv6+ systems should be supported by the compiler. */ 86 #if __ARM_ARCH <= 5 87 88 /* Clang doesn't allow us to reimplement builtins without this. */ 89 #ifdef __clang__ 90 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize 91 #define __sync_synchronize __sync_synchronize_ext 92 #endif 93 94 void 95 __sync_synchronize(void) 96 { 97 } 98 99 #ifdef _KERNEL 100 101 #ifdef SMP 102 #error "On SMP systems we should have proper atomic operations." 103 #endif 104 105 /* 106 * On uniprocessor systems, we can perform the atomic operations by 107 * disabling interrupts. 108 */ 109 110 #define EMIT_LOAD_N(N, uintN_t) \ 111 uintN_t \ 112 __atomic_load_##N(uintN_t *mem, int model __unused) \ 113 { \ 114 uintN_t ret; \ 115 \ 116 WITHOUT_INTERRUPTS({ \ 117 ret = *mem; \ 118 }); \ 119 return (ret); \ 120 } 121 122 #define EMIT_STORE_N(N, uintN_t) \ 123 void \ 124 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 125 { \ 126 \ 127 WITHOUT_INTERRUPTS({ \ 128 *mem = val; \ 129 }); \ 130 } 131 132 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 133 _Bool \ 134 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 135 uintN_t desired, int success __unused, int failure __unused) \ 136 { \ 137 _Bool ret; \ 138 \ 139 WITHOUT_INTERRUPTS({ \ 140 if (*mem == *expected) { \ 141 *mem = desired; \ 142 ret = 1; \ 143 } else { \ 144 *expected = *mem; \ 145 ret = 0; \ 146 } \ 147 }); \ 148 return (ret); \ 149 } 150 151 #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 152 uintN_t \ 153 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 154 { \ 155 uintN_t ret; \ 156 \ 157 WITHOUT_INTERRUPTS({ \ 158 ret = *mem; \ 159 *mem op val; \ 160 }); \ 161 return (ret); \ 162 } 163 164 #define EMIT_ALL_OPS_N(N, uintN_t) \ 165 EMIT_LOAD_N(N, uintN_t) \ 166 EMIT_STORE_N(N, uintN_t) \ 167 EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 168 EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 169 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 170 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 171 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 172 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 173 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 174 175 EMIT_ALL_OPS_N(1, uint8_t) 176 EMIT_ALL_OPS_N(2, uint16_t) 177 EMIT_ALL_OPS_N(4, uint32_t) 178 EMIT_ALL_OPS_N(8, uint64_t) 179 #undef EMIT_ALL_OPS_N 180 181 #else /* !_KERNEL */ 182 183 /* 184 * For userspace on uniprocessor systems, we can implement the atomic 185 * operations by using a Restartable Atomic Sequence. This makes the 186 * kernel restart the code from the beginning when interrupted. 187 */ 188 189 #define EMIT_LOAD_N(N, uintN_t) \ 190 uintN_t \ 191 __atomic_load_##N(uintN_t *mem, int model __unused) \ 192 { \ 193 \ 194 return (*mem); \ 195 } 196 197 #define EMIT_STORE_N(N, uintN_t) \ 198 void \ 199 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 200 { \ 201 \ 202 *mem = val; \ 203 } 204 205 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 206 uintN_t \ 207 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 208 { \ 209 uint32_t old, temp, ras_start; \ 210 \ 211 ras_start = ARM_RAS_START; \ 212 __asm volatile ( \ 213 /* Set up Restartable Atomic Sequence. */ \ 214 "1:" \ 215 "\tadr %2, 1b\n" \ 216 "\tstr %2, [%5]\n" \ 217 "\tadr %2, 2f\n" \ 218 "\tstr %2, [%5, #4]\n" \ 219 \ 220 "\t"ldr" %0, %4\n" /* Load old value. */ \ 221 "\t"str" %3, %1\n" /* Store new value. */ \ 222 \ 223 /* Tear down Restartable Atomic Sequence. */ \ 224 "2:" \ 225 "\tmov %2, #0x00000000\n" \ 226 "\tstr %2, [%5]\n" \ 227 "\tmov %2, #0xffffffff\n" \ 228 "\tstr %2, [%5, #4]\n" \ 229 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 230 : "r" (val), "m" (*mem), "r" (ras_start)); \ 231 return (old); \ 232 } 233 234 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 235 _Bool \ 236 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 237 uintN_t desired, int success __unused, int failure __unused) \ 238 { \ 239 uint32_t expected, old, temp, ras_start; \ 240 \ 241 expected = *pexpected; \ 242 ras_start = ARM_RAS_START; \ 243 __asm volatile ( \ 244 /* Set up Restartable Atomic Sequence. */ \ 245 "1:" \ 246 "\tadr %2, 1b\n" \ 247 "\tstr %2, [%6]\n" \ 248 "\tadr %2, 2f\n" \ 249 "\tstr %2, [%6, #4]\n" \ 250 \ 251 "\t"ldr" %0, %5\n" /* Load old value. */ \ 252 "\tcmp %0, %3\n" /* Compare to expected value. */\ 253 "\t"streq" %4, %1\n" /* Store new value. */ \ 254 \ 255 /* Tear down Restartable Atomic Sequence. */ \ 256 "2:" \ 257 "\tmov %2, #0x00000000\n" \ 258 "\tstr %2, [%6]\n" \ 259 "\tmov %2, #0xffffffff\n" \ 260 "\tstr %2, [%6, #4]\n" \ 261 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 262 : "r" (expected), "r" (desired), "m" (*mem), \ 263 "r" (ras_start)); \ 264 if (old == expected) { \ 265 return (1); \ 266 } else { \ 267 *pexpected = old; \ 268 return (0); \ 269 } \ 270 } 271 272 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret) \ 273 uintN_t \ 274 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 275 { \ 276 uint32_t old, new, ras_start; \ 277 \ 278 ras_start = ARM_RAS_START; \ 279 __asm volatile ( \ 280 /* Set up Restartable Atomic Sequence. */ \ 281 "1:" \ 282 "\tadr %2, 1b\n" \ 283 "\tstr %2, [%5]\n" \ 284 "\tadr %2, 2f\n" \ 285 "\tstr %2, [%5, #4]\n" \ 286 \ 287 "\t"ldr" %0, %4\n" /* Load old value. */ \ 288 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 289 "\t"str" %2, %1\n" /* Store new value. */ \ 290 \ 291 /* Tear down Restartable Atomic Sequence. */ \ 292 "2:" \ 293 "\tmov %2, #0x00000000\n" \ 294 "\tstr %2, [%5]\n" \ 295 "\tmov %2, #0xffffffff\n" \ 296 "\tstr %2, [%5, #4]\n" \ 297 : "=&r" (old), "=m" (*mem), "=&r" (new) \ 298 : "r" (val), "m" (*mem), "r" (ras_start)); \ 299 return (ret); \ 300 } 301 302 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 303 EMIT_LOAD_N(N, uintN_t) \ 304 EMIT_STORE_N(N, uintN_t) \ 305 EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 306 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 307 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old) \ 308 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old) \ 309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr", old) \ 310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old) \ 311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old) \ 312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new) \ 313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new) \ 314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch, "orr", new) \ 315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new) \ 316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new) 317 318 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 319 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 320 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 321 #undef EMIT_ALL_OPS_N 322 323 #endif /* _KERNEL */ 324 325 #endif /* __ARM_ARCH */ 326 327 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 328 329 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 330 331 #ifdef __clang__ 332 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 333 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 334 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 335 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 336 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 337 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 338 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 339 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 340 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 341 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 342 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 343 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 344 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 345 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 346 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 347 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 348 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 349 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 350 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 351 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 352 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 353 #endif 354 355 /* 356 * Old __sync_* API. 357 */ 358 359 #if __ARM_ARCH >= 6 360 361 /* Implementations for old GCC versions, lacking support for atomics. */ 362 363 typedef union { 364 uint8_t v8[4]; 365 uint32_t v32; 366 } reg_t; 367 368 /* 369 * Given a memory address pointing to an 8-bit or 16-bit integer, return 370 * the address of the 32-bit word containing it. 371 */ 372 373 static inline uint32_t * 374 round_to_word(void *ptr) 375 { 376 377 return ((uint32_t *)((intptr_t)ptr & ~3)); 378 } 379 380 /* 381 * Utility functions for loading and storing 8-bit and 16-bit integers 382 * in 32-bit words at an offset corresponding with the location of the 383 * atomic variable. 384 */ 385 386 static inline void 387 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 388 { 389 size_t offset; 390 391 offset = (intptr_t)offset_ptr & 3; 392 r->v8[offset] = val; 393 } 394 395 static inline uint8_t 396 get_1(const reg_t *r, const uint8_t *offset_ptr) 397 { 398 size_t offset; 399 400 offset = (intptr_t)offset_ptr & 3; 401 return (r->v8[offset]); 402 } 403 404 static inline void 405 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 406 { 407 size_t offset; 408 union { 409 uint16_t in; 410 uint8_t out[2]; 411 } bytes; 412 413 offset = (intptr_t)offset_ptr & 3; 414 bytes.in = val; 415 r->v8[offset] = bytes.out[0]; 416 r->v8[offset + 1] = bytes.out[1]; 417 } 418 419 static inline uint16_t 420 get_2(const reg_t *r, const uint16_t *offset_ptr) 421 { 422 size_t offset; 423 union { 424 uint8_t in[2]; 425 uint16_t out; 426 } bytes; 427 428 offset = (intptr_t)offset_ptr & 3; 429 bytes.in[0] = r->v8[offset]; 430 bytes.in[1] = r->v8[offset + 1]; 431 return (bytes.out); 432 } 433 434 /* 435 * 8-bit and 16-bit routines. 436 * 437 * These operations are not natively supported by the CPU, so we use 438 * some shifting and bitmasking on top of the 32-bit instructions. 439 */ 440 441 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 442 uintN_t \ 443 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 444 { \ 445 uint32_t *mem32; \ 446 reg_t val32, negmask, old; \ 447 uint32_t temp1, temp2; \ 448 \ 449 mem32 = round_to_word(mem); \ 450 val32.v32 = 0x00000000; \ 451 put_##N(&val32, mem, val); \ 452 negmask.v32 = 0xffffffff; \ 453 put_##N(&negmask, mem, 0); \ 454 \ 455 do_sync(); \ 456 __asm volatile ( \ 457 "1:" \ 458 "\tldrex %0, %6\n" /* Load old value. */ \ 459 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 460 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 461 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 462 "\tcmp %3, #0\n" /* Did it succeed? */ \ 463 "\tbne 1b\n" /* Spin if failed. */ \ 464 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 465 "=&r" (temp2) \ 466 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 467 return (get_##N(&old, mem)); \ 468 } 469 470 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 471 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 472 473 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 474 uintN_t \ 475 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 476 uintN_t desired) \ 477 { \ 478 uint32_t *mem32; \ 479 reg_t expected32, desired32, posmask, old; \ 480 uint32_t negmask, temp1, temp2; \ 481 \ 482 mem32 = round_to_word(mem); \ 483 expected32.v32 = 0x00000000; \ 484 put_##N(&expected32, mem, expected); \ 485 desired32.v32 = 0x00000000; \ 486 put_##N(&desired32, mem, desired); \ 487 posmask.v32 = 0x00000000; \ 488 put_##N(&posmask, mem, ~0); \ 489 negmask = ~posmask.v32; \ 490 \ 491 do_sync(); \ 492 __asm volatile ( \ 493 "1:" \ 494 "\tldrex %0, %8\n" /* Load old value. */ \ 495 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 496 "\tcmp %2, %4\n" /* Compare to expected value. */\ 497 "\tbne 2f\n" /* Values are unequal. */ \ 498 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 499 "\torr %2, %5\n" /* Put in the new value. */ \ 500 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 501 "\tcmp %3, #0\n" /* Did it succeed? */ \ 502 "\tbne 1b\n" /* Spin if failed. */ \ 503 "2:" \ 504 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 505 "=&r" (temp2) \ 506 : "r" (expected32.v32), "r" (desired32.v32), \ 507 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 508 return (get_##N(&old, mem)); \ 509 } 510 511 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 512 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 513 514 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 515 uintN_t \ 516 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 517 { \ 518 uint32_t *mem32; \ 519 reg_t val32, posmask, old; \ 520 uint32_t negmask, temp1, temp2; \ 521 \ 522 mem32 = round_to_word(mem); \ 523 val32.v32 = 0x00000000; \ 524 put_##N(&val32, mem, val); \ 525 posmask.v32 = 0x00000000; \ 526 put_##N(&posmask, mem, ~0); \ 527 negmask = ~posmask.v32; \ 528 \ 529 do_sync(); \ 530 __asm volatile ( \ 531 "1:" \ 532 "\tldrex %0, %7\n" /* Load old value. */ \ 533 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 534 "\tand %2, %5\n" /* Isolate the new value. */ \ 535 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 536 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 537 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 538 "\tcmp %3, #0\n" /* Did it succeed? */ \ 539 "\tbne 1b\n" /* Spin if failed. */ \ 540 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 541 "=&r" (temp2) \ 542 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 543 "m" (*mem32)); \ 544 return (get_##N(&old, mem)); \ 545 } 546 547 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 548 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 549 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 550 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 551 552 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 553 uintN_t \ 554 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 555 { \ 556 uint32_t *mem32; \ 557 reg_t val32, old; \ 558 uint32_t temp1, temp2; \ 559 \ 560 mem32 = round_to_word(mem); \ 561 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 562 put_##N(&val32, mem, val); \ 563 \ 564 do_sync(); \ 565 __asm volatile ( \ 566 "1:" \ 567 "\tldrex %0, %5\n" /* Load old value. */ \ 568 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 569 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 570 "\tcmp %3, #0\n" /* Did it succeed? */ \ 571 "\tbne 1b\n" /* Spin if failed. */ \ 572 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 573 "=&r" (temp2) \ 574 : "r" (val32.v32), "m" (*mem32)); \ 575 return (get_##N(&old, mem)); \ 576 } 577 578 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 579 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 581 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 582 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 584 585 /* 586 * 32-bit routines. 587 */ 588 589 uint32_t 590 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 591 { 592 uint32_t old, temp; 593 594 do_sync(); 595 __asm volatile ( 596 "1:" 597 "\tldrex %0, %4\n" /* Load old value. */ 598 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 599 "\tcmp %2, #0\n" /* Did it succeed? */ 600 "\tbne 1b\n" /* Spin if failed. */ 601 : "=&r" (old), "=m" (*mem), "=&r" (temp) 602 : "r" (val), "m" (*mem)); 603 return (old); 604 } 605 606 uint32_t 607 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 608 uint32_t desired) 609 { 610 uint32_t old, temp; 611 612 do_sync(); 613 __asm volatile ( 614 "1:" 615 "\tldrex %0, %5\n" /* Load old value. */ 616 "\tcmp %0, %3\n" /* Compare to expected value. */ 617 "\tbne 2f\n" /* Values are unequal. */ 618 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 619 "\tcmp %2, #0\n" /* Did it succeed? */ 620 "\tbne 1b\n" /* Spin if failed. */ 621 "2:" 622 : "=&r" (old), "=m" (*mem), "=&r" (temp) 623 : "r" (expected), "r" (desired), "m" (*mem)); 624 return (old); 625 } 626 627 #define EMIT_FETCH_AND_OP_4(name, op) \ 628 uint32_t \ 629 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 630 { \ 631 uint32_t old, temp1, temp2; \ 632 \ 633 do_sync(); \ 634 __asm volatile ( \ 635 "1:" \ 636 "\tldrex %0, %5\n" /* Load old value. */ \ 637 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 638 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 639 "\tcmp %3, #0\n" /* Did it succeed? */ \ 640 "\tbne 1b\n" /* Spin if failed. */ \ 641 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 642 "=&r" (temp2) \ 643 : "r" (val), "m" (*mem)); \ 644 return (old); \ 645 } 646 647 EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 648 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 649 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 650 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 651 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 652 653 #ifndef __clang__ 654 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 655 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 656 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 657 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 658 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 659 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 660 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 661 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 662 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 663 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 664 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 665 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 666 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 667 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 668 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 669 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 670 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 671 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 672 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 673 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 674 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 675 #endif 676 677 #else /* __ARM_ARCH < 6 */ 678 679 #ifdef _KERNEL 680 681 #ifdef SMP 682 #error "On SMP systems we should have proper atomic operations." 683 #endif 684 685 /* 686 * On uniprocessor systems, we can perform the atomic operations by 687 * disabling interrupts. 688 */ 689 690 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 691 uintN_t \ 692 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 693 uintN_t desired) \ 694 { \ 695 uintN_t ret; \ 696 \ 697 WITHOUT_INTERRUPTS({ \ 698 ret = *mem; \ 699 if (*mem == expected) \ 700 *mem = desired; \ 701 }); \ 702 return (ret); \ 703 } 704 705 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 706 uintN_t \ 707 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 708 { \ 709 uintN_t ret; \ 710 \ 711 WITHOUT_INTERRUPTS({ \ 712 ret = *mem; \ 713 *mem op val; \ 714 }); \ 715 return (ret); \ 716 } 717 718 #define EMIT_ALL_OPS_N(N, uintN_t) \ 719 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 720 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 721 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 722 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 726 727 EMIT_ALL_OPS_N(1, uint8_t) 728 EMIT_ALL_OPS_N(2, uint16_t) 729 EMIT_ALL_OPS_N(4, uint32_t) 730 EMIT_ALL_OPS_N(8, uint64_t) 731 #undef EMIT_ALL_OPS_N 732 733 #else /* !_KERNEL */ 734 735 /* 736 * For userspace on uniprocessor systems, we can implement the atomic 737 * operations by using a Restartable Atomic Sequence. This makes the 738 * kernel restart the code from the beginning when interrupted. 739 */ 740 741 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 742 uintN_t \ 743 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 744 { \ 745 uint32_t old, temp, ras_start; \ 746 \ 747 ras_start = ARM_RAS_START; \ 748 __asm volatile ( \ 749 /* Set up Restartable Atomic Sequence. */ \ 750 "1:" \ 751 "\tadr %2, 1b\n" \ 752 "\tstr %2, [%5]\n" \ 753 "\tadr %2, 2f\n" \ 754 "\tstr %2, [%5, #4]\n" \ 755 \ 756 "\t"ldr" %0, %4\n" /* Load old value. */ \ 757 "\t"str" %3, %1\n" /* Store new value. */ \ 758 \ 759 /* Tear down Restartable Atomic Sequence. */ \ 760 "2:" \ 761 "\tmov %2, #0x00000000\n" \ 762 "\tstr %2, [%5]\n" \ 763 "\tmov %2, #0xffffffff\n" \ 764 "\tstr %2, [%5, #4]\n" \ 765 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 766 : "r" (val), "m" (*mem), "r" (ras_start)); \ 767 return (old); \ 768 } 769 770 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 771 uintN_t \ 772 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 773 uintN_t desired) \ 774 { \ 775 uint32_t old, temp, ras_start; \ 776 \ 777 ras_start = ARM_RAS_START; \ 778 __asm volatile ( \ 779 /* Set up Restartable Atomic Sequence. */ \ 780 "1:" \ 781 "\tadr %2, 1b\n" \ 782 "\tstr %2, [%6]\n" \ 783 "\tadr %2, 2f\n" \ 784 "\tstr %2, [%6, #4]\n" \ 785 \ 786 "\t"ldr" %0, %5\n" /* Load old value. */ \ 787 "\tcmp %0, %3\n" /* Compare to expected value. */\ 788 "\t"streq" %4, %1\n" /* Store new value. */ \ 789 \ 790 /* Tear down Restartable Atomic Sequence. */ \ 791 "2:" \ 792 "\tmov %2, #0x00000000\n" \ 793 "\tstr %2, [%6]\n" \ 794 "\tmov %2, #0xffffffff\n" \ 795 "\tstr %2, [%6, #4]\n" \ 796 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 797 : "r" (expected), "r" (desired), "m" (*mem), \ 798 "r" (ras_start)); \ 799 return (old); \ 800 } 801 802 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 803 uintN_t \ 804 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 805 { \ 806 uint32_t old, temp, ras_start; \ 807 \ 808 ras_start = ARM_RAS_START; \ 809 __asm volatile ( \ 810 /* Set up Restartable Atomic Sequence. */ \ 811 "1:" \ 812 "\tadr %2, 1b\n" \ 813 "\tstr %2, [%5]\n" \ 814 "\tadr %2, 2f\n" \ 815 "\tstr %2, [%5, #4]\n" \ 816 \ 817 "\t"ldr" %0, %4\n" /* Load old value. */ \ 818 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 819 "\t"str" %2, %1\n" /* Store new value. */ \ 820 \ 821 /* Tear down Restartable Atomic Sequence. */ \ 822 "2:" \ 823 "\tmov %2, #0x00000000\n" \ 824 "\tstr %2, [%5]\n" \ 825 "\tmov %2, #0xffffffff\n" \ 826 "\tstr %2, [%5, #4]\n" \ 827 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 828 : "r" (val), "m" (*mem), "r" (ras_start)); \ 829 return (old); \ 830 } 831 832 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 833 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 834 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 835 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 836 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 840 841 #ifdef __clang__ 842 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 843 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 844 #else 845 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 846 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 847 #endif 848 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 849 850 #ifndef __clang__ 851 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 852 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 853 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 854 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 855 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 856 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 857 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 858 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 859 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 860 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 861 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 862 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 863 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 864 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 865 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 866 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 867 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 868 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 869 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 870 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 871 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 872 #endif /* __ARM_ARCH */ 873 874 #endif /* _KERNEL */ 875 876 #endif 877 878 #endif /* __SYNC_ATOMICS */ 879