1 /*- 2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/stdatomic.h> 32 #include <sys/types.h> 33 34 #include <machine/acle-compat.h> 35 #include <machine/atomic.h> 36 #include <machine/cpufunc.h> 37 #include <machine/sysarch.h> 38 39 /* 40 * Executing statements with interrupts disabled. 41 */ 42 43 #if defined(_KERNEL) && !defined(SMP) 44 #define WITHOUT_INTERRUPTS(s) do { \ 45 register_t regs; \ 46 \ 47 regs = intr_disable(); \ 48 do s while (0); \ 49 intr_restore(regs); \ 50 } while (0) 51 #endif /* _KERNEL && !SMP */ 52 53 /* 54 * Memory barriers. 55 * 56 * It turns out __sync_synchronize() does not emit any code when used 57 * with GCC 4.2. Implement our own version that does work reliably. 58 * 59 * Although __sync_lock_test_and_set() should only perform an acquire 60 * barrier, make it do a full barrier like the other functions. This 61 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 62 */ 63 64 #if defined(_KERNEL) && !defined(SMP) 65 static inline void 66 do_sync(void) 67 { 68 69 __asm volatile ("" : : : "memory"); 70 } 71 #elif __ARM_ARCH >= 6 72 static inline void 73 do_sync(void) 74 { 75 76 dmb(); 77 } 78 #endif 79 80 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 81 82 /* 83 * New C11 __atomic_* API. 84 */ 85 86 /* ARMv6+ systems should be supported by the compiler. */ 87 #if __ARM_ARCH <= 5 88 89 /* Clang doesn't allow us to reimplement builtins without this. */ 90 #ifdef __clang__ 91 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize 92 #define __sync_synchronize __sync_synchronize_ext 93 #endif 94 95 void 96 __sync_synchronize(void) 97 { 98 } 99 100 #ifdef _KERNEL 101 102 #ifdef SMP 103 #error "On SMP systems we should have proper atomic operations." 104 #endif 105 106 /* 107 * On uniprocessor systems, we can perform the atomic operations by 108 * disabling interrupts. 109 */ 110 111 #define EMIT_LOAD_N(N, uintN_t) \ 112 uintN_t \ 113 __atomic_load_##N(uintN_t *mem, int model __unused) \ 114 { \ 115 uintN_t ret; \ 116 \ 117 WITHOUT_INTERRUPTS({ \ 118 ret = *mem; \ 119 }); \ 120 return (ret); \ 121 } 122 123 #define EMIT_STORE_N(N, uintN_t) \ 124 void \ 125 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 126 { \ 127 \ 128 WITHOUT_INTERRUPTS({ \ 129 *mem = val; \ 130 }); \ 131 } 132 133 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 134 _Bool \ 135 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 136 uintN_t desired, int success __unused, int failure __unused) \ 137 { \ 138 _Bool ret; \ 139 \ 140 WITHOUT_INTERRUPTS({ \ 141 if (*mem == *expected) { \ 142 *mem = desired; \ 143 ret = 1; \ 144 } else { \ 145 *expected = *mem; \ 146 ret = 0; \ 147 } \ 148 }); \ 149 return (ret); \ 150 } 151 152 #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 153 uintN_t \ 154 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 155 { \ 156 uintN_t ret; \ 157 \ 158 WITHOUT_INTERRUPTS({ \ 159 ret = *mem; \ 160 *mem op val; \ 161 }); \ 162 return (ret); \ 163 } 164 165 #define EMIT_ALL_OPS_N(N, uintN_t) \ 166 EMIT_LOAD_N(N, uintN_t) \ 167 EMIT_STORE_N(N, uintN_t) \ 168 EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 169 EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 170 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 171 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 172 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 173 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 174 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 175 176 EMIT_ALL_OPS_N(1, uint8_t) 177 EMIT_ALL_OPS_N(2, uint16_t) 178 EMIT_ALL_OPS_N(4, uint32_t) 179 EMIT_ALL_OPS_N(8, uint64_t) 180 #undef EMIT_ALL_OPS_N 181 182 #else /* !_KERNEL */ 183 184 /* 185 * For userspace on uniprocessor systems, we can implement the atomic 186 * operations by using a Restartable Atomic Sequence. This makes the 187 * kernel restart the code from the beginning when interrupted. 188 */ 189 190 #define EMIT_LOAD_N(N, uintN_t) \ 191 uintN_t \ 192 __atomic_load_##N(uintN_t *mem, int model __unused) \ 193 { \ 194 \ 195 return (*mem); \ 196 } 197 198 #define EMIT_STORE_N(N, uintN_t) \ 199 void \ 200 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 201 { \ 202 \ 203 *mem = val; \ 204 } 205 206 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 207 uintN_t \ 208 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 209 { \ 210 uint32_t old, temp, ras_start; \ 211 \ 212 ras_start = ARM_RAS_START; \ 213 __asm volatile ( \ 214 /* Set up Restartable Atomic Sequence. */ \ 215 "1:" \ 216 "\tadr %2, 1b\n" \ 217 "\tstr %2, [%5]\n" \ 218 "\tadr %2, 2f\n" \ 219 "\tstr %2, [%5, #4]\n" \ 220 \ 221 "\t"ldr" %0, %4\n" /* Load old value. */ \ 222 "\t"str" %3, %1\n" /* Store new value. */ \ 223 \ 224 /* Tear down Restartable Atomic Sequence. */ \ 225 "2:" \ 226 "\tmov %2, #0x00000000\n" \ 227 "\tstr %2, [%5]\n" \ 228 "\tmov %2, #0xffffffff\n" \ 229 "\tstr %2, [%5, #4]\n" \ 230 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 231 : "r" (val), "m" (*mem), "r" (ras_start)); \ 232 return (old); \ 233 } 234 235 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 236 _Bool \ 237 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 238 uintN_t desired, int success __unused, int failure __unused) \ 239 { \ 240 uint32_t expected, old, temp, ras_start; \ 241 \ 242 expected = *pexpected; \ 243 ras_start = ARM_RAS_START; \ 244 __asm volatile ( \ 245 /* Set up Restartable Atomic Sequence. */ \ 246 "1:" \ 247 "\tadr %2, 1b\n" \ 248 "\tstr %2, [%6]\n" \ 249 "\tadr %2, 2f\n" \ 250 "\tstr %2, [%6, #4]\n" \ 251 \ 252 "\t"ldr" %0, %5\n" /* Load old value. */ \ 253 "\tcmp %0, %3\n" /* Compare to expected value. */\ 254 "\t"streq" %4, %1\n" /* Store new value. */ \ 255 \ 256 /* Tear down Restartable Atomic Sequence. */ \ 257 "2:" \ 258 "\tmov %2, #0x00000000\n" \ 259 "\tstr %2, [%6]\n" \ 260 "\tmov %2, #0xffffffff\n" \ 261 "\tstr %2, [%6, #4]\n" \ 262 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 263 : "r" (expected), "r" (desired), "m" (*mem), \ 264 "r" (ras_start)); \ 265 if (old == expected) { \ 266 return (1); \ 267 } else { \ 268 *pexpected = old; \ 269 return (0); \ 270 } \ 271 } 272 273 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret) \ 274 uintN_t \ 275 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 276 { \ 277 uint32_t old, new, ras_start; \ 278 \ 279 ras_start = ARM_RAS_START; \ 280 __asm volatile ( \ 281 /* Set up Restartable Atomic Sequence. */ \ 282 "1:" \ 283 "\tadr %2, 1b\n" \ 284 "\tstr %2, [%5]\n" \ 285 "\tadr %2, 2f\n" \ 286 "\tstr %2, [%5, #4]\n" \ 287 \ 288 "\t"ldr" %0, %4\n" /* Load old value. */ \ 289 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 290 "\t"str" %2, %1\n" /* Store new value. */ \ 291 \ 292 /* Tear down Restartable Atomic Sequence. */ \ 293 "2:" \ 294 "\tmov %2, #0x00000000\n" \ 295 "\tstr %2, [%5]\n" \ 296 "\tmov %2, #0xffffffff\n" \ 297 "\tstr %2, [%5, #4]\n" \ 298 : "=&r" (old), "=m" (*mem), "=&r" (new) \ 299 : "r" (val), "m" (*mem), "r" (ras_start)); \ 300 return (ret); \ 301 } 302 303 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 304 EMIT_LOAD_N(N, uintN_t) \ 305 EMIT_STORE_N(N, uintN_t) \ 306 EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 307 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 308 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old) \ 309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old) \ 310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr", old) \ 311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old) \ 312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old) \ 313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new) \ 314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new) \ 315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch, "orr", new) \ 316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new) \ 317 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new) 318 319 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 320 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 321 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 322 #undef EMIT_ALL_OPS_N 323 324 #endif /* _KERNEL */ 325 326 #endif /* __ARM_ARCH */ 327 328 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 329 330 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 331 332 #ifdef __clang__ 333 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 334 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 335 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 336 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 337 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 338 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 339 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 340 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 341 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 342 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 343 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 344 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 345 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 346 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 347 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 348 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 349 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 350 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 351 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 352 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 353 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 354 #endif 355 356 /* 357 * Old __sync_* API. 358 */ 359 360 #if __ARM_ARCH >= 6 361 362 /* Implementations for old GCC versions, lacking support for atomics. */ 363 364 typedef union { 365 uint8_t v8[4]; 366 uint32_t v32; 367 } reg_t; 368 369 /* 370 * Given a memory address pointing to an 8-bit or 16-bit integer, return 371 * the address of the 32-bit word containing it. 372 */ 373 374 static inline uint32_t * 375 round_to_word(void *ptr) 376 { 377 378 return ((uint32_t *)((intptr_t)ptr & ~3)); 379 } 380 381 /* 382 * Utility functions for loading and storing 8-bit and 16-bit integers 383 * in 32-bit words at an offset corresponding with the location of the 384 * atomic variable. 385 */ 386 387 static inline void 388 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 389 { 390 size_t offset; 391 392 offset = (intptr_t)offset_ptr & 3; 393 r->v8[offset] = val; 394 } 395 396 static inline uint8_t 397 get_1(const reg_t *r, const uint8_t *offset_ptr) 398 { 399 size_t offset; 400 401 offset = (intptr_t)offset_ptr & 3; 402 return (r->v8[offset]); 403 } 404 405 static inline void 406 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 407 { 408 size_t offset; 409 union { 410 uint16_t in; 411 uint8_t out[2]; 412 } bytes; 413 414 offset = (intptr_t)offset_ptr & 3; 415 bytes.in = val; 416 r->v8[offset] = bytes.out[0]; 417 r->v8[offset + 1] = bytes.out[1]; 418 } 419 420 static inline uint16_t 421 get_2(const reg_t *r, const uint16_t *offset_ptr) 422 { 423 size_t offset; 424 union { 425 uint8_t in[2]; 426 uint16_t out; 427 } bytes; 428 429 offset = (intptr_t)offset_ptr & 3; 430 bytes.in[0] = r->v8[offset]; 431 bytes.in[1] = r->v8[offset + 1]; 432 return (bytes.out); 433 } 434 435 /* 436 * 8-bit and 16-bit routines. 437 * 438 * These operations are not natively supported by the CPU, so we use 439 * some shifting and bitmasking on top of the 32-bit instructions. 440 */ 441 442 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 443 uintN_t \ 444 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 445 { \ 446 uint32_t *mem32; \ 447 reg_t val32, negmask, old; \ 448 uint32_t temp1, temp2; \ 449 \ 450 mem32 = round_to_word(mem); \ 451 val32.v32 = 0x00000000; \ 452 put_##N(&val32, mem, val); \ 453 negmask.v32 = 0xffffffff; \ 454 put_##N(&negmask, mem, 0); \ 455 \ 456 do_sync(); \ 457 __asm volatile ( \ 458 "1:" \ 459 "\tldrex %0, %6\n" /* Load old value. */ \ 460 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 461 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 462 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 463 "\tcmp %3, #0\n" /* Did it succeed? */ \ 464 "\tbne 1b\n" /* Spin if failed. */ \ 465 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 466 "=&r" (temp2) \ 467 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 468 return (get_##N(&old, mem)); \ 469 } 470 471 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 472 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 473 474 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 475 uintN_t \ 476 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 477 uintN_t desired) \ 478 { \ 479 uint32_t *mem32; \ 480 reg_t expected32, desired32, posmask, old; \ 481 uint32_t negmask, temp1, temp2; \ 482 \ 483 mem32 = round_to_word(mem); \ 484 expected32.v32 = 0x00000000; \ 485 put_##N(&expected32, mem, expected); \ 486 desired32.v32 = 0x00000000; \ 487 put_##N(&desired32, mem, desired); \ 488 posmask.v32 = 0x00000000; \ 489 put_##N(&posmask, mem, ~0); \ 490 negmask = ~posmask.v32; \ 491 \ 492 do_sync(); \ 493 __asm volatile ( \ 494 "1:" \ 495 "\tldrex %0, %8\n" /* Load old value. */ \ 496 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 497 "\tcmp %2, %4\n" /* Compare to expected value. */\ 498 "\tbne 2f\n" /* Values are unequal. */ \ 499 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 500 "\torr %2, %5\n" /* Put in the new value. */ \ 501 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 502 "\tcmp %3, #0\n" /* Did it succeed? */ \ 503 "\tbne 1b\n" /* Spin if failed. */ \ 504 "2:" \ 505 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 506 "=&r" (temp2) \ 507 : "r" (expected32.v32), "r" (desired32.v32), \ 508 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 509 return (get_##N(&old, mem)); \ 510 } 511 512 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 513 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 514 515 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 516 uintN_t \ 517 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 518 { \ 519 uint32_t *mem32; \ 520 reg_t val32, posmask, old; \ 521 uint32_t negmask, temp1, temp2; \ 522 \ 523 mem32 = round_to_word(mem); \ 524 val32.v32 = 0x00000000; \ 525 put_##N(&val32, mem, val); \ 526 posmask.v32 = 0x00000000; \ 527 put_##N(&posmask, mem, ~0); \ 528 negmask = ~posmask.v32; \ 529 \ 530 do_sync(); \ 531 __asm volatile ( \ 532 "1:" \ 533 "\tldrex %0, %7\n" /* Load old value. */ \ 534 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 535 "\tand %2, %5\n" /* Isolate the new value. */ \ 536 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 537 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 538 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 539 "\tcmp %3, #0\n" /* Did it succeed? */ \ 540 "\tbne 1b\n" /* Spin if failed. */ \ 541 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 542 "=&r" (temp2) \ 543 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 544 "m" (*mem32)); \ 545 return (get_##N(&old, mem)); \ 546 } 547 548 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 549 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 550 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 551 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 552 553 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 554 uintN_t \ 555 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 556 { \ 557 uint32_t *mem32; \ 558 reg_t val32, old; \ 559 uint32_t temp1, temp2; \ 560 \ 561 mem32 = round_to_word(mem); \ 562 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 563 put_##N(&val32, mem, val); \ 564 \ 565 do_sync(); \ 566 __asm volatile ( \ 567 "1:" \ 568 "\tldrex %0, %5\n" /* Load old value. */ \ 569 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 570 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 571 "\tcmp %3, #0\n" /* Did it succeed? */ \ 572 "\tbne 1b\n" /* Spin if failed. */ \ 573 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 574 "=&r" (temp2) \ 575 : "r" (val32.v32), "m" (*mem32)); \ 576 return (get_##N(&old, mem)); \ 577 } 578 579 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 581 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 582 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 584 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 585 586 /* 587 * 32-bit routines. 588 */ 589 590 uint32_t 591 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 592 { 593 uint32_t old, temp; 594 595 do_sync(); 596 __asm volatile ( 597 "1:" 598 "\tldrex %0, %4\n" /* Load old value. */ 599 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 600 "\tcmp %2, #0\n" /* Did it succeed? */ 601 "\tbne 1b\n" /* Spin if failed. */ 602 : "=&r" (old), "=m" (*mem), "=&r" (temp) 603 : "r" (val), "m" (*mem)); 604 return (old); 605 } 606 607 uint32_t 608 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 609 uint32_t desired) 610 { 611 uint32_t old, temp; 612 613 do_sync(); 614 __asm volatile ( 615 "1:" 616 "\tldrex %0, %5\n" /* Load old value. */ 617 "\tcmp %0, %3\n" /* Compare to expected value. */ 618 "\tbne 2f\n" /* Values are unequal. */ 619 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 620 "\tcmp %2, #0\n" /* Did it succeed? */ 621 "\tbne 1b\n" /* Spin if failed. */ 622 "2:" 623 : "=&r" (old), "=m" (*mem), "=&r" (temp) 624 : "r" (expected), "r" (desired), "m" (*mem)); 625 return (old); 626 } 627 628 #define EMIT_FETCH_AND_OP_4(name, op) \ 629 uint32_t \ 630 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 631 { \ 632 uint32_t old, temp1, temp2; \ 633 \ 634 do_sync(); \ 635 __asm volatile ( \ 636 "1:" \ 637 "\tldrex %0, %5\n" /* Load old value. */ \ 638 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 639 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 640 "\tcmp %3, #0\n" /* Did it succeed? */ \ 641 "\tbne 1b\n" /* Spin if failed. */ \ 642 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 643 "=&r" (temp2) \ 644 : "r" (val), "m" (*mem)); \ 645 return (old); \ 646 } 647 648 EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 649 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 650 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 651 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 652 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 653 654 #ifndef __clang__ 655 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 656 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 657 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 658 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 659 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 660 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 661 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 662 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 663 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 664 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 665 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 666 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 667 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 668 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 669 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 670 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 671 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 672 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 673 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 674 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 675 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 676 #endif 677 678 #else /* __ARM_ARCH < 6 */ 679 680 #ifdef _KERNEL 681 682 #ifdef SMP 683 #error "On SMP systems we should have proper atomic operations." 684 #endif 685 686 /* 687 * On uniprocessor systems, we can perform the atomic operations by 688 * disabling interrupts. 689 */ 690 691 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 692 uintN_t \ 693 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 694 uintN_t desired) \ 695 { \ 696 uintN_t ret; \ 697 \ 698 WITHOUT_INTERRUPTS({ \ 699 ret = *mem; \ 700 if (*mem == expected) \ 701 *mem = desired; \ 702 }); \ 703 return (ret); \ 704 } 705 706 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 707 uintN_t \ 708 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 709 { \ 710 uintN_t ret; \ 711 \ 712 WITHOUT_INTERRUPTS({ \ 713 ret = *mem; \ 714 *mem op val; \ 715 }); \ 716 return (ret); \ 717 } 718 719 #define EMIT_ALL_OPS_N(N, uintN_t) \ 720 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 721 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 722 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 726 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 727 728 EMIT_ALL_OPS_N(1, uint8_t) 729 EMIT_ALL_OPS_N(2, uint16_t) 730 EMIT_ALL_OPS_N(4, uint32_t) 731 EMIT_ALL_OPS_N(8, uint64_t) 732 #undef EMIT_ALL_OPS_N 733 734 #else /* !_KERNEL */ 735 736 /* 737 * For userspace on uniprocessor systems, we can implement the atomic 738 * operations by using a Restartable Atomic Sequence. This makes the 739 * kernel restart the code from the beginning when interrupted. 740 */ 741 742 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 743 uintN_t \ 744 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 745 { \ 746 uint32_t old, temp, ras_start; \ 747 \ 748 ras_start = ARM_RAS_START; \ 749 __asm volatile ( \ 750 /* Set up Restartable Atomic Sequence. */ \ 751 "1:" \ 752 "\tadr %2, 1b\n" \ 753 "\tstr %2, [%5]\n" \ 754 "\tadr %2, 2f\n" \ 755 "\tstr %2, [%5, #4]\n" \ 756 \ 757 "\t"ldr" %0, %4\n" /* Load old value. */ \ 758 "\t"str" %3, %1\n" /* Store new value. */ \ 759 \ 760 /* Tear down Restartable Atomic Sequence. */ \ 761 "2:" \ 762 "\tmov %2, #0x00000000\n" \ 763 "\tstr %2, [%5]\n" \ 764 "\tmov %2, #0xffffffff\n" \ 765 "\tstr %2, [%5, #4]\n" \ 766 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 767 : "r" (val), "m" (*mem), "r" (ras_start)); \ 768 return (old); \ 769 } 770 771 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 772 uintN_t \ 773 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 774 uintN_t desired) \ 775 { \ 776 uint32_t old, temp, ras_start; \ 777 \ 778 ras_start = ARM_RAS_START; \ 779 __asm volatile ( \ 780 /* Set up Restartable Atomic Sequence. */ \ 781 "1:" \ 782 "\tadr %2, 1b\n" \ 783 "\tstr %2, [%6]\n" \ 784 "\tadr %2, 2f\n" \ 785 "\tstr %2, [%6, #4]\n" \ 786 \ 787 "\t"ldr" %0, %5\n" /* Load old value. */ \ 788 "\tcmp %0, %3\n" /* Compare to expected value. */\ 789 "\t"streq" %4, %1\n" /* Store new value. */ \ 790 \ 791 /* Tear down Restartable Atomic Sequence. */ \ 792 "2:" \ 793 "\tmov %2, #0x00000000\n" \ 794 "\tstr %2, [%6]\n" \ 795 "\tmov %2, #0xffffffff\n" \ 796 "\tstr %2, [%6, #4]\n" \ 797 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 798 : "r" (expected), "r" (desired), "m" (*mem), \ 799 "r" (ras_start)); \ 800 return (old); \ 801 } 802 803 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 804 uintN_t \ 805 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 806 { \ 807 uint32_t old, temp, ras_start; \ 808 \ 809 ras_start = ARM_RAS_START; \ 810 __asm volatile ( \ 811 /* Set up Restartable Atomic Sequence. */ \ 812 "1:" \ 813 "\tadr %2, 1b\n" \ 814 "\tstr %2, [%5]\n" \ 815 "\tadr %2, 2f\n" \ 816 "\tstr %2, [%5, #4]\n" \ 817 \ 818 "\t"ldr" %0, %4\n" /* Load old value. */ \ 819 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 820 "\t"str" %2, %1\n" /* Store new value. */ \ 821 \ 822 /* Tear down Restartable Atomic Sequence. */ \ 823 "2:" \ 824 "\tmov %2, #0x00000000\n" \ 825 "\tstr %2, [%5]\n" \ 826 "\tmov %2, #0xffffffff\n" \ 827 "\tstr %2, [%5, #4]\n" \ 828 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 829 : "r" (val), "m" (*mem), "r" (ras_start)); \ 830 return (old); \ 831 } 832 833 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 834 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 835 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 836 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 840 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 841 842 #ifdef __clang__ 843 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 844 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 845 #else 846 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 847 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 848 #endif 849 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 850 851 #ifndef __clang__ 852 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 853 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 854 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 855 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 856 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 857 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 858 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 859 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 860 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 861 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 862 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 863 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 864 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 865 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 866 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 867 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 868 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 869 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 870 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 871 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 872 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 873 #endif /* __ARM_ARCH */ 874 875 #endif /* _KERNEL */ 876 877 #endif 878 879 #endif /* __SYNC_ATOMICS */ 880