1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/stdatomic.h> 34 #include <sys/types.h> 35 36 #include <machine/atomic.h> 37 #include <machine/cpufunc.h> 38 #include <machine/sysarch.h> 39 40 /* 41 * Executing statements with interrupts disabled. 42 */ 43 44 #if defined(_KERNEL) && !defined(SMP) 45 #define WITHOUT_INTERRUPTS(s) do { \ 46 register_t regs; \ 47 \ 48 regs = intr_disable(); \ 49 do s while (0); \ 50 intr_restore(regs); \ 51 } while (0) 52 #endif /* _KERNEL && !SMP */ 53 54 /* 55 * Memory barriers. 56 * 57 * It turns out __sync_synchronize() does not emit any code when used 58 * with GCC 4.2. Implement our own version that does work reliably. 59 * 60 * Although __sync_lock_test_and_set() should only perform an acquire 61 * barrier, make it do a full barrier like the other functions. This 62 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 63 */ 64 65 #if defined(_KERNEL) && !defined(SMP) 66 static inline void 67 do_sync(void) 68 { 69 70 __asm volatile ("" : : : "memory"); 71 } 72 #elif __ARM_ARCH >= 6 73 static inline void 74 do_sync(void) 75 { 76 77 dmb(); 78 } 79 #endif 80 81 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) 82 83 /* 84 * New C11 __atomic_* API. 85 */ 86 87 /* ARMv6+ systems should be supported by the compiler. */ 88 #if __ARM_ARCH <= 5 89 90 /* Clang doesn't allow us to reimplement builtins without this. */ 91 #ifdef __clang__ 92 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize 93 #define __sync_synchronize __sync_synchronize_ext 94 #endif 95 96 void 97 __sync_synchronize(void) 98 { 99 } 100 101 #ifdef _KERNEL 102 103 #ifdef SMP 104 #error "On SMP systems we should have proper atomic operations." 105 #endif 106 107 /* 108 * On uniprocessor systems, we can perform the atomic operations by 109 * disabling interrupts. 110 */ 111 112 #define EMIT_LOAD_N(N, uintN_t) \ 113 uintN_t \ 114 __atomic_load_##N(uintN_t *mem, int model __unused) \ 115 { \ 116 uintN_t ret; \ 117 \ 118 WITHOUT_INTERRUPTS({ \ 119 ret = *mem; \ 120 }); \ 121 return (ret); \ 122 } 123 124 #define EMIT_STORE_N(N, uintN_t) \ 125 void \ 126 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 127 { \ 128 \ 129 WITHOUT_INTERRUPTS({ \ 130 *mem = val; \ 131 }); \ 132 } 133 134 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 135 _Bool \ 136 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ 137 uintN_t desired, int success __unused, int failure __unused) \ 138 { \ 139 _Bool ret; \ 140 \ 141 WITHOUT_INTERRUPTS({ \ 142 if (*mem == *expected) { \ 143 *mem = desired; \ 144 ret = 1; \ 145 } else { \ 146 *expected = *mem; \ 147 ret = 0; \ 148 } \ 149 }); \ 150 return (ret); \ 151 } 152 153 #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ 154 uintN_t \ 155 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 156 { \ 157 uintN_t ret; \ 158 \ 159 WITHOUT_INTERRUPTS({ \ 160 ret = *mem; \ 161 *mem op val; \ 162 }); \ 163 return (ret); \ 164 } 165 166 #define EMIT_ALL_OPS_N(N, uintN_t) \ 167 EMIT_LOAD_N(N, uintN_t) \ 168 EMIT_STORE_N(N, uintN_t) \ 169 EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ 170 EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ 171 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ 172 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ 173 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ 174 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ 175 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) 176 177 EMIT_ALL_OPS_N(1, uint8_t) 178 EMIT_ALL_OPS_N(2, uint16_t) 179 EMIT_ALL_OPS_N(4, uint32_t) 180 EMIT_ALL_OPS_N(8, uint64_t) 181 #undef EMIT_ALL_OPS_N 182 183 #else /* !_KERNEL */ 184 185 /* 186 * For userspace on uniprocessor systems, we can implement the atomic 187 * operations by using a Restartable Atomic Sequence. This makes the 188 * kernel restart the code from the beginning when interrupted. 189 */ 190 191 #define EMIT_LOAD_N(N, uintN_t) \ 192 uintN_t \ 193 __atomic_load_##N(uintN_t *mem, int model __unused) \ 194 { \ 195 \ 196 return (*mem); \ 197 } 198 199 #define EMIT_STORE_N(N, uintN_t) \ 200 void \ 201 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ 202 { \ 203 \ 204 *mem = val; \ 205 } 206 207 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 208 uintN_t \ 209 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ 210 { \ 211 uint32_t old, temp, ras_start; \ 212 \ 213 ras_start = ARM_RAS_START; \ 214 __asm volatile ( \ 215 /* Set up Restartable Atomic Sequence. */ \ 216 "1:" \ 217 "\tadr %2, 1b\n" \ 218 "\tstr %2, [%5]\n" \ 219 "\tadr %2, 2f\n" \ 220 "\tstr %2, [%5, #4]\n" \ 221 \ 222 "\t"ldr" %0, %4\n" /* Load old value. */ \ 223 "\t"str" %3, %1\n" /* Store new value. */ \ 224 \ 225 /* Tear down Restartable Atomic Sequence. */ \ 226 "2:" \ 227 "\tmov %2, #0x00000000\n" \ 228 "\tstr %2, [%5]\n" \ 229 "\tmov %2, #0xffffffff\n" \ 230 "\tstr %2, [%5, #4]\n" \ 231 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 232 : "r" (val), "m" (*mem), "r" (ras_start)); \ 233 return (old); \ 234 } 235 236 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 237 _Bool \ 238 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ 239 uintN_t desired, int success __unused, int failure __unused) \ 240 { \ 241 uint32_t expected, old, temp, ras_start; \ 242 \ 243 expected = *pexpected; \ 244 ras_start = ARM_RAS_START; \ 245 __asm volatile ( \ 246 /* Set up Restartable Atomic Sequence. */ \ 247 "1:" \ 248 "\tadr %2, 1b\n" \ 249 "\tstr %2, [%6]\n" \ 250 "\tadr %2, 2f\n" \ 251 "\tstr %2, [%6, #4]\n" \ 252 \ 253 "\t"ldr" %0, %5\n" /* Load old value. */ \ 254 "\tcmp %0, %3\n" /* Compare to expected value. */\ 255 "\t"streq" %4, %1\n" /* Store new value. */ \ 256 \ 257 /* Tear down Restartable Atomic Sequence. */ \ 258 "2:" \ 259 "\tmov %2, #0x00000000\n" \ 260 "\tstr %2, [%6]\n" \ 261 "\tmov %2, #0xffffffff\n" \ 262 "\tstr %2, [%6, #4]\n" \ 263 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 264 : "r" (expected), "r" (desired), "m" (*mem), \ 265 "r" (ras_start)); \ 266 if (old == expected) { \ 267 return (1); \ 268 } else { \ 269 *pexpected = old; \ 270 return (0); \ 271 } \ 272 } 273 274 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret) \ 275 uintN_t \ 276 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ 277 { \ 278 uint32_t old, new, ras_start; \ 279 \ 280 ras_start = ARM_RAS_START; \ 281 __asm volatile ( \ 282 /* Set up Restartable Atomic Sequence. */ \ 283 "1:" \ 284 "\tadr %2, 1b\n" \ 285 "\tstr %2, [%5]\n" \ 286 "\tadr %2, 2f\n" \ 287 "\tstr %2, [%5, #4]\n" \ 288 \ 289 "\t"ldr" %0, %4\n" /* Load old value. */ \ 290 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 291 "\t"str" %2, %1\n" /* Store new value. */ \ 292 \ 293 /* Tear down Restartable Atomic Sequence. */ \ 294 "2:" \ 295 "\tmov %2, #0x00000000\n" \ 296 "\tstr %2, [%5]\n" \ 297 "\tmov %2, #0xffffffff\n" \ 298 "\tstr %2, [%5, #4]\n" \ 299 : "=&r" (old), "=m" (*mem), "=&r" (new) \ 300 : "r" (val), "m" (*mem), "r" (ras_start)); \ 301 return (ret); \ 302 } 303 304 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 305 EMIT_LOAD_N(N, uintN_t) \ 306 EMIT_STORE_N(N, uintN_t) \ 307 EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ 308 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ 309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old) \ 310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old) \ 311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr", old) \ 312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old) \ 313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old) \ 314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new) \ 315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new) \ 316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch, "orr", new) \ 317 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new) \ 318 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new) 319 320 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 321 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 322 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 323 #undef EMIT_ALL_OPS_N 324 325 #endif /* _KERNEL */ 326 327 #endif /* __ARM_ARCH */ 328 329 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ 330 331 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 332 333 #ifdef __clang__ 334 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 335 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 336 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 337 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 338 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 339 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 340 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 341 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 342 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 343 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 344 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 345 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 346 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 347 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 348 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 349 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 350 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 351 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 352 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 353 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 354 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 355 #endif 356 357 /* 358 * Old __sync_* API. 359 */ 360 361 #if __ARM_ARCH >= 6 362 363 /* Implementations for old GCC versions, lacking support for atomics. */ 364 365 typedef union { 366 uint8_t v8[4]; 367 uint32_t v32; 368 } reg_t; 369 370 /* 371 * Given a memory address pointing to an 8-bit or 16-bit integer, return 372 * the address of the 32-bit word containing it. 373 */ 374 375 static inline uint32_t * 376 round_to_word(void *ptr) 377 { 378 379 return ((uint32_t *)((intptr_t)ptr & ~3)); 380 } 381 382 /* 383 * Utility functions for loading and storing 8-bit and 16-bit integers 384 * in 32-bit words at an offset corresponding with the location of the 385 * atomic variable. 386 */ 387 388 static inline void 389 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 390 { 391 size_t offset; 392 393 offset = (intptr_t)offset_ptr & 3; 394 r->v8[offset] = val; 395 } 396 397 static inline uint8_t 398 get_1(const reg_t *r, const uint8_t *offset_ptr) 399 { 400 size_t offset; 401 402 offset = (intptr_t)offset_ptr & 3; 403 return (r->v8[offset]); 404 } 405 406 static inline void 407 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 408 { 409 size_t offset; 410 union { 411 uint16_t in; 412 uint8_t out[2]; 413 } bytes; 414 415 offset = (intptr_t)offset_ptr & 3; 416 bytes.in = val; 417 r->v8[offset] = bytes.out[0]; 418 r->v8[offset + 1] = bytes.out[1]; 419 } 420 421 static inline uint16_t 422 get_2(const reg_t *r, const uint16_t *offset_ptr) 423 { 424 size_t offset; 425 union { 426 uint8_t in[2]; 427 uint16_t out; 428 } bytes; 429 430 offset = (intptr_t)offset_ptr & 3; 431 bytes.in[0] = r->v8[offset]; 432 bytes.in[1] = r->v8[offset + 1]; 433 return (bytes.out); 434 } 435 436 /* 437 * 8-bit and 16-bit routines. 438 * 439 * These operations are not natively supported by the CPU, so we use 440 * some shifting and bitmasking on top of the 32-bit instructions. 441 */ 442 443 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 444 uintN_t \ 445 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 446 { \ 447 uint32_t *mem32; \ 448 reg_t val32, negmask, old; \ 449 uint32_t temp1, temp2; \ 450 \ 451 mem32 = round_to_word(mem); \ 452 val32.v32 = 0x00000000; \ 453 put_##N(&val32, mem, val); \ 454 negmask.v32 = 0xffffffff; \ 455 put_##N(&negmask, mem, 0); \ 456 \ 457 do_sync(); \ 458 __asm volatile ( \ 459 "1:" \ 460 "\tldrex %0, %6\n" /* Load old value. */ \ 461 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 462 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 463 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 464 "\tcmp %3, #0\n" /* Did it succeed? */ \ 465 "\tbne 1b\n" /* Spin if failed. */ \ 466 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 467 "=&r" (temp2) \ 468 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 469 return (get_##N(&old, mem)); \ 470 } 471 472 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 473 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 474 475 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 476 uintN_t \ 477 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 478 uintN_t desired) \ 479 { \ 480 uint32_t *mem32; \ 481 reg_t expected32, desired32, posmask, old; \ 482 uint32_t negmask, temp1, temp2; \ 483 \ 484 mem32 = round_to_word(mem); \ 485 expected32.v32 = 0x00000000; \ 486 put_##N(&expected32, mem, expected); \ 487 desired32.v32 = 0x00000000; \ 488 put_##N(&desired32, mem, desired); \ 489 posmask.v32 = 0x00000000; \ 490 put_##N(&posmask, mem, ~0); \ 491 negmask = ~posmask.v32; \ 492 \ 493 do_sync(); \ 494 __asm volatile ( \ 495 "1:" \ 496 "\tldrex %0, %8\n" /* Load old value. */ \ 497 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 498 "\tcmp %2, %4\n" /* Compare to expected value. */\ 499 "\tbne 2f\n" /* Values are unequal. */ \ 500 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 501 "\torr %2, %5\n" /* Put in the new value. */ \ 502 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 503 "\tcmp %3, #0\n" /* Did it succeed? */ \ 504 "\tbne 1b\n" /* Spin if failed. */ \ 505 "2:" \ 506 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 507 "=&r" (temp2) \ 508 : "r" (expected32.v32), "r" (desired32.v32), \ 509 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 510 return (get_##N(&old, mem)); \ 511 } 512 513 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 514 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 515 516 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 517 uintN_t \ 518 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 519 { \ 520 uint32_t *mem32; \ 521 reg_t val32, posmask, old; \ 522 uint32_t negmask, temp1, temp2; \ 523 \ 524 mem32 = round_to_word(mem); \ 525 val32.v32 = 0x00000000; \ 526 put_##N(&val32, mem, val); \ 527 posmask.v32 = 0x00000000; \ 528 put_##N(&posmask, mem, ~0); \ 529 negmask = ~posmask.v32; \ 530 \ 531 do_sync(); \ 532 __asm volatile ( \ 533 "1:" \ 534 "\tldrex %0, %7\n" /* Load old value. */ \ 535 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 536 "\tand %2, %5\n" /* Isolate the new value. */ \ 537 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 538 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 539 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 540 "\tcmp %3, #0\n" /* Did it succeed? */ \ 541 "\tbne 1b\n" /* Spin if failed. */ \ 542 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 543 "=&r" (temp2) \ 544 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 545 "m" (*mem32)); \ 546 return (get_##N(&old, mem)); \ 547 } 548 549 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 550 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 551 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 552 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 553 554 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 555 uintN_t \ 556 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 557 { \ 558 uint32_t *mem32; \ 559 reg_t val32, old; \ 560 uint32_t temp1, temp2; \ 561 \ 562 mem32 = round_to_word(mem); \ 563 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 564 put_##N(&val32, mem, val); \ 565 \ 566 do_sync(); \ 567 __asm volatile ( \ 568 "1:" \ 569 "\tldrex %0, %5\n" /* Load old value. */ \ 570 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 571 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 572 "\tcmp %3, #0\n" /* Did it succeed? */ \ 573 "\tbne 1b\n" /* Spin if failed. */ \ 574 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 575 "=&r" (temp2) \ 576 : "r" (val32.v32), "m" (*mem32)); \ 577 return (get_##N(&old, mem)); \ 578 } 579 580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 581 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 582 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 584 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 585 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 586 587 /* 588 * 32-bit routines. 589 */ 590 591 uint32_t 592 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 593 { 594 uint32_t old, temp; 595 596 do_sync(); 597 __asm volatile ( 598 "1:" 599 "\tldrex %0, %4\n" /* Load old value. */ 600 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 601 "\tcmp %2, #0\n" /* Did it succeed? */ 602 "\tbne 1b\n" /* Spin if failed. */ 603 : "=&r" (old), "=m" (*mem), "=&r" (temp) 604 : "r" (val), "m" (*mem)); 605 return (old); 606 } 607 608 uint32_t 609 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 610 uint32_t desired) 611 { 612 uint32_t old, temp; 613 614 do_sync(); 615 __asm volatile ( 616 "1:" 617 "\tldrex %0, %5\n" /* Load old value. */ 618 "\tcmp %0, %3\n" /* Compare to expected value. */ 619 "\tbne 2f\n" /* Values are unequal. */ 620 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 621 "\tcmp %2, #0\n" /* Did it succeed? */ 622 "\tbne 1b\n" /* Spin if failed. */ 623 "2:" 624 : "=&r" (old), "=m" (*mem), "=&r" (temp) 625 : "r" (expected), "r" (desired), "m" (*mem)); 626 return (old); 627 } 628 629 #define EMIT_FETCH_AND_OP_4(name, op) \ 630 uint32_t \ 631 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 632 { \ 633 uint32_t old, temp1, temp2; \ 634 \ 635 do_sync(); \ 636 __asm volatile ( \ 637 "1:" \ 638 "\tldrex %0, %5\n" /* Load old value. */ \ 639 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 640 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 641 "\tcmp %3, #0\n" /* Did it succeed? */ \ 642 "\tbne 1b\n" /* Spin if failed. */ \ 643 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 644 "=&r" (temp2) \ 645 : "r" (val), "m" (*mem)); \ 646 return (old); \ 647 } 648 649 EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 650 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 651 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 652 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 653 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 654 655 #ifndef __clang__ 656 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 657 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 658 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 659 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 660 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 661 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 662 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 663 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 664 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 665 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 666 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 667 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 668 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 669 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 670 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 671 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 672 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 673 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 674 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 675 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 676 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 677 #endif 678 679 #else /* __ARM_ARCH < 6 */ 680 681 #ifdef _KERNEL 682 683 #ifdef SMP 684 #error "On SMP systems we should have proper atomic operations." 685 #endif 686 687 /* 688 * On uniprocessor systems, we can perform the atomic operations by 689 * disabling interrupts. 690 */ 691 692 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 693 uintN_t \ 694 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ 695 uintN_t desired) \ 696 { \ 697 uintN_t ret; \ 698 \ 699 WITHOUT_INTERRUPTS({ \ 700 ret = *mem; \ 701 if (*mem == expected) \ 702 *mem = desired; \ 703 }); \ 704 return (ret); \ 705 } 706 707 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ 708 uintN_t \ 709 __sync_##name##_##N(uintN_t *mem, uintN_t val) \ 710 { \ 711 uintN_t ret; \ 712 \ 713 WITHOUT_INTERRUPTS({ \ 714 ret = *mem; \ 715 *mem op val; \ 716 }); \ 717 return (ret); \ 718 } 719 720 #define EMIT_ALL_OPS_N(N, uintN_t) \ 721 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 722 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ 723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ 724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ 725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ 726 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ 727 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) 728 729 EMIT_ALL_OPS_N(1, uint8_t) 730 EMIT_ALL_OPS_N(2, uint16_t) 731 EMIT_ALL_OPS_N(4, uint32_t) 732 EMIT_ALL_OPS_N(8, uint64_t) 733 #undef EMIT_ALL_OPS_N 734 735 #else /* !_KERNEL */ 736 737 /* 738 * For userspace on uniprocessor systems, we can implement the atomic 739 * operations by using a Restartable Atomic Sequence. This makes the 740 * kernel restart the code from the beginning when interrupted. 741 */ 742 743 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 744 uintN_t \ 745 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 746 { \ 747 uint32_t old, temp, ras_start; \ 748 \ 749 ras_start = ARM_RAS_START; \ 750 __asm volatile ( \ 751 /* Set up Restartable Atomic Sequence. */ \ 752 "1:" \ 753 "\tadr %2, 1b\n" \ 754 "\tstr %2, [%5]\n" \ 755 "\tadr %2, 2f\n" \ 756 "\tstr %2, [%5, #4]\n" \ 757 \ 758 "\t"ldr" %0, %4\n" /* Load old value. */ \ 759 "\t"str" %3, %1\n" /* Store new value. */ \ 760 \ 761 /* Tear down Restartable Atomic Sequence. */ \ 762 "2:" \ 763 "\tmov %2, #0x00000000\n" \ 764 "\tstr %2, [%5]\n" \ 765 "\tmov %2, #0xffffffff\n" \ 766 "\tstr %2, [%5, #4]\n" \ 767 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 768 : "r" (val), "m" (*mem), "r" (ras_start)); \ 769 return (old); \ 770 } 771 772 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 773 uintN_t \ 774 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 775 uintN_t desired) \ 776 { \ 777 uint32_t old, temp, ras_start; \ 778 \ 779 ras_start = ARM_RAS_START; \ 780 __asm volatile ( \ 781 /* Set up Restartable Atomic Sequence. */ \ 782 "1:" \ 783 "\tadr %2, 1b\n" \ 784 "\tstr %2, [%6]\n" \ 785 "\tadr %2, 2f\n" \ 786 "\tstr %2, [%6, #4]\n" \ 787 \ 788 "\t"ldr" %0, %5\n" /* Load old value. */ \ 789 "\tcmp %0, %3\n" /* Compare to expected value. */\ 790 "\t"streq" %4, %1\n" /* Store new value. */ \ 791 \ 792 /* Tear down Restartable Atomic Sequence. */ \ 793 "2:" \ 794 "\tmov %2, #0x00000000\n" \ 795 "\tstr %2, [%6]\n" \ 796 "\tmov %2, #0xffffffff\n" \ 797 "\tstr %2, [%6, #4]\n" \ 798 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 799 : "r" (expected), "r" (desired), "m" (*mem), \ 800 "r" (ras_start)); \ 801 return (old); \ 802 } 803 804 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ 805 uintN_t \ 806 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 807 { \ 808 uint32_t old, temp, ras_start; \ 809 \ 810 ras_start = ARM_RAS_START; \ 811 __asm volatile ( \ 812 /* Set up Restartable Atomic Sequence. */ \ 813 "1:" \ 814 "\tadr %2, 1b\n" \ 815 "\tstr %2, [%5]\n" \ 816 "\tadr %2, 2f\n" \ 817 "\tstr %2, [%5, #4]\n" \ 818 \ 819 "\t"ldr" %0, %4\n" /* Load old value. */ \ 820 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ 821 "\t"str" %2, %1\n" /* Store new value. */ \ 822 \ 823 /* Tear down Restartable Atomic Sequence. */ \ 824 "2:" \ 825 "\tmov %2, #0x00000000\n" \ 826 "\tstr %2, [%5]\n" \ 827 "\tmov %2, #0xffffffff\n" \ 828 "\tstr %2, [%5, #4]\n" \ 829 : "=&r" (old), "=m" (*mem), "=&r" (temp) \ 830 : "r" (val), "m" (*mem), "r" (ras_start)); \ 831 return (old); \ 832 } 833 834 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ 835 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ 836 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ 837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ 838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ 839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ 840 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ 841 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") 842 843 #ifdef __clang__ 844 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") 845 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") 846 #else 847 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") 848 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") 849 #endif 850 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") 851 852 #ifndef __clang__ 853 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 854 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 855 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 856 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 857 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 858 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 859 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 860 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 861 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 862 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 863 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 864 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 865 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 866 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 867 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 868 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 869 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 870 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 871 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 872 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 873 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 874 #endif /* __ARM_ARCH */ 875 876 #endif /* _KERNEL */ 877 878 #endif 879 880 #endif /* __SYNC_ATOMICS */ 881