1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/stdatomic.h> 34 #include <sys/types.h> 35 36 #include <machine/atomic.h> 37 #include <machine/cpufunc.h> 38 #include <machine/sysarch.h> 39 40 /* 41 * Executing statements with interrupts disabled. 42 */ 43 44 #if defined(_KERNEL) && !defined(SMP) 45 #define WITHOUT_INTERRUPTS(s) do { \ 46 register_t regs; \ 47 \ 48 regs = intr_disable(); \ 49 do s while (0); \ 50 intr_restore(regs); \ 51 } while (0) 52 #endif /* _KERNEL && !SMP */ 53 54 /* 55 * Memory barriers. 56 * 57 * It turns out __sync_synchronize() does not emit any code when used 58 * with GCC 4.2. Implement our own version that does work reliably. 59 * 60 * Although __sync_lock_test_and_set() should only perform an acquire 61 * barrier, make it do a full barrier like the other functions. This 62 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 63 */ 64 65 #if defined(_KERNEL) && !defined(SMP) 66 static inline void 67 do_sync(void) 68 { 69 70 __asm volatile ("" : : : "memory"); 71 } 72 #else 73 static inline void 74 do_sync(void) 75 { 76 77 dmb(); 78 } 79 #endif 80 81 82 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 83 84 #ifdef __clang__ 85 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 86 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 87 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 88 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 89 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 90 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 91 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 92 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 93 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 94 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 95 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 96 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 97 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 98 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 99 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 100 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 101 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 102 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 103 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 104 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 105 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 106 #endif 107 108 /* 109 * Old __sync_* API. 110 */ 111 112 113 /* Implementations for old GCC versions, lacking support for atomics. */ 114 115 typedef union { 116 uint8_t v8[4]; 117 uint32_t v32; 118 } reg_t; 119 120 /* 121 * Given a memory address pointing to an 8-bit or 16-bit integer, return 122 * the address of the 32-bit word containing it. 123 */ 124 125 static inline uint32_t * 126 round_to_word(void *ptr) 127 { 128 129 return ((uint32_t *)((intptr_t)ptr & ~3)); 130 } 131 132 /* 133 * Utility functions for loading and storing 8-bit and 16-bit integers 134 * in 32-bit words at an offset corresponding with the location of the 135 * atomic variable. 136 */ 137 138 static inline void 139 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 140 { 141 size_t offset; 142 143 offset = (intptr_t)offset_ptr & 3; 144 r->v8[offset] = val; 145 } 146 147 static inline uint8_t 148 get_1(const reg_t *r, const uint8_t *offset_ptr) 149 { 150 size_t offset; 151 152 offset = (intptr_t)offset_ptr & 3; 153 return (r->v8[offset]); 154 } 155 156 static inline void 157 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 158 { 159 size_t offset; 160 union { 161 uint16_t in; 162 uint8_t out[2]; 163 } bytes; 164 165 offset = (intptr_t)offset_ptr & 3; 166 bytes.in = val; 167 r->v8[offset] = bytes.out[0]; 168 r->v8[offset + 1] = bytes.out[1]; 169 } 170 171 static inline uint16_t 172 get_2(const reg_t *r, const uint16_t *offset_ptr) 173 { 174 size_t offset; 175 union { 176 uint8_t in[2]; 177 uint16_t out; 178 } bytes; 179 180 offset = (intptr_t)offset_ptr & 3; 181 bytes.in[0] = r->v8[offset]; 182 bytes.in[1] = r->v8[offset + 1]; 183 return (bytes.out); 184 } 185 186 /* 187 * 8-bit and 16-bit routines. 188 * 189 * These operations are not natively supported by the CPU, so we use 190 * some shifting and bitmasking on top of the 32-bit instructions. 191 */ 192 193 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 194 uintN_t \ 195 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 196 { \ 197 uint32_t *mem32; \ 198 reg_t val32, negmask, old; \ 199 uint32_t temp1, temp2; \ 200 \ 201 mem32 = round_to_word(mem); \ 202 val32.v32 = 0x00000000; \ 203 put_##N(&val32, mem, val); \ 204 negmask.v32 = 0xffffffff; \ 205 put_##N(&negmask, mem, 0); \ 206 \ 207 do_sync(); \ 208 __asm volatile ( \ 209 "1:" \ 210 "\tldrex %0, %6\n" /* Load old value. */ \ 211 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 212 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 213 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 214 "\tcmp %3, #0\n" /* Did it succeed? */ \ 215 "\tbne 1b\n" /* Spin if failed. */ \ 216 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 217 "=&r" (temp2) \ 218 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 219 return (get_##N(&old, mem)); \ 220 } 221 222 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 223 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 224 225 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 226 uintN_t \ 227 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 228 uintN_t desired) \ 229 { \ 230 uint32_t *mem32; \ 231 reg_t expected32, desired32, posmask, old; \ 232 uint32_t negmask, temp1, temp2; \ 233 \ 234 mem32 = round_to_word(mem); \ 235 expected32.v32 = 0x00000000; \ 236 put_##N(&expected32, mem, expected); \ 237 desired32.v32 = 0x00000000; \ 238 put_##N(&desired32, mem, desired); \ 239 posmask.v32 = 0x00000000; \ 240 put_##N(&posmask, mem, ~0); \ 241 negmask = ~posmask.v32; \ 242 \ 243 do_sync(); \ 244 __asm volatile ( \ 245 "1:" \ 246 "\tldrex %0, %8\n" /* Load old value. */ \ 247 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 248 "\tcmp %2, %4\n" /* Compare to expected value. */\ 249 "\tbne 2f\n" /* Values are unequal. */ \ 250 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 251 "\torr %2, %5\n" /* Put in the new value. */ \ 252 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 253 "\tcmp %3, #0\n" /* Did it succeed? */ \ 254 "\tbne 1b\n" /* Spin if failed. */ \ 255 "2:" \ 256 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 257 "=&r" (temp2) \ 258 : "r" (expected32.v32), "r" (desired32.v32), \ 259 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 260 return (get_##N(&old, mem)); \ 261 } 262 263 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 264 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 265 266 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 267 uintN_t \ 268 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 269 { \ 270 uint32_t *mem32; \ 271 reg_t val32, posmask, old; \ 272 uint32_t negmask, temp1, temp2; \ 273 \ 274 mem32 = round_to_word(mem); \ 275 val32.v32 = 0x00000000; \ 276 put_##N(&val32, mem, val); \ 277 posmask.v32 = 0x00000000; \ 278 put_##N(&posmask, mem, ~0); \ 279 negmask = ~posmask.v32; \ 280 \ 281 do_sync(); \ 282 __asm volatile ( \ 283 "1:" \ 284 "\tldrex %0, %7\n" /* Load old value. */ \ 285 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 286 "\tand %2, %5\n" /* Isolate the new value. */ \ 287 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 288 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 289 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 290 "\tcmp %3, #0\n" /* Did it succeed? */ \ 291 "\tbne 1b\n" /* Spin if failed. */ \ 292 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 293 "=&r" (temp2) \ 294 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 295 "m" (*mem32)); \ 296 return (get_##N(&old, mem)); \ 297 } 298 299 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 300 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 301 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 302 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 303 304 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 305 uintN_t \ 306 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 307 { \ 308 uint32_t *mem32; \ 309 reg_t val32, old; \ 310 uint32_t temp1, temp2; \ 311 \ 312 mem32 = round_to_word(mem); \ 313 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 314 put_##N(&val32, mem, val); \ 315 \ 316 do_sync(); \ 317 __asm volatile ( \ 318 "1:" \ 319 "\tldrex %0, %5\n" /* Load old value. */ \ 320 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 321 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 322 "\tcmp %3, #0\n" /* Did it succeed? */ \ 323 "\tbne 1b\n" /* Spin if failed. */ \ 324 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 325 "=&r" (temp2) \ 326 : "r" (val32.v32), "m" (*mem32)); \ 327 return (get_##N(&old, mem)); \ 328 } 329 330 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 331 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 332 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 333 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 334 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 335 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 336 337 /* 338 * 32-bit routines. 339 */ 340 341 uint32_t 342 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 343 { 344 uint32_t old, temp; 345 346 do_sync(); 347 __asm volatile ( 348 "1:" 349 "\tldrex %0, %4\n" /* Load old value. */ 350 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 351 "\tcmp %2, #0\n" /* Did it succeed? */ 352 "\tbne 1b\n" /* Spin if failed. */ 353 : "=&r" (old), "=m" (*mem), "=&r" (temp) 354 : "r" (val), "m" (*mem)); 355 return (old); 356 } 357 358 uint32_t 359 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 360 uint32_t desired) 361 { 362 uint32_t old, temp; 363 364 do_sync(); 365 __asm volatile ( 366 "1:" 367 "\tldrex %0, %5\n" /* Load old value. */ 368 "\tcmp %0, %3\n" /* Compare to expected value. */ 369 "\tbne 2f\n" /* Values are unequal. */ 370 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 371 "\tcmp %2, #0\n" /* Did it succeed? */ 372 "\tbne 1b\n" /* Spin if failed. */ 373 "2:" 374 : "=&r" (old), "=m" (*mem), "=&r" (temp) 375 : "r" (expected), "r" (desired), "m" (*mem)); 376 return (old); 377 } 378 379 #define EMIT_FETCH_AND_OP_4(name, op) \ 380 uint32_t \ 381 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 382 { \ 383 uint32_t old, temp1, temp2; \ 384 \ 385 do_sync(); \ 386 __asm volatile ( \ 387 "1:" \ 388 "\tldrex %0, %5\n" /* Load old value. */ \ 389 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 390 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 391 "\tcmp %3, #0\n" /* Did it succeed? */ \ 392 "\tbne 1b\n" /* Spin if failed. */ \ 393 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 394 "=&r" (temp2) \ 395 : "r" (val), "m" (*mem)); \ 396 return (old); \ 397 } 398 399 EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 400 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 401 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 402 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 403 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 404 405 #ifndef __clang__ 406 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 407 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 408 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 409 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 410 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 411 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 412 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 413 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 414 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 415 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 416 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 417 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 418 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 419 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 420 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 421 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 422 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 423 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 424 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 425 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 426 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 427 #endif 428 429 #endif /* __SYNC_ATOMICS */ 430