1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/stdatomic.h> 31 #include <sys/types.h> 32 33 #include <machine/atomic.h> 34 #include <machine/cpufunc.h> 35 #include <machine/sysarch.h> 36 37 /* 38 * Executing statements with interrupts disabled. 39 */ 40 41 #if defined(_KERNEL) && !defined(SMP) 42 #define WITHOUT_INTERRUPTS(s) do { \ 43 register_t regs; \ 44 \ 45 regs = intr_disable(); \ 46 do s while (0); \ 47 intr_restore(regs); \ 48 } while (0) 49 #endif /* _KERNEL && !SMP */ 50 51 /* 52 * Memory barriers. 53 * 54 * It turns out __sync_synchronize() does not emit any code when used 55 * with GCC 4.2. Implement our own version that does work reliably. 56 * 57 * Although __sync_lock_test_and_set() should only perform an acquire 58 * barrier, make it do a full barrier like the other functions. This 59 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 60 */ 61 62 #if defined(_KERNEL) && !defined(SMP) 63 static inline void 64 do_sync(void) 65 { 66 67 __asm volatile ("" : : : "memory"); 68 } 69 #else 70 static inline void 71 do_sync(void) 72 { 73 74 dmb(); 75 } 76 #endif 77 78 79 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 80 81 #ifdef __clang__ 82 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 83 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 84 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 85 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 86 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 87 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 88 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 89 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 90 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 91 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 92 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 93 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 94 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 95 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 96 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 97 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 98 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 99 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 100 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 101 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 102 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 103 #endif 104 105 /* 106 * Old __sync_* API. 107 */ 108 109 110 /* Implementations for old GCC versions, lacking support for atomics. */ 111 112 typedef union { 113 uint8_t v8[4]; 114 uint32_t v32; 115 } reg_t; 116 117 /* 118 * Given a memory address pointing to an 8-bit or 16-bit integer, return 119 * the address of the 32-bit word containing it. 120 */ 121 122 static inline uint32_t * 123 round_to_word(void *ptr) 124 { 125 126 return ((uint32_t *)((intptr_t)ptr & ~3)); 127 } 128 129 /* 130 * Utility functions for loading and storing 8-bit and 16-bit integers 131 * in 32-bit words at an offset corresponding with the location of the 132 * atomic variable. 133 */ 134 135 static inline void 136 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 137 { 138 size_t offset; 139 140 offset = (intptr_t)offset_ptr & 3; 141 r->v8[offset] = val; 142 } 143 144 static inline uint8_t 145 get_1(const reg_t *r, const uint8_t *offset_ptr) 146 { 147 size_t offset; 148 149 offset = (intptr_t)offset_ptr & 3; 150 return (r->v8[offset]); 151 } 152 153 static inline void 154 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 155 { 156 size_t offset; 157 union { 158 uint16_t in; 159 uint8_t out[2]; 160 } bytes; 161 162 offset = (intptr_t)offset_ptr & 3; 163 bytes.in = val; 164 r->v8[offset] = bytes.out[0]; 165 r->v8[offset + 1] = bytes.out[1]; 166 } 167 168 static inline uint16_t 169 get_2(const reg_t *r, const uint16_t *offset_ptr) 170 { 171 size_t offset; 172 union { 173 uint8_t in[2]; 174 uint16_t out; 175 } bytes; 176 177 offset = (intptr_t)offset_ptr & 3; 178 bytes.in[0] = r->v8[offset]; 179 bytes.in[1] = r->v8[offset + 1]; 180 return (bytes.out); 181 } 182 183 /* 184 * 8-bit and 16-bit routines. 185 * 186 * These operations are not natively supported by the CPU, so we use 187 * some shifting and bitmasking on top of the 32-bit instructions. 188 */ 189 190 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 191 uintN_t \ 192 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 193 { \ 194 uint32_t *mem32; \ 195 reg_t val32, negmask, old; \ 196 uint32_t temp1, temp2; \ 197 \ 198 mem32 = round_to_word(mem); \ 199 val32.v32 = 0x00000000; \ 200 put_##N(&val32, mem, val); \ 201 negmask.v32 = 0xffffffff; \ 202 put_##N(&negmask, mem, 0); \ 203 \ 204 do_sync(); \ 205 __asm volatile ( \ 206 "1:" \ 207 "\tldrex %0, %6\n" /* Load old value. */ \ 208 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 209 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 210 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 211 "\tcmp %3, #0\n" /* Did it succeed? */ \ 212 "\tbne 1b\n" /* Spin if failed. */ \ 213 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 214 "=&r" (temp2) \ 215 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 216 return (get_##N(&old, mem)); \ 217 } 218 219 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 220 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 221 222 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 223 uintN_t \ 224 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 225 uintN_t desired) \ 226 { \ 227 uint32_t *mem32; \ 228 reg_t expected32, desired32, posmask, old; \ 229 uint32_t negmask, temp1, temp2; \ 230 \ 231 mem32 = round_to_word(mem); \ 232 expected32.v32 = 0x00000000; \ 233 put_##N(&expected32, mem, expected); \ 234 desired32.v32 = 0x00000000; \ 235 put_##N(&desired32, mem, desired); \ 236 posmask.v32 = 0x00000000; \ 237 put_##N(&posmask, mem, ~0); \ 238 negmask = ~posmask.v32; \ 239 \ 240 do_sync(); \ 241 __asm volatile ( \ 242 "1:" \ 243 "\tldrex %0, %8\n" /* Load old value. */ \ 244 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 245 "\tcmp %2, %4\n" /* Compare to expected value. */\ 246 "\tbne 2f\n" /* Values are unequal. */ \ 247 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 248 "\torr %2, %5\n" /* Put in the new value. */ \ 249 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 250 "\tcmp %3, #0\n" /* Did it succeed? */ \ 251 "\tbne 1b\n" /* Spin if failed. */ \ 252 "2:" \ 253 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 254 "=&r" (temp2) \ 255 : "r" (expected32.v32), "r" (desired32.v32), \ 256 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 257 return (get_##N(&old, mem)); \ 258 } 259 260 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 261 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 262 263 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 264 uintN_t \ 265 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 266 { \ 267 uint32_t *mem32; \ 268 reg_t val32, posmask, old; \ 269 uint32_t negmask, temp1, temp2; \ 270 \ 271 mem32 = round_to_word(mem); \ 272 val32.v32 = 0x00000000; \ 273 put_##N(&val32, mem, val); \ 274 posmask.v32 = 0x00000000; \ 275 put_##N(&posmask, mem, ~0); \ 276 negmask = ~posmask.v32; \ 277 \ 278 do_sync(); \ 279 __asm volatile ( \ 280 "1:" \ 281 "\tldrex %0, %7\n" /* Load old value. */ \ 282 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 283 "\tand %2, %5\n" /* Isolate the new value. */ \ 284 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 285 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 286 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 287 "\tcmp %3, #0\n" /* Did it succeed? */ \ 288 "\tbne 1b\n" /* Spin if failed. */ \ 289 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 290 "=&r" (temp2) \ 291 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 292 "m" (*mem32)); \ 293 return (get_##N(&old, mem)); \ 294 } 295 296 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 297 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 298 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 299 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 300 301 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 302 uintN_t \ 303 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 304 { \ 305 uint32_t *mem32; \ 306 reg_t val32, old; \ 307 uint32_t temp1, temp2; \ 308 \ 309 mem32 = round_to_word(mem); \ 310 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 311 put_##N(&val32, mem, val); \ 312 \ 313 do_sync(); \ 314 __asm volatile ( \ 315 "1:" \ 316 "\tldrex %0, %5\n" /* Load old value. */ \ 317 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 318 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 319 "\tcmp %3, #0\n" /* Did it succeed? */ \ 320 "\tbne 1b\n" /* Spin if failed. */ \ 321 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 322 "=&r" (temp2) \ 323 : "r" (val32.v32), "m" (*mem32)); \ 324 return (get_##N(&old, mem)); \ 325 } 326 327 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 328 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 329 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 330 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 331 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 332 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 333 334 /* 335 * 32-bit routines. 336 */ 337 338 uint32_t 339 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 340 { 341 uint32_t old, temp; 342 343 do_sync(); 344 __asm volatile ( 345 "1:" 346 "\tldrex %0, %4\n" /* Load old value. */ 347 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 348 "\tcmp %2, #0\n" /* Did it succeed? */ 349 "\tbne 1b\n" /* Spin if failed. */ 350 : "=&r" (old), "=m" (*mem), "=&r" (temp) 351 : "r" (val), "m" (*mem)); 352 return (old); 353 } 354 355 uint32_t 356 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 357 uint32_t desired) 358 { 359 uint32_t old, temp; 360 361 do_sync(); 362 __asm volatile ( 363 "1:" 364 "\tldrex %0, %5\n" /* Load old value. */ 365 "\tcmp %0, %3\n" /* Compare to expected value. */ 366 "\tbne 2f\n" /* Values are unequal. */ 367 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 368 "\tcmp %2, #0\n" /* Did it succeed? */ 369 "\tbne 1b\n" /* Spin if failed. */ 370 "2:" 371 : "=&r" (old), "=m" (*mem), "=&r" (temp) 372 : "r" (expected), "r" (desired), "m" (*mem)); 373 return (old); 374 } 375 376 #define EMIT_FETCH_AND_OP_4(name, op) \ 377 uint32_t \ 378 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 379 { \ 380 uint32_t old, temp1, temp2; \ 381 \ 382 do_sync(); \ 383 __asm volatile ( \ 384 "1:" \ 385 "\tldrex %0, %5\n" /* Load old value. */ \ 386 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 387 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 388 "\tcmp %3, #0\n" /* Did it succeed? */ \ 389 "\tbne 1b\n" /* Spin if failed. */ \ 390 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 391 "=&r" (temp2) \ 392 : "r" (val), "m" (*mem)); \ 393 return (old); \ 394 } 395 396 EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 397 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 398 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 399 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 400 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 401 402 #ifndef __clang__ 403 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 404 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 405 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 406 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 407 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 408 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 409 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 410 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 411 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 412 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 413 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 414 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 415 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 416 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 417 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 418 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 419 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 420 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 421 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 422 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 423 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 424 #endif 425 426 #endif /* __SYNC_ATOMICS */ 427