1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/stdatomic.h> 32 #include <sys/types.h> 33 34 #include <machine/atomic.h> 35 #include <machine/cpufunc.h> 36 #include <machine/sysarch.h> 37 38 /* 39 * Executing statements with interrupts disabled. 40 */ 41 42 #if defined(_KERNEL) && !defined(SMP) 43 #define WITHOUT_INTERRUPTS(s) do { \ 44 register_t regs; \ 45 \ 46 regs = intr_disable(); \ 47 do s while (0); \ 48 intr_restore(regs); \ 49 } while (0) 50 #endif /* _KERNEL && !SMP */ 51 52 /* 53 * Memory barriers. 54 * 55 * It turns out __sync_synchronize() does not emit any code when used 56 * with GCC 4.2. Implement our own version that does work reliably. 57 * 58 * Although __sync_lock_test_and_set() should only perform an acquire 59 * barrier, make it do a full barrier like the other functions. This 60 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably. 61 */ 62 63 #if defined(_KERNEL) && !defined(SMP) 64 static inline void 65 do_sync(void) 66 { 67 68 __asm volatile ("" : : : "memory"); 69 } 70 #else 71 static inline void 72 do_sync(void) 73 { 74 75 dmb(); 76 } 77 #endif 78 79 80 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) 81 82 #ifdef __clang__ 83 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 84 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 85 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 86 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 87 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 88 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 89 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 90 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 91 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 92 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 93 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 94 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 95 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 96 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 97 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 98 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 99 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 100 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 101 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 102 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 103 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 104 #endif 105 106 /* 107 * Old __sync_* API. 108 */ 109 110 111 /* Implementations for old GCC versions, lacking support for atomics. */ 112 113 typedef union { 114 uint8_t v8[4]; 115 uint32_t v32; 116 } reg_t; 117 118 /* 119 * Given a memory address pointing to an 8-bit or 16-bit integer, return 120 * the address of the 32-bit word containing it. 121 */ 122 123 static inline uint32_t * 124 round_to_word(void *ptr) 125 { 126 127 return ((uint32_t *)((intptr_t)ptr & ~3)); 128 } 129 130 /* 131 * Utility functions for loading and storing 8-bit and 16-bit integers 132 * in 32-bit words at an offset corresponding with the location of the 133 * atomic variable. 134 */ 135 136 static inline void 137 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) 138 { 139 size_t offset; 140 141 offset = (intptr_t)offset_ptr & 3; 142 r->v8[offset] = val; 143 } 144 145 static inline uint8_t 146 get_1(const reg_t *r, const uint8_t *offset_ptr) 147 { 148 size_t offset; 149 150 offset = (intptr_t)offset_ptr & 3; 151 return (r->v8[offset]); 152 } 153 154 static inline void 155 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) 156 { 157 size_t offset; 158 union { 159 uint16_t in; 160 uint8_t out[2]; 161 } bytes; 162 163 offset = (intptr_t)offset_ptr & 3; 164 bytes.in = val; 165 r->v8[offset] = bytes.out[0]; 166 r->v8[offset + 1] = bytes.out[1]; 167 } 168 169 static inline uint16_t 170 get_2(const reg_t *r, const uint16_t *offset_ptr) 171 { 172 size_t offset; 173 union { 174 uint8_t in[2]; 175 uint16_t out; 176 } bytes; 177 178 offset = (intptr_t)offset_ptr & 3; 179 bytes.in[0] = r->v8[offset]; 180 bytes.in[1] = r->v8[offset + 1]; 181 return (bytes.out); 182 } 183 184 /* 185 * 8-bit and 16-bit routines. 186 * 187 * These operations are not natively supported by the CPU, so we use 188 * some shifting and bitmasking on top of the 32-bit instructions. 189 */ 190 191 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ 192 uintN_t \ 193 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ 194 { \ 195 uint32_t *mem32; \ 196 reg_t val32, negmask, old; \ 197 uint32_t temp1, temp2; \ 198 \ 199 mem32 = round_to_word(mem); \ 200 val32.v32 = 0x00000000; \ 201 put_##N(&val32, mem, val); \ 202 negmask.v32 = 0xffffffff; \ 203 put_##N(&negmask, mem, 0); \ 204 \ 205 do_sync(); \ 206 __asm volatile ( \ 207 "1:" \ 208 "\tldrex %0, %6\n" /* Load old value. */ \ 209 "\tand %2, %5, %0\n" /* Remove the old value. */ \ 210 "\torr %2, %2, %4\n" /* Put in the new value. */ \ 211 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 212 "\tcmp %3, #0\n" /* Did it succeed? */ \ 213 "\tbne 1b\n" /* Spin if failed. */ \ 214 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 215 "=&r" (temp2) \ 216 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ 217 return (get_##N(&old, mem)); \ 218 } 219 220 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) 221 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) 222 223 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ 224 uintN_t \ 225 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ 226 uintN_t desired) \ 227 { \ 228 uint32_t *mem32; \ 229 reg_t expected32, desired32, posmask, old; \ 230 uint32_t negmask, temp1, temp2; \ 231 \ 232 mem32 = round_to_word(mem); \ 233 expected32.v32 = 0x00000000; \ 234 put_##N(&expected32, mem, expected); \ 235 desired32.v32 = 0x00000000; \ 236 put_##N(&desired32, mem, desired); \ 237 posmask.v32 = 0x00000000; \ 238 put_##N(&posmask, mem, ~0); \ 239 negmask = ~posmask.v32; \ 240 \ 241 do_sync(); \ 242 __asm volatile ( \ 243 "1:" \ 244 "\tldrex %0, %8\n" /* Load old value. */ \ 245 "\tand %2, %6, %0\n" /* Isolate the old value. */ \ 246 "\tcmp %2, %4\n" /* Compare to expected value. */\ 247 "\tbne 2f\n" /* Values are unequal. */ \ 248 "\tand %2, %7, %0\n" /* Remove the old value. */ \ 249 "\torr %2, %5\n" /* Put in the new value. */ \ 250 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 251 "\tcmp %3, #0\n" /* Did it succeed? */ \ 252 "\tbne 1b\n" /* Spin if failed. */ \ 253 "2:" \ 254 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ 255 "=&r" (temp2) \ 256 : "r" (expected32.v32), "r" (desired32.v32), \ 257 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ 258 return (get_##N(&old, mem)); \ 259 } 260 261 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) 262 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) 263 264 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ 265 uintN_t \ 266 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 267 { \ 268 uint32_t *mem32; \ 269 reg_t val32, posmask, old; \ 270 uint32_t negmask, temp1, temp2; \ 271 \ 272 mem32 = round_to_word(mem); \ 273 val32.v32 = 0x00000000; \ 274 put_##N(&val32, mem, val); \ 275 posmask.v32 = 0x00000000; \ 276 put_##N(&posmask, mem, ~0); \ 277 negmask = ~posmask.v32; \ 278 \ 279 do_sync(); \ 280 __asm volatile ( \ 281 "1:" \ 282 "\tldrex %0, %7\n" /* Load old value. */ \ 283 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 284 "\tand %2, %5\n" /* Isolate the new value. */ \ 285 "\tand %3, %6, %0\n" /* Remove the old value. */ \ 286 "\torr %2, %2, %3\n" /* Put in the new value. */ \ 287 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 288 "\tcmp %3, #0\n" /* Did it succeed? */ \ 289 "\tbne 1b\n" /* Spin if failed. */ \ 290 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 291 "=&r" (temp2) \ 292 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ 293 "m" (*mem32)); \ 294 return (get_##N(&old, mem)); \ 295 } 296 297 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") 298 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") 299 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") 300 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") 301 302 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ 303 uintN_t \ 304 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ 305 { \ 306 uint32_t *mem32; \ 307 reg_t val32, old; \ 308 uint32_t temp1, temp2; \ 309 \ 310 mem32 = round_to_word(mem); \ 311 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ 312 put_##N(&val32, mem, val); \ 313 \ 314 do_sync(); \ 315 __asm volatile ( \ 316 "1:" \ 317 "\tldrex %0, %5\n" /* Load old value. */ \ 318 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ 319 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 320 "\tcmp %3, #0\n" /* Did it succeed? */ \ 321 "\tbne 1b\n" /* Spin if failed. */ \ 322 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ 323 "=&r" (temp2) \ 324 : "r" (val32.v32), "m" (*mem32)); \ 325 return (get_##N(&old, mem)); \ 326 } 327 328 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) 329 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) 330 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) 331 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) 332 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) 333 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) 334 335 /* 336 * 32-bit routines. 337 */ 338 339 uint32_t 340 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) 341 { 342 uint32_t old, temp; 343 344 do_sync(); 345 __asm volatile ( 346 "1:" 347 "\tldrex %0, %4\n" /* Load old value. */ 348 "\tstrex %2, %3, %1\n" /* Attempt to store. */ 349 "\tcmp %2, #0\n" /* Did it succeed? */ 350 "\tbne 1b\n" /* Spin if failed. */ 351 : "=&r" (old), "=m" (*mem), "=&r" (temp) 352 : "r" (val), "m" (*mem)); 353 return (old); 354 } 355 356 uint32_t 357 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, 358 uint32_t desired) 359 { 360 uint32_t old, temp; 361 362 do_sync(); 363 __asm volatile ( 364 "1:" 365 "\tldrex %0, %5\n" /* Load old value. */ 366 "\tcmp %0, %3\n" /* Compare to expected value. */ 367 "\tbne 2f\n" /* Values are unequal. */ 368 "\tstrex %2, %4, %1\n" /* Attempt to store. */ 369 "\tcmp %2, #0\n" /* Did it succeed? */ 370 "\tbne 1b\n" /* Spin if failed. */ 371 "2:" 372 : "=&r" (old), "=m" (*mem), "=&r" (temp) 373 : "r" (expected), "r" (desired), "m" (*mem)); 374 return (old); 375 } 376 377 #define EMIT_FETCH_AND_OP_4(name, op) \ 378 uint32_t \ 379 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ 380 { \ 381 uint32_t old, temp1, temp2; \ 382 \ 383 do_sync(); \ 384 __asm volatile ( \ 385 "1:" \ 386 "\tldrex %0, %5\n" /* Load old value. */ \ 387 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ 388 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ 389 "\tcmp %3, #0\n" /* Did it succeed? */ \ 390 "\tbne 1b\n" /* Spin if failed. */ \ 391 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ 392 "=&r" (temp2) \ 393 : "r" (val), "m" (*mem)); \ 394 return (old); \ 395 } 396 397 EMIT_FETCH_AND_OP_4(fetch_and_add, "add") 398 EMIT_FETCH_AND_OP_4(fetch_and_and, "and") 399 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") 400 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") 401 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") 402 403 #ifndef __clang__ 404 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); 405 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); 406 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); 407 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); 408 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); 409 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); 410 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); 411 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); 412 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); 413 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); 414 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); 415 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); 416 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); 417 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); 418 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); 419 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); 420 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); 421 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); 422 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); 423 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); 424 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); 425 #endif 426 427 #endif /* __SYNC_ATOMICS */ 428