1 /* 2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Jeffrey M. Hsu. and Matthew Dillon 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of The DragonFly Project nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific, prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $DragonFly: src/sys/kern/kern_spinlock.c,v 1.16 2008/09/11 01:11:42 y0netan1 Exp $ 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/types.h> 38 #include <sys/kernel.h> 39 #include <sys/sysctl.h> 40 #ifdef INVARIANTS 41 #include <sys/proc.h> 42 #endif 43 #include <sys/priv.h> 44 #include <machine/atomic.h> 45 #include <machine/cpufunc.h> 46 #include <machine/specialreg.h> 47 #include <machine/clock.h> 48 #include <sys/spinlock.h> 49 #include <sys/spinlock2.h> 50 #include <sys/ktr.h> 51 52 #define BACKOFF_INITIAL 1 53 #define BACKOFF_LIMIT 256 54 55 #ifdef SMP 56 57 /* 58 * Kernal Trace 59 */ 60 #if !defined(KTR_SPIN_CONTENTION) 61 #define KTR_SPIN_CONTENTION KTR_ALL 62 #endif 63 #define SPIN_STRING "spin=%p type=%c" 64 #define SPIN_ARG_SIZE (sizeof(void *) + sizeof(int)) 65 66 KTR_INFO_MASTER(spin); 67 KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE); 68 KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE); 69 KTR_INFO(KTR_SPIN_CONTENTION, spin, backoff, 2, 70 "spin=%p bo1=%d thr=%p bo=%d", 71 ((2 * sizeof(void *)) + (2 * sizeof(int)))); 72 KTR_INFO(KTR_SPIN_CONTENTION, spin, bofail, 3, SPIN_STRING, SPIN_ARG_SIZE); 73 74 #define logspin(name, mtx, type) \ 75 KTR_LOG(spin_ ## name, mtx, type) 76 77 #define logspin_backoff(mtx, bo1, thr, bo) \ 78 KTR_LOG(spin_backoff, mtx, bo1, thr, bo) 79 80 #ifdef INVARIANTS 81 static int spin_lock_test_mode; 82 #endif 83 84 static int64_t spinlocks_contested1; 85 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD, 86 &spinlocks_contested1, 0, ""); 87 88 static int64_t spinlocks_contested2; 89 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD, 90 &spinlocks_contested2, 0, ""); 91 92 static int spinlocks_backoff_limit = BACKOFF_LIMIT; 93 SYSCTL_INT(_debug, OID_AUTO, spinlocks_bolim, CTLFLAG_RW, 94 &spinlocks_backoff_limit, 0, ""); 95 96 struct exponential_backoff { 97 int backoff; 98 int nsec; 99 struct spinlock *mtx; 100 sysclock_t base; 101 }; 102 static int exponential_backoff(struct exponential_backoff *bo); 103 104 static __inline 105 void 106 exponential_init(struct exponential_backoff *bo, struct spinlock *mtx) 107 { 108 bo->backoff = BACKOFF_INITIAL; 109 bo->nsec = 0; 110 bo->mtx = mtx; 111 } 112 113 /* 114 * We were either contested due to another exclusive lock holder, 115 * or due to the presence of shared locks. We have to undo the mess 116 * we created by returning the shared locks. 117 * 118 * If there was another exclusive lock holder only the exclusive bit 119 * in value will be the only bit set. We don't have to do anything since 120 * restoration does not involve any work. 121 * 122 * Otherwise we successfully obtained the exclusive bit. Attempt to 123 * clear the shared bits. If we are able to clear the shared bits 124 * we win. Otherwise we lose and we have to restore the shared bits 125 * we couldn't clear (and also clear our exclusive bit). 126 */ 127 int 128 spin_trylock_wr_contested(globaldata_t gd, struct spinlock *mtx, int value) 129 { 130 int bit; 131 132 ++spinlocks_contested1; 133 if ((value & SPINLOCK_EXCLUSIVE) == 0) { 134 while (value) { 135 bit = bsfl(value); 136 if (globaldata_find(bit)->gd_spinlock_rd == mtx) { 137 atomic_swap_int(&mtx->lock, value); 138 --gd->gd_spinlocks_wr; 139 return (FALSE); 140 } 141 value &= ~(1 << bit); 142 } 143 return (TRUE); 144 } 145 --gd->gd_spinlocks_wr; 146 return (FALSE); 147 } 148 149 /* 150 * We were either contested due to another exclusive lock holder, 151 * or due to the presence of shared locks 152 * 153 * NOTE: If value indicates an exclusively held mutex, no shared bits 154 * would have been set and we can throw away value. 155 */ 156 void 157 spin_lock_wr_contested(struct spinlock *mtx, int value) 158 { 159 struct exponential_backoff backoff; 160 globaldata_t gd = mycpu; 161 int bit; 162 int mask; 163 164 /* 165 * Wait until we can gain exclusive access vs another exclusive 166 * holder. 167 */ 168 exponential_init(&backoff, mtx); 169 ++spinlocks_contested1; 170 logspin(beg, mtx, 'w'); 171 172 while (value & SPINLOCK_EXCLUSIVE) { 173 value = atomic_swap_int(&mtx->lock, SPINLOCK_EXCLUSIVE); 174 if (exponential_backoff(&backoff)) { 175 value &= ~SPINLOCK_EXCLUSIVE; 176 break; 177 } 178 } 179 180 /* 181 * Kill the cached shared bit for our own cpu. This is the most 182 * common case and there's no sense wasting cpu on it. Since 183 * spinlocks aren't recursive, we can't own a shared ref on the 184 * spinlock while trying to get an exclusive one. 185 * 186 * If multiple bits are set do not stall on any single cpu. Check 187 * all cpus that have the cache bit set, then loop and check again, 188 * until we've cleaned all the bits. 189 */ 190 value &= ~gd->gd_cpumask; 191 192 while ((mask = value) != 0) { 193 while (mask) { 194 bit = bsfl(value); 195 if (globaldata_find(bit)->gd_spinlock_rd != mtx) { 196 value &= ~(1 << bit); 197 } else if (exponential_backoff(&backoff)) { 198 value = 0; 199 break; 200 } 201 mask &= ~(1 << bit); 202 } 203 } 204 logspin(end, mtx, 'w'); 205 } 206 207 /* 208 * The cache bit wasn't set for our cpu. Loop until we can set the bit. 209 * As with the spin_lock_rd() inline we need a memory fence after setting 210 * gd_spinlock_rd to interlock against exclusive spinlocks waiting for 211 * that field to clear. 212 */ 213 void 214 spin_lock_rd_contested(struct spinlock *mtx) 215 { 216 struct exponential_backoff backoff; 217 globaldata_t gd = mycpu; 218 int value = mtx->lock; 219 220 /* 221 * Shortcut the op if we can just set the cache bit. This case 222 * occurs when the last lock was an exclusive lock. 223 */ 224 while ((value & SPINLOCK_EXCLUSIVE) == 0) { 225 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask)) 226 return; 227 value = mtx->lock; 228 } 229 230 exponential_init(&backoff, mtx); 231 ++spinlocks_contested1; 232 233 logspin(beg, mtx, 'r'); 234 235 while ((value & gd->gd_cpumask) == 0) { 236 if (value & SPINLOCK_EXCLUSIVE) { 237 gd->gd_spinlock_rd = NULL; 238 if (exponential_backoff(&backoff)) { 239 gd->gd_spinlock_rd = mtx; 240 break; 241 } 242 gd->gd_spinlock_rd = mtx; 243 cpu_mfence(); 244 } else { 245 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask)) 246 break; 247 } 248 value = mtx->lock; 249 } 250 logspin(end, mtx, 'r'); 251 } 252 253 /* 254 * Handle exponential backoff and indefinite waits. 255 * 256 * If the system is handling a panic we hand the spinlock over to the caller 257 * after 1 second. After 10 seconds we attempt to print a debugger 258 * backtrace. We also run pending interrupts in order to allow a console 259 * break into DDB. 260 */ 261 static 262 int 263 exponential_backoff(struct exponential_backoff *bo) 264 { 265 sysclock_t count; 266 int backoff; 267 268 #ifdef _RDTSC_SUPPORTED_ 269 if (cpu_feature & CPUID_TSC) { 270 backoff = 271 (((u_long)rdtsc() ^ (((u_long)curthread) >> 5)) & 272 (bo->backoff - 1)) + BACKOFF_INITIAL; 273 } else 274 #endif 275 backoff = bo->backoff; 276 logspin_backoff(bo->mtx, bo->backoff, curthread, backoff); 277 278 /* 279 * Quick backoff 280 */ 281 for (; backoff; --backoff) 282 cpu_pause(); 283 if (bo->backoff < spinlocks_backoff_limit) { 284 bo->backoff <<= 1; 285 return (FALSE); 286 } else { 287 bo->backoff = BACKOFF_INITIAL; 288 } 289 290 logspin(bofail, bo->mtx, 'u'); 291 292 /* 293 * Indefinite 294 */ 295 ++spinlocks_contested2; 296 cpu_spinlock_contested(); 297 if (bo->nsec == 0) { 298 bo->base = sys_cputimer->count(); 299 bo->nsec = 1; 300 } 301 302 count = sys_cputimer->count(); 303 if (count - bo->base > sys_cputimer->freq) { 304 kprintf("spin_lock: %p, indefinite wait!\n", bo->mtx); 305 if (panicstr) 306 return (TRUE); 307 #if defined(INVARIANTS) 308 if (spin_lock_test_mode) { 309 print_backtrace(); 310 return (TRUE); 311 } 312 #endif 313 ++bo->nsec; 314 #if defined(INVARIANTS) 315 if (bo->nsec == 11) 316 print_backtrace(); 317 #endif 318 if (bo->nsec == 60) 319 panic("spin_lock: %p, indefinite wait!\n", bo->mtx); 320 splz(); 321 bo->base = count; 322 } 323 return (FALSE); 324 } 325 326 /* 327 * If INVARIANTS is enabled various spinlock timing tests can be run 328 * by setting debug.spin_lock_test: 329 * 330 * 1 Test the indefinite wait code 331 * 2 Time the best-case exclusive lock overhead (spin_test_count) 332 * 3 Time the best-case shared lock overhead (spin_test_count) 333 */ 334 335 #ifdef INVARIANTS 336 337 static int spin_test_count = 10000000; 338 SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0, ""); 339 340 static int 341 sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS) 342 { 343 struct spinlock mtx; 344 int error; 345 int value = 0; 346 int i; 347 348 if ((error = priv_check(curthread, PRIV_ROOT)) != 0) 349 return (error); 350 if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0) 351 return (error); 352 353 /* 354 * Indefinite wait test 355 */ 356 if (value == 1) { 357 spin_init(&mtx); 358 spin_lock_wr(&mtx); /* force an indefinite wait */ 359 spin_lock_test_mode = 1; 360 spin_lock_wr(&mtx); 361 spin_unlock_wr(&mtx); /* Clean up the spinlock count */ 362 spin_unlock_wr(&mtx); 363 spin_lock_test_mode = 0; 364 } 365 366 /* 367 * Time best-case exclusive spinlocks 368 */ 369 if (value == 2) { 370 globaldata_t gd = mycpu; 371 372 spin_init(&mtx); 373 for (i = spin_test_count; i > 0; --i) { 374 spin_lock_wr_quick(gd, &mtx); 375 spin_unlock_wr_quick(gd, &mtx); 376 } 377 } 378 379 /* 380 * Time best-case shared spinlocks 381 */ 382 if (value == 3) { 383 globaldata_t gd = mycpu; 384 385 spin_init(&mtx); 386 for (i = spin_test_count; i > 0; --i) { 387 spin_lock_rd_quick(gd, &mtx); 388 spin_unlock_rd_quick(gd, &mtx); 389 } 390 } 391 return (0); 392 } 393 394 SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT, 395 0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code"); 396 397 #endif /* INVARIANTS */ 398 #endif /* SMP */ 399