1 /* 2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Jeffrey M. Hsu. and Matthew Dillon 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of The DragonFly Project nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific, prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $DragonFly: src/sys/kern/kern_spinlock.c,v 1.16 2008/09/11 01:11:42 y0netan1 Exp $ 33 */ 34 35 #include "opt_ddb.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/types.h> 40 #include <sys/kernel.h> 41 #include <sys/sysctl.h> 42 #ifdef INVARIANTS 43 #include <sys/proc.h> 44 #endif 45 #include <sys/priv.h> 46 #include <ddb/ddb.h> 47 #include <machine/atomic.h> 48 #include <machine/cpufunc.h> 49 #include <machine/specialreg.h> 50 #include <machine/clock.h> 51 #include <sys/spinlock.h> 52 #include <sys/spinlock2.h> 53 #include <sys/ktr.h> 54 55 #define BACKOFF_INITIAL 1 56 #define BACKOFF_LIMIT 256 57 58 #ifdef SMP 59 60 /* 61 * Kernal Trace 62 */ 63 #if !defined(KTR_SPIN_CONTENTION) 64 #define KTR_SPIN_CONTENTION KTR_ALL 65 #endif 66 #define SPIN_STRING "spin=%p type=%c" 67 #define SPIN_ARG_SIZE (sizeof(void *) + sizeof(int)) 68 69 KTR_INFO_MASTER(spin); 70 KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE); 71 KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE); 72 KTR_INFO(KTR_SPIN_CONTENTION, spin, backoff, 2, 73 "spin=%p bo1=%d thr=%p bo=%d", 74 ((2 * sizeof(void *)) + (2 * sizeof(int)))); 75 KTR_INFO(KTR_SPIN_CONTENTION, spin, bofail, 3, SPIN_STRING, SPIN_ARG_SIZE); 76 77 #define logspin(name, mtx, type) \ 78 KTR_LOG(spin_ ## name, mtx, type) 79 80 #define logspin_backoff(mtx, bo1, thr, bo) \ 81 KTR_LOG(spin_backoff, mtx, bo1, thr, bo) 82 83 #ifdef INVARIANTS 84 static int spin_lock_test_mode; 85 #endif 86 87 static int64_t spinlocks_contested1; 88 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD, 89 &spinlocks_contested1, 0, ""); 90 91 static int64_t spinlocks_contested2; 92 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD, 93 &spinlocks_contested2, 0, ""); 94 95 static int spinlocks_backoff_limit = BACKOFF_LIMIT; 96 SYSCTL_INT(_debug, OID_AUTO, spinlocks_bolim, CTLFLAG_RW, 97 &spinlocks_backoff_limit, 0, ""); 98 99 struct exponential_backoff { 100 int backoff; 101 int nsec; 102 struct spinlock *mtx; 103 sysclock_t base; 104 }; 105 static int exponential_backoff(struct exponential_backoff *bo); 106 107 static __inline 108 void 109 exponential_init(struct exponential_backoff *bo, struct spinlock *mtx) 110 { 111 bo->backoff = BACKOFF_INITIAL; 112 bo->nsec = 0; 113 bo->mtx = mtx; 114 } 115 116 /* 117 * We were either contested due to another exclusive lock holder, 118 * or due to the presence of shared locks. We have to undo the mess 119 * we created by returning the shared locks. 120 * 121 * If there was another exclusive lock holder only the exclusive bit 122 * in value will be the only bit set. We don't have to do anything since 123 * restoration does not involve any work. 124 * 125 * Otherwise we successfully obtained the exclusive bit. Attempt to 126 * clear the shared bits. If we are able to clear the shared bits 127 * we win. Otherwise we lose and we have to restore the shared bits 128 * we couldn't clear (and also clear our exclusive bit). 129 */ 130 int 131 spin_trylock_wr_contested(globaldata_t gd, struct spinlock *mtx, int value) 132 { 133 int bit; 134 135 ++spinlocks_contested1; 136 if ((value & SPINLOCK_EXCLUSIVE) == 0) { 137 while (value) { 138 bit = bsfl(value); 139 if (globaldata_find(bit)->gd_spinlock_rd == mtx) { 140 atomic_swap_int(&mtx->lock, value); 141 --gd->gd_spinlocks_wr; 142 return (FALSE); 143 } 144 value &= ~(1 << bit); 145 } 146 return (TRUE); 147 } 148 --gd->gd_spinlocks_wr; 149 return (FALSE); 150 } 151 152 /* 153 * We were either contested due to another exclusive lock holder, 154 * or due to the presence of shared locks 155 * 156 * NOTE: If value indicates an exclusively held mutex, no shared bits 157 * would have been set and we can throw away value. 158 */ 159 void 160 spin_lock_wr_contested(struct spinlock *mtx, int value) 161 { 162 struct exponential_backoff backoff; 163 globaldata_t gd = mycpu; 164 int bit; 165 int mask; 166 167 /* 168 * Wait until we can gain exclusive access vs another exclusive 169 * holder. 170 */ 171 exponential_init(&backoff, mtx); 172 ++spinlocks_contested1; 173 logspin(beg, mtx, 'w'); 174 175 while (value & SPINLOCK_EXCLUSIVE) { 176 value = atomic_swap_int(&mtx->lock, SPINLOCK_EXCLUSIVE); 177 if (exponential_backoff(&backoff)) { 178 value &= ~SPINLOCK_EXCLUSIVE; 179 break; 180 } 181 } 182 183 /* 184 * Kill the cached shared bit for our own cpu. This is the most 185 * common case and there's no sense wasting cpu on it. Since 186 * spinlocks aren't recursive, we can't own a shared ref on the 187 * spinlock while trying to get an exclusive one. 188 * 189 * If multiple bits are set do not stall on any single cpu. Check 190 * all cpus that have the cache bit set, then loop and check again, 191 * until we've cleaned all the bits. 192 */ 193 value &= ~gd->gd_cpumask; 194 195 while ((mask = value) != 0) { 196 while (mask) { 197 bit = bsfl(value); 198 if (globaldata_find(bit)->gd_spinlock_rd != mtx) { 199 value &= ~(1 << bit); 200 } else if (exponential_backoff(&backoff)) { 201 value = 0; 202 break; 203 } 204 mask &= ~(1 << bit); 205 } 206 } 207 logspin(end, mtx, 'w'); 208 } 209 210 /* 211 * The cache bit wasn't set for our cpu. Loop until we can set the bit. 212 * As with the spin_lock_rd() inline we need a memory fence after setting 213 * gd_spinlock_rd to interlock against exclusive spinlocks waiting for 214 * that field to clear. 215 */ 216 void 217 spin_lock_rd_contested(struct spinlock *mtx) 218 { 219 struct exponential_backoff backoff; 220 globaldata_t gd = mycpu; 221 int value = mtx->lock; 222 223 /* 224 * Shortcut the op if we can just set the cache bit. This case 225 * occurs when the last lock was an exclusive lock. 226 */ 227 while ((value & SPINLOCK_EXCLUSIVE) == 0) { 228 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask)) 229 return; 230 value = mtx->lock; 231 } 232 233 exponential_init(&backoff, mtx); 234 ++spinlocks_contested1; 235 236 logspin(beg, mtx, 'r'); 237 238 while ((value & gd->gd_cpumask) == 0) { 239 if (value & SPINLOCK_EXCLUSIVE) { 240 gd->gd_spinlock_rd = NULL; 241 if (exponential_backoff(&backoff)) { 242 gd->gd_spinlock_rd = mtx; 243 break; 244 } 245 gd->gd_spinlock_rd = mtx; 246 cpu_mfence(); 247 } else { 248 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask)) 249 break; 250 } 251 value = mtx->lock; 252 } 253 logspin(end, mtx, 'r'); 254 } 255 256 /* 257 * Handle exponential backoff and indefinite waits. 258 * 259 * If the system is handling a panic we hand the spinlock over to the caller 260 * after 1 second. After 10 seconds we attempt to print a debugger 261 * backtrace. We also run pending interrupts in order to allow a console 262 * break into DDB. 263 */ 264 static 265 int 266 exponential_backoff(struct exponential_backoff *bo) 267 { 268 sysclock_t count; 269 int backoff; 270 271 #ifdef _RDTSC_SUPPORTED_ 272 if (cpu_feature & CPUID_TSC) { 273 backoff = 274 (((u_long)rdtsc() ^ (((u_long)curthread) >> 5)) & 275 (bo->backoff - 1)) + BACKOFF_INITIAL; 276 } else 277 #endif 278 backoff = bo->backoff; 279 logspin_backoff(bo->mtx, bo->backoff, curthread, backoff); 280 281 /* 282 * Quick backoff 283 */ 284 for (; backoff; --backoff) 285 cpu_pause(); 286 if (bo->backoff < spinlocks_backoff_limit) { 287 bo->backoff <<= 1; 288 return (FALSE); 289 } else { 290 bo->backoff = BACKOFF_INITIAL; 291 } 292 293 logspin(bofail, bo->mtx, 'u'); 294 295 /* 296 * Indefinite 297 */ 298 ++spinlocks_contested2; 299 cpu_spinlock_contested(); 300 if (bo->nsec == 0) { 301 bo->base = sys_cputimer->count(); 302 bo->nsec = 1; 303 } 304 305 count = sys_cputimer->count(); 306 if (count - bo->base > sys_cputimer->freq) { 307 kprintf("spin_lock: %p, indefinite wait!\n", bo->mtx); 308 if (panicstr) 309 return (TRUE); 310 #if defined(INVARIANTS) && defined(DDB) 311 if (spin_lock_test_mode) { 312 db_print_backtrace(); 313 return (TRUE); 314 } 315 #endif 316 ++bo->nsec; 317 #if defined(INVARIANTS) && defined(DDB) 318 if (bo->nsec == 11) 319 db_print_backtrace(); 320 #endif 321 if (bo->nsec == 60) 322 panic("spin_lock: %p, indefinite wait!\n", bo->mtx); 323 splz(); 324 bo->base = count; 325 } 326 return (FALSE); 327 } 328 329 /* 330 * If INVARIANTS is enabled various spinlock timing tests can be run 331 * by setting debug.spin_lock_test: 332 * 333 * 1 Test the indefinite wait code 334 * 2 Time the best-case exclusive lock overhead (spin_test_count) 335 * 3 Time the best-case shared lock overhead (spin_test_count) 336 */ 337 338 #ifdef INVARIANTS 339 340 static int spin_test_count = 10000000; 341 SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0, ""); 342 343 static int 344 sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS) 345 { 346 struct spinlock mtx; 347 int error; 348 int value = 0; 349 int i; 350 351 if ((error = priv_check(curthread, PRIV_ROOT)) != 0) 352 return (error); 353 if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0) 354 return (error); 355 356 /* 357 * Indefinite wait test 358 */ 359 if (value == 1) { 360 spin_init(&mtx); 361 spin_lock_wr(&mtx); /* force an indefinite wait */ 362 spin_lock_test_mode = 1; 363 spin_lock_wr(&mtx); 364 spin_unlock_wr(&mtx); /* Clean up the spinlock count */ 365 spin_unlock_wr(&mtx); 366 spin_lock_test_mode = 0; 367 } 368 369 /* 370 * Time best-case exclusive spinlocks 371 */ 372 if (value == 2) { 373 globaldata_t gd = mycpu; 374 375 spin_init(&mtx); 376 for (i = spin_test_count; i > 0; --i) { 377 spin_lock_wr_quick(gd, &mtx); 378 spin_unlock_wr_quick(gd, &mtx); 379 } 380 } 381 382 /* 383 * Time best-case shared spinlocks 384 */ 385 if (value == 3) { 386 globaldata_t gd = mycpu; 387 388 spin_init(&mtx); 389 for (i = spin_test_count; i > 0; --i) { 390 spin_lock_rd_quick(gd, &mtx); 391 spin_unlock_rd_quick(gd, &mtx); 392 } 393 } 394 return (0); 395 } 396 397 SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT, 398 0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code"); 399 400 #endif /* INVARIANTS */ 401 #endif /* SMP */ 402