1 /* $NetBSD: kern_turnstile.c,v 1.28 2009/11/18 12:26:22 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Turnstiles are described in detail in: 34 * 35 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 36 * Richard McDougall. 37 * 38 * Turnstiles are kept in a hash table. There are likely to be many more 39 * synchronisation objects than there are threads. Since a thread can block 40 * on only one lock at a time, we only need one turnstile per thread, and 41 * so they are allocated at thread creation time. 42 * 43 * When a thread decides it needs to block on a lock, it looks up the 44 * active turnstile for that lock. If no active turnstile exists, then 45 * the process lends its turnstile to the lock. If there is already an 46 * active turnstile for the lock, the thread places its turnstile on a 47 * list of free turnstiles, and references the active one instead. 48 * 49 * The act of looking up the turnstile acquires an interlock on the sleep 50 * queue. If a thread decides it doesn't need to block after all, then this 51 * interlock must be released by explicitly aborting the turnstile 52 * operation. 53 * 54 * When a thread is awakened, it needs to get its turnstile back. If there 55 * are still other threads waiting in the active turnstile, the thread 56 * grabs a free turnstile off the free list. Otherwise, it can take back 57 * the active turnstile from the lock (thus deactivating the turnstile). 58 * 59 * Turnstiles are the place to do priority inheritence. 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.28 2009/11/18 12:26:22 yamt Exp $"); 64 65 #include <sys/param.h> 66 #include <sys/lockdebug.h> 67 #include <sys/pool.h> 68 #include <sys/proc.h> 69 #include <sys/sleepq.h> 70 #include <sys/systm.h> 71 72 #include <uvm/uvm_extern.h> 73 74 #define TS_HASH_SIZE 64 75 #define TS_HASH_MASK (TS_HASH_SIZE - 1) 76 #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) 77 78 tschain_t turnstile_tab[TS_HASH_SIZE]; 79 pool_cache_t turnstile_cache; 80 81 int turnstile_ctor(void *, void *, int); 82 83 extern turnstile_t turnstile0; 84 85 /* 86 * turnstile_init: 87 * 88 * Initialize the turnstile mechanism. 89 */ 90 void 91 turnstile_init(void) 92 { 93 tschain_t *tc; 94 int i; 95 96 for (i = 0; i < TS_HASH_SIZE; i++) { 97 tc = &turnstile_tab[i]; 98 LIST_INIT(&tc->tc_chain); 99 tc->tc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 100 } 101 102 turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, 103 "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); 104 KASSERT(turnstile_cache != NULL); 105 106 (void)turnstile_ctor(NULL, &turnstile0, 0); 107 } 108 109 /* 110 * turnstile_ctor: 111 * 112 * Constructor for turnstiles. 113 */ 114 int 115 turnstile_ctor(void *arg, void *obj, int flags) 116 { 117 turnstile_t *ts = obj; 118 119 memset(ts, 0, sizeof(*ts)); 120 sleepq_init(&ts->ts_sleepq[TS_READER_Q]); 121 sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]); 122 return (0); 123 } 124 125 /* 126 * turnstile_remove: 127 * 128 * Remove an LWP from a turnstile sleep queue and wake it. 129 */ 130 static inline void 131 turnstile_remove(turnstile_t *ts, lwp_t *l, int q) 132 { 133 turnstile_t *nts; 134 135 KASSERT(l->l_ts == ts); 136 137 /* 138 * This process is no longer using the active turnstile. 139 * Find an inactive one on the free list to give to it. 140 */ 141 if ((nts = ts->ts_free) != NULL) { 142 KASSERT(TS_ALL_WAITERS(ts) > 1); 143 l->l_ts = nts; 144 ts->ts_free = nts->ts_free; 145 nts->ts_free = NULL; 146 } else { 147 /* 148 * If the free list is empty, this is the last 149 * waiter. 150 */ 151 KASSERT(TS_ALL_WAITERS(ts) == 1); 152 LIST_REMOVE(ts, ts_chain); 153 } 154 155 ts->ts_waiters[q]--; 156 sleepq_remove(&ts->ts_sleepq[q], l); 157 } 158 159 /* 160 * turnstile_lookup: 161 * 162 * Look up the turnstile for the specified lock. This acquires and 163 * holds the turnstile chain lock (sleep queue interlock). 164 */ 165 turnstile_t * 166 turnstile_lookup(wchan_t obj) 167 { 168 turnstile_t *ts; 169 tschain_t *tc; 170 171 tc = &turnstile_tab[TS_HASH(obj)]; 172 mutex_spin_enter(tc->tc_mutex); 173 174 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 175 if (ts->ts_obj == obj) 176 return (ts); 177 178 /* 179 * No turnstile yet for this lock. No problem, turnstile_block() 180 * handles this by fetching the turnstile from the blocking thread. 181 */ 182 return (NULL); 183 } 184 185 /* 186 * turnstile_exit: 187 * 188 * Abort a turnstile operation. 189 */ 190 void 191 turnstile_exit(wchan_t obj) 192 { 193 tschain_t *tc; 194 195 tc = &turnstile_tab[TS_HASH(obj)]; 196 mutex_spin_exit(tc->tc_mutex); 197 } 198 199 /* 200 * turnstile_block: 201 * 202 * Enter an object into the turnstile chain and prepare the current 203 * LWP for sleep. 204 */ 205 void 206 turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) 207 { 208 lwp_t *l; 209 lwp_t *cur; /* cached curlwp */ 210 lwp_t *owner; 211 turnstile_t *ots; 212 tschain_t *tc; 213 sleepq_t *sq; 214 pri_t prio, obase; 215 216 tc = &turnstile_tab[TS_HASH(obj)]; 217 l = cur = curlwp; 218 219 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 220 KASSERT(mutex_owned(tc->tc_mutex)); 221 KASSERT(l != NULL && l->l_ts != NULL); 222 223 if (ts == NULL) { 224 /* 225 * We are the first thread to wait for this object; 226 * lend our turnstile to it. 227 */ 228 ts = l->l_ts; 229 KASSERT(TS_ALL_WAITERS(ts) == 0); 230 KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) && 231 TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); 232 ts->ts_obj = obj; 233 ts->ts_inheritor = NULL; 234 LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); 235 } else { 236 /* 237 * Object already has a turnstile. Put our turnstile 238 * onto the free list, and reference the existing 239 * turnstile instead. 240 */ 241 ots = l->l_ts; 242 KASSERT(ots->ts_free == NULL); 243 ots->ts_free = ts->ts_free; 244 ts->ts_free = ots; 245 l->l_ts = ts; 246 247 KASSERT(ts->ts_obj == obj); 248 KASSERT(TS_ALL_WAITERS(ts) != 0); 249 KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) || 250 !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); 251 } 252 253 sq = &ts->ts_sleepq[q]; 254 ts->ts_waiters[q]++; 255 sleepq_enter(sq, l, tc->tc_mutex); 256 LOCKDEBUG_BARRIER(tc->tc_mutex, 1); 257 l->l_kpriority = true; 258 obase = l->l_kpribase; 259 if (obase < PRI_KTHREAD) 260 l->l_kpribase = PRI_KTHREAD; 261 sleepq_enqueue(sq, obj, "tstile", sobj); 262 263 /* 264 * Disable preemption across this entire block, as we may drop 265 * scheduler locks (allowing preemption), and would prefer not 266 * to be interrupted while in a state of flux. 267 */ 268 KPREEMPT_DISABLE(l); 269 270 /* 271 * Lend our priority to lwps on the blocking chain. 272 * 273 * NOTE: if you get a panic in this code block, it is likely that 274 * a lock has been destroyed or corrupted while still in use. Try 275 * compiling a kernel with LOCKDEBUG to pinpoint the problem. 276 */ 277 prio = lwp_eprio(l); 278 KASSERT(cur == l); 279 KASSERT(tc->tc_mutex == cur->l_mutex); 280 for (;;) { 281 bool dolock; 282 283 if (l->l_wchan == NULL) 284 break; 285 286 owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); 287 if (owner == NULL) 288 break; 289 290 /* The owner may have changed as we have dropped the tc lock */ 291 if (cur == owner) { 292 /* 293 * we own the lock: stop here, sleepq_block() 294 * should wake up immediatly 295 */ 296 break; 297 } 298 if (l->l_mutex != owner->l_mutex) 299 dolock = true; 300 else 301 dolock = false; 302 if (l == owner || (dolock && !lwp_trylock(owner))) { 303 /* 304 * restart from curlwp. 305 * Note that there may be a livelock here: 306 * the owner may try grabing cur's lock (which is 307 * the tc lock) while we're trying to grab 308 * the owner's lock. 309 */ 310 lwp_unlock(l); 311 l = cur; 312 lwp_lock(l); 313 prio = lwp_eprio(l); 314 continue; 315 } 316 if (prio <= lwp_eprio(owner)) { 317 if (dolock) 318 lwp_unlock(owner); 319 break; 320 } 321 ts = l->l_ts; 322 KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL); 323 if (ts->ts_inheritor == NULL) { 324 ts->ts_inheritor = owner; 325 ts->ts_eprio = prio; 326 SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain); 327 lwp_lendpri(owner, prio); 328 } else if (prio > ts->ts_eprio) { 329 ts->ts_eprio = prio; 330 lwp_lendpri(owner, prio); 331 } 332 if (dolock) 333 lwp_unlock(l); 334 l = owner; 335 } 336 LOCKDEBUG_BARRIER(l->l_mutex, 1); 337 if (cur->l_mutex != l->l_mutex) { 338 lwp_unlock(l); 339 lwp_lock(cur); 340 } 341 LOCKDEBUG_BARRIER(cur->l_mutex, 1); 342 343 sleepq_block(0, false); 344 cur->l_kpribase = obase; 345 KPREEMPT_ENABLE(cur); 346 } 347 348 /* 349 * turnstile_wakeup: 350 * 351 * Wake up the specified number of threads that are blocked 352 * in a turnstile. 353 */ 354 void 355 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) 356 { 357 sleepq_t *sq; 358 tschain_t *tc; 359 lwp_t *l; 360 361 tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; 362 sq = &ts->ts_sleepq[q]; 363 364 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 365 KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); 366 KASSERT(mutex_owned(tc->tc_mutex)); 367 KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); 368 369 /* 370 * restore inherited priority if necessary. 371 */ 372 373 if (ts->ts_inheritor != NULL) { 374 turnstile_t *iter; 375 turnstile_t *next; 376 turnstile_t *prev = NULL; 377 pri_t prio; 378 bool dolock; 379 380 ts->ts_inheritor = NULL; 381 l = curlwp; 382 383 dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; 384 if (dolock) { 385 lwp_lock(l); 386 } 387 388 /* 389 * the following loop does two things. 390 * 391 * - remove ts from the list. 392 * 393 * - from the rest of the list, find the highest priority. 394 */ 395 396 prio = -1; 397 KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); 398 for (iter = SLIST_FIRST(&l->l_pi_lenders); 399 iter != NULL; iter = next) { 400 KASSERT(lwp_eprio(l) >= ts->ts_eprio); 401 next = SLIST_NEXT(iter, ts_pichain); 402 if (iter == ts) { 403 if (prev == NULL) { 404 SLIST_REMOVE_HEAD(&l->l_pi_lenders, 405 ts_pichain); 406 } else { 407 SLIST_REMOVE_AFTER(prev, ts_pichain); 408 } 409 } else if (prio < iter->ts_eprio) { 410 prio = iter->ts_eprio; 411 } 412 prev = iter; 413 } 414 415 lwp_lendpri(l, prio); 416 417 if (dolock) { 418 lwp_unlock(l); 419 } 420 } 421 422 if (nl != NULL) { 423 #if defined(DEBUG) || defined(LOCKDEBUG) 424 TAILQ_FOREACH(l, sq, l_sleepchain) { 425 if (l == nl) 426 break; 427 } 428 if (l == NULL) 429 panic("turnstile_wakeup: nl not on sleepq"); 430 #endif 431 turnstile_remove(ts, nl, q); 432 } else { 433 while (count-- > 0) { 434 l = TAILQ_FIRST(sq); 435 KASSERT(l != NULL); 436 turnstile_remove(ts, l, q); 437 } 438 } 439 mutex_spin_exit(tc->tc_mutex); 440 } 441 442 /* 443 * turnstile_unsleep: 444 * 445 * Remove an LWP from the turnstile. This is called when the LWP has 446 * not been awoken normally but instead interrupted: for example, if it 447 * has received a signal. It's not a valid action for turnstiles, 448 * since LWPs blocking on a turnstile are not interruptable. 449 */ 450 void 451 turnstile_unsleep(lwp_t *l, bool cleanup) 452 { 453 454 lwp_unlock(l); 455 panic("turnstile_unsleep"); 456 } 457 458 /* 459 * turnstile_changepri: 460 * 461 * Adjust the priority of an LWP residing on a turnstile. 462 */ 463 void 464 turnstile_changepri(lwp_t *l, pri_t pri) 465 { 466 467 /* XXX priority inheritance */ 468 sleepq_changepri(l, pri); 469 } 470 471 #if defined(LOCKDEBUG) 472 /* 473 * turnstile_print: 474 * 475 * Given the address of a lock object, print the contents of a 476 * turnstile. 477 */ 478 void 479 turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) 480 { 481 turnstile_t *ts; 482 tschain_t *tc; 483 sleepq_t *rsq, *wsq; 484 lwp_t *l; 485 486 tc = &turnstile_tab[TS_HASH(obj)]; 487 488 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 489 if (ts->ts_obj == obj) 490 break; 491 492 (*pr)("Turnstile chain at %p.\n", tc); 493 if (ts == NULL) { 494 (*pr)("=> No active turnstile for this lock.\n"); 495 return; 496 } 497 498 rsq = &ts->ts_sleepq[TS_READER_Q]; 499 wsq = &ts->ts_sleepq[TS_WRITER_Q]; 500 501 (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq); 502 503 (*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q)); 504 TAILQ_FOREACH(l, rsq, l_sleepchain) { 505 (*pr)(" %p", l); 506 } 507 (*pr)("\n"); 508 509 (*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q)); 510 TAILQ_FOREACH(l, wsq, l_sleepchain) { 511 (*pr)(" %p", l); 512 } 513 (*pr)("\n"); 514 } 515 #endif /* LOCKDEBUG */ 516