1 /* $OpenBSD: kern_rwlock.c,v 1.47 2021/02/08 08:18:45 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/pool.h> 23 #include <sys/proc.h> 24 #include <sys/rwlock.h> 25 #include <sys/limits.h> 26 #include <sys/atomic.h> 27 #include <sys/witness.h> 28 29 void rw_do_exit(struct rwlock *, unsigned long); 30 31 /* XXX - temporary measure until proc0 is properly aligned */ 32 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK) 33 34 /* 35 * Other OSes implement more sophisticated mechanism to determine how long the 36 * process attempting to acquire the lock should be spinning. We start with 37 * the most simple approach: we do RW_SPINS attempts at most before eventually 38 * giving up and putting the process to sleep queue. 39 */ 40 #define RW_SPINS 1000 41 42 #ifdef MULTIPROCESSOR 43 #define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o) 44 #else 45 static inline int 46 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n) 47 { 48 if (*p != o) 49 return (1); 50 *p = n; 51 52 return (0); 53 } 54 #endif 55 56 /* 57 * Magic wand for lock operations. Every operation checks if certain 58 * flags are set and if they aren't, it increments the lock with some 59 * value (that might need some computing in a few cases). If the operation 60 * fails, we need to set certain flags while waiting for the lock. 61 * 62 * RW_WRITE The lock must be completely empty. We increment it with 63 * RWLOCK_WRLOCK and the proc pointer of the holder. 64 * Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting. 65 * RW_READ RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment 66 * with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting. 67 */ 68 static const struct rwlock_op { 69 unsigned long inc; 70 unsigned long check; 71 unsigned long wait_set; 72 long proc_mult; 73 int wait_prio; 74 } rw_ops[] = { 75 { /* RW_WRITE */ 76 RWLOCK_WRLOCK, 77 ULONG_MAX, 78 RWLOCK_WAIT | RWLOCK_WRWANT, 79 1, 80 PLOCK - 4 81 }, 82 { /* RW_READ */ 83 RWLOCK_READ_INCR, 84 RWLOCK_WRLOCK, 85 RWLOCK_WAIT, 86 0, 87 PLOCK 88 }, 89 { /* Sparse Entry. */ 90 0, 91 }, 92 { /* RW_DOWNGRADE */ 93 RWLOCK_READ_INCR - RWLOCK_WRLOCK, 94 0, 95 0, 96 -1, 97 PLOCK 98 }, 99 }; 100 101 void 102 rw_enter_read(struct rwlock *rwl) 103 { 104 unsigned long owner = rwl->rwl_owner; 105 106 if (__predict_false((owner & RWLOCK_WRLOCK) || 107 rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) 108 rw_enter(rwl, RW_READ); 109 else { 110 membar_enter_after_atomic(); 111 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL); 112 WITNESS_LOCK(&rwl->rwl_lock_obj, 0); 113 } 114 } 115 116 void 117 rw_enter_write(struct rwlock *rwl) 118 { 119 struct proc *p = curproc; 120 121 if (__predict_false(rw_cas(&rwl->rwl_owner, 0, 122 RW_PROC(p) | RWLOCK_WRLOCK))) 123 rw_enter(rwl, RW_WRITE); 124 else { 125 membar_enter_after_atomic(); 126 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, 127 LOP_EXCLUSIVE | LOP_NEWORDER, NULL); 128 WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 129 } 130 } 131 132 void 133 rw_exit_read(struct rwlock *rwl) 134 { 135 unsigned long owner; 136 137 rw_assert_rdlock(rwl); 138 WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0); 139 140 membar_exit_before_atomic(); 141 owner = rwl->rwl_owner; 142 if (__predict_false((owner & RWLOCK_WAIT) || 143 rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) 144 rw_do_exit(rwl, 0); 145 } 146 147 void 148 rw_exit_write(struct rwlock *rwl) 149 { 150 unsigned long owner; 151 152 rw_assert_wrlock(rwl); 153 WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); 154 155 membar_exit_before_atomic(); 156 owner = rwl->rwl_owner; 157 if (__predict_false((owner & RWLOCK_WAIT) || 158 rw_cas(&rwl->rwl_owner, owner, 0))) 159 rw_do_exit(rwl, RWLOCK_WRLOCK); 160 } 161 162 #ifdef DIAGNOSTIC 163 /* 164 * Put the diagnostic functions here to keep the main code free 165 * from ifdef clutter. 166 */ 167 static void 168 rw_enter_diag(struct rwlock *rwl, int flags) 169 { 170 switch (flags & RW_OPMASK) { 171 case RW_WRITE: 172 case RW_READ: 173 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 174 panic("rw_enter: %s locking against myself", 175 rwl->rwl_name); 176 break; 177 case RW_DOWNGRADE: 178 /* 179 * If we're downgrading, we must hold the write lock. 180 */ 181 if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0) 182 panic("rw_enter: %s downgrade of non-write lock", 183 rwl->rwl_name); 184 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 185 panic("rw_enter: %s downgrade, not holder", 186 rwl->rwl_name); 187 break; 188 189 default: 190 panic("rw_enter: unknown op 0x%x", flags); 191 } 192 } 193 194 #else 195 #define rw_enter_diag(r, f) 196 #endif 197 198 static void 199 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags, 200 const struct lock_type *type) 201 { 202 rwl->rwl_owner = 0; 203 rwl->rwl_name = name; 204 205 #ifdef WITNESS 206 rwl->rwl_lock_obj.lo_flags = lo_flags; 207 rwl->rwl_lock_obj.lo_name = name; 208 rwl->rwl_lock_obj.lo_type = type; 209 WITNESS_INIT(&rwl->rwl_lock_obj, type); 210 #else 211 (void)type; 212 (void)lo_flags; 213 #endif 214 } 215 216 void 217 _rw_init_flags(struct rwlock *rwl, const char *name, int flags, 218 const struct lock_type *type) 219 { 220 _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type); 221 } 222 223 int 224 rw_enter(struct rwlock *rwl, int flags) 225 { 226 const struct rwlock_op *op; 227 struct sleep_state sls; 228 unsigned long inc, o; 229 #ifdef MULTIPROCESSOR 230 /* 231 * If process holds the kernel lock, then we want to give up on CPU 232 * as soon as possible so other processes waiting for the kernel lock 233 * can progress. Hence no spinning if we hold the kernel lock. 234 */ 235 unsigned int spin = (_kernel_lock_held()) ? 0 : RW_SPINS; 236 #endif 237 int error, prio; 238 #ifdef WITNESS 239 int lop_flags; 240 241 lop_flags = LOP_NEWORDER; 242 if (flags & RW_WRITE) 243 lop_flags |= LOP_EXCLUSIVE; 244 if (flags & RW_DUPOK) 245 lop_flags |= LOP_DUPOK; 246 if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0) 247 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL); 248 #endif 249 250 op = &rw_ops[(flags & RW_OPMASK) - 1]; 251 252 inc = op->inc + RW_PROC(curproc) * op->proc_mult; 253 retry: 254 while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) { 255 unsigned long set = o | op->wait_set; 256 int do_sleep; 257 258 /* Avoid deadlocks after panic or in DDB */ 259 if (panicstr || db_active) 260 return (0); 261 262 #ifdef MULTIPROCESSOR 263 /* 264 * It makes sense to try to spin just in case the lock 265 * is acquired by writer. 266 */ 267 if ((o & RWLOCK_WRLOCK) && (spin != 0)) { 268 spin--; 269 CPU_BUSY_CYCLE(); 270 continue; 271 } 272 #endif 273 274 rw_enter_diag(rwl, flags); 275 276 if (flags & RW_NOSLEEP) 277 return (EBUSY); 278 279 prio = op->wait_prio; 280 if (flags & RW_INTR) 281 prio |= PCATCH; 282 sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0); 283 284 do_sleep = !rw_cas(&rwl->rwl_owner, o, set); 285 286 error = sleep_finish(&sls, do_sleep); 287 if ((flags & RW_INTR) && 288 (error != 0)) 289 return (error); 290 if (flags & RW_SLEEPFAIL) 291 return (EAGAIN); 292 } 293 294 if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) 295 goto retry; 296 membar_enter_after_atomic(); 297 298 /* 299 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we 300 * downgraded a write lock and had possible read waiter, wake them 301 * to let them retry the lock. 302 */ 303 if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) == 304 (RWLOCK_WRLOCK|RWLOCK_WAIT))) 305 wakeup(rwl); 306 307 if (flags & RW_DOWNGRADE) 308 WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags); 309 else 310 WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags); 311 312 return (0); 313 } 314 315 void 316 rw_exit(struct rwlock *rwl) 317 { 318 unsigned long wrlock; 319 320 /* Avoid deadlocks after panic or in DDB */ 321 if (panicstr || db_active) 322 return; 323 324 wrlock = rwl->rwl_owner & RWLOCK_WRLOCK; 325 if (wrlock) 326 rw_assert_wrlock(rwl); 327 else 328 rw_assert_rdlock(rwl); 329 WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0); 330 331 membar_exit_before_atomic(); 332 rw_do_exit(rwl, wrlock); 333 } 334 335 /* membar_exit_before_atomic() has to precede call of this function. */ 336 void 337 rw_do_exit(struct rwlock *rwl, unsigned long wrlock) 338 { 339 unsigned long owner, set; 340 341 do { 342 owner = rwl->rwl_owner; 343 if (wrlock) 344 set = 0; 345 else 346 set = (owner - RWLOCK_READ_INCR) & 347 ~(RWLOCK_WAIT|RWLOCK_WRWANT); 348 } while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set))); 349 350 if (owner & RWLOCK_WAIT) 351 wakeup(rwl); 352 } 353 354 int 355 rw_status(struct rwlock *rwl) 356 { 357 unsigned long owner = rwl->rwl_owner; 358 359 if (owner & RWLOCK_WRLOCK) { 360 if (RW_PROC(curproc) == RW_PROC(owner)) 361 return RW_WRITE; 362 else 363 return RW_WRITE_OTHER; 364 } 365 if (owner) 366 return RW_READ; 367 return (0); 368 } 369 370 #ifdef DIAGNOSTIC 371 void 372 rw_assert_wrlock(struct rwlock *rwl) 373 { 374 if (panicstr || db_active) 375 return; 376 377 #ifdef WITNESS 378 witness_assert(&rwl->rwl_lock_obj, LA_XLOCKED); 379 #else 380 if (!(rwl->rwl_owner & RWLOCK_WRLOCK)) 381 panic("%s: lock not held", rwl->rwl_name); 382 383 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner)) 384 panic("%s: lock not held by this process", rwl->rwl_name); 385 #endif 386 } 387 388 void 389 rw_assert_rdlock(struct rwlock *rwl) 390 { 391 if (panicstr || db_active) 392 return; 393 394 #ifdef WITNESS 395 witness_assert(&rwl->rwl_lock_obj, LA_SLOCKED); 396 #else 397 if (!RW_PROC(rwl->rwl_owner) || (rwl->rwl_owner & RWLOCK_WRLOCK)) 398 panic("%s: lock not shared", rwl->rwl_name); 399 #endif 400 } 401 402 void 403 rw_assert_anylock(struct rwlock *rwl) 404 { 405 if (panicstr || db_active) 406 return; 407 408 #ifdef WITNESS 409 witness_assert(&rwl->rwl_lock_obj, LA_LOCKED); 410 #else 411 switch (rw_status(rwl)) { 412 case RW_WRITE_OTHER: 413 panic("%s: lock held by different process", rwl->rwl_name); 414 case 0: 415 panic("%s: lock not held", rwl->rwl_name); 416 } 417 #endif 418 } 419 420 void 421 rw_assert_unlocked(struct rwlock *rwl) 422 { 423 if (panicstr || db_active) 424 return; 425 426 #ifdef WITNESS 427 witness_assert(&rwl->rwl_lock_obj, LA_UNLOCKED); 428 #else 429 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner)) 430 panic("%s: lock held", rwl->rwl_name); 431 #endif 432 } 433 #endif 434 435 /* recursive rwlocks; */ 436 void 437 _rrw_init_flags(struct rrwlock *rrwl, const char *name, int flags, 438 const struct lock_type *type) 439 { 440 memset(rrwl, 0, sizeof(struct rrwlock)); 441 _rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags), 442 type); 443 } 444 445 int 446 rrw_enter(struct rrwlock *rrwl, int flags) 447 { 448 int rv; 449 450 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) { 451 if (flags & RW_RECURSEFAIL) 452 return (EDEADLK); 453 else { 454 rrwl->rrwl_wcnt++; 455 WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj, 456 LOP_EXCLUSIVE); 457 return (0); 458 } 459 } 460 461 rv = rw_enter(&rrwl->rrwl_lock, flags); 462 if (rv == 0) 463 rrwl->rrwl_wcnt = 1; 464 465 return (rv); 466 } 467 468 void 469 rrw_exit(struct rrwlock *rrwl) 470 { 471 472 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) { 473 KASSERT(rrwl->rrwl_wcnt > 0); 474 rrwl->rrwl_wcnt--; 475 if (rrwl->rrwl_wcnt != 0) { 476 WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj, 477 LOP_EXCLUSIVE); 478 return; 479 } 480 } 481 482 rw_exit(&rrwl->rrwl_lock); 483 } 484 485 int 486 rrw_status(struct rrwlock *rrwl) 487 { 488 return (rw_status(&rrwl->rrwl_lock)); 489 } 490 491 /*- 492 * Copyright (c) 2008 The NetBSD Foundation, Inc. 493 * All rights reserved. 494 * 495 * This code is derived from software contributed to The NetBSD Foundation 496 * by Andrew Doran. 497 * 498 * Redistribution and use in source and binary forms, with or without 499 * modification, are permitted provided that the following conditions 500 * are met: 501 * 1. Redistributions of source code must retain the above copyright 502 * notice, this list of conditions and the following disclaimer. 503 * 2. Redistributions in binary form must reproduce the above copyright 504 * notice, this list of conditions and the following disclaimer in the 505 * documentation and/or other materials provided with the distribution. 506 * 507 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 508 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 509 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 510 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 511 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 512 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 513 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 514 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 515 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 516 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 517 * POSSIBILITY OF SUCH DAMAGE. 518 */ 519 520 #define RWLOCK_OBJ_MAGIC 0x5aa3c85d 521 struct rwlock_obj { 522 struct rwlock ro_lock; 523 u_int ro_magic; 524 u_int ro_refcnt; 525 }; 526 527 528 struct pool rwlock_obj_pool; 529 530 /* 531 * rw_obj_init: 532 * 533 * Initialize the mutex object store. 534 */ 535 void 536 rw_obj_init(void) 537 { 538 pool_init(&rwlock_obj_pool, sizeof(struct rwlock_obj), 0, IPL_MPFLOOR, 539 PR_WAITOK, "rwobjpl", NULL); 540 } 541 542 /* 543 * rw_obj_alloc: 544 * 545 * Allocate a single lock object. 546 */ 547 void 548 _rw_obj_alloc_flags(struct rwlock **lock, const char *name, int flags, 549 struct lock_type *type) 550 { 551 struct rwlock_obj *mo; 552 553 mo = pool_get(&rwlock_obj_pool, PR_WAITOK); 554 mo->ro_magic = RWLOCK_OBJ_MAGIC; 555 _rw_init_flags(&mo->ro_lock, name, flags, type); 556 mo->ro_refcnt = 1; 557 558 *lock = &mo->ro_lock; 559 } 560 561 /* 562 * rw_obj_hold: 563 * 564 * Add a single reference to a lock object. A reference to the object 565 * must already be held, and must be held across this call. 566 */ 567 568 void 569 rw_obj_hold(struct rwlock *lock) 570 { 571 struct rwlock_obj *mo = (struct rwlock_obj *)lock; 572 573 KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, 574 "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", 575 __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); 576 KASSERTMSG(mo->ro_refcnt > 0, 577 "%s: lock %p: mo->ro_refcnt (%#x) == 0", 578 __func__, mo, mo->ro_refcnt); 579 580 atomic_inc_int(&mo->ro_refcnt); 581 } 582 583 /* 584 * rw_obj_free: 585 * 586 * Drop a reference from a lock object. If the last reference is being 587 * dropped, free the object and return true. Otherwise, return false. 588 */ 589 int 590 rw_obj_free(struct rwlock *lock) 591 { 592 struct rwlock_obj *mo = (struct rwlock_obj *)lock; 593 594 KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC, 595 "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)", 596 __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC); 597 KASSERTMSG(mo->ro_refcnt > 0, 598 "%s: lock %p: mo->ro_refcnt (%#x) == 0", 599 __func__, mo, mo->ro_refcnt); 600 601 if (atomic_dec_int_nv(&mo->ro_refcnt) > 0) { 602 return false; 603 } 604 #if notyet 605 WITNESS_DESTROY(&mo->ro_lock); 606 #endif 607 pool_put(&rwlock_obj_pool, mo); 608 return true; 609 } 610