1 /* 2 * Copyright (c) 2004 Joerg Sonnenberger <joerg@bec.de>. All rights reserved. 3 * Copyright (c) 2006-2018 Matthew Dillon <dillon@backplane.com>. All rights reserved. 4 * 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Scooter Morris at Genentech Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94 36 * $FreeBSD: src/sys/kern/kern_lockf.c,v 1.25 1999/11/16 16:28:56 phk Exp $ 37 */ 38 39 #include "opt_debug_lockf.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/proc.h> 46 #include <sys/unistd.h> 47 #include <sys/vnode.h> 48 #include <sys/malloc.h> 49 #include <sys/fcntl.h> 50 #include <sys/resourcevar.h> 51 52 #include <sys/lockf.h> 53 #include <machine/limits.h> /* for LLONG_MAX */ 54 #include <machine/stdarg.h> 55 56 #include <sys/spinlock2.h> 57 58 struct lf_pcpu { 59 struct lockf_range *free1; 60 struct lockf_range *free2; 61 } __cachealign; 62 63 static struct lf_pcpu *lf_pcpu_array; 64 65 #ifdef LOCKF_DEBUG 66 int lf_print_ranges = 0; 67 68 static void _lf_print_lock(const struct lockf *); 69 static void _lf_printf(const char *, ...) __printflike(1, 2); 70 71 #define lf_print_lock(lock) if (lf_print_ranges) _lf_print_lock(lock) 72 #define lf_printf(ctl, args...) if (lf_print_ranges) _lf_printf(ctl, args) 73 #else 74 #define lf_print_lock(lock) 75 #define lf_printf(ctl, args...) 76 #endif 77 78 static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); 79 80 static void lf_wakeup(struct lockf *, off_t, off_t); 81 static struct lockf_range *lf_alloc_range(void); 82 static void lf_create_range(struct lockf_range *, struct proc *, int, int, 83 off_t, off_t); 84 static void lf_insert(struct lockf_range_list *list, 85 struct lockf_range *elm, 86 struct lockf_range *insert_point); 87 static void lf_destroy_range(struct lockf_range *); 88 89 static int lf_setlock(struct lockf *, struct proc *, int, int, 90 off_t, off_t); 91 static int lf_getlock(struct flock *, struct lockf *, struct proc *, 92 int, int, off_t, off_t); 93 94 static int lf_count_change(struct proc *, int); 95 96 /* 97 * Return TRUE (non-zero) if the type and posix flags match. 98 */ 99 static __inline 100 int 101 lf_match(struct lockf_range *range, int type, int flags) 102 { 103 if (range->lf_type != type) 104 return(0); 105 if ((range->lf_flags ^ flags) & F_POSIX) 106 return(0); 107 return(1); 108 } 109 110 /* 111 * Check whether range and [start, end] overlap. 112 */ 113 static __inline 114 int 115 lf_overlap(const struct lockf_range *range, off_t start, off_t end) 116 { 117 if (range->lf_start >= start && range->lf_start <= end) 118 return(1); 119 else if (start >= range->lf_start && start <= range->lf_end) 120 return(1); 121 else 122 return(0); 123 } 124 125 126 /* 127 * Change the POSIX lock accounting for the given process. 128 */ 129 void 130 lf_count_adjust(struct proc *p, int increase) 131 { 132 struct uidinfo *uip; 133 struct uidcount *pup; 134 int n; 135 136 KKASSERT(p != NULL); 137 138 uip = p->p_ucred->cr_uidinfo; 139 pup = &uip->ui_pcpu[mycpuid]; 140 141 if (increase) { 142 for (n = 0; n < ncpus; ++n) 143 pup->pu_posixlocks += p->p_uidpcpu[n].pu_posixlocks; 144 } else { 145 for (n = 0; n < ncpus; ++n) 146 pup->pu_posixlocks -= p->p_uidpcpu[n].pu_posixlocks; 147 } 148 149 if (pup->pu_posixlocks < -PUP_LIMIT || 150 pup->pu_posixlocks > PUP_LIMIT) { 151 atomic_add_int(&uip->ui_posixlocks, pup->pu_posixlocks); 152 pup->pu_posixlocks = 0; 153 } 154 } 155 156 static int 157 lf_count_change(struct proc *owner, int diff) 158 { 159 struct uidinfo *uip; 160 int max, ret; 161 162 /* we might actually not have a process context */ 163 if (owner == NULL) 164 return(0); 165 166 uip = owner->p_ucred->cr_uidinfo; 167 168 max = MIN(owner->p_rlimit[RLIMIT_POSIXLOCKS].rlim_cur, 169 maxposixlocksperuid); 170 171 if (diff > 0 && owner->p_ucred->cr_uid != 0 && max != -1 && 172 uip->ui_posixlocks >= max ) { 173 ret = 1; 174 } else { 175 struct uidcount *pup; 176 int cpu = mycpuid; 177 178 pup = &uip->ui_pcpu[cpu]; 179 pup->pu_posixlocks += diff; 180 if (pup->pu_posixlocks < -PUP_LIMIT || 181 pup->pu_posixlocks > PUP_LIMIT) { 182 atomic_add_int(&uip->ui_posixlocks, pup->pu_posixlocks); 183 pup->pu_posixlocks = 0; 184 } 185 owner->p_uidpcpu[cpu].pu_posixlocks += diff; 186 ret = 0; 187 } 188 return ret; 189 } 190 191 /* 192 * Advisory record locking support 193 */ 194 int 195 lf_advlock(struct vop_advlock_args *ap, struct lockf *lock, u_quad_t size) 196 { 197 struct flock *fl = ap->a_fl; 198 struct proc *owner; 199 off_t start, end; 200 int type, flags, error; 201 lwkt_token_t token; 202 203 /* 204 * Convert the flock structure into a start and end. 205 */ 206 switch (fl->l_whence) { 207 case SEEK_SET: 208 case SEEK_CUR: 209 /* 210 * Caller is responsible for adding any necessary offset 211 * when SEEK_CUR is used. 212 */ 213 start = fl->l_start; 214 break; 215 216 case SEEK_END: 217 start = size + fl->l_start; 218 break; 219 220 default: 221 return(EINVAL); 222 } 223 224 flags = ap->a_flags; 225 if (start < 0) 226 return(EINVAL); 227 if (fl->l_len == 0) { 228 flags |= F_NOEND; 229 end = LLONG_MAX; 230 } else if (fl->l_len < 0) { 231 return(EINVAL); 232 } else { 233 end = start + fl->l_len - 1; 234 if (end < start) 235 return(EINVAL); 236 } 237 238 type = fl->l_type; 239 /* 240 * This isn't really correct for flock-style locks, 241 * but the current handling is somewhat broken anyway. 242 */ 243 owner = (struct proc *)ap->a_id; 244 245 /* 246 * Do the requested operation. 247 */ 248 token = lwkt_getpooltoken(lock); 249 250 if (lock->init_done == 0) { 251 TAILQ_INIT(&lock->lf_range); 252 TAILQ_INIT(&lock->lf_blocked); 253 lock->init_done = 1; 254 } 255 256 switch(ap->a_op) { 257 case F_SETLK: 258 /* 259 * NOTE: It is possible for both lf_range and lf_blocked to 260 * be empty if we block and get woken up, but another process 261 * then gets in and issues an unlock. So VMAYHAVELOCKS must 262 * be set after the lf_setlock() operation completes rather 263 * then before. 264 */ 265 error = lf_setlock(lock, owner, type, flags, start, end); 266 if ((ap->a_vp->v_flag & VMAYHAVELOCKS) == 0) 267 vsetflags(ap->a_vp, VMAYHAVELOCKS); 268 break; 269 270 case F_UNLCK: 271 error = lf_setlock(lock, owner, type, flags, start, end); 272 #if 0 273 /* 274 * XXX REMOVED. don't bother doing this in the critical path. 275 * close() overhead is minimal. 276 */ 277 if (TAILQ_EMPTY(&lock->lf_range) && 278 TAILQ_EMPTY(&lock->lf_blocked)) { 279 vclrflags(ap->a_vp, VMAYHAVELOCKS); 280 } 281 #endif 282 break; 283 284 case F_GETLK: 285 error = lf_getlock(fl, lock, owner, type, flags, start, end); 286 break; 287 288 default: 289 error = EINVAL; 290 break; 291 } 292 lwkt_reltoken(token); 293 return(error); 294 } 295 296 static int 297 lf_setlock(struct lockf *lock, struct proc *owner, int type, int flags, 298 off_t start, off_t end) 299 { 300 struct lockf_range *range; 301 struct lockf_range *brange; 302 struct lockf_range *next; 303 struct lockf_range *first_match; 304 struct lockf_range *last_match; 305 struct lockf_range *insert_point; 306 struct lockf_range *new_range1; 307 struct lockf_range *new_range2; 308 int wakeup_needed; 309 int double_clip; 310 int unlock_override; 311 int error = 0; 312 int count; 313 struct lockf_range_list deadlist; 314 315 new_range1 = NULL; 316 new_range2 = NULL; 317 count = 0; 318 319 restart: 320 /* 321 * Preallocate two ranges so we don't have to worry about blocking 322 * in the middle of the lock code. 323 */ 324 if (new_range1 == NULL) 325 new_range1 = lf_alloc_range(); 326 if (new_range2 == NULL) 327 new_range2 = lf_alloc_range(); 328 first_match = NULL; 329 last_match = NULL; 330 insert_point = NULL; 331 wakeup_needed = 0; 332 333 lf_print_lock(lock); 334 335 /* 336 * Locate the insertion point for the new lock (the first range 337 * with an lf_start >= start). 338 * 339 * Locate the first and latch ranges owned by us that overlap 340 * the requested range. 341 */ 342 TAILQ_FOREACH(range, &lock->lf_range, lf_link) { 343 if (insert_point == NULL && range->lf_start >= start) 344 insert_point = range; 345 346 /* 347 * Skip non-overlapping locks. Locks are sorted by lf_start 348 * So we can terminate the search when lf_start exceeds the 349 * requested range (insert_point is still guarenteed to be 350 * set properly). 351 */ 352 if (range->lf_end < start) 353 continue; 354 if (range->lf_start > end) { 355 range = NULL; 356 break; 357 } 358 359 /* 360 * Overlapping lock. Set first_match and last_match if we 361 * are the owner. 362 */ 363 if (range->lf_owner == owner) { 364 if (first_match == NULL) 365 first_match = range; 366 last_match = range; 367 continue; 368 } 369 370 /* 371 * If we aren't the owner check for a conflicting lock. Only 372 * if not unlocking. 373 */ 374 if (type != F_UNLCK) { 375 if (type == F_WRLCK || range->lf_type == F_WRLCK) 376 break; 377 } 378 } 379 380 /* 381 * If a conflicting lock was observed, block or fail as appropriate. 382 * (this code is skipped when unlocking) 383 */ 384 if (range != NULL) { 385 if ((flags & F_WAIT) == 0) { 386 error = EAGAIN; 387 goto do_cleanup; 388 } 389 390 /* 391 * We are blocked. For POSIX locks we have to check 392 * for deadlocks and return with EDEADLK. This is done 393 * by checking whether range->lf_owner is already 394 * blocked. 395 * 396 * Since flock-style locks cover the whole file, a 397 * deadlock between those is nearly impossible. 398 * This can only occur if a process tries to lock the 399 * same inode exclusively while holding a shared lock 400 * with another descriptor. 401 * XXX How can we cleanly detect this? 402 * XXX The current mixing of flock & fcntl/lockf is evil. 403 * 404 * Handle existing locks of flock-style like POSIX locks. 405 */ 406 if (flags & F_POSIX) { 407 TAILQ_FOREACH(brange, &lock->lf_blocked, lf_link) { 408 if (brange->lf_owner == range->lf_owner) { 409 error = EDEADLK; 410 goto do_cleanup; 411 } 412 } 413 } 414 415 /* 416 * For flock-style locks, we must first remove 417 * any shared locks that we hold before we sleep 418 * waiting for an exclusive lock. 419 */ 420 if ((flags & F_POSIX) == 0 && type == F_WRLCK) 421 lf_setlock(lock, owner, F_UNLCK, 0, start, end); 422 423 brange = new_range1; 424 new_range1 = NULL; 425 lf_create_range(brange, owner, type, 0, start, end); 426 TAILQ_INSERT_TAIL(&lock->lf_blocked, brange, lf_link); 427 error = tsleep(brange, PCATCH, "lockf", 0); 428 429 /* 430 * We may have been awaked by a signal and/or by a 431 * debugger continuing us (in which case we must remove 432 * ourselves from the blocked list) and/or by another 433 * process releasing/downgrading a lock (in which case 434 * we have already been removed from the blocked list 435 * and our lf_flags field is 1). 436 * 437 * Sleep if it looks like we might be livelocking. 438 */ 439 if (brange->lf_flags == 0) 440 TAILQ_REMOVE(&lock->lf_blocked, brange, lf_link); 441 if (error == 0 && count == 2) 442 tsleep(brange, 0, "lockfz", 2); 443 else 444 ++count; 445 lf_destroy_range(brange); 446 447 if (error) 448 goto do_cleanup; 449 goto restart; 450 } 451 452 /* 453 * If there are no overlapping locks owned by us then creating 454 * the new lock is easy. This is the most common case. 455 */ 456 if (first_match == NULL) { 457 if (type == F_UNLCK) 458 goto do_wakeup; 459 if (flags & F_POSIX) { 460 if (lf_count_change(owner, 1)) { 461 error = ENOLCK; 462 goto do_cleanup; 463 } 464 } 465 range = new_range1; 466 new_range1 = NULL; 467 lf_create_range(range, owner, type, flags, start, end); 468 lf_insert(&lock->lf_range, range, insert_point); 469 goto do_wakeup; 470 } 471 472 /* 473 * double_clip - Calculate a special case where TWO locks may have 474 * to be added due to the new lock breaking up an 475 * existing incompatible lock in the middle. 476 * 477 * unlock_override - Calculate a special case where NO locks 478 * need to be created. This occurs when an unlock 479 * does not clip any locks at the front and rear. 480 * 481 * WARNING! closef() and fdrop() assume that an F_UNLCK of the 482 * entire range will always succeed so the unlock_override 483 * case is mandatory. 484 */ 485 double_clip = 0; 486 unlock_override = 0; 487 if (first_match->lf_start < start) { 488 if (first_match == last_match && last_match->lf_end > end) 489 double_clip = 1; 490 } else if (type == F_UNLCK && last_match->lf_end <= end) { 491 unlock_override = 1; 492 } 493 494 /* 495 * Figure out the worst case net increase in POSIX locks and account 496 * for it now before we start modifying things. If neither the 497 * first or last locks match we have an issue. If there is only 498 * one overlapping range which needs to be clipped on both ends 499 * we wind up having to create up to two new locks, else only one. 500 * 501 * When unlocking the worst case is always 1 new lock if our 502 * unlock request cuts the middle out of an existing lock range. 503 * 504 * count represents the 'cleanup' adjustment needed. It starts 505 * negative, is incremented whenever we create a new POSIX lock, 506 * and decremented whenever we delete an existing one. At the 507 * end of the day it had better be <= 0 or we didn't calculate the 508 * worse case properly here. 509 */ 510 count = 0; 511 if ((flags & F_POSIX) && !unlock_override) { 512 if (!lf_match(first_match, type, flags) && 513 !lf_match(last_match, type, flags) 514 ) { 515 if (double_clip && type != F_UNLCK) 516 count = -2; 517 else 518 count = -1; 519 } 520 if (count && lf_count_change(owner, -count)) { 521 error = ENOLCK; 522 goto do_cleanup; 523 } 524 } 525 /* else flock style lock which encompasses entire range */ 526 527 /* 528 * Create and insert the lock represented the requested range. 529 * Adjust the net POSIX lock count. We have to move our insertion 530 * point since brange now represents the first record >= start. 531 * 532 * When unlocking, no new lock is inserted but we still clip. 533 */ 534 if (type != F_UNLCK) { 535 brange = new_range1; 536 new_range1 = NULL; 537 lf_create_range(brange, owner, type, flags, start, end); 538 lf_insert(&lock->lf_range, brange, insert_point); 539 insert_point = brange; 540 if (flags & F_POSIX) 541 ++count; 542 } else { 543 brange = NULL; 544 } 545 546 /* 547 * Handle the double_clip case. This is the only case where 548 * we wind up having to add TWO locks. 549 */ 550 if (double_clip) { 551 KKASSERT(first_match == last_match); 552 last_match = new_range2; 553 new_range2 = NULL; 554 lf_create_range(last_match, first_match->lf_owner, 555 first_match->lf_type, first_match->lf_flags, 556 end + 1, first_match->lf_end); 557 first_match->lf_end = start - 1; 558 first_match->lf_flags &= ~F_NOEND; 559 560 /* 561 * Figure out where to insert the right side clip. 562 */ 563 lf_insert(&lock->lf_range, last_match, first_match); 564 if (last_match->lf_flags & F_POSIX) 565 ++count; 566 } 567 568 /* 569 * Clip or destroy the locks between first_match and last_match, 570 * inclusive. Ignore the primary lock we created (brange). Note 571 * that if double-clipped, first_match and last_match will be 572 * outside our clipping range. Otherwise first_match and last_match 573 * will be deleted. 574 * 575 * We have already taken care of any double clipping. 576 * 577 * The insert_point may become invalid as we delete records, do not 578 * use that pointer any more. Also, when removing something other 579 * then 'range' we have to check to see if the item we are removing 580 * is 'next' and adjust 'next' properly. 581 * 582 * NOTE: brange will be NULL if F_UNLCKing. 583 */ 584 TAILQ_INIT(&deadlist); 585 next = first_match; 586 587 while ((range = next) != NULL) { 588 next = TAILQ_NEXT(range, lf_link); 589 590 /* 591 * Ignore elements that we do not own and ignore the 592 * primary request range which we just created. 593 */ 594 if (range->lf_owner != owner || range == brange) 595 continue; 596 597 /* 598 * We may have to wakeup a waiter when downgrading a lock. 599 */ 600 if (type == F_UNLCK) 601 wakeup_needed = 1; 602 if (type == F_RDLCK && range->lf_type == F_WRLCK) 603 wakeup_needed = 1; 604 605 /* 606 * Clip left. This can only occur on first_match. 607 * 608 * Merge the left clip with brange if possible. This must 609 * be done specifically, not in the optimized merge heuristic 610 * below, since we may have counted on it in our 'count' 611 * calculation above. 612 */ 613 if (range->lf_start < start) { 614 KKASSERT(range == first_match); 615 if (brange && 616 range->lf_end >= start - 1 && 617 lf_match(range, type, flags)) { 618 range->lf_end = brange->lf_end; 619 range->lf_flags |= brange->lf_flags & F_NOEND; 620 /* 621 * Removing something other then 'range', 622 * adjust 'next' if necessary. 623 */ 624 if (next == brange) 625 next = TAILQ_NEXT(next, lf_link); 626 TAILQ_REMOVE(&lock->lf_range, brange, lf_link); 627 if (brange->lf_flags & F_POSIX) 628 --count; 629 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link); 630 brange = range; 631 } else if (range->lf_end >= start) { 632 range->lf_end = start - 1; 633 if (type != F_UNLCK) 634 range->lf_flags &= ~F_NOEND; 635 } 636 if (range == last_match) 637 break; 638 continue; 639 } 640 641 /* 642 * Clip right. This can only occur on last_match. 643 * 644 * Merge the right clip if possible. This must be done 645 * specifically, not in the optimized merge heuristic 646 * below, since we may have counted on it in our 'count' 647 * calculation. 648 * 649 * Since we are adjusting lf_start, we have to move the 650 * record to maintain the sorted list. Since lf_start is 651 * only getting larger we can use the next element as the 652 * insert point (we don't have to backtrack). 653 */ 654 if (range->lf_end > end) { 655 KKASSERT(range == last_match); 656 if (brange && 657 range->lf_start <= end + 1 && 658 lf_match(range, type, flags)) { 659 brange->lf_end = range->lf_end; 660 brange->lf_flags |= range->lf_flags & F_NOEND; 661 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 662 if (range->lf_flags & F_POSIX) 663 --count; 664 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 665 } else if (range->lf_start <= end) { 666 range->lf_start = end + 1; 667 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 668 lf_insert(&lock->lf_range, range, next); 669 } 670 /* range == last_match, we are done */ 671 break; 672 } 673 674 /* 675 * The record must be entirely enclosed. Note that the 676 * record could be first_match or last_match, and will be 677 * deleted. 678 */ 679 KKASSERT(range->lf_start >= start && range->lf_end <= end); 680 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 681 if (range->lf_flags & F_POSIX) 682 --count; 683 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 684 if (range == last_match) 685 break; 686 } 687 688 /* 689 * Attempt to merge locks adjacent to brange. For example, we may 690 * have had to clip first_match and/or last_match, and they might 691 * be adjacent. Or there might simply have been an adjacent lock 692 * already there. 693 * 694 * Don't get fancy, just check adjacent elements in the list if they 695 * happen to be owned by us. 696 * 697 * This case only gets hit if we have a situation where a shared 698 * and exclusive lock are adjacent, and the exclusive lock is 699 * downgraded to shared or the shared lock is upgraded to exclusive. 700 */ 701 if (brange) { 702 range = TAILQ_PREV(brange, lockf_range_list, lf_link); 703 if (range && 704 range->lf_owner == owner && 705 range->lf_end == brange->lf_start - 1 && 706 lf_match(range, type, flags) 707 ) { 708 /* 709 * Extend range to cover brange and scrap brange. 710 */ 711 range->lf_end = brange->lf_end; 712 range->lf_flags |= brange->lf_flags & F_NOEND; 713 TAILQ_REMOVE(&lock->lf_range, brange, lf_link); 714 if (brange->lf_flags & F_POSIX) 715 --count; 716 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link); 717 brange = range; 718 } 719 range = TAILQ_NEXT(brange, lf_link); 720 if (range && 721 range->lf_owner == owner && 722 range->lf_start == brange->lf_end + 1 && 723 lf_match(range, type, flags) 724 ) { 725 /* 726 * Extend brange to cover range and scrap range. 727 */ 728 brange->lf_end = range->lf_end; 729 brange->lf_flags |= range->lf_flags & F_NOEND; 730 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 731 if (range->lf_flags & F_POSIX) 732 --count; 733 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 734 } 735 } 736 737 /* 738 * Destroy deleted elements. We didn't want to do it in the loop 739 * because the free() might have blocked. 740 * 741 * Adjust the count for any posix locks we thought we might create 742 * but didn't. 743 */ 744 while ((range = TAILQ_FIRST(&deadlist)) != NULL) { 745 TAILQ_REMOVE(&deadlist, range, lf_link); 746 lf_destroy_range(range); 747 } 748 749 KKASSERT(count <= 0); 750 if (count < 0) 751 lf_count_change(owner, count); 752 do_wakeup: 753 lf_print_lock(lock); 754 if (wakeup_needed) 755 lf_wakeup(lock, start, end); 756 error = 0; 757 do_cleanup: 758 if (new_range1 != NULL) 759 lf_destroy_range(new_range1); 760 if (new_range2 != NULL) 761 lf_destroy_range(new_range2); 762 return(error); 763 } 764 765 /* 766 * Check whether there is a blocking lock, 767 * and if so return its process identifier. 768 */ 769 static int 770 lf_getlock(struct flock *fl, struct lockf *lock, struct proc *owner, 771 int type, int flags, off_t start, off_t end) 772 { 773 struct lockf_range *range; 774 775 TAILQ_FOREACH(range, &lock->lf_range, lf_link) 776 if (range->lf_owner != owner && 777 lf_overlap(range, start, end) && 778 (type == F_WRLCK || range->lf_type == F_WRLCK)) 779 break; 780 if (range == NULL) { 781 fl->l_type = F_UNLCK; 782 return(0); 783 } 784 fl->l_type = range->lf_type; 785 fl->l_whence = SEEK_SET; 786 fl->l_start = range->lf_start; 787 if (range->lf_flags & F_NOEND) 788 fl->l_len = 0; 789 else 790 fl->l_len = range->lf_end - range->lf_start + 1; 791 if (range->lf_owner != NULL && (range->lf_flags & F_POSIX)) 792 fl->l_pid = range->lf_owner->p_pid; 793 else 794 fl->l_pid = -1; 795 return(0); 796 } 797 798 /* 799 * Wakeup pending lock attempts. Theoretically we can stop as soon as 800 * we encounter an exclusive request that covers the whole range (at least 801 * insofar as the sleep code above calls lf_wakeup() if it would otherwise 802 * exit instead of loop), but for now just wakeup all overlapping 803 * requests. XXX 804 */ 805 static void 806 lf_wakeup(struct lockf *lock, off_t start, off_t end) 807 { 808 struct lockf_range *range, *nrange; 809 810 TAILQ_FOREACH_MUTABLE(range, &lock->lf_blocked, lf_link, nrange) { 811 if (lf_overlap(range, start, end) == 0) 812 continue; 813 TAILQ_REMOVE(&lock->lf_blocked, range, lf_link); 814 range->lf_flags = 1; 815 wakeup(range); 816 } 817 } 818 819 /* 820 * Allocate a range structure and initialize it sufficiently such that 821 * lf_destroy_range() does not barf. 822 * 823 * Most use cases are temporary, implement a small 2-entry-per-cpu 824 * cache. 825 */ 826 static struct lockf_range * 827 lf_alloc_range(void) 828 { 829 struct lockf_range *range; 830 struct lf_pcpu *lfpc; 831 832 lfpc = &lf_pcpu_array[mycpuid]; 833 if ((range = lfpc->free1) != NULL) { 834 lfpc->free1 = NULL; 835 return range; 836 } 837 if ((range = lfpc->free2) != NULL) { 838 lfpc->free2 = NULL; 839 return range; 840 } 841 range = kmalloc(sizeof(struct lockf_range), M_LOCKF, M_WAITOK); 842 range->lf_owner = NULL; 843 844 return(range); 845 } 846 847 static void 848 lf_insert(struct lockf_range_list *list, struct lockf_range *elm, 849 struct lockf_range *insert_point) 850 { 851 while (insert_point && insert_point->lf_start < elm->lf_start) 852 insert_point = TAILQ_NEXT(insert_point, lf_link); 853 if (insert_point != NULL) 854 TAILQ_INSERT_BEFORE(insert_point, elm, lf_link); 855 else 856 TAILQ_INSERT_TAIL(list, elm, lf_link); 857 } 858 859 static void 860 lf_create_range(struct lockf_range *range, struct proc *owner, int type, 861 int flags, off_t start, off_t end) 862 { 863 KKASSERT(start <= end); 864 range->lf_type = type; 865 range->lf_flags = flags; 866 range->lf_start = start; 867 range->lf_end = end; 868 range->lf_owner = owner; 869 870 lf_printf("lf_create_range: %ju..%ju\n", 871 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end); 872 } 873 874 static void 875 lf_destroy_range(struct lockf_range *range) 876 { 877 struct lf_pcpu *lfpc; 878 879 lf_printf("lf_destroy_range: %ju..%ju\n", 880 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end); 881 882 lfpc = &lf_pcpu_array[mycpuid]; 883 if (lfpc->free1 == NULL) { 884 range->lf_owner = NULL; 885 lfpc->free1 = range; 886 return; 887 } 888 if (lfpc->free2 == NULL) { 889 range->lf_owner = NULL; 890 lfpc->free2 = range; 891 return; 892 } 893 kfree(range, M_LOCKF); 894 } 895 896 #ifdef LOCKF_DEBUG 897 898 static void 899 _lf_printf(const char *ctl, ...) 900 { 901 struct proc *p; 902 __va_list va; 903 904 if (lf_print_ranges) { 905 if ((p = curproc) != NULL) 906 kprintf("pid %d (%s): ", p->p_pid, p->p_comm); 907 } 908 __va_start(va, ctl); 909 kvprintf(ctl, va); 910 __va_end(va); 911 } 912 913 static void 914 _lf_print_lock(const struct lockf *lock) 915 { 916 struct lockf_range *range; 917 918 if (lf_print_ranges == 0) 919 return; 920 921 if (TAILQ_EMPTY(&lock->lf_range)) { 922 lf_printf("lockf %p: no ranges locked\n", lock); 923 } else { 924 lf_printf("lockf %p:\n", lock); 925 } 926 TAILQ_FOREACH(range, &lock->lf_range, lf_link) 927 kprintf("\t%jd..%jd type %s owned by %d\n", 928 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end, 929 range->lf_type == F_RDLCK ? "shared" : "exclusive", 930 range->lf_flags & F_POSIX ? range->lf_owner->p_pid : -1); 931 if (TAILQ_EMPTY(&lock->lf_blocked)) 932 kprintf("no process waiting for range\n"); 933 else 934 kprintf("blocked locks:"); 935 TAILQ_FOREACH(range, &lock->lf_blocked, lf_link) 936 kprintf("\t%jd..%jd type %s waiting on %p\n", 937 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end, 938 range->lf_type == F_RDLCK ? "shared" : "exclusive", 939 range); 940 } 941 #endif /* LOCKF_DEBUG */ 942 943 static void 944 lf_init(void *dummy __unused) 945 { 946 lf_pcpu_array = kmalloc(sizeof(*lf_pcpu_array) * ncpus, 947 M_LOCKF, M_WAITOK | M_ZERO); 948 } 949 950 SYSINIT(lockf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, lf_init, NULL); 951