1 /* 2 * Copyright (c) 2004 Joerg Sonnenberger <joerg@bec.de>. All rights reserved. 3 * Copyright (c) 2006 Matthew Dillon <dillon@backplane.com>. All rights reserved. 4 * 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Scooter Morris at Genentech Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94 36 * $FreeBSD: src/sys/kern/kern_lockf.c,v 1.25 1999/11/16 16:28:56 phk Exp $ 37 */ 38 39 #include "opt_debug_lockf.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/proc.h> 46 #include <sys/unistd.h> 47 #include <sys/vnode.h> 48 #include <sys/malloc.h> 49 #include <sys/fcntl.h> 50 #include <sys/resourcevar.h> 51 52 #include <sys/lockf.h> 53 #include <machine/limits.h> /* for LLONG_MAX */ 54 #include <machine/stdarg.h> 55 56 #include <sys/spinlock2.h> 57 58 #ifdef INVARIANTS 59 int lf_global_counter = 0; 60 #endif 61 62 #ifdef LOCKF_DEBUG 63 int lf_print_ranges = 0; 64 65 static void _lf_print_lock(const struct lockf *); 66 static void _lf_printf(const char *, ...); 67 68 #define lf_print_lock(lock) if (lf_print_ranges) _lf_print_lock(lock) 69 #define lf_printf(ctl, args...) if (lf_print_ranges) _lf_printf(ctl, args) 70 #else 71 #define lf_print_lock(lock) 72 #define lf_printf(ctl, args...) 73 #endif 74 75 static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); 76 77 static void lf_wakeup(struct lockf *, off_t, off_t); 78 static struct lockf_range *lf_alloc_range(void); 79 static void lf_create_range(struct lockf_range *, struct proc *, int, int, 80 off_t, off_t); 81 static void lf_insert(struct lockf_range_list *list, 82 struct lockf_range *elm, 83 struct lockf_range *insert_point); 84 static void lf_destroy_range(struct lockf_range *); 85 86 static int lf_setlock(struct lockf *, struct proc *, int, int, 87 off_t, off_t); 88 static int lf_getlock(struct flock *, struct lockf *, struct proc *, 89 int, int, off_t, off_t); 90 91 static int lf_count_change(struct proc *, int); 92 93 /* 94 * Return TRUE (non-zero) if the type and posix flags match. 95 */ 96 static __inline 97 int 98 lf_match(struct lockf_range *range, int type, int flags) 99 { 100 if (range->lf_type != type) 101 return(0); 102 if ((range->lf_flags ^ flags) & F_POSIX) 103 return(0); 104 return(1); 105 } 106 107 /* 108 * Check whether range and [start, end] overlap. 109 */ 110 static __inline 111 int 112 lf_overlap(const struct lockf_range *range, off_t start, off_t end) 113 { 114 if (range->lf_start >= start && range->lf_start <= end) 115 return(1); 116 else if (start >= range->lf_start && start <= range->lf_end) 117 return(1); 118 else 119 return(0); 120 } 121 122 123 /* 124 * Change the POSIX lock accounting for the given process. 125 */ 126 void 127 lf_count_adjust(struct proc *p, int increase) 128 { 129 struct uidinfo *uip; 130 131 KKASSERT(p != NULL); 132 133 uip = p->p_ucred->cr_uidinfo; 134 spin_lock(&uip->ui_lock); 135 136 if (increase) 137 uip->ui_posixlocks += p->p_numposixlocks; 138 else 139 uip->ui_posixlocks -= p->p_numposixlocks; 140 141 KASSERT(uip->ui_posixlocks >= 0, 142 ("Negative number of POSIX locks held by %s user: %d.", 143 increase ? "new" : "old", uip->ui_posixlocks)); 144 spin_unlock(&uip->ui_lock); 145 } 146 147 static int 148 lf_count_change(struct proc *owner, int diff) 149 { 150 struct uidinfo *uip; 151 int max, ret; 152 153 /* we might actually not have a process context */ 154 if (owner == NULL) 155 return(0); 156 157 uip = owner->p_ucred->cr_uidinfo; 158 159 max = MIN(owner->p_rlimit[RLIMIT_POSIXLOCKS].rlim_cur, 160 maxposixlocksperuid); 161 162 spin_lock(&uip->ui_lock); 163 if (diff > 0 && owner->p_ucred->cr_uid != 0 && max != -1 && 164 uip->ui_posixlocks >= max ) { 165 ret = 1; 166 } else { 167 uip->ui_posixlocks += diff; 168 owner->p_numposixlocks += diff; 169 KASSERT(uip->ui_posixlocks >= 0, 170 ("Negative number of POSIX locks held by user: %d.", 171 uip->ui_posixlocks)); 172 KASSERT(owner->p_numposixlocks >= 0, 173 ("Negative number of POSIX locks held by proc: %d.", 174 uip->ui_posixlocks)); 175 ret = 0; 176 } 177 spin_unlock(&uip->ui_lock); 178 return ret; 179 } 180 181 /* 182 * Advisory record locking support 183 */ 184 int 185 lf_advlock(struct vop_advlock_args *ap, struct lockf *lock, u_quad_t size) 186 { 187 struct flock *fl = ap->a_fl; 188 struct proc *owner; 189 off_t start, end; 190 int type, flags, error; 191 lwkt_token_t token; 192 193 /* 194 * Convert the flock structure into a start and end. 195 */ 196 switch (fl->l_whence) { 197 case SEEK_SET: 198 case SEEK_CUR: 199 /* 200 * Caller is responsible for adding any necessary offset 201 * when SEEK_CUR is used. 202 */ 203 start = fl->l_start; 204 break; 205 206 case SEEK_END: 207 start = size + fl->l_start; 208 break; 209 210 default: 211 return(EINVAL); 212 } 213 214 flags = ap->a_flags; 215 if (start < 0) 216 return(EINVAL); 217 if (fl->l_len == 0) { 218 flags |= F_NOEND; 219 end = LLONG_MAX; 220 } else if (fl->l_len < 0) { 221 return(EINVAL); 222 } else { 223 end = start + fl->l_len - 1; 224 if (end < start) 225 return(EINVAL); 226 } 227 228 type = fl->l_type; 229 /* 230 * This isn't really correct for flock-style locks, 231 * but the current handling is somewhat broken anyway. 232 */ 233 owner = (struct proc *)ap->a_id; 234 235 /* 236 * Do the requested operation. 237 */ 238 token = lwkt_getpooltoken(lock); 239 240 if (lock->init_done == 0) { 241 TAILQ_INIT(&lock->lf_range); 242 TAILQ_INIT(&lock->lf_blocked); 243 lock->init_done = 1; 244 } 245 246 switch(ap->a_op) { 247 case F_SETLK: 248 /* 249 * NOTE: It is possible for both lf_range and lf_blocked to 250 * be empty if we block and get woken up, but another process 251 * then gets in and issues an unlock. So VMAYHAVELOCKS must 252 * be set after the lf_setlock() operation completes rather 253 * then before. 254 */ 255 error = lf_setlock(lock, owner, type, flags, start, end); 256 vsetflags(ap->a_vp, VMAYHAVELOCKS); 257 break; 258 259 case F_UNLCK: 260 error = lf_setlock(lock, owner, type, flags, start, end); 261 if (TAILQ_EMPTY(&lock->lf_range) && 262 TAILQ_EMPTY(&lock->lf_blocked)) { 263 vclrflags(ap->a_vp, VMAYHAVELOCKS); 264 } 265 break; 266 267 case F_GETLK: 268 error = lf_getlock(fl, lock, owner, type, flags, start, end); 269 break; 270 271 default: 272 error = EINVAL; 273 break; 274 } 275 lwkt_reltoken(token); 276 return(error); 277 } 278 279 static int 280 lf_setlock(struct lockf *lock, struct proc *owner, int type, int flags, 281 off_t start, off_t end) 282 { 283 struct lockf_range *range; 284 struct lockf_range *brange; 285 struct lockf_range *next; 286 struct lockf_range *first_match; 287 struct lockf_range *last_match; 288 struct lockf_range *insert_point; 289 struct lockf_range *new_range1; 290 struct lockf_range *new_range2; 291 int wakeup_needed; 292 int double_clip; 293 int unlock_override; 294 int error = 0; 295 int count; 296 struct lockf_range_list deadlist; 297 298 new_range1 = NULL; 299 new_range2 = NULL; 300 count = 0; 301 302 restart: 303 /* 304 * Preallocate two ranges so we don't have to worry about blocking 305 * in the middle of the lock code. 306 */ 307 if (new_range1 == NULL) 308 new_range1 = lf_alloc_range(); 309 if (new_range2 == NULL) 310 new_range2 = lf_alloc_range(); 311 first_match = NULL; 312 last_match = NULL; 313 insert_point = NULL; 314 wakeup_needed = 0; 315 316 lf_print_lock(lock); 317 318 /* 319 * Locate the insertion point for the new lock (the first range 320 * with an lf_start >= start). 321 * 322 * Locate the first and latch ranges owned by us that overlap 323 * the requested range. 324 */ 325 TAILQ_FOREACH(range, &lock->lf_range, lf_link) { 326 if (insert_point == NULL && range->lf_start >= start) 327 insert_point = range; 328 329 /* 330 * Skip non-overlapping locks. Locks are sorted by lf_start 331 * So we can terminate the search when lf_start exceeds the 332 * requested range (insert_point is still guarenteed to be 333 * set properly). 334 */ 335 if (range->lf_end < start) 336 continue; 337 if (range->lf_start > end) { 338 range = NULL; 339 break; 340 } 341 342 /* 343 * Overlapping lock. Set first_match and last_match if we 344 * are the owner. 345 */ 346 if (range->lf_owner == owner) { 347 if (first_match == NULL) 348 first_match = range; 349 last_match = range; 350 continue; 351 } 352 353 /* 354 * If we aren't the owner check for a conflicting lock. Only 355 * if not unlocking. 356 */ 357 if (type != F_UNLCK) { 358 if (type == F_WRLCK || range->lf_type == F_WRLCK) 359 break; 360 } 361 } 362 363 /* 364 * If a conflicting lock was observed, block or fail as appropriate. 365 * (this code is skipped when unlocking) 366 */ 367 if (range != NULL) { 368 if ((flags & F_WAIT) == 0) { 369 error = EAGAIN; 370 goto do_cleanup; 371 } 372 373 /* 374 * We are blocked. For POSIX locks we have to check 375 * for deadlocks and return with EDEADLK. This is done 376 * by checking whether range->lf_owner is already 377 * blocked. 378 * 379 * Since flock-style locks cover the whole file, a 380 * deadlock between those is nearly impossible. 381 * This can only occur if a process tries to lock the 382 * same inode exclusively while holding a shared lock 383 * with another descriptor. 384 * XXX How can we cleanly detect this? 385 * XXX The current mixing of flock & fcntl/lockf is evil. 386 * 387 * Handle existing locks of flock-style like POSIX locks. 388 */ 389 if (flags & F_POSIX) { 390 TAILQ_FOREACH(brange, &lock->lf_blocked, lf_link) { 391 if (brange->lf_owner == range->lf_owner) { 392 error = EDEADLK; 393 goto do_cleanup; 394 } 395 } 396 } 397 398 /* 399 * For flock-style locks, we must first remove 400 * any shared locks that we hold before we sleep 401 * waiting for an exclusive lock. 402 */ 403 if ((flags & F_POSIX) == 0 && type == F_WRLCK) 404 lf_setlock(lock, owner, F_UNLCK, 0, start, end); 405 406 brange = new_range1; 407 new_range1 = NULL; 408 lf_create_range(brange, owner, type, 0, start, end); 409 TAILQ_INSERT_TAIL(&lock->lf_blocked, brange, lf_link); 410 error = tsleep(brange, PCATCH, "lockf", 0); 411 412 /* 413 * We may have been awaked by a signal and/or by a 414 * debugger continuing us (in which case we must remove 415 * ourselves from the blocked list) and/or by another 416 * process releasing/downgrading a lock (in which case 417 * we have already been removed from the blocked list 418 * and our lf_flags field is 1). 419 * 420 * Sleep if it looks like we might be livelocking. 421 */ 422 if (brange->lf_flags == 0) 423 TAILQ_REMOVE(&lock->lf_blocked, brange, lf_link); 424 if (count == 2) 425 tsleep(brange, 0, "lockfz", 2); 426 else 427 ++count; 428 lf_destroy_range(brange); 429 430 if (error) 431 goto do_cleanup; 432 goto restart; 433 } 434 435 /* 436 * If there are no overlapping locks owned by us then creating 437 * the new lock is easy. This is the most common case. 438 */ 439 if (first_match == NULL) { 440 if (type == F_UNLCK) 441 goto do_wakeup; 442 if (flags & F_POSIX) { 443 if (lf_count_change(owner, 1)) { 444 error = ENOLCK; 445 goto do_cleanup; 446 } 447 } 448 range = new_range1; 449 new_range1 = NULL; 450 lf_create_range(range, owner, type, flags, start, end); 451 lf_insert(&lock->lf_range, range, insert_point); 452 goto do_wakeup; 453 } 454 455 /* 456 * double_clip - Calculate a special case where TWO locks may have 457 * to be added due to the new lock breaking up an 458 * existing incompatible lock in the middle. 459 * 460 * unlock_override - Calculate a special case where NO locks 461 * need to be created. This occurs when an unlock 462 * does not clip any locks at the front and rear. 463 * 464 * WARNING! closef() and fdrop() assume that an F_UNLCK of the 465 * entire range will always succeed so the unlock_override 466 * case is mandatory. 467 */ 468 double_clip = 0; 469 unlock_override = 0; 470 if (first_match->lf_start < start) { 471 if (first_match == last_match && last_match->lf_end > end) 472 double_clip = 1; 473 } else if (type == F_UNLCK && last_match->lf_end <= end) { 474 unlock_override = 1; 475 } 476 477 /* 478 * Figure out the worst case net increase in POSIX locks and account 479 * for it now before we start modifying things. If neither the 480 * first or last locks match we have an issue. If there is only 481 * one overlapping range which needs to be clipped on both ends 482 * we wind up having to create up to two new locks, else only one. 483 * 484 * When unlocking the worst case is always 1 new lock if our 485 * unlock request cuts the middle out of an existing lock range. 486 * 487 * count represents the 'cleanup' adjustment needed. It starts 488 * negative, is incremented whenever we create a new POSIX lock, 489 * and decremented whenever we delete an existing one. At the 490 * end of the day it had better be <= 0 or we didn't calculate the 491 * worse case properly here. 492 */ 493 count = 0; 494 if ((flags & F_POSIX) && !unlock_override) { 495 if (!lf_match(first_match, type, flags) && 496 !lf_match(last_match, type, flags) 497 ) { 498 if (double_clip && type != F_UNLCK) 499 count = -2; 500 else 501 count = -1; 502 } 503 if (count && lf_count_change(owner, -count)) { 504 error = ENOLCK; 505 goto do_cleanup; 506 } 507 } 508 /* else flock style lock which encompasses entire range */ 509 510 /* 511 * Create and insert the lock represented the requested range. 512 * Adjust the net POSIX lock count. We have to move our insertion 513 * point since brange now represents the first record >= start. 514 * 515 * When unlocking, no new lock is inserted but we still clip. 516 */ 517 if (type != F_UNLCK) { 518 brange = new_range1; 519 new_range1 = NULL; 520 lf_create_range(brange, owner, type, flags, start, end); 521 lf_insert(&lock->lf_range, brange, insert_point); 522 insert_point = brange; 523 if (flags & F_POSIX) 524 ++count; 525 } else { 526 brange = NULL; 527 } 528 529 /* 530 * Handle the double_clip case. This is the only case where 531 * we wind up having to add TWO locks. 532 */ 533 if (double_clip) { 534 KKASSERT(first_match == last_match); 535 last_match = new_range2; 536 new_range2 = NULL; 537 lf_create_range(last_match, first_match->lf_owner, 538 first_match->lf_type, first_match->lf_flags, 539 end + 1, first_match->lf_end); 540 first_match->lf_end = start - 1; 541 first_match->lf_flags &= ~F_NOEND; 542 543 /* 544 * Figure out where to insert the right side clip. 545 */ 546 lf_insert(&lock->lf_range, last_match, first_match); 547 if (last_match->lf_flags & F_POSIX) 548 ++count; 549 } 550 551 /* 552 * Clip or destroy the locks between first_match and last_match, 553 * inclusive. Ignore the primary lock we created (brange). Note 554 * that if double-clipped, first_match and last_match will be 555 * outside our clipping range. Otherwise first_match and last_match 556 * will be deleted. 557 * 558 * We have already taken care of any double clipping. 559 * 560 * The insert_point may become invalid as we delete records, do not 561 * use that pointer any more. Also, when removing something other 562 * then 'range' we have to check to see if the item we are removing 563 * is 'next' and adjust 'next' properly. 564 * 565 * NOTE: brange will be NULL if F_UNLCKing. 566 */ 567 TAILQ_INIT(&deadlist); 568 next = first_match; 569 570 while ((range = next) != NULL) { 571 next = TAILQ_NEXT(range, lf_link); 572 573 /* 574 * Ignore elements that we do not own and ignore the 575 * primary request range which we just created. 576 */ 577 if (range->lf_owner != owner || range == brange) 578 continue; 579 580 /* 581 * We may have to wakeup a waiter when downgrading a lock. 582 */ 583 if (type == F_UNLCK) 584 wakeup_needed = 1; 585 if (type == F_RDLCK && range->lf_type == F_WRLCK) 586 wakeup_needed = 1; 587 588 /* 589 * Clip left. This can only occur on first_match. 590 * 591 * Merge the left clip with brange if possible. This must 592 * be done specifically, not in the optimized merge heuristic 593 * below, since we may have counted on it in our 'count' 594 * calculation above. 595 */ 596 if (range->lf_start < start) { 597 KKASSERT(range == first_match); 598 if (brange && 599 range->lf_end >= start - 1 && 600 lf_match(range, type, flags)) { 601 range->lf_end = brange->lf_end; 602 range->lf_flags |= brange->lf_flags & F_NOEND; 603 /* 604 * Removing something other then 'range', 605 * adjust 'next' if necessary. 606 */ 607 if (next == brange) 608 next = TAILQ_NEXT(next, lf_link); 609 TAILQ_REMOVE(&lock->lf_range, brange, lf_link); 610 if (brange->lf_flags & F_POSIX) 611 --count; 612 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link); 613 brange = range; 614 } else if (range->lf_end >= start) { 615 range->lf_end = start - 1; 616 if (type != F_UNLCK) 617 range->lf_flags &= ~F_NOEND; 618 } 619 if (range == last_match) 620 break; 621 continue; 622 } 623 624 /* 625 * Clip right. This can only occur on last_match. 626 * 627 * Merge the right clip if possible. This must be done 628 * specifically, not in the optimized merge heuristic 629 * below, since we may have counted on it in our 'count' 630 * calculation. 631 * 632 * Since we are adjusting lf_start, we have to move the 633 * record to maintain the sorted list. Since lf_start is 634 * only getting larger we can use the next element as the 635 * insert point (we don't have to backtrack). 636 */ 637 if (range->lf_end > end) { 638 KKASSERT(range == last_match); 639 if (brange && 640 range->lf_start <= end + 1 && 641 lf_match(range, type, flags)) { 642 brange->lf_end = range->lf_end; 643 brange->lf_flags |= range->lf_flags & F_NOEND; 644 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 645 if (range->lf_flags & F_POSIX) 646 --count; 647 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 648 } else if (range->lf_start <= end) { 649 range->lf_start = end + 1; 650 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 651 lf_insert(&lock->lf_range, range, next); 652 } 653 /* range == last_match, we are done */ 654 break; 655 } 656 657 /* 658 * The record must be entirely enclosed. Note that the 659 * record could be first_match or last_match, and will be 660 * deleted. 661 */ 662 KKASSERT(range->lf_start >= start && range->lf_end <= end); 663 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 664 if (range->lf_flags & F_POSIX) 665 --count; 666 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 667 if (range == last_match) 668 break; 669 } 670 671 /* 672 * Attempt to merge locks adjacent to brange. For example, we may 673 * have had to clip first_match and/or last_match, and they might 674 * be adjacent. Or there might simply have been an adjacent lock 675 * already there. 676 * 677 * Don't get fancy, just check adjacent elements in the list if they 678 * happen to be owned by us. 679 * 680 * This case only gets hit if we have a situation where a shared 681 * and exclusive lock are adjacent, and the exclusive lock is 682 * downgraded to shared or the shared lock is upgraded to exclusive. 683 */ 684 if (brange) { 685 range = TAILQ_PREV(brange, lockf_range_list, lf_link); 686 if (range && 687 range->lf_owner == owner && 688 range->lf_end == brange->lf_start - 1 && 689 lf_match(range, type, flags) 690 ) { 691 /* 692 * Extend range to cover brange and scrap brange. 693 */ 694 range->lf_end = brange->lf_end; 695 range->lf_flags |= brange->lf_flags & F_NOEND; 696 TAILQ_REMOVE(&lock->lf_range, brange, lf_link); 697 if (brange->lf_flags & F_POSIX) 698 --count; 699 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link); 700 brange = range; 701 } 702 range = TAILQ_NEXT(brange, lf_link); 703 if (range && 704 range->lf_owner == owner && 705 range->lf_start == brange->lf_end + 1 && 706 lf_match(range, type, flags) 707 ) { 708 /* 709 * Extend brange to cover range and scrap range. 710 */ 711 brange->lf_end = range->lf_end; 712 brange->lf_flags |= range->lf_flags & F_NOEND; 713 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 714 if (range->lf_flags & F_POSIX) 715 --count; 716 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 717 } 718 } 719 720 /* 721 * Destroy deleted elements. We didn't want to do it in the loop 722 * because the free() might have blocked. 723 * 724 * Adjust the count for any posix locks we thought we might create 725 * but didn't. 726 */ 727 while ((range = TAILQ_FIRST(&deadlist)) != NULL) { 728 TAILQ_REMOVE(&deadlist, range, lf_link); 729 lf_destroy_range(range); 730 } 731 732 KKASSERT(count <= 0); 733 if (count < 0) 734 lf_count_change(owner, count); 735 do_wakeup: 736 lf_print_lock(lock); 737 if (wakeup_needed) 738 lf_wakeup(lock, start, end); 739 error = 0; 740 do_cleanup: 741 if (new_range1 != NULL) 742 lf_destroy_range(new_range1); 743 if (new_range2 != NULL) 744 lf_destroy_range(new_range2); 745 return(error); 746 } 747 748 /* 749 * Check whether there is a blocking lock, 750 * and if so return its process identifier. 751 */ 752 static int 753 lf_getlock(struct flock *fl, struct lockf *lock, struct proc *owner, 754 int type, int flags, off_t start, off_t end) 755 { 756 struct lockf_range *range; 757 758 TAILQ_FOREACH(range, &lock->lf_range, lf_link) 759 if (range->lf_owner != owner && 760 lf_overlap(range, start, end) && 761 (type == F_WRLCK || range->lf_type == F_WRLCK)) 762 break; 763 if (range == NULL) { 764 fl->l_type = F_UNLCK; 765 return(0); 766 } 767 fl->l_type = range->lf_type; 768 fl->l_whence = SEEK_SET; 769 fl->l_start = range->lf_start; 770 if (range->lf_flags & F_NOEND) 771 fl->l_len = 0; 772 else 773 fl->l_len = range->lf_end - range->lf_start + 1; 774 if (range->lf_owner != NULL && (range->lf_flags & F_POSIX)) 775 fl->l_pid = range->lf_owner->p_pid; 776 else 777 fl->l_pid = -1; 778 return(0); 779 } 780 781 /* 782 * Wakeup pending lock attempts. Theoretically we can stop as soon as 783 * we encounter an exclusive request that covers the whole range (at least 784 * insofar as the sleep code above calls lf_wakeup() if it would otherwise 785 * exit instead of loop), but for now just wakeup all overlapping 786 * requests. XXX 787 */ 788 static void 789 lf_wakeup(struct lockf *lock, off_t start, off_t end) 790 { 791 struct lockf_range *range, *nrange; 792 793 TAILQ_FOREACH_MUTABLE(range, &lock->lf_blocked, lf_link, nrange) { 794 if (lf_overlap(range, start, end) == 0) 795 continue; 796 TAILQ_REMOVE(&lock->lf_blocked, range, lf_link); 797 range->lf_flags = 1; 798 wakeup(range); 799 } 800 } 801 802 /* 803 * Allocate a range structure and initialize it sufficiently such that 804 * lf_destroy_range() does not barf. 805 */ 806 static struct lockf_range * 807 lf_alloc_range(void) 808 { 809 struct lockf_range *range; 810 811 #ifdef INVARIANTS 812 atomic_add_int(&lf_global_counter, 1); 813 #endif 814 range = kmalloc(sizeof(struct lockf_range), M_LOCKF, M_WAITOK); 815 range->lf_owner = NULL; 816 return(range); 817 } 818 819 static void 820 lf_insert(struct lockf_range_list *list, struct lockf_range *elm, 821 struct lockf_range *insert_point) 822 { 823 while (insert_point && insert_point->lf_start < elm->lf_start) 824 insert_point = TAILQ_NEXT(insert_point, lf_link); 825 if (insert_point != NULL) 826 TAILQ_INSERT_BEFORE(insert_point, elm, lf_link); 827 else 828 TAILQ_INSERT_TAIL(list, elm, lf_link); 829 } 830 831 static void 832 lf_create_range(struct lockf_range *range, struct proc *owner, int type, 833 int flags, off_t start, off_t end) 834 { 835 KKASSERT(start <= end); 836 range->lf_type = type; 837 range->lf_flags = flags; 838 range->lf_start = start; 839 range->lf_end = end; 840 range->lf_owner = owner; 841 842 lf_printf("lf_create_range: %lld..%lld\n", 843 range->lf_start, range->lf_end); 844 } 845 846 static void 847 lf_destroy_range(struct lockf_range *range) 848 { 849 lf_printf("lf_destroy_range: %lld..%lld\n", 850 range->lf_start, range->lf_end); 851 kfree(range, M_LOCKF); 852 #ifdef INVARIANTS 853 atomic_add_int(&lf_global_counter, -1); 854 KKASSERT(lf_global_counter >= 0); 855 #endif 856 } 857 858 #ifdef LOCKF_DEBUG 859 860 static void 861 _lf_printf(const char *ctl, ...) 862 { 863 struct proc *p; 864 __va_list va; 865 866 if (lf_print_ranges) { 867 if ((p = curproc) != NULL) 868 kprintf("pid %d (%s): ", p->p_pid, p->p_comm); 869 } 870 __va_start(va, ctl); 871 kvprintf(ctl, va); 872 __va_end(va); 873 } 874 875 static void 876 _lf_print_lock(const struct lockf *lock) 877 { 878 struct lockf_range *range; 879 880 if (lf_print_ranges == 0) 881 return; 882 883 if (TAILQ_EMPTY(&lock->lf_range)) { 884 lf_printf("lockf %p: no ranges locked\n", lock); 885 } else { 886 lf_printf("lockf %p:\n", lock); 887 } 888 TAILQ_FOREACH(range, &lock->lf_range, lf_link) 889 kprintf("\t%jd..%jd type %s owned by %d\n", 890 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end, 891 range->lf_type == F_RDLCK ? "shared" : "exclusive", 892 range->lf_flags & F_POSIX ? range->lf_owner->p_pid : -1); 893 if (TAILQ_EMPTY(&lock->lf_blocked)) 894 kprintf("no process waiting for range\n"); 895 else 896 kprintf("blocked locks:"); 897 TAILQ_FOREACH(range, &lock->lf_blocked, lf_link) 898 kprintf("\t%jd..%jd type %s waiting on %p\n", 899 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end, 900 range->lf_type == F_RDLCK ? "shared" : "exclusive", 901 range); 902 } 903 #endif /* LOCKF_DEBUG */ 904