1 /* 2 * Copyright (c) 2004 Joerg Sonnenberger <joerg@bec.de>. All rights reserved. 3 * Copyright (c) 2006-2018 Matthew Dillon <dillon@backplane.com>. All rights reserved. 4 * 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Scooter Morris at Genentech Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94 36 * $FreeBSD: src/sys/kern/kern_lockf.c,v 1.25 1999/11/16 16:28:56 phk Exp $ 37 */ 38 39 #include "opt_debug_lockf.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/proc.h> 46 #include <sys/unistd.h> 47 #include <sys/vnode.h> 48 #include <sys/malloc.h> 49 #include <sys/fcntl.h> 50 #include <sys/resourcevar.h> 51 52 #include <sys/lockf.h> 53 #include <machine/limits.h> /* for LLONG_MAX */ 54 #include <machine/stdarg.h> 55 56 #include <sys/spinlock2.h> 57 58 #ifdef INVARIANTS 59 int lf_global_counter = 0; 60 #endif 61 62 #ifdef LOCKF_DEBUG 63 int lf_print_ranges = 0; 64 65 static void _lf_print_lock(const struct lockf *); 66 static void _lf_printf(const char *, ...) __printflike(1, 2); 67 68 #define lf_print_lock(lock) if (lf_print_ranges) _lf_print_lock(lock) 69 #define lf_printf(ctl, args...) if (lf_print_ranges) _lf_printf(ctl, args) 70 #else 71 #define lf_print_lock(lock) 72 #define lf_printf(ctl, args...) 73 #endif 74 75 static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); 76 77 static void lf_wakeup(struct lockf *, off_t, off_t); 78 static struct lockf_range *lf_alloc_range(void); 79 static void lf_create_range(struct lockf_range *, struct proc *, int, int, 80 off_t, off_t); 81 static void lf_insert(struct lockf_range_list *list, 82 struct lockf_range *elm, 83 struct lockf_range *insert_point); 84 static void lf_destroy_range(struct lockf_range *); 85 86 static int lf_setlock(struct lockf *, struct proc *, int, int, 87 off_t, off_t); 88 static int lf_getlock(struct flock *, struct lockf *, struct proc *, 89 int, int, off_t, off_t); 90 91 static int lf_count_change(struct proc *, int); 92 93 /* 94 * Return TRUE (non-zero) if the type and posix flags match. 95 */ 96 static __inline 97 int 98 lf_match(struct lockf_range *range, int type, int flags) 99 { 100 if (range->lf_type != type) 101 return(0); 102 if ((range->lf_flags ^ flags) & F_POSIX) 103 return(0); 104 return(1); 105 } 106 107 /* 108 * Check whether range and [start, end] overlap. 109 */ 110 static __inline 111 int 112 lf_overlap(const struct lockf_range *range, off_t start, off_t end) 113 { 114 if (range->lf_start >= start && range->lf_start <= end) 115 return(1); 116 else if (start >= range->lf_start && start <= range->lf_end) 117 return(1); 118 else 119 return(0); 120 } 121 122 123 /* 124 * Change the POSIX lock accounting for the given process. 125 */ 126 void 127 lf_count_adjust(struct proc *p, int increase) 128 { 129 struct uidinfo *uip; 130 struct uidcount *pup; 131 int n; 132 133 KKASSERT(p != NULL); 134 135 uip = p->p_ucred->cr_uidinfo; 136 pup = &uip->ui_pcpu[mycpuid]; 137 138 if (increase) { 139 for (n = 0; n < ncpus; ++n) 140 pup->pu_posixlocks += p->p_uidpcpu[n].pu_posixlocks; 141 } else { 142 for (n = 0; n < ncpus; ++n) 143 pup->pu_posixlocks -= p->p_uidpcpu[n].pu_posixlocks; 144 } 145 146 if (pup->pu_posixlocks < -PUP_LIMIT || 147 pup->pu_posixlocks > PUP_LIMIT) { 148 atomic_add_int(&uip->ui_posixlocks, pup->pu_posixlocks); 149 pup->pu_posixlocks = 0; 150 } 151 } 152 153 static int 154 lf_count_change(struct proc *owner, int diff) 155 { 156 struct uidinfo *uip; 157 int max, ret; 158 159 /* we might actually not have a process context */ 160 if (owner == NULL) 161 return(0); 162 163 uip = owner->p_ucred->cr_uidinfo; 164 165 max = MIN(owner->p_rlimit[RLIMIT_POSIXLOCKS].rlim_cur, 166 maxposixlocksperuid); 167 168 if (diff > 0 && owner->p_ucred->cr_uid != 0 && max != -1 && 169 uip->ui_posixlocks >= max ) { 170 ret = 1; 171 } else { 172 struct uidcount *pup; 173 int cpu = mycpuid; 174 175 pup = &uip->ui_pcpu[cpu]; 176 pup->pu_posixlocks += diff; 177 if (pup->pu_posixlocks < -PUP_LIMIT || 178 pup->pu_posixlocks > PUP_LIMIT) { 179 atomic_add_int(&uip->ui_posixlocks, pup->pu_posixlocks); 180 pup->pu_posixlocks = 0; 181 } 182 owner->p_uidpcpu[cpu].pu_posixlocks += diff; 183 ret = 0; 184 } 185 return ret; 186 } 187 188 /* 189 * Advisory record locking support 190 */ 191 int 192 lf_advlock(struct vop_advlock_args *ap, struct lockf *lock, u_quad_t size) 193 { 194 struct flock *fl = ap->a_fl; 195 struct proc *owner; 196 off_t start, end; 197 int type, flags, error; 198 lwkt_token_t token; 199 200 /* 201 * Convert the flock structure into a start and end. 202 */ 203 switch (fl->l_whence) { 204 case SEEK_SET: 205 case SEEK_CUR: 206 /* 207 * Caller is responsible for adding any necessary offset 208 * when SEEK_CUR is used. 209 */ 210 start = fl->l_start; 211 break; 212 213 case SEEK_END: 214 start = size + fl->l_start; 215 break; 216 217 default: 218 return(EINVAL); 219 } 220 221 flags = ap->a_flags; 222 if (start < 0) 223 return(EINVAL); 224 if (fl->l_len == 0) { 225 flags |= F_NOEND; 226 end = LLONG_MAX; 227 } else if (fl->l_len < 0) { 228 return(EINVAL); 229 } else { 230 end = start + fl->l_len - 1; 231 if (end < start) 232 return(EINVAL); 233 } 234 235 type = fl->l_type; 236 /* 237 * This isn't really correct for flock-style locks, 238 * but the current handling is somewhat broken anyway. 239 */ 240 owner = (struct proc *)ap->a_id; 241 242 /* 243 * Do the requested operation. 244 */ 245 token = lwkt_getpooltoken(lock); 246 247 if (lock->init_done == 0) { 248 TAILQ_INIT(&lock->lf_range); 249 TAILQ_INIT(&lock->lf_blocked); 250 lock->init_done = 1; 251 } 252 253 switch(ap->a_op) { 254 case F_SETLK: 255 /* 256 * NOTE: It is possible for both lf_range and lf_blocked to 257 * be empty if we block and get woken up, but another process 258 * then gets in and issues an unlock. So VMAYHAVELOCKS must 259 * be set after the lf_setlock() operation completes rather 260 * then before. 261 */ 262 error = lf_setlock(lock, owner, type, flags, start, end); 263 vsetflags(ap->a_vp, VMAYHAVELOCKS); 264 break; 265 266 case F_UNLCK: 267 error = lf_setlock(lock, owner, type, flags, start, end); 268 if (TAILQ_EMPTY(&lock->lf_range) && 269 TAILQ_EMPTY(&lock->lf_blocked)) { 270 vclrflags(ap->a_vp, VMAYHAVELOCKS); 271 } 272 break; 273 274 case F_GETLK: 275 error = lf_getlock(fl, lock, owner, type, flags, start, end); 276 break; 277 278 default: 279 error = EINVAL; 280 break; 281 } 282 lwkt_reltoken(token); 283 return(error); 284 } 285 286 static int 287 lf_setlock(struct lockf *lock, struct proc *owner, int type, int flags, 288 off_t start, off_t end) 289 { 290 struct lockf_range *range; 291 struct lockf_range *brange; 292 struct lockf_range *next; 293 struct lockf_range *first_match; 294 struct lockf_range *last_match; 295 struct lockf_range *insert_point; 296 struct lockf_range *new_range1; 297 struct lockf_range *new_range2; 298 int wakeup_needed; 299 int double_clip; 300 int unlock_override; 301 int error = 0; 302 int count; 303 struct lockf_range_list deadlist; 304 305 new_range1 = NULL; 306 new_range2 = NULL; 307 count = 0; 308 309 restart: 310 /* 311 * Preallocate two ranges so we don't have to worry about blocking 312 * in the middle of the lock code. 313 */ 314 if (new_range1 == NULL) 315 new_range1 = lf_alloc_range(); 316 if (new_range2 == NULL) 317 new_range2 = lf_alloc_range(); 318 first_match = NULL; 319 last_match = NULL; 320 insert_point = NULL; 321 wakeup_needed = 0; 322 323 lf_print_lock(lock); 324 325 /* 326 * Locate the insertion point for the new lock (the first range 327 * with an lf_start >= start). 328 * 329 * Locate the first and latch ranges owned by us that overlap 330 * the requested range. 331 */ 332 TAILQ_FOREACH(range, &lock->lf_range, lf_link) { 333 if (insert_point == NULL && range->lf_start >= start) 334 insert_point = range; 335 336 /* 337 * Skip non-overlapping locks. Locks are sorted by lf_start 338 * So we can terminate the search when lf_start exceeds the 339 * requested range (insert_point is still guarenteed to be 340 * set properly). 341 */ 342 if (range->lf_end < start) 343 continue; 344 if (range->lf_start > end) { 345 range = NULL; 346 break; 347 } 348 349 /* 350 * Overlapping lock. Set first_match and last_match if we 351 * are the owner. 352 */ 353 if (range->lf_owner == owner) { 354 if (first_match == NULL) 355 first_match = range; 356 last_match = range; 357 continue; 358 } 359 360 /* 361 * If we aren't the owner check for a conflicting lock. Only 362 * if not unlocking. 363 */ 364 if (type != F_UNLCK) { 365 if (type == F_WRLCK || range->lf_type == F_WRLCK) 366 break; 367 } 368 } 369 370 /* 371 * If a conflicting lock was observed, block or fail as appropriate. 372 * (this code is skipped when unlocking) 373 */ 374 if (range != NULL) { 375 if ((flags & F_WAIT) == 0) { 376 error = EAGAIN; 377 goto do_cleanup; 378 } 379 380 /* 381 * We are blocked. For POSIX locks we have to check 382 * for deadlocks and return with EDEADLK. This is done 383 * by checking whether range->lf_owner is already 384 * blocked. 385 * 386 * Since flock-style locks cover the whole file, a 387 * deadlock between those is nearly impossible. 388 * This can only occur if a process tries to lock the 389 * same inode exclusively while holding a shared lock 390 * with another descriptor. 391 * XXX How can we cleanly detect this? 392 * XXX The current mixing of flock & fcntl/lockf is evil. 393 * 394 * Handle existing locks of flock-style like POSIX locks. 395 */ 396 if (flags & F_POSIX) { 397 TAILQ_FOREACH(brange, &lock->lf_blocked, lf_link) { 398 if (brange->lf_owner == range->lf_owner) { 399 error = EDEADLK; 400 goto do_cleanup; 401 } 402 } 403 } 404 405 /* 406 * For flock-style locks, we must first remove 407 * any shared locks that we hold before we sleep 408 * waiting for an exclusive lock. 409 */ 410 if ((flags & F_POSIX) == 0 && type == F_WRLCK) 411 lf_setlock(lock, owner, F_UNLCK, 0, start, end); 412 413 brange = new_range1; 414 new_range1 = NULL; 415 lf_create_range(brange, owner, type, 0, start, end); 416 TAILQ_INSERT_TAIL(&lock->lf_blocked, brange, lf_link); 417 error = tsleep(brange, PCATCH, "lockf", 0); 418 419 /* 420 * We may have been awaked by a signal and/or by a 421 * debugger continuing us (in which case we must remove 422 * ourselves from the blocked list) and/or by another 423 * process releasing/downgrading a lock (in which case 424 * we have already been removed from the blocked list 425 * and our lf_flags field is 1). 426 * 427 * Sleep if it looks like we might be livelocking. 428 */ 429 if (brange->lf_flags == 0) 430 TAILQ_REMOVE(&lock->lf_blocked, brange, lf_link); 431 if (count == 2) 432 tsleep(brange, 0, "lockfz", 2); 433 else 434 ++count; 435 lf_destroy_range(brange); 436 437 if (error) 438 goto do_cleanup; 439 goto restart; 440 } 441 442 /* 443 * If there are no overlapping locks owned by us then creating 444 * the new lock is easy. This is the most common case. 445 */ 446 if (first_match == NULL) { 447 if (type == F_UNLCK) 448 goto do_wakeup; 449 if (flags & F_POSIX) { 450 if (lf_count_change(owner, 1)) { 451 error = ENOLCK; 452 goto do_cleanup; 453 } 454 } 455 range = new_range1; 456 new_range1 = NULL; 457 lf_create_range(range, owner, type, flags, start, end); 458 lf_insert(&lock->lf_range, range, insert_point); 459 goto do_wakeup; 460 } 461 462 /* 463 * double_clip - Calculate a special case where TWO locks may have 464 * to be added due to the new lock breaking up an 465 * existing incompatible lock in the middle. 466 * 467 * unlock_override - Calculate a special case where NO locks 468 * need to be created. This occurs when an unlock 469 * does not clip any locks at the front and rear. 470 * 471 * WARNING! closef() and fdrop() assume that an F_UNLCK of the 472 * entire range will always succeed so the unlock_override 473 * case is mandatory. 474 */ 475 double_clip = 0; 476 unlock_override = 0; 477 if (first_match->lf_start < start) { 478 if (first_match == last_match && last_match->lf_end > end) 479 double_clip = 1; 480 } else if (type == F_UNLCK && last_match->lf_end <= end) { 481 unlock_override = 1; 482 } 483 484 /* 485 * Figure out the worst case net increase in POSIX locks and account 486 * for it now before we start modifying things. If neither the 487 * first or last locks match we have an issue. If there is only 488 * one overlapping range which needs to be clipped on both ends 489 * we wind up having to create up to two new locks, else only one. 490 * 491 * When unlocking the worst case is always 1 new lock if our 492 * unlock request cuts the middle out of an existing lock range. 493 * 494 * count represents the 'cleanup' adjustment needed. It starts 495 * negative, is incremented whenever we create a new POSIX lock, 496 * and decremented whenever we delete an existing one. At the 497 * end of the day it had better be <= 0 or we didn't calculate the 498 * worse case properly here. 499 */ 500 count = 0; 501 if ((flags & F_POSIX) && !unlock_override) { 502 if (!lf_match(first_match, type, flags) && 503 !lf_match(last_match, type, flags) 504 ) { 505 if (double_clip && type != F_UNLCK) 506 count = -2; 507 else 508 count = -1; 509 } 510 if (count && lf_count_change(owner, -count)) { 511 error = ENOLCK; 512 goto do_cleanup; 513 } 514 } 515 /* else flock style lock which encompasses entire range */ 516 517 /* 518 * Create and insert the lock represented the requested range. 519 * Adjust the net POSIX lock count. We have to move our insertion 520 * point since brange now represents the first record >= start. 521 * 522 * When unlocking, no new lock is inserted but we still clip. 523 */ 524 if (type != F_UNLCK) { 525 brange = new_range1; 526 new_range1 = NULL; 527 lf_create_range(brange, owner, type, flags, start, end); 528 lf_insert(&lock->lf_range, brange, insert_point); 529 insert_point = brange; 530 if (flags & F_POSIX) 531 ++count; 532 } else { 533 brange = NULL; 534 } 535 536 /* 537 * Handle the double_clip case. This is the only case where 538 * we wind up having to add TWO locks. 539 */ 540 if (double_clip) { 541 KKASSERT(first_match == last_match); 542 last_match = new_range2; 543 new_range2 = NULL; 544 lf_create_range(last_match, first_match->lf_owner, 545 first_match->lf_type, first_match->lf_flags, 546 end + 1, first_match->lf_end); 547 first_match->lf_end = start - 1; 548 first_match->lf_flags &= ~F_NOEND; 549 550 /* 551 * Figure out where to insert the right side clip. 552 */ 553 lf_insert(&lock->lf_range, last_match, first_match); 554 if (last_match->lf_flags & F_POSIX) 555 ++count; 556 } 557 558 /* 559 * Clip or destroy the locks between first_match and last_match, 560 * inclusive. Ignore the primary lock we created (brange). Note 561 * that if double-clipped, first_match and last_match will be 562 * outside our clipping range. Otherwise first_match and last_match 563 * will be deleted. 564 * 565 * We have already taken care of any double clipping. 566 * 567 * The insert_point may become invalid as we delete records, do not 568 * use that pointer any more. Also, when removing something other 569 * then 'range' we have to check to see if the item we are removing 570 * is 'next' and adjust 'next' properly. 571 * 572 * NOTE: brange will be NULL if F_UNLCKing. 573 */ 574 TAILQ_INIT(&deadlist); 575 next = first_match; 576 577 while ((range = next) != NULL) { 578 next = TAILQ_NEXT(range, lf_link); 579 580 /* 581 * Ignore elements that we do not own and ignore the 582 * primary request range which we just created. 583 */ 584 if (range->lf_owner != owner || range == brange) 585 continue; 586 587 /* 588 * We may have to wakeup a waiter when downgrading a lock. 589 */ 590 if (type == F_UNLCK) 591 wakeup_needed = 1; 592 if (type == F_RDLCK && range->lf_type == F_WRLCK) 593 wakeup_needed = 1; 594 595 /* 596 * Clip left. This can only occur on first_match. 597 * 598 * Merge the left clip with brange if possible. This must 599 * be done specifically, not in the optimized merge heuristic 600 * below, since we may have counted on it in our 'count' 601 * calculation above. 602 */ 603 if (range->lf_start < start) { 604 KKASSERT(range == first_match); 605 if (brange && 606 range->lf_end >= start - 1 && 607 lf_match(range, type, flags)) { 608 range->lf_end = brange->lf_end; 609 range->lf_flags |= brange->lf_flags & F_NOEND; 610 /* 611 * Removing something other then 'range', 612 * adjust 'next' if necessary. 613 */ 614 if (next == brange) 615 next = TAILQ_NEXT(next, lf_link); 616 TAILQ_REMOVE(&lock->lf_range, brange, lf_link); 617 if (brange->lf_flags & F_POSIX) 618 --count; 619 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link); 620 brange = range; 621 } else if (range->lf_end >= start) { 622 range->lf_end = start - 1; 623 if (type != F_UNLCK) 624 range->lf_flags &= ~F_NOEND; 625 } 626 if (range == last_match) 627 break; 628 continue; 629 } 630 631 /* 632 * Clip right. This can only occur on last_match. 633 * 634 * Merge the right clip if possible. This must be done 635 * specifically, not in the optimized merge heuristic 636 * below, since we may have counted on it in our 'count' 637 * calculation. 638 * 639 * Since we are adjusting lf_start, we have to move the 640 * record to maintain the sorted list. Since lf_start is 641 * only getting larger we can use the next element as the 642 * insert point (we don't have to backtrack). 643 */ 644 if (range->lf_end > end) { 645 KKASSERT(range == last_match); 646 if (brange && 647 range->lf_start <= end + 1 && 648 lf_match(range, type, flags)) { 649 brange->lf_end = range->lf_end; 650 brange->lf_flags |= range->lf_flags & F_NOEND; 651 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 652 if (range->lf_flags & F_POSIX) 653 --count; 654 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 655 } else if (range->lf_start <= end) { 656 range->lf_start = end + 1; 657 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 658 lf_insert(&lock->lf_range, range, next); 659 } 660 /* range == last_match, we are done */ 661 break; 662 } 663 664 /* 665 * The record must be entirely enclosed. Note that the 666 * record could be first_match or last_match, and will be 667 * deleted. 668 */ 669 KKASSERT(range->lf_start >= start && range->lf_end <= end); 670 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 671 if (range->lf_flags & F_POSIX) 672 --count; 673 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 674 if (range == last_match) 675 break; 676 } 677 678 /* 679 * Attempt to merge locks adjacent to brange. For example, we may 680 * have had to clip first_match and/or last_match, and they might 681 * be adjacent. Or there might simply have been an adjacent lock 682 * already there. 683 * 684 * Don't get fancy, just check adjacent elements in the list if they 685 * happen to be owned by us. 686 * 687 * This case only gets hit if we have a situation where a shared 688 * and exclusive lock are adjacent, and the exclusive lock is 689 * downgraded to shared or the shared lock is upgraded to exclusive. 690 */ 691 if (brange) { 692 range = TAILQ_PREV(brange, lockf_range_list, lf_link); 693 if (range && 694 range->lf_owner == owner && 695 range->lf_end == brange->lf_start - 1 && 696 lf_match(range, type, flags) 697 ) { 698 /* 699 * Extend range to cover brange and scrap brange. 700 */ 701 range->lf_end = brange->lf_end; 702 range->lf_flags |= brange->lf_flags & F_NOEND; 703 TAILQ_REMOVE(&lock->lf_range, brange, lf_link); 704 if (brange->lf_flags & F_POSIX) 705 --count; 706 TAILQ_INSERT_TAIL(&deadlist, brange, lf_link); 707 brange = range; 708 } 709 range = TAILQ_NEXT(brange, lf_link); 710 if (range && 711 range->lf_owner == owner && 712 range->lf_start == brange->lf_end + 1 && 713 lf_match(range, type, flags) 714 ) { 715 /* 716 * Extend brange to cover range and scrap range. 717 */ 718 brange->lf_end = range->lf_end; 719 brange->lf_flags |= range->lf_flags & F_NOEND; 720 TAILQ_REMOVE(&lock->lf_range, range, lf_link); 721 if (range->lf_flags & F_POSIX) 722 --count; 723 TAILQ_INSERT_TAIL(&deadlist, range, lf_link); 724 } 725 } 726 727 /* 728 * Destroy deleted elements. We didn't want to do it in the loop 729 * because the free() might have blocked. 730 * 731 * Adjust the count for any posix locks we thought we might create 732 * but didn't. 733 */ 734 while ((range = TAILQ_FIRST(&deadlist)) != NULL) { 735 TAILQ_REMOVE(&deadlist, range, lf_link); 736 lf_destroy_range(range); 737 } 738 739 KKASSERT(count <= 0); 740 if (count < 0) 741 lf_count_change(owner, count); 742 do_wakeup: 743 lf_print_lock(lock); 744 if (wakeup_needed) 745 lf_wakeup(lock, start, end); 746 error = 0; 747 do_cleanup: 748 if (new_range1 != NULL) 749 lf_destroy_range(new_range1); 750 if (new_range2 != NULL) 751 lf_destroy_range(new_range2); 752 return(error); 753 } 754 755 /* 756 * Check whether there is a blocking lock, 757 * and if so return its process identifier. 758 */ 759 static int 760 lf_getlock(struct flock *fl, struct lockf *lock, struct proc *owner, 761 int type, int flags, off_t start, off_t end) 762 { 763 struct lockf_range *range; 764 765 TAILQ_FOREACH(range, &lock->lf_range, lf_link) 766 if (range->lf_owner != owner && 767 lf_overlap(range, start, end) && 768 (type == F_WRLCK || range->lf_type == F_WRLCK)) 769 break; 770 if (range == NULL) { 771 fl->l_type = F_UNLCK; 772 return(0); 773 } 774 fl->l_type = range->lf_type; 775 fl->l_whence = SEEK_SET; 776 fl->l_start = range->lf_start; 777 if (range->lf_flags & F_NOEND) 778 fl->l_len = 0; 779 else 780 fl->l_len = range->lf_end - range->lf_start + 1; 781 if (range->lf_owner != NULL && (range->lf_flags & F_POSIX)) 782 fl->l_pid = range->lf_owner->p_pid; 783 else 784 fl->l_pid = -1; 785 return(0); 786 } 787 788 /* 789 * Wakeup pending lock attempts. Theoretically we can stop as soon as 790 * we encounter an exclusive request that covers the whole range (at least 791 * insofar as the sleep code above calls lf_wakeup() if it would otherwise 792 * exit instead of loop), but for now just wakeup all overlapping 793 * requests. XXX 794 */ 795 static void 796 lf_wakeup(struct lockf *lock, off_t start, off_t end) 797 { 798 struct lockf_range *range, *nrange; 799 800 TAILQ_FOREACH_MUTABLE(range, &lock->lf_blocked, lf_link, nrange) { 801 if (lf_overlap(range, start, end) == 0) 802 continue; 803 TAILQ_REMOVE(&lock->lf_blocked, range, lf_link); 804 range->lf_flags = 1; 805 wakeup(range); 806 } 807 } 808 809 /* 810 * Allocate a range structure and initialize it sufficiently such that 811 * lf_destroy_range() does not barf. 812 */ 813 static struct lockf_range * 814 lf_alloc_range(void) 815 { 816 struct lockf_range *range; 817 818 #ifdef INVARIANTS 819 atomic_add_int(&lf_global_counter, 1); 820 #endif 821 range = kmalloc(sizeof(struct lockf_range), M_LOCKF, M_WAITOK); 822 range->lf_owner = NULL; 823 return(range); 824 } 825 826 static void 827 lf_insert(struct lockf_range_list *list, struct lockf_range *elm, 828 struct lockf_range *insert_point) 829 { 830 while (insert_point && insert_point->lf_start < elm->lf_start) 831 insert_point = TAILQ_NEXT(insert_point, lf_link); 832 if (insert_point != NULL) 833 TAILQ_INSERT_BEFORE(insert_point, elm, lf_link); 834 else 835 TAILQ_INSERT_TAIL(list, elm, lf_link); 836 } 837 838 static void 839 lf_create_range(struct lockf_range *range, struct proc *owner, int type, 840 int flags, off_t start, off_t end) 841 { 842 KKASSERT(start <= end); 843 range->lf_type = type; 844 range->lf_flags = flags; 845 range->lf_start = start; 846 range->lf_end = end; 847 range->lf_owner = owner; 848 849 lf_printf("lf_create_range: %ju..%ju\n", 850 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end); 851 } 852 853 static void 854 lf_destroy_range(struct lockf_range *range) 855 { 856 lf_printf("lf_destroy_range: %ju..%ju\n", 857 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end); 858 kfree(range, M_LOCKF); 859 #ifdef INVARIANTS 860 atomic_add_int(&lf_global_counter, -1); 861 KKASSERT(lf_global_counter >= 0); 862 #endif 863 } 864 865 #ifdef LOCKF_DEBUG 866 867 static void 868 _lf_printf(const char *ctl, ...) 869 { 870 struct proc *p; 871 __va_list va; 872 873 if (lf_print_ranges) { 874 if ((p = curproc) != NULL) 875 kprintf("pid %d (%s): ", p->p_pid, p->p_comm); 876 } 877 __va_start(va, ctl); 878 kvprintf(ctl, va); 879 __va_end(va); 880 } 881 882 static void 883 _lf_print_lock(const struct lockf *lock) 884 { 885 struct lockf_range *range; 886 887 if (lf_print_ranges == 0) 888 return; 889 890 if (TAILQ_EMPTY(&lock->lf_range)) { 891 lf_printf("lockf %p: no ranges locked\n", lock); 892 } else { 893 lf_printf("lockf %p:\n", lock); 894 } 895 TAILQ_FOREACH(range, &lock->lf_range, lf_link) 896 kprintf("\t%jd..%jd type %s owned by %d\n", 897 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end, 898 range->lf_type == F_RDLCK ? "shared" : "exclusive", 899 range->lf_flags & F_POSIX ? range->lf_owner->p_pid : -1); 900 if (TAILQ_EMPTY(&lock->lf_blocked)) 901 kprintf("no process waiting for range\n"); 902 else 903 kprintf("blocked locks:"); 904 TAILQ_FOREACH(range, &lock->lf_blocked, lf_link) 905 kprintf("\t%jd..%jd type %s waiting on %p\n", 906 (uintmax_t)range->lf_start, (uintmax_t)range->lf_end, 907 range->lf_type == F_RDLCK ? "shared" : "exclusive", 908 range); 909 } 910 #endif /* LOCKF_DEBUG */ 911