1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Scooter Morris at Genentech Inc. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 11 */ 12 13 #include <sys/param.h> 14 #include <sys/systm.h> 15 #include <sys/kernel.h> 16 #include <sys/file.h> 17 #include <sys/proc.h> 18 #include <sys/vnode.h> 19 #include <sys/malloc.h> 20 #include <sys/fcntl.h> 21 22 #include <ufs/ufs/lockf.h> 23 #include <ufs/ufs/quota.h> 24 #include <ufs/ufs/inode.h> 25 #include <ufs/ufs/ufs_extern.h> 26 27 /* 28 * This variable controls the maximum number of processes that will 29 * be checked in doing deadlock detection. 30 */ 31 int maxlockdepth = MAXDEPTH; 32 33 #ifdef LOCKF_DEBUG 34 #include <vm/vm.h> 35 #include <sys/sysctl.h> 36 int lockf_debug = 0; 37 struct ctldebug debug4 = { "lockf_debug", &lockf_debug }; 38 #endif 39 40 #define NOLOCKF (struct lockf *)0 41 #define SELF 0x1 42 #define OTHERS 0x2 43 44 /* 45 * Set a byte-range lock. 46 */ 47 int 48 lf_setlock(lock) 49 register struct lockf *lock; 50 { 51 register struct lockf *block; 52 struct inode *ip = lock->lf_inode; 53 struct lockf **prev, *overlap, *ltmp; 54 static char lockstr[] = "lockf"; 55 int ovcase, priority, needtolink, error; 56 57 #ifdef LOCKF_DEBUG 58 if (lockf_debug & 1) 59 lf_print("lf_setlock", lock); 60 #endif /* LOCKF_DEBUG */ 61 62 /* 63 * Set the priority 64 */ 65 priority = PLOCK; 66 if (lock->lf_type == F_WRLCK) 67 priority += 4; 68 priority |= PCATCH; 69 /* 70 * Scan lock list for this file looking for locks that would block us. 71 */ 72 while (block = lf_getblock(lock)) { 73 /* 74 * Free the structure and return if nonblocking. 75 */ 76 if ((lock->lf_flags & F_WAIT) == 0) { 77 FREE(lock, M_LOCKF); 78 return (EAGAIN); 79 } 80 /* 81 * We are blocked. Since flock style locks cover 82 * the whole file, there is no chance for deadlock. 83 * For byte-range locks we must check for deadlock. 84 * 85 * Deadlock detection is done by looking through the 86 * wait channels to see if there are any cycles that 87 * involve us. MAXDEPTH is set just to make sure we 88 * do not go off into neverland. 89 */ 90 if ((lock->lf_flags & F_POSIX) && 91 (block->lf_flags & F_POSIX)) { 92 register struct proc *wproc; 93 register struct lockf *waitblock; 94 int i = 0; 95 96 /* The block is waiting on something */ 97 wproc = (struct proc *)block->lf_id; 98 while (wproc->p_wchan && 99 (wproc->p_wmesg == lockstr) && 100 (i++ < maxlockdepth)) { 101 waitblock = (struct lockf *)wproc->p_wchan; 102 /* Get the owner of the blocking lock */ 103 waitblock = waitblock->lf_next; 104 if ((waitblock->lf_flags & F_POSIX) == 0) 105 break; 106 wproc = (struct proc *)waitblock->lf_id; 107 if (wproc == (struct proc *)lock->lf_id) { 108 free(lock, M_LOCKF); 109 return (EDEADLK); 110 } 111 } 112 } 113 /* 114 * For flock type locks, we must first remove 115 * any shared locks that we hold before we sleep 116 * waiting for an exclusive lock. 117 */ 118 if ((lock->lf_flags & F_FLOCK) && 119 lock->lf_type == F_WRLCK) { 120 lock->lf_type = F_UNLCK; 121 (void) lf_clearlock(lock); 122 lock->lf_type = F_WRLCK; 123 } 124 /* 125 * Add our lock to the blocked list and sleep until we're free. 126 * Remember who blocked us (for deadlock detection). 127 */ 128 lock->lf_next = block; 129 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 130 #ifdef LOCKF_DEBUG 131 if (lockf_debug & 1) { 132 lf_print("lf_setlock: blocking on", block); 133 lf_printlist("lf_setlock", block); 134 } 135 #endif /* LOCKF_DEBUG */ 136 if (error = tsleep((caddr_t)lock, priority, lockstr, 0)) { 137 /* 138 * We may have been awakened by a signal (in 139 * which case we must remove ourselves from the 140 * blocked list) and/or by another process 141 * releasing a lock (in which case we have already 142 * been removed from the blocked list and our 143 * lf_next field set to NOLOCKF). 144 */ 145 if (lock->lf_next) 146 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, 147 lf_block); 148 free(lock, M_LOCKF); 149 return (error); 150 } 151 } 152 /* 153 * No blocks!! Add the lock. Note that we will 154 * downgrade or upgrade any overlapping locks this 155 * process already owns. 156 * 157 * Skip over locks owned by other processes. 158 * Handle any locks that overlap and are owned by ourselves. 159 */ 160 prev = &ip->i_lockf; 161 block = ip->i_lockf; 162 needtolink = 1; 163 for (;;) { 164 if (ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap)) 165 block = overlap->lf_next; 166 /* 167 * Six cases: 168 * 0) no overlap 169 * 1) overlap == lock 170 * 2) overlap contains lock 171 * 3) lock contains overlap 172 * 4) overlap starts before lock 173 * 5) overlap ends after lock 174 */ 175 switch (ovcase) { 176 case 0: /* no overlap */ 177 if (needtolink) { 178 *prev = lock; 179 lock->lf_next = overlap; 180 } 181 break; 182 183 case 1: /* overlap == lock */ 184 /* 185 * If downgrading lock, others may be 186 * able to acquire it. 187 */ 188 if (lock->lf_type == F_RDLCK && 189 overlap->lf_type == F_WRLCK) 190 lf_wakelock(overlap); 191 overlap->lf_type = lock->lf_type; 192 FREE(lock, M_LOCKF); 193 lock = overlap; /* for debug output below */ 194 break; 195 196 case 2: /* overlap contains lock */ 197 /* 198 * Check for common starting point and different types. 199 */ 200 if (overlap->lf_type == lock->lf_type) { 201 free(lock, M_LOCKF); 202 lock = overlap; /* for debug output below */ 203 break; 204 } 205 if (overlap->lf_start == lock->lf_start) { 206 *prev = lock; 207 lock->lf_next = overlap; 208 overlap->lf_start = lock->lf_end + 1; 209 } else 210 lf_split(overlap, lock); 211 lf_wakelock(overlap); 212 break; 213 214 case 3: /* lock contains overlap */ 215 /* 216 * If downgrading lock, others may be able to 217 * acquire it, otherwise take the list. 218 */ 219 if (lock->lf_type == F_RDLCK && 220 overlap->lf_type == F_WRLCK) { 221 lf_wakelock(overlap); 222 } else { 223 while (ltmp = overlap->lf_blkhd.tqh_first) { 224 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 225 lf_block); 226 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 227 ltmp, lf_block); 228 } 229 } 230 /* 231 * Add the new lock if necessary and delete the overlap. 232 */ 233 if (needtolink) { 234 *prev = lock; 235 lock->lf_next = overlap->lf_next; 236 prev = &lock->lf_next; 237 needtolink = 0; 238 } else 239 *prev = overlap->lf_next; 240 free(overlap, M_LOCKF); 241 continue; 242 243 case 4: /* overlap starts before lock */ 244 /* 245 * Add lock after overlap on the list. 246 */ 247 lock->lf_next = overlap->lf_next; 248 overlap->lf_next = lock; 249 overlap->lf_end = lock->lf_start - 1; 250 prev = &lock->lf_next; 251 lf_wakelock(overlap); 252 needtolink = 0; 253 continue; 254 255 case 5: /* overlap ends after lock */ 256 /* 257 * Add the new lock before overlap. 258 */ 259 if (needtolink) { 260 *prev = lock; 261 lock->lf_next = overlap; 262 } 263 overlap->lf_start = lock->lf_end + 1; 264 lf_wakelock(overlap); 265 break; 266 } 267 break; 268 } 269 #ifdef LOCKF_DEBUG 270 if (lockf_debug & 1) { 271 lf_print("lf_setlock: got the lock", lock); 272 lf_printlist("lf_setlock", lock); 273 } 274 #endif /* LOCKF_DEBUG */ 275 return (0); 276 } 277 278 /* 279 * Remove a byte-range lock on an inode. 280 * 281 * Generally, find the lock (or an overlap to that lock) 282 * and remove it (or shrink it), then wakeup anyone we can. 283 */ 284 int 285 lf_clearlock(unlock) 286 register struct lockf *unlock; 287 { 288 struct inode *ip = unlock->lf_inode; 289 register struct lockf *lf = ip->i_lockf; 290 struct lockf *overlap, **prev; 291 int ovcase; 292 293 if (lf == NOLOCKF) 294 return (0); 295 #ifdef LOCKF_DEBUG 296 if (unlock->lf_type != F_UNLCK) 297 panic("lf_clearlock: bad type"); 298 if (lockf_debug & 1) 299 lf_print("lf_clearlock", unlock); 300 #endif /* LOCKF_DEBUG */ 301 prev = &ip->i_lockf; 302 while (ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) { 303 /* 304 * Wakeup the list of locks to be retried. 305 */ 306 lf_wakelock(overlap); 307 308 switch (ovcase) { 309 310 case 1: /* overlap == lock */ 311 *prev = overlap->lf_next; 312 FREE(overlap, M_LOCKF); 313 break; 314 315 case 2: /* overlap contains lock: split it */ 316 if (overlap->lf_start == unlock->lf_start) { 317 overlap->lf_start = unlock->lf_end + 1; 318 break; 319 } 320 lf_split(overlap, unlock); 321 overlap->lf_next = unlock->lf_next; 322 break; 323 324 case 3: /* lock contains overlap */ 325 *prev = overlap->lf_next; 326 lf = overlap->lf_next; 327 free(overlap, M_LOCKF); 328 continue; 329 330 case 4: /* overlap starts before lock */ 331 overlap->lf_end = unlock->lf_start - 1; 332 prev = &overlap->lf_next; 333 lf = overlap->lf_next; 334 continue; 335 336 case 5: /* overlap ends after lock */ 337 overlap->lf_start = unlock->lf_end + 1; 338 break; 339 } 340 break; 341 } 342 #ifdef LOCKF_DEBUG 343 if (lockf_debug & 1) 344 lf_printlist("lf_clearlock", unlock); 345 #endif /* LOCKF_DEBUG */ 346 return (0); 347 } 348 349 /* 350 * Check whether there is a blocking lock, 351 * and if so return its process identifier. 352 */ 353 int 354 lf_getlock(lock, fl) 355 register struct lockf *lock; 356 register struct flock *fl; 357 { 358 register struct lockf *block; 359 360 #ifdef LOCKF_DEBUG 361 if (lockf_debug & 1) 362 lf_print("lf_getlock", lock); 363 #endif /* LOCKF_DEBUG */ 364 365 if (block = lf_getblock(lock)) { 366 fl->l_type = block->lf_type; 367 fl->l_whence = SEEK_SET; 368 fl->l_start = block->lf_start; 369 if (block->lf_end == -1) 370 fl->l_len = 0; 371 else 372 fl->l_len = block->lf_end - block->lf_start + 1; 373 if (block->lf_flags & F_POSIX) 374 fl->l_pid = ((struct proc *)(block->lf_id))->p_pid; 375 else 376 fl->l_pid = -1; 377 } else { 378 fl->l_type = F_UNLCK; 379 } 380 return (0); 381 } 382 383 /* 384 * Walk the list of locks for an inode and 385 * return the first blocking lock. 386 */ 387 struct lockf * 388 lf_getblock(lock) 389 register struct lockf *lock; 390 { 391 struct lockf **prev, *overlap, *lf = lock->lf_inode->i_lockf; 392 int ovcase; 393 394 prev = &lock->lf_inode->i_lockf; 395 while (ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) { 396 /* 397 * We've found an overlap, see if it blocks us 398 */ 399 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 400 return (overlap); 401 /* 402 * Nope, point to the next one on the list and 403 * see if it blocks us 404 */ 405 lf = overlap->lf_next; 406 } 407 return (NOLOCKF); 408 } 409 410 /* 411 * Walk the list of locks for an inode to 412 * find an overlapping lock (if any). 413 * 414 * NOTE: this returns only the FIRST overlapping lock. There 415 * may be more than one. 416 */ 417 int 418 lf_findoverlap(lf, lock, type, prev, overlap) 419 register struct lockf *lf; 420 struct lockf *lock; 421 int type; 422 struct lockf ***prev; 423 struct lockf **overlap; 424 { 425 off_t start, end; 426 427 *overlap = lf; 428 if (lf == NOLOCKF) 429 return (0); 430 #ifdef LOCKF_DEBUG 431 if (lockf_debug & 2) 432 lf_print("lf_findoverlap: looking for overlap in", lock); 433 #endif /* LOCKF_DEBUG */ 434 start = lock->lf_start; 435 end = lock->lf_end; 436 while (lf != NOLOCKF) { 437 if (((type & SELF) && lf->lf_id != lock->lf_id) || 438 ((type & OTHERS) && lf->lf_id == lock->lf_id)) { 439 *prev = &lf->lf_next; 440 *overlap = lf = lf->lf_next; 441 continue; 442 } 443 #ifdef LOCKF_DEBUG 444 if (lockf_debug & 2) 445 lf_print("\tchecking", lf); 446 #endif /* LOCKF_DEBUG */ 447 /* 448 * OK, check for overlap 449 * 450 * Six cases: 451 * 0) no overlap 452 * 1) overlap == lock 453 * 2) overlap contains lock 454 * 3) lock contains overlap 455 * 4) overlap starts before lock 456 * 5) overlap ends after lock 457 */ 458 if ((lf->lf_end != -1 && start > lf->lf_end) || 459 (end != -1 && lf->lf_start > end)) { 460 /* Case 0 */ 461 #ifdef LOCKF_DEBUG 462 if (lockf_debug & 2) 463 printf("no overlap\n"); 464 #endif /* LOCKF_DEBUG */ 465 if ((type & SELF) && end != -1 && lf->lf_start > end) 466 return (0); 467 *prev = &lf->lf_next; 468 *overlap = lf = lf->lf_next; 469 continue; 470 } 471 if ((lf->lf_start == start) && (lf->lf_end == end)) { 472 /* Case 1 */ 473 #ifdef LOCKF_DEBUG 474 if (lockf_debug & 2) 475 printf("overlap == lock\n"); 476 #endif /* LOCKF_DEBUG */ 477 return (1); 478 } 479 if ((lf->lf_start <= start) && 480 (end != -1) && 481 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 482 /* Case 2 */ 483 #ifdef LOCKF_DEBUG 484 if (lockf_debug & 2) 485 printf("overlap contains lock\n"); 486 #endif /* LOCKF_DEBUG */ 487 return (2); 488 } 489 if (start <= lf->lf_start && 490 (end == -1 || 491 (lf->lf_end != -1 && end >= lf->lf_end))) { 492 /* Case 3 */ 493 #ifdef LOCKF_DEBUG 494 if (lockf_debug & 2) 495 printf("lock contains overlap\n"); 496 #endif /* LOCKF_DEBUG */ 497 return (3); 498 } 499 if ((lf->lf_start < start) && 500 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 501 /* Case 4 */ 502 #ifdef LOCKF_DEBUG 503 if (lockf_debug & 2) 504 printf("overlap starts before lock\n"); 505 #endif /* LOCKF_DEBUG */ 506 return (4); 507 } 508 if ((lf->lf_start > start) && 509 (end != -1) && 510 ((lf->lf_end > end) || (lf->lf_end == -1))) { 511 /* Case 5 */ 512 #ifdef LOCKF_DEBUG 513 if (lockf_debug & 2) 514 printf("overlap ends after lock\n"); 515 #endif /* LOCKF_DEBUG */ 516 return (5); 517 } 518 panic("lf_findoverlap: default"); 519 } 520 return (0); 521 } 522 523 /* 524 * Split a lock and a contained region into 525 * two or three locks as necessary. 526 */ 527 void 528 lf_split(lock1, lock2) 529 register struct lockf *lock1; 530 register struct lockf *lock2; 531 { 532 register struct lockf *splitlock; 533 534 #ifdef LOCKF_DEBUG 535 if (lockf_debug & 2) { 536 lf_print("lf_split", lock1); 537 lf_print("splitting from", lock2); 538 } 539 #endif /* LOCKF_DEBUG */ 540 /* 541 * Check to see if spliting into only two pieces. 542 */ 543 if (lock1->lf_start == lock2->lf_start) { 544 lock1->lf_start = lock2->lf_end + 1; 545 lock2->lf_next = lock1; 546 return; 547 } 548 if (lock1->lf_end == lock2->lf_end) { 549 lock1->lf_end = lock2->lf_start - 1; 550 lock2->lf_next = lock1->lf_next; 551 lock1->lf_next = lock2; 552 return; 553 } 554 /* 555 * Make a new lock consisting of the last part of 556 * the encompassing lock 557 */ 558 MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK); 559 bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock); 560 splitlock->lf_start = lock2->lf_end + 1; 561 TAILQ_INIT(&splitlock->lf_blkhd); 562 lock1->lf_end = lock2->lf_start - 1; 563 /* 564 * OK, now link it in 565 */ 566 splitlock->lf_next = lock1->lf_next; 567 lock2->lf_next = splitlock; 568 lock1->lf_next = lock2; 569 } 570 571 /* 572 * Wakeup a blocklist 573 */ 574 void 575 lf_wakelock(listhead) 576 struct lockf *listhead; 577 { 578 register struct lockf *wakelock; 579 580 while (wakelock = listhead->lf_blkhd.tqh_first) { 581 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 582 wakelock->lf_next = NOLOCKF; 583 #ifdef LOCKF_DEBUG 584 if (lockf_debug & 2) 585 lf_print("lf_wakelock: awakening", wakelock); 586 #endif /* LOCKF_DEBUG */ 587 wakeup((caddr_t)wakelock); 588 } 589 } 590 591 #ifdef LOCKF_DEBUG 592 /* 593 * Print out a lock. 594 */ 595 lf_print(tag, lock) 596 char *tag; 597 register struct lockf *lock; 598 { 599 600 printf("%s: lock 0x%lx for ", tag, lock); 601 if (lock->lf_flags & F_POSIX) 602 printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid); 603 else 604 printf("id 0x%x", lock->lf_id); 605 printf(" in ino %d on dev <%d, %d>, %s, start %d, end %d", 606 lock->lf_inode->i_number, 607 major(lock->lf_inode->i_dev), 608 minor(lock->lf_inode->i_dev), 609 lock->lf_type == F_RDLCK ? "shared" : 610 lock->lf_type == F_WRLCK ? "exclusive" : 611 lock->lf_type == F_UNLCK ? "unlock" : 612 "unknown", lock->lf_start, lock->lf_end); 613 if (lock->lf_blkhd.tqh_first) 614 printf(" block 0x%x\n", lock->lf_blkhd.tqh_first); 615 else 616 printf("\n"); 617 } 618 619 lf_printlist(tag, lock) 620 char *tag; 621 struct lockf *lock; 622 { 623 register struct lockf *lf, *blk; 624 625 printf("%s: Lock list for ino %d on dev <%d, %d>:\n", 626 tag, lock->lf_inode->i_number, 627 major(lock->lf_inode->i_dev), 628 minor(lock->lf_inode->i_dev)); 629 for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) { 630 printf("\tlock 0x%lx for ", lf); 631 if (lf->lf_flags & F_POSIX) 632 printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid); 633 else 634 printf("id 0x%x", lf->lf_id); 635 printf(", %s, start %d, end %d", 636 lf->lf_type == F_RDLCK ? "shared" : 637 lf->lf_type == F_WRLCK ? "exclusive" : 638 lf->lf_type == F_UNLCK ? "unlock" : 639 "unknown", lf->lf_start, lf->lf_end); 640 for (blk = lf->lf_blkhd.tqh_first; blk; 641 blk = blk->lf_block.tqe_next) { 642 printf("\n\t\tlock request 0x%lx for ", blk); 643 if (blk->lf_flags & F_POSIX) 644 printf("proc %d", 645 ((struct proc *)(blk->lf_id))->p_pid); 646 else 647 printf("id 0x%x", blk->lf_id); 648 printf(", %s, start %d, end %d", 649 blk->lf_type == F_RDLCK ? "shared" : 650 blk->lf_type == F_WRLCK ? "exclusive" : 651 blk->lf_type == F_UNLCK ? "unlock" : 652 "unknown", blk->lf_start, blk->lf_end); 653 if (blk->lf_blkhd.tqh_first) 654 panic("lf_printlist: bad list"); 655 } 656 printf("\n"); 657 } 658 } 659 #endif /* LOCKF_DEBUG */ 660