1 /*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Scooter Morris at Genentech Inc.
7 *
8 * %sccs.include.redist.c%
9 *
10 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94
11 */
12
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/kernel.h>
16 #include <sys/file.h>
17 #include <sys/proc.h>
18 #include <sys/vnode.h>
19 #include <sys/malloc.h>
20 #include <sys/fcntl.h>
21
22 #include <ufs/ufs/lockf.h>
23 #include <ufs/ufs/quota.h>
24 #include <ufs/ufs/inode.h>
25 #include <ufs/ufs/ufs_extern.h>
26
27 /*
28 * This variable controls the maximum number of processes that will
29 * be checked in doing deadlock detection.
30 */
31 int maxlockdepth = MAXDEPTH;
32
33 #ifdef LOCKF_DEBUG
34 #include <vm/vm.h>
35 #include <sys/sysctl.h>
36 int lockf_debug = 0;
37 struct ctldebug debug4 = { "lockf_debug", &lockf_debug };
38 #endif
39
40 #define NOLOCKF (struct lockf *)0
41 #define SELF 0x1
42 #define OTHERS 0x2
43
44 /*
45 * Set a byte-range lock.
46 */
47 int
lf_setlock(lock)48 lf_setlock(lock)
49 register struct lockf *lock;
50 {
51 register struct lockf *block;
52 struct inode *ip = lock->lf_inode;
53 struct lockf **prev, *overlap, *ltmp;
54 static char lockstr[] = "lockf";
55 int ovcase, priority, needtolink, error;
56
57 #ifdef LOCKF_DEBUG
58 if (lockf_debug & 1)
59 lf_print("lf_setlock", lock);
60 #endif /* LOCKF_DEBUG */
61
62 /*
63 * Set the priority
64 */
65 priority = PLOCK;
66 if (lock->lf_type == F_WRLCK)
67 priority += 4;
68 priority |= PCATCH;
69 /*
70 * Scan lock list for this file looking for locks that would block us.
71 */
72 while (block = lf_getblock(lock)) {
73 /*
74 * Free the structure and return if nonblocking.
75 */
76 if ((lock->lf_flags & F_WAIT) == 0) {
77 FREE(lock, M_LOCKF);
78 return (EAGAIN);
79 }
80 /*
81 * We are blocked. Since flock style locks cover
82 * the whole file, there is no chance for deadlock.
83 * For byte-range locks we must check for deadlock.
84 *
85 * Deadlock detection is done by looking through the
86 * wait channels to see if there are any cycles that
87 * involve us. MAXDEPTH is set just to make sure we
88 * do not go off into neverland.
89 */
90 if ((lock->lf_flags & F_POSIX) &&
91 (block->lf_flags & F_POSIX)) {
92 register struct proc *wproc;
93 register struct lockf *waitblock;
94 int i = 0;
95
96 /* The block is waiting on something */
97 wproc = (struct proc *)block->lf_id;
98 while (wproc->p_wchan &&
99 (wproc->p_wmesg == lockstr) &&
100 (i++ < maxlockdepth)) {
101 waitblock = (struct lockf *)wproc->p_wchan;
102 /* Get the owner of the blocking lock */
103 waitblock = waitblock->lf_next;
104 if ((waitblock->lf_flags & F_POSIX) == 0)
105 break;
106 wproc = (struct proc *)waitblock->lf_id;
107 if (wproc == (struct proc *)lock->lf_id) {
108 free(lock, M_LOCKF);
109 return (EDEADLK);
110 }
111 }
112 }
113 /*
114 * For flock type locks, we must first remove
115 * any shared locks that we hold before we sleep
116 * waiting for an exclusive lock.
117 */
118 if ((lock->lf_flags & F_FLOCK) &&
119 lock->lf_type == F_WRLCK) {
120 lock->lf_type = F_UNLCK;
121 (void) lf_clearlock(lock);
122 lock->lf_type = F_WRLCK;
123 }
124 /*
125 * Add our lock to the blocked list and sleep until we're free.
126 * Remember who blocked us (for deadlock detection).
127 */
128 lock->lf_next = block;
129 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
130 #ifdef LOCKF_DEBUG
131 if (lockf_debug & 1) {
132 lf_print("lf_setlock: blocking on", block);
133 lf_printlist("lf_setlock", block);
134 }
135 #endif /* LOCKF_DEBUG */
136 if (error = tsleep((caddr_t)lock, priority, lockstr, 0)) {
137 /*
138 * We may have been awakened by a signal (in
139 * which case we must remove ourselves from the
140 * blocked list) and/or by another process
141 * releasing a lock (in which case we have already
142 * been removed from the blocked list and our
143 * lf_next field set to NOLOCKF).
144 */
145 if (lock->lf_next)
146 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock,
147 lf_block);
148 free(lock, M_LOCKF);
149 return (error);
150 }
151 }
152 /*
153 * No blocks!! Add the lock. Note that we will
154 * downgrade or upgrade any overlapping locks this
155 * process already owns.
156 *
157 * Skip over locks owned by other processes.
158 * Handle any locks that overlap and are owned by ourselves.
159 */
160 prev = &ip->i_lockf;
161 block = ip->i_lockf;
162 needtolink = 1;
163 for (;;) {
164 if (ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap))
165 block = overlap->lf_next;
166 /*
167 * Six cases:
168 * 0) no overlap
169 * 1) overlap == lock
170 * 2) overlap contains lock
171 * 3) lock contains overlap
172 * 4) overlap starts before lock
173 * 5) overlap ends after lock
174 */
175 switch (ovcase) {
176 case 0: /* no overlap */
177 if (needtolink) {
178 *prev = lock;
179 lock->lf_next = overlap;
180 }
181 break;
182
183 case 1: /* overlap == lock */
184 /*
185 * If downgrading lock, others may be
186 * able to acquire it.
187 */
188 if (lock->lf_type == F_RDLCK &&
189 overlap->lf_type == F_WRLCK)
190 lf_wakelock(overlap);
191 overlap->lf_type = lock->lf_type;
192 FREE(lock, M_LOCKF);
193 lock = overlap; /* for debug output below */
194 break;
195
196 case 2: /* overlap contains lock */
197 /*
198 * Check for common starting point and different types.
199 */
200 if (overlap->lf_type == lock->lf_type) {
201 free(lock, M_LOCKF);
202 lock = overlap; /* for debug output below */
203 break;
204 }
205 if (overlap->lf_start == lock->lf_start) {
206 *prev = lock;
207 lock->lf_next = overlap;
208 overlap->lf_start = lock->lf_end + 1;
209 } else
210 lf_split(overlap, lock);
211 lf_wakelock(overlap);
212 break;
213
214 case 3: /* lock contains overlap */
215 /*
216 * If downgrading lock, others may be able to
217 * acquire it, otherwise take the list.
218 */
219 if (lock->lf_type == F_RDLCK &&
220 overlap->lf_type == F_WRLCK) {
221 lf_wakelock(overlap);
222 } else {
223 while (ltmp = overlap->lf_blkhd.tqh_first) {
224 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
225 lf_block);
226 TAILQ_INSERT_TAIL(&lock->lf_blkhd,
227 ltmp, lf_block);
228 }
229 }
230 /*
231 * Add the new lock if necessary and delete the overlap.
232 */
233 if (needtolink) {
234 *prev = lock;
235 lock->lf_next = overlap->lf_next;
236 prev = &lock->lf_next;
237 needtolink = 0;
238 } else
239 *prev = overlap->lf_next;
240 free(overlap, M_LOCKF);
241 continue;
242
243 case 4: /* overlap starts before lock */
244 /*
245 * Add lock after overlap on the list.
246 */
247 lock->lf_next = overlap->lf_next;
248 overlap->lf_next = lock;
249 overlap->lf_end = lock->lf_start - 1;
250 prev = &lock->lf_next;
251 lf_wakelock(overlap);
252 needtolink = 0;
253 continue;
254
255 case 5: /* overlap ends after lock */
256 /*
257 * Add the new lock before overlap.
258 */
259 if (needtolink) {
260 *prev = lock;
261 lock->lf_next = overlap;
262 }
263 overlap->lf_start = lock->lf_end + 1;
264 lf_wakelock(overlap);
265 break;
266 }
267 break;
268 }
269 #ifdef LOCKF_DEBUG
270 if (lockf_debug & 1) {
271 lf_print("lf_setlock: got the lock", lock);
272 lf_printlist("lf_setlock", lock);
273 }
274 #endif /* LOCKF_DEBUG */
275 return (0);
276 }
277
278 /*
279 * Remove a byte-range lock on an inode.
280 *
281 * Generally, find the lock (or an overlap to that lock)
282 * and remove it (or shrink it), then wakeup anyone we can.
283 */
284 int
lf_clearlock(unlock)285 lf_clearlock(unlock)
286 register struct lockf *unlock;
287 {
288 struct inode *ip = unlock->lf_inode;
289 register struct lockf *lf = ip->i_lockf;
290 struct lockf *overlap, **prev;
291 int ovcase;
292
293 if (lf == NOLOCKF)
294 return (0);
295 #ifdef LOCKF_DEBUG
296 if (unlock->lf_type != F_UNLCK)
297 panic("lf_clearlock: bad type");
298 if (lockf_debug & 1)
299 lf_print("lf_clearlock", unlock);
300 #endif /* LOCKF_DEBUG */
301 prev = &ip->i_lockf;
302 while (ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) {
303 /*
304 * Wakeup the list of locks to be retried.
305 */
306 lf_wakelock(overlap);
307
308 switch (ovcase) {
309
310 case 1: /* overlap == lock */
311 *prev = overlap->lf_next;
312 FREE(overlap, M_LOCKF);
313 break;
314
315 case 2: /* overlap contains lock: split it */
316 if (overlap->lf_start == unlock->lf_start) {
317 overlap->lf_start = unlock->lf_end + 1;
318 break;
319 }
320 lf_split(overlap, unlock);
321 overlap->lf_next = unlock->lf_next;
322 break;
323
324 case 3: /* lock contains overlap */
325 *prev = overlap->lf_next;
326 lf = overlap->lf_next;
327 free(overlap, M_LOCKF);
328 continue;
329
330 case 4: /* overlap starts before lock */
331 overlap->lf_end = unlock->lf_start - 1;
332 prev = &overlap->lf_next;
333 lf = overlap->lf_next;
334 continue;
335
336 case 5: /* overlap ends after lock */
337 overlap->lf_start = unlock->lf_end + 1;
338 break;
339 }
340 break;
341 }
342 #ifdef LOCKF_DEBUG
343 if (lockf_debug & 1)
344 lf_printlist("lf_clearlock", unlock);
345 #endif /* LOCKF_DEBUG */
346 return (0);
347 }
348
349 /*
350 * Check whether there is a blocking lock,
351 * and if so return its process identifier.
352 */
353 int
lf_getlock(lock,fl)354 lf_getlock(lock, fl)
355 register struct lockf *lock;
356 register struct flock *fl;
357 {
358 register struct lockf *block;
359
360 #ifdef LOCKF_DEBUG
361 if (lockf_debug & 1)
362 lf_print("lf_getlock", lock);
363 #endif /* LOCKF_DEBUG */
364
365 if (block = lf_getblock(lock)) {
366 fl->l_type = block->lf_type;
367 fl->l_whence = SEEK_SET;
368 fl->l_start = block->lf_start;
369 if (block->lf_end == -1)
370 fl->l_len = 0;
371 else
372 fl->l_len = block->lf_end - block->lf_start + 1;
373 if (block->lf_flags & F_POSIX)
374 fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
375 else
376 fl->l_pid = -1;
377 } else {
378 fl->l_type = F_UNLCK;
379 }
380 return (0);
381 }
382
383 /*
384 * Walk the list of locks for an inode and
385 * return the first blocking lock.
386 */
387 struct lockf *
lf_getblock(lock)388 lf_getblock(lock)
389 register struct lockf *lock;
390 {
391 struct lockf **prev, *overlap, *lf = lock->lf_inode->i_lockf;
392 int ovcase;
393
394 prev = &lock->lf_inode->i_lockf;
395 while (ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) {
396 /*
397 * We've found an overlap, see if it blocks us
398 */
399 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
400 return (overlap);
401 /*
402 * Nope, point to the next one on the list and
403 * see if it blocks us
404 */
405 lf = overlap->lf_next;
406 }
407 return (NOLOCKF);
408 }
409
410 /*
411 * Walk the list of locks for an inode to
412 * find an overlapping lock (if any).
413 *
414 * NOTE: this returns only the FIRST overlapping lock. There
415 * may be more than one.
416 */
417 int
lf_findoverlap(lf,lock,type,prev,overlap)418 lf_findoverlap(lf, lock, type, prev, overlap)
419 register struct lockf *lf;
420 struct lockf *lock;
421 int type;
422 struct lockf ***prev;
423 struct lockf **overlap;
424 {
425 off_t start, end;
426
427 *overlap = lf;
428 if (lf == NOLOCKF)
429 return (0);
430 #ifdef LOCKF_DEBUG
431 if (lockf_debug & 2)
432 lf_print("lf_findoverlap: looking for overlap in", lock);
433 #endif /* LOCKF_DEBUG */
434 start = lock->lf_start;
435 end = lock->lf_end;
436 while (lf != NOLOCKF) {
437 if (((type & SELF) && lf->lf_id != lock->lf_id) ||
438 ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
439 *prev = &lf->lf_next;
440 *overlap = lf = lf->lf_next;
441 continue;
442 }
443 #ifdef LOCKF_DEBUG
444 if (lockf_debug & 2)
445 lf_print("\tchecking", lf);
446 #endif /* LOCKF_DEBUG */
447 /*
448 * OK, check for overlap
449 *
450 * Six cases:
451 * 0) no overlap
452 * 1) overlap == lock
453 * 2) overlap contains lock
454 * 3) lock contains overlap
455 * 4) overlap starts before lock
456 * 5) overlap ends after lock
457 */
458 if ((lf->lf_end != -1 && start > lf->lf_end) ||
459 (end != -1 && lf->lf_start > end)) {
460 /* Case 0 */
461 #ifdef LOCKF_DEBUG
462 if (lockf_debug & 2)
463 printf("no overlap\n");
464 #endif /* LOCKF_DEBUG */
465 if ((type & SELF) && end != -1 && lf->lf_start > end)
466 return (0);
467 *prev = &lf->lf_next;
468 *overlap = lf = lf->lf_next;
469 continue;
470 }
471 if ((lf->lf_start == start) && (lf->lf_end == end)) {
472 /* Case 1 */
473 #ifdef LOCKF_DEBUG
474 if (lockf_debug & 2)
475 printf("overlap == lock\n");
476 #endif /* LOCKF_DEBUG */
477 return (1);
478 }
479 if ((lf->lf_start <= start) &&
480 (end != -1) &&
481 ((lf->lf_end >= end) || (lf->lf_end == -1))) {
482 /* Case 2 */
483 #ifdef LOCKF_DEBUG
484 if (lockf_debug & 2)
485 printf("overlap contains lock\n");
486 #endif /* LOCKF_DEBUG */
487 return (2);
488 }
489 if (start <= lf->lf_start &&
490 (end == -1 ||
491 (lf->lf_end != -1 && end >= lf->lf_end))) {
492 /* Case 3 */
493 #ifdef LOCKF_DEBUG
494 if (lockf_debug & 2)
495 printf("lock contains overlap\n");
496 #endif /* LOCKF_DEBUG */
497 return (3);
498 }
499 if ((lf->lf_start < start) &&
500 ((lf->lf_end >= start) || (lf->lf_end == -1))) {
501 /* Case 4 */
502 #ifdef LOCKF_DEBUG
503 if (lockf_debug & 2)
504 printf("overlap starts before lock\n");
505 #endif /* LOCKF_DEBUG */
506 return (4);
507 }
508 if ((lf->lf_start > start) &&
509 (end != -1) &&
510 ((lf->lf_end > end) || (lf->lf_end == -1))) {
511 /* Case 5 */
512 #ifdef LOCKF_DEBUG
513 if (lockf_debug & 2)
514 printf("overlap ends after lock\n");
515 #endif /* LOCKF_DEBUG */
516 return (5);
517 }
518 panic("lf_findoverlap: default");
519 }
520 return (0);
521 }
522
523 /*
524 * Split a lock and a contained region into
525 * two or three locks as necessary.
526 */
527 void
lf_split(lock1,lock2)528 lf_split(lock1, lock2)
529 register struct lockf *lock1;
530 register struct lockf *lock2;
531 {
532 register struct lockf *splitlock;
533
534 #ifdef LOCKF_DEBUG
535 if (lockf_debug & 2) {
536 lf_print("lf_split", lock1);
537 lf_print("splitting from", lock2);
538 }
539 #endif /* LOCKF_DEBUG */
540 /*
541 * Check to see if spliting into only two pieces.
542 */
543 if (lock1->lf_start == lock2->lf_start) {
544 lock1->lf_start = lock2->lf_end + 1;
545 lock2->lf_next = lock1;
546 return;
547 }
548 if (lock1->lf_end == lock2->lf_end) {
549 lock1->lf_end = lock2->lf_start - 1;
550 lock2->lf_next = lock1->lf_next;
551 lock1->lf_next = lock2;
552 return;
553 }
554 /*
555 * Make a new lock consisting of the last part of
556 * the encompassing lock
557 */
558 MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
559 bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
560 splitlock->lf_start = lock2->lf_end + 1;
561 TAILQ_INIT(&splitlock->lf_blkhd);
562 lock1->lf_end = lock2->lf_start - 1;
563 /*
564 * OK, now link it in
565 */
566 splitlock->lf_next = lock1->lf_next;
567 lock2->lf_next = splitlock;
568 lock1->lf_next = lock2;
569 }
570
571 /*
572 * Wakeup a blocklist
573 */
574 void
lf_wakelock(listhead)575 lf_wakelock(listhead)
576 struct lockf *listhead;
577 {
578 register struct lockf *wakelock;
579
580 while (wakelock = listhead->lf_blkhd.tqh_first) {
581 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
582 wakelock->lf_next = NOLOCKF;
583 #ifdef LOCKF_DEBUG
584 if (lockf_debug & 2)
585 lf_print("lf_wakelock: awakening", wakelock);
586 #endif /* LOCKF_DEBUG */
587 wakeup((caddr_t)wakelock);
588 }
589 }
590
591 #ifdef LOCKF_DEBUG
592 /*
593 * Print out a lock.
594 */
lf_print(tag,lock)595 lf_print(tag, lock)
596 char *tag;
597 register struct lockf *lock;
598 {
599
600 printf("%s: lock 0x%lx for ", tag, lock);
601 if (lock->lf_flags & F_POSIX)
602 printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid);
603 else
604 printf("id 0x%x", lock->lf_id);
605 printf(" in ino %d on dev <%d, %d>, %s, start %d, end %d",
606 lock->lf_inode->i_number,
607 major(lock->lf_inode->i_dev),
608 minor(lock->lf_inode->i_dev),
609 lock->lf_type == F_RDLCK ? "shared" :
610 lock->lf_type == F_WRLCK ? "exclusive" :
611 lock->lf_type == F_UNLCK ? "unlock" :
612 "unknown", lock->lf_start, lock->lf_end);
613 if (lock->lf_blkhd.tqh_first)
614 printf(" block 0x%x\n", lock->lf_blkhd.tqh_first);
615 else
616 printf("\n");
617 }
618
lf_printlist(tag,lock)619 lf_printlist(tag, lock)
620 char *tag;
621 struct lockf *lock;
622 {
623 register struct lockf *lf, *blk;
624
625 printf("%s: Lock list for ino %d on dev <%d, %d>:\n",
626 tag, lock->lf_inode->i_number,
627 major(lock->lf_inode->i_dev),
628 minor(lock->lf_inode->i_dev));
629 for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
630 printf("\tlock 0x%lx for ", lf);
631 if (lf->lf_flags & F_POSIX)
632 printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid);
633 else
634 printf("id 0x%x", lf->lf_id);
635 printf(", %s, start %d, end %d",
636 lf->lf_type == F_RDLCK ? "shared" :
637 lf->lf_type == F_WRLCK ? "exclusive" :
638 lf->lf_type == F_UNLCK ? "unlock" :
639 "unknown", lf->lf_start, lf->lf_end);
640 for (blk = lf->lf_blkhd.tqh_first; blk;
641 blk = blk->lf_block.tqe_next) {
642 printf("\n\t\tlock request 0x%lx for ", blk);
643 if (blk->lf_flags & F_POSIX)
644 printf("proc %d",
645 ((struct proc *)(blk->lf_id))->p_pid);
646 else
647 printf("id 0x%x", blk->lf_id);
648 printf(", %s, start %d, end %d",
649 blk->lf_type == F_RDLCK ? "shared" :
650 blk->lf_type == F_WRLCK ? "exclusive" :
651 blk->lf_type == F_UNLCK ? "unlock" :
652 "unknown", blk->lf_start, blk->lf_end);
653 if (blk->lf_blkhd.tqh_first)
654 panic("lf_printlist: bad list");
655 }
656 printf("\n");
657 }
658 }
659 #endif /* LOCKF_DEBUG */
660