xref: /original-bsd/sys/ufs/ufs/ufs_lockf.c (revision b78a899d)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Scooter Morris at Genentech Inc.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)ufs_lockf.c	8.4 (Berkeley) 10/26/94
11  */
12 
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/kernel.h>
16 #include <sys/file.h>
17 #include <sys/proc.h>
18 #include <sys/vnode.h>
19 #include <sys/malloc.h>
20 #include <sys/fcntl.h>
21 
22 #include <ufs/ufs/lockf.h>
23 #include <ufs/ufs/quota.h>
24 #include <ufs/ufs/inode.h>
25 #include <ufs/ufs/ufs_extern.h>
26 
27 /*
28  * This variable controls the maximum number of processes that will
29  * be checked in doing deadlock detection.
30  */
31 int maxlockdepth = MAXDEPTH;
32 
33 #ifdef LOCKF_DEBUG
34 #include <vm/vm.h>
35 #include <sys/sysctl.h>
36 int	lockf_debug = 0;
37 struct ctldebug debug4 = { "lockf_debug", &lockf_debug };
38 #endif
39 
40 #define NOLOCKF (struct lockf *)0
41 #define SELF	0x1
42 #define OTHERS	0x2
43 
44 /*
45  * Set a byte-range lock.
46  */
47 int
lf_setlock(lock)48 lf_setlock(lock)
49 	register struct lockf *lock;
50 {
51 	register struct lockf *block;
52 	struct inode *ip = lock->lf_inode;
53 	struct lockf **prev, *overlap, *ltmp;
54 	static char lockstr[] = "lockf";
55 	int ovcase, priority, needtolink, error;
56 
57 #ifdef LOCKF_DEBUG
58 	if (lockf_debug & 1)
59 		lf_print("lf_setlock", lock);
60 #endif /* LOCKF_DEBUG */
61 
62 	/*
63 	 * Set the priority
64 	 */
65 	priority = PLOCK;
66 	if (lock->lf_type == F_WRLCK)
67 		priority += 4;
68 	priority |= PCATCH;
69 	/*
70 	 * Scan lock list for this file looking for locks that would block us.
71 	 */
72 	while (block = lf_getblock(lock)) {
73 		/*
74 		 * Free the structure and return if nonblocking.
75 		 */
76 		if ((lock->lf_flags & F_WAIT) == 0) {
77 			FREE(lock, M_LOCKF);
78 			return (EAGAIN);
79 		}
80 		/*
81 		 * We are blocked. Since flock style locks cover
82 		 * the whole file, there is no chance for deadlock.
83 		 * For byte-range locks we must check for deadlock.
84 		 *
85 		 * Deadlock detection is done by looking through the
86 		 * wait channels to see if there are any cycles that
87 		 * involve us. MAXDEPTH is set just to make sure we
88 		 * do not go off into neverland.
89 		 */
90 		if ((lock->lf_flags & F_POSIX) &&
91 		    (block->lf_flags & F_POSIX)) {
92 			register struct proc *wproc;
93 			register struct lockf *waitblock;
94 			int i = 0;
95 
96 			/* The block is waiting on something */
97 			wproc = (struct proc *)block->lf_id;
98 			while (wproc->p_wchan &&
99 			       (wproc->p_wmesg == lockstr) &&
100 			       (i++ < maxlockdepth)) {
101 				waitblock = (struct lockf *)wproc->p_wchan;
102 				/* Get the owner of the blocking lock */
103 				waitblock = waitblock->lf_next;
104 				if ((waitblock->lf_flags & F_POSIX) == 0)
105 					break;
106 				wproc = (struct proc *)waitblock->lf_id;
107 				if (wproc == (struct proc *)lock->lf_id) {
108 					free(lock, M_LOCKF);
109 					return (EDEADLK);
110 				}
111 			}
112 		}
113 		/*
114 		 * For flock type locks, we must first remove
115 		 * any shared locks that we hold before we sleep
116 		 * waiting for an exclusive lock.
117 		 */
118 		if ((lock->lf_flags & F_FLOCK) &&
119 		    lock->lf_type == F_WRLCK) {
120 			lock->lf_type = F_UNLCK;
121 			(void) lf_clearlock(lock);
122 			lock->lf_type = F_WRLCK;
123 		}
124 		/*
125 		 * Add our lock to the blocked list and sleep until we're free.
126 		 * Remember who blocked us (for deadlock detection).
127 		 */
128 		lock->lf_next = block;
129 		TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
130 #ifdef LOCKF_DEBUG
131 		if (lockf_debug & 1) {
132 			lf_print("lf_setlock: blocking on", block);
133 			lf_printlist("lf_setlock", block);
134 		}
135 #endif /* LOCKF_DEBUG */
136 		if (error = tsleep((caddr_t)lock, priority, lockstr, 0)) {
137 			/*
138 			 * We may have been awakened by a signal (in
139 			 * which case we must remove ourselves from the
140 			 * blocked list) and/or by another process
141 			 * releasing a lock (in which case we have already
142 			 * been removed from the blocked list and our
143 			 * lf_next field set to NOLOCKF).
144 			 */
145 			if (lock->lf_next)
146 				TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock,
147 				    lf_block);
148 			free(lock, M_LOCKF);
149 			return (error);
150 		}
151 	}
152 	/*
153 	 * No blocks!!  Add the lock.  Note that we will
154 	 * downgrade or upgrade any overlapping locks this
155 	 * process already owns.
156 	 *
157 	 * Skip over locks owned by other processes.
158 	 * Handle any locks that overlap and are owned by ourselves.
159 	 */
160 	prev = &ip->i_lockf;
161 	block = ip->i_lockf;
162 	needtolink = 1;
163 	for (;;) {
164 		if (ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap))
165 			block = overlap->lf_next;
166 		/*
167 		 * Six cases:
168 		 *	0) no overlap
169 		 *	1) overlap == lock
170 		 *	2) overlap contains lock
171 		 *	3) lock contains overlap
172 		 *	4) overlap starts before lock
173 		 *	5) overlap ends after lock
174 		 */
175 		switch (ovcase) {
176 		case 0: /* no overlap */
177 			if (needtolink) {
178 				*prev = lock;
179 				lock->lf_next = overlap;
180 			}
181 			break;
182 
183 		case 1: /* overlap == lock */
184 			/*
185 			 * If downgrading lock, others may be
186 			 * able to acquire it.
187 			 */
188 			if (lock->lf_type == F_RDLCK &&
189 			    overlap->lf_type == F_WRLCK)
190 				lf_wakelock(overlap);
191 			overlap->lf_type = lock->lf_type;
192 			FREE(lock, M_LOCKF);
193 			lock = overlap; /* for debug output below */
194 			break;
195 
196 		case 2: /* overlap contains lock */
197 			/*
198 			 * Check for common starting point and different types.
199 			 */
200 			if (overlap->lf_type == lock->lf_type) {
201 				free(lock, M_LOCKF);
202 				lock = overlap; /* for debug output below */
203 				break;
204 			}
205 			if (overlap->lf_start == lock->lf_start) {
206 				*prev = lock;
207 				lock->lf_next = overlap;
208 				overlap->lf_start = lock->lf_end + 1;
209 			} else
210 				lf_split(overlap, lock);
211 			lf_wakelock(overlap);
212 			break;
213 
214 		case 3: /* lock contains overlap */
215 			/*
216 			 * If downgrading lock, others may be able to
217 			 * acquire it, otherwise take the list.
218 			 */
219 			if (lock->lf_type == F_RDLCK &&
220 			    overlap->lf_type == F_WRLCK) {
221 				lf_wakelock(overlap);
222 			} else {
223 				while (ltmp = overlap->lf_blkhd.tqh_first) {
224 					TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
225 					    lf_block);
226 					TAILQ_INSERT_TAIL(&lock->lf_blkhd,
227 					    ltmp, lf_block);
228 				}
229 			}
230 			/*
231 			 * Add the new lock if necessary and delete the overlap.
232 			 */
233 			if (needtolink) {
234 				*prev = lock;
235 				lock->lf_next = overlap->lf_next;
236 				prev = &lock->lf_next;
237 				needtolink = 0;
238 			} else
239 				*prev = overlap->lf_next;
240 			free(overlap, M_LOCKF);
241 			continue;
242 
243 		case 4: /* overlap starts before lock */
244 			/*
245 			 * Add lock after overlap on the list.
246 			 */
247 			lock->lf_next = overlap->lf_next;
248 			overlap->lf_next = lock;
249 			overlap->lf_end = lock->lf_start - 1;
250 			prev = &lock->lf_next;
251 			lf_wakelock(overlap);
252 			needtolink = 0;
253 			continue;
254 
255 		case 5: /* overlap ends after lock */
256 			/*
257 			 * Add the new lock before overlap.
258 			 */
259 			if (needtolink) {
260 				*prev = lock;
261 				lock->lf_next = overlap;
262 			}
263 			overlap->lf_start = lock->lf_end + 1;
264 			lf_wakelock(overlap);
265 			break;
266 		}
267 		break;
268 	}
269 #ifdef LOCKF_DEBUG
270 	if (lockf_debug & 1) {
271 		lf_print("lf_setlock: got the lock", lock);
272 		lf_printlist("lf_setlock", lock);
273 	}
274 #endif /* LOCKF_DEBUG */
275 	return (0);
276 }
277 
278 /*
279  * Remove a byte-range lock on an inode.
280  *
281  * Generally, find the lock (or an overlap to that lock)
282  * and remove it (or shrink it), then wakeup anyone we can.
283  */
284 int
lf_clearlock(unlock)285 lf_clearlock(unlock)
286 	register struct lockf *unlock;
287 {
288 	struct inode *ip = unlock->lf_inode;
289 	register struct lockf *lf = ip->i_lockf;
290 	struct lockf *overlap, **prev;
291 	int ovcase;
292 
293 	if (lf == NOLOCKF)
294 		return (0);
295 #ifdef LOCKF_DEBUG
296 	if (unlock->lf_type != F_UNLCK)
297 		panic("lf_clearlock: bad type");
298 	if (lockf_debug & 1)
299 		lf_print("lf_clearlock", unlock);
300 #endif /* LOCKF_DEBUG */
301 	prev = &ip->i_lockf;
302 	while (ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) {
303 		/*
304 		 * Wakeup the list of locks to be retried.
305 		 */
306 		lf_wakelock(overlap);
307 
308 		switch (ovcase) {
309 
310 		case 1: /* overlap == lock */
311 			*prev = overlap->lf_next;
312 			FREE(overlap, M_LOCKF);
313 			break;
314 
315 		case 2: /* overlap contains lock: split it */
316 			if (overlap->lf_start == unlock->lf_start) {
317 				overlap->lf_start = unlock->lf_end + 1;
318 				break;
319 			}
320 			lf_split(overlap, unlock);
321 			overlap->lf_next = unlock->lf_next;
322 			break;
323 
324 		case 3: /* lock contains overlap */
325 			*prev = overlap->lf_next;
326 			lf = overlap->lf_next;
327 			free(overlap, M_LOCKF);
328 			continue;
329 
330 		case 4: /* overlap starts before lock */
331 			overlap->lf_end = unlock->lf_start - 1;
332 			prev = &overlap->lf_next;
333 			lf = overlap->lf_next;
334 			continue;
335 
336 		case 5: /* overlap ends after lock */
337 			overlap->lf_start = unlock->lf_end + 1;
338 			break;
339 		}
340 		break;
341 	}
342 #ifdef LOCKF_DEBUG
343 	if (lockf_debug & 1)
344 		lf_printlist("lf_clearlock", unlock);
345 #endif /* LOCKF_DEBUG */
346 	return (0);
347 }
348 
349 /*
350  * Check whether there is a blocking lock,
351  * and if so return its process identifier.
352  */
353 int
lf_getlock(lock,fl)354 lf_getlock(lock, fl)
355 	register struct lockf *lock;
356 	register struct flock *fl;
357 {
358 	register struct lockf *block;
359 
360 #ifdef LOCKF_DEBUG
361 	if (lockf_debug & 1)
362 		lf_print("lf_getlock", lock);
363 #endif /* LOCKF_DEBUG */
364 
365 	if (block = lf_getblock(lock)) {
366 		fl->l_type = block->lf_type;
367 		fl->l_whence = SEEK_SET;
368 		fl->l_start = block->lf_start;
369 		if (block->lf_end == -1)
370 			fl->l_len = 0;
371 		else
372 			fl->l_len = block->lf_end - block->lf_start + 1;
373 		if (block->lf_flags & F_POSIX)
374 			fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
375 		else
376 			fl->l_pid = -1;
377 	} else {
378 		fl->l_type = F_UNLCK;
379 	}
380 	return (0);
381 }
382 
383 /*
384  * Walk the list of locks for an inode and
385  * return the first blocking lock.
386  */
387 struct lockf *
lf_getblock(lock)388 lf_getblock(lock)
389 	register struct lockf *lock;
390 {
391 	struct lockf **prev, *overlap, *lf = lock->lf_inode->i_lockf;
392 	int ovcase;
393 
394 	prev = &lock->lf_inode->i_lockf;
395 	while (ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) {
396 		/*
397 		 * We've found an overlap, see if it blocks us
398 		 */
399 		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
400 			return (overlap);
401 		/*
402 		 * Nope, point to the next one on the list and
403 		 * see if it blocks us
404 		 */
405 		lf = overlap->lf_next;
406 	}
407 	return (NOLOCKF);
408 }
409 
410 /*
411  * Walk the list of locks for an inode to
412  * find an overlapping lock (if any).
413  *
414  * NOTE: this returns only the FIRST overlapping lock.  There
415  *	 may be more than one.
416  */
417 int
lf_findoverlap(lf,lock,type,prev,overlap)418 lf_findoverlap(lf, lock, type, prev, overlap)
419 	register struct lockf *lf;
420 	struct lockf *lock;
421 	int type;
422 	struct lockf ***prev;
423 	struct lockf **overlap;
424 {
425 	off_t start, end;
426 
427 	*overlap = lf;
428 	if (lf == NOLOCKF)
429 		return (0);
430 #ifdef LOCKF_DEBUG
431 	if (lockf_debug & 2)
432 		lf_print("lf_findoverlap: looking for overlap in", lock);
433 #endif /* LOCKF_DEBUG */
434 	start = lock->lf_start;
435 	end = lock->lf_end;
436 	while (lf != NOLOCKF) {
437 		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
438 		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
439 			*prev = &lf->lf_next;
440 			*overlap = lf = lf->lf_next;
441 			continue;
442 		}
443 #ifdef LOCKF_DEBUG
444 		if (lockf_debug & 2)
445 			lf_print("\tchecking", lf);
446 #endif /* LOCKF_DEBUG */
447 		/*
448 		 * OK, check for overlap
449 		 *
450 		 * Six cases:
451 		 *	0) no overlap
452 		 *	1) overlap == lock
453 		 *	2) overlap contains lock
454 		 *	3) lock contains overlap
455 		 *	4) overlap starts before lock
456 		 *	5) overlap ends after lock
457 		 */
458 		if ((lf->lf_end != -1 && start > lf->lf_end) ||
459 		    (end != -1 && lf->lf_start > end)) {
460 			/* Case 0 */
461 #ifdef LOCKF_DEBUG
462 			if (lockf_debug & 2)
463 				printf("no overlap\n");
464 #endif /* LOCKF_DEBUG */
465 			if ((type & SELF) && end != -1 && lf->lf_start > end)
466 				return (0);
467 			*prev = &lf->lf_next;
468 			*overlap = lf = lf->lf_next;
469 			continue;
470 		}
471 		if ((lf->lf_start == start) && (lf->lf_end == end)) {
472 			/* Case 1 */
473 #ifdef LOCKF_DEBUG
474 			if (lockf_debug & 2)
475 				printf("overlap == lock\n");
476 #endif /* LOCKF_DEBUG */
477 			return (1);
478 		}
479 		if ((lf->lf_start <= start) &&
480 		    (end != -1) &&
481 		    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
482 			/* Case 2 */
483 #ifdef LOCKF_DEBUG
484 			if (lockf_debug & 2)
485 				printf("overlap contains lock\n");
486 #endif /* LOCKF_DEBUG */
487 			return (2);
488 		}
489 		if (start <= lf->lf_start &&
490 		           (end == -1 ||
491 			   (lf->lf_end != -1 && end >= lf->lf_end))) {
492 			/* Case 3 */
493 #ifdef LOCKF_DEBUG
494 			if (lockf_debug & 2)
495 				printf("lock contains overlap\n");
496 #endif /* LOCKF_DEBUG */
497 			return (3);
498 		}
499 		if ((lf->lf_start < start) &&
500 			((lf->lf_end >= start) || (lf->lf_end == -1))) {
501 			/* Case 4 */
502 #ifdef LOCKF_DEBUG
503 			if (lockf_debug & 2)
504 				printf("overlap starts before lock\n");
505 #endif /* LOCKF_DEBUG */
506 			return (4);
507 		}
508 		if ((lf->lf_start > start) &&
509 			(end != -1) &&
510 			((lf->lf_end > end) || (lf->lf_end == -1))) {
511 			/* Case 5 */
512 #ifdef LOCKF_DEBUG
513 			if (lockf_debug & 2)
514 				printf("overlap ends after lock\n");
515 #endif /* LOCKF_DEBUG */
516 			return (5);
517 		}
518 		panic("lf_findoverlap: default");
519 	}
520 	return (0);
521 }
522 
523 /*
524  * Split a lock and a contained region into
525  * two or three locks as necessary.
526  */
527 void
lf_split(lock1,lock2)528 lf_split(lock1, lock2)
529 	register struct lockf *lock1;
530 	register struct lockf *lock2;
531 {
532 	register struct lockf *splitlock;
533 
534 #ifdef LOCKF_DEBUG
535 	if (lockf_debug & 2) {
536 		lf_print("lf_split", lock1);
537 		lf_print("splitting from", lock2);
538 	}
539 #endif /* LOCKF_DEBUG */
540 	/*
541 	 * Check to see if spliting into only two pieces.
542 	 */
543 	if (lock1->lf_start == lock2->lf_start) {
544 		lock1->lf_start = lock2->lf_end + 1;
545 		lock2->lf_next = lock1;
546 		return;
547 	}
548 	if (lock1->lf_end == lock2->lf_end) {
549 		lock1->lf_end = lock2->lf_start - 1;
550 		lock2->lf_next = lock1->lf_next;
551 		lock1->lf_next = lock2;
552 		return;
553 	}
554 	/*
555 	 * Make a new lock consisting of the last part of
556 	 * the encompassing lock
557 	 */
558 	MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
559 	bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
560 	splitlock->lf_start = lock2->lf_end + 1;
561 	TAILQ_INIT(&splitlock->lf_blkhd);
562 	lock1->lf_end = lock2->lf_start - 1;
563 	/*
564 	 * OK, now link it in
565 	 */
566 	splitlock->lf_next = lock1->lf_next;
567 	lock2->lf_next = splitlock;
568 	lock1->lf_next = lock2;
569 }
570 
571 /*
572  * Wakeup a blocklist
573  */
574 void
lf_wakelock(listhead)575 lf_wakelock(listhead)
576 	struct lockf *listhead;
577 {
578 	register struct lockf *wakelock;
579 
580 	while (wakelock = listhead->lf_blkhd.tqh_first) {
581 		TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
582 		wakelock->lf_next = NOLOCKF;
583 #ifdef LOCKF_DEBUG
584 		if (lockf_debug & 2)
585 			lf_print("lf_wakelock: awakening", wakelock);
586 #endif /* LOCKF_DEBUG */
587 		wakeup((caddr_t)wakelock);
588 	}
589 }
590 
591 #ifdef LOCKF_DEBUG
592 /*
593  * Print out a lock.
594  */
lf_print(tag,lock)595 lf_print(tag, lock)
596 	char *tag;
597 	register struct lockf *lock;
598 {
599 
600 	printf("%s: lock 0x%lx for ", tag, lock);
601 	if (lock->lf_flags & F_POSIX)
602 		printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid);
603 	else
604 		printf("id 0x%x", lock->lf_id);
605 	printf(" in ino %d on dev <%d, %d>, %s, start %d, end %d",
606 		lock->lf_inode->i_number,
607 		major(lock->lf_inode->i_dev),
608 		minor(lock->lf_inode->i_dev),
609 		lock->lf_type == F_RDLCK ? "shared" :
610 		lock->lf_type == F_WRLCK ? "exclusive" :
611 		lock->lf_type == F_UNLCK ? "unlock" :
612 		"unknown", lock->lf_start, lock->lf_end);
613 	if (lock->lf_blkhd.tqh_first)
614 		printf(" block 0x%x\n", lock->lf_blkhd.tqh_first);
615 	else
616 		printf("\n");
617 }
618 
lf_printlist(tag,lock)619 lf_printlist(tag, lock)
620 	char *tag;
621 	struct lockf *lock;
622 {
623 	register struct lockf *lf, *blk;
624 
625 	printf("%s: Lock list for ino %d on dev <%d, %d>:\n",
626 		tag, lock->lf_inode->i_number,
627 		major(lock->lf_inode->i_dev),
628 		minor(lock->lf_inode->i_dev));
629 	for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
630 		printf("\tlock 0x%lx for ", lf);
631 		if (lf->lf_flags & F_POSIX)
632 			printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid);
633 		else
634 			printf("id 0x%x", lf->lf_id);
635 		printf(", %s, start %d, end %d",
636 			lf->lf_type == F_RDLCK ? "shared" :
637 			lf->lf_type == F_WRLCK ? "exclusive" :
638 			lf->lf_type == F_UNLCK ? "unlock" :
639 			"unknown", lf->lf_start, lf->lf_end);
640 		for (blk = lf->lf_blkhd.tqh_first; blk;
641 		     blk = blk->lf_block.tqe_next) {
642 			printf("\n\t\tlock request 0x%lx for ", blk);
643 			if (blk->lf_flags & F_POSIX)
644 				printf("proc %d",
645 				    ((struct proc *)(blk->lf_id))->p_pid);
646 			else
647 				printf("id 0x%x", blk->lf_id);
648 			printf(", %s, start %d, end %d",
649 				blk->lf_type == F_RDLCK ? "shared" :
650 				blk->lf_type == F_WRLCK ? "exclusive" :
651 				blk->lf_type == F_UNLCK ? "unlock" :
652 				"unknown", blk->lf_start, blk->lf_end);
653 			if (blk->lf_blkhd.tqh_first)
654 				panic("lf_printlist: bad list");
655 		}
656 		printf("\n");
657 	}
658 }
659 #endif /* LOCKF_DEBUG */
660