xref: /original-bsd/sys/kern/vfs_subr.c (revision 21b9697b)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
16  *
17  *	@(#)vfs_subr.c	7.22 (Berkeley) 12/31/89
18  */
20 /*
21  * External virtual filesystem routines
22  */
24 #include "param.h"
25 #include "mount.h"
26 #include "time.h"
27 #include "vnode.h"
28 #include "namei.h"
29 #include "ucred.h"
30 #include "errno.h"
31 #include "malloc.h"
33 /*
34  * Remove a mount point from the list of mounted filesystems.
35  * Unmount of the root is illegal.
36  */
37 void
38 vfs_remove(mp)
39 	register struct mount *mp;
40 {
42 	if (mp == rootfs)
43 		panic("vfs_remove: unmounting root");
44 	mp->m_prev->m_next = mp->m_next;
45 	mp->m_next->m_prev = mp->m_prev;
46 	mp->m_vnodecovered->v_mountedhere = (struct mount *)0;
47 	vfs_unlock(mp);
48 }
50 /*
51  * Lock a filesystem.
52  * Used to prevent access to it while mounting and unmounting.
53  */
54 vfs_lock(mp)
55 	register struct mount *mp;
56 {
58 	while(mp->m_flag & M_MLOCK) {
59 		mp->m_flag |= M_MWAIT;
60 		sleep((caddr_t)mp, PVFS);
61 	}
62 	mp->m_flag |= M_MLOCK;
63 	return (0);
64 }
66 /*
67  * Unlock a locked filesystem.
68  * Panic if filesystem is not locked.
69  */
70 void
71 vfs_unlock(mp)
72 	register struct mount *mp;
73 {
75 	if ((mp->m_flag & M_MLOCK) == 0)
76 		panic("vfs_unlock: locked fs");
77 	mp->m_flag &= ~M_MLOCK;
78 	if (mp->m_flag & M_MWAIT) {
79 		mp->m_flag &= ~M_MWAIT;
80 		wakeup((caddr_t)mp);
81 	}
82 }
84 /*
85  * Lookup a mount point by filesystem identifier.
86  */
87 struct mount *
88 getvfs(fsid)
89 	fsid_t *fsid;
90 {
91 	register struct mount *mp;
93 	mp = rootfs;
94 	do {
95 		if (mp->m_fsid.val[0] == fsid->val[0] &&
96 		    mp->m_fsid.val[1] == fsid->val[1]) {
97 			return (mp);
98 		}
99 		mp = mp->m_next;
100 	} while (mp != rootfs);
101 	return ((struct mount *)0);
102 }
104 /*
105  * Set vnode attributes to VNOVAL
106  */
107 void vattr_null(vap)
108 	register struct vattr *vap;
109 {
111 	vap->va_type = VNON;
112 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
113 		vap->va_fsid = vap->va_fileid = vap->va_size =
114 		vap->va_size1 = vap->va_blocksize = vap->va_rdev =
115 		vap->va_bytes = vap->va_bytes1 =
116 		vap->va_atime.tv_sec = vap->va_atime.tv_usec =
117 		vap->va_mtime.tv_sec = vap->va_mtime.tv_usec =
118 		vap->va_ctime.tv_sec = vap->va_ctime.tv_usec =
119 		vap->va_flags = vap->va_gen = VNOVAL;
120 }
122 /*
123  * Initialize a nameidata structure
124  */
125 ndinit(ndp)
126 	register struct nameidata *ndp;
127 {
129 	bzero((caddr_t)ndp, sizeof(struct nameidata));
130 	ndp->ni_iov = &ndp->ni_nd.nd_iovec;
131 	ndp->ni_iovcnt = 1;
132 	ndp->ni_base = (caddr_t)&ndp->ni_dent;
133 	ndp->ni_rw = UIO_WRITE;
134 	ndp->ni_uioseg = UIO_SYSSPACE;
135 }
137 /*
138  * Duplicate a nameidata structure
139  */
140 nddup(ndp, newndp)
141 	register struct nameidata *ndp, *newndp;
142 {
144 	ndinit(newndp);
145 	newndp->ni_cdir = ndp->ni_cdir;
146 	VREF(newndp->ni_cdir);
147 	newndp->ni_rdir = ndp->ni_rdir;
148 	if (newndp->ni_rdir)
149 		VREF(newndp->ni_rdir);
150 	newndp->ni_cred = ndp->ni_cred;
151 	crhold(newndp->ni_cred);
152 }
154 /*
155  * Release a nameidata structure
156  */
157 ndrele(ndp)
158 	register struct nameidata *ndp;
159 {
161 	vrele(ndp->ni_cdir);
162 	if (ndp->ni_rdir)
163 		vrele(ndp->ni_rdir);
164 	crfree(ndp->ni_cred);
165 }
167 /*
168  * Routines having to do with the management of the vnode table.
169  */
170 struct vnode *vfreeh, **vfreet;
171 extern struct vnodeops dead_vnodeops, spec_vnodeops;
172 extern void vclean();
174 #define	SPECHSZ	64
175 #if	((SPECHSZ&(SPECHSZ-1)) == 0)
176 #define	SPECHASH(rdev)	(((rdev>>5)+(rdev))&(SPECHSZ-1))
177 #else
178 #define	SPECHASH(rdev)	(((unsigned)((rdev>>5)+(rdev)))%SPECHSZ)
179 #endif
180 struct vnode *speclisth[SPECHSZ];
182 /*
183  * Initialize the vnode structures and initialize each file system type.
184  */
185 vfsinit()
186 {
187 	register struct vnode *vp = vnode;
188 	struct vfsops **vfsp;
190 	/*
191 	 * Build vnode free list.
192 	 */
193 	vfreeh = vp;
194 	vfreet = &vp->v_freef;
195 	vp->v_freeb = &vfreeh;
196 	vp->v_op = &dead_vnodeops;
197 	for (vp++; vp < vnodeNVNODE; vp++) {
198 		*vfreet = vp;
199 		vp->v_freeb = vfreet;
200 		vfreet = &vp->v_freef;
201 		vp->v_op = &dead_vnodeops;
202 	}
203 	vp--;
204 	vp->v_freef = NULL;
205 	/*
206 	 * Initialize the vnode name cache
207 	 */
208 	nchinit();
209 	/*
210 	 * Initialize each file system type.
211 	 */
212 	for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) {
213 		if (*vfsp == NULL)
214 			continue;
215 		(*(*vfsp)->vfs_init)();
216 	}
217 }
219 /*
220  * Return the next vnode from the free list.
221  */
222 getnewvnode(tag, mp, vops, vpp)
223 	enum vtagtype tag;
224 	struct mount *mp;
225 	struct vnodeops *vops;
226 	struct vnode **vpp;
227 {
228 	register struct vnode *vp, *vq;
230 	if ((vp = vfreeh) == NULL) {
231 		tablefull("vnode");
232 		*vpp = 0;
233 		return (ENFILE);
234 	}
235 	if (vp->v_usecount)
236 		panic("free vnode isn't");
237 	if (vq = vp->v_freef)
238 		vq->v_freeb = &vfreeh;
239 	vfreeh = vq;
240 	vp->v_freef = NULL;
241 	vp->v_freeb = NULL;
242 	if (vp->v_type != VNON && vp->v_type != VBAD)
243 		vgone(vp);
244 	vp->v_type = VNON;
245 	vp->v_flag = 0;
246 	vp->v_shlockc = 0;
247 	vp->v_exlockc = 0;
248 	vp->v_lastr = 0;
249 	vp->v_socket = 0;
250 	cache_purge(vp);
251 	vp->v_tag = tag;
252 	vp->v_op = vops;
253 	insmntque(vp, mp);
254 	VREF(vp);
255 	*vpp = vp;
256 	return (0);
257 }
259 /*
260  * Move a vnode from one mount queue to another.
261  */
262 insmntque(vp, mp)
263 	register struct vnode *vp;
264 	register struct mount *mp;
265 {
266 	struct vnode *vq;
268 	/*
269 	 * Delete from old mount point vnode list, if on one.
270 	 */
271 	if (vp->v_mountb) {
272 		if (vq = vp->v_mountf)
273 			vq->v_mountb = vp->v_mountb;
274 		*vp->v_mountb = vq;
275 	}
276 	/*
277 	 * Insert into list of vnodes for the new mount point, if available.
278 	 */
279 	vp->v_mount = mp;
280 	if (mp == NULL) {
281 		vp->v_mountf = NULL;
282 		vp->v_mountb = NULL;
283 		return;
284 	}
285 	if (mp->m_mounth) {
286 		vp->v_mountf = mp->m_mounth;
287 		vp->v_mountb = &mp->m_mounth;
288 		mp->m_mounth->v_mountb = &vp->v_mountf;
289 		mp->m_mounth = vp;
290 	} else {
291 		mp->m_mounth = vp;
292 		vp->v_mountb = &mp->m_mounth;
293 		vp->v_mountf = NULL;
294 	}
295 }
297 /*
298  * Create a vnode for a block device.
299  * Used for root filesystem, argdev, and swap areas.
300  * Also used for memory file system special devices.
301  */
302 bdevvp(dev, vpp)
303 	dev_t dev;
304 	struct vnode **vpp;
305 {
306 	register struct vnode *vp;
307 	struct vnode *nvp;
308 	int error;
310 	error = getnewvnode(VT_NON, (struct mount *)0, &spec_vnodeops, &nvp);
311 	if (error) {
312 		*vpp = 0;
313 		return (error);
314 	}
315 	vp = nvp;
316 	vp->v_type = VBLK;
317 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
318 		vput(vp);
319 		vp = nvp;
320 	}
321 	*vpp = vp;
322 	return (0);
323 }
325 /*
326  * Check to see if the new vnode represents a special device
327  * for which we already have a vnode (either because of
328  * bdevvp() or because of a different vnode representing
329  * the same block device). If such an alias exists, deallocate
330  * the existing contents and return the aliased vnode. The
331  * caller is responsible for filling it with its new contents.
332  */
333 struct vnode *
334 checkalias(nvp, nvp_rdev, mp)
335 	register struct vnode *nvp;
336 	dev_t nvp_rdev;
337 	struct mount *mp;
338 {
339 	register struct vnode *vp;
340 	struct vnode **vpp;
342 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
343 		return ((struct vnode *)0);
345 	vpp = &speclisth[SPECHASH(nvp_rdev)];
346 loop:
347 	for (vp = *vpp; vp; vp = vp->v_specnext) {
348 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
349 			continue;
350 		/*
351 		 * Alias, but not in use, so flush it out.
352 		 */
353 		if (vp->v_usecount == 0) {
354 			vgone(vp);
355 			goto loop;
356 		}
357 		if (vget(vp))
358 			goto loop;
359 		break;
360 	}
361 	if (vp == NULL || vp->v_tag != VT_NON) {
362 		if (vp != NULL) {
363 			nvp->v_flag |= VALIASED;
364 			vp->v_flag |= VALIASED;
365 			vput(vp);
366 		}
367 		MALLOC(nvp->v_specinfo, struct specinfo *,
368 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
369 		nvp->v_rdev = nvp_rdev;
370 		nvp->v_hashchain = vpp;
371 		nvp->v_specnext = *vpp;
372 		*vpp = nvp;
373 		return ((struct vnode *)0);
374 	}
375 	VOP_UNLOCK(vp);
376 	vclean(vp, 0);
377 	vp->v_op = nvp->v_op;
378 	vp->v_tag = nvp->v_tag;
379 	nvp->v_type = VNON;
380 	insmntque(vp, mp);
381 	return (vp);
382 }
384 /*
385  * Grab a particular vnode from the free list, increment its
386  * reference count and lock it. The vnode lock bit is set the
387  * vnode is being eliminated in vgone. The process is awakened
388  * when the transition is completed, and an error returned to
389  * indicate that the vnode is no longer usable (possibly having
390  * been changed to a new file system type).
391  */
392 vget(vp)
393 	register struct vnode *vp;
394 {
395 	register struct vnode *vq;
397 	if (vp->v_flag & VXLOCK) {
398 		vp->v_flag |= VXWANT;
399 		sleep((caddr_t)vp, PINOD);
400 		return (1);
401 	}
402 	if (vp->v_usecount == 0) {
403 		if (vq = vp->v_freef)
404 			vq->v_freeb = vp->v_freeb;
405 		else
406 			vfreet = vp->v_freeb;
407 		*vp->v_freeb = vq;
408 		vp->v_freef = NULL;
409 		vp->v_freeb = NULL;
410 	}
411 	VREF(vp);
412 	VOP_LOCK(vp);
413 	return (0);
414 }
416 /*
417  * Vnode reference, just increment the count
418  */
419 void vref(vp)
420 	struct vnode *vp;
421 {
423 	vp->v_usecount++;
424 }
426 /*
427  * vput(), just unlock and vrele()
428  */
429 void vput(vp)
430 	register struct vnode *vp;
431 {
432 	VOP_UNLOCK(vp);
433 	vrele(vp);
434 }
436 /*
437  * Vnode release.
438  * If count drops to zero, call inactive routine and return to freelist.
439  */
440 void vrele(vp)
441 	register struct vnode *vp;
442 {
444 	if (vp == NULL)
445 		panic("vrele: null vp");
446 	vp->v_usecount--;
447 	if (vp->v_usecount < 0)
448 		vprint("vrele: bad ref count", vp);
449 	if (vp->v_usecount > 0)
450 		return;
451 	if (vfreeh == (struct vnode *)0) {
452 		/*
453 		 * insert into empty list
454 		 */
455 		vfreeh = vp;
456 		vp->v_freeb = &vfreeh;
457 	} else {
458 		/*
459 		 * insert at tail of list
460 		 */
461 		*vfreet = vp;
462 		vp->v_freeb = vfreet;
463 	}
464 	vp->v_freef = NULL;
465 	vfreet = &vp->v_freef;
466 	VOP_INACTIVE(vp);
467 }
469 /*
470  * Page or buffer structure gets a reference.
471  */
472 vhold(vp)
473 	register struct vnode *vp;
474 {
476 	vp->v_holdcnt++;
477 }
479 /*
480  * Page or buffer structure frees a reference.
481  */
482 holdrele(vp)
483 	register struct vnode *vp;
484 {
486 	if (vp->v_holdcnt <= 0)
487 		panic("holdrele: holdcnt");
488 	vp->v_holdcnt--;
489 }
491 /*
492  * Remove any vnodes in the vnode table belonging to mount point mp.
493  *
494  * If MNT_NOFORCE is specified, there should not be any active ones,
495  * return error if any are found (nb: this is a user error, not a
496  * system error). If MNT_FORCE is specified, detach any active vnodes
497  * that are found.
498  */
499 int busyprt = 0;	/* patch to print out busy vnodes */
501 vflush(mp, skipvp, flags)
502 	struct mount *mp;
503 	struct vnode *skipvp;
504 	int flags;
505 {
506 	register struct vnode *vp, *nvp;
507 	int busy = 0;
509 	for (vp = mp->m_mounth; vp; vp = nvp) {
510 		nvp = vp->v_mountf;
511 		/*
512 		 * Skip over a selected vnode.
513 		 * Used by ufs to skip over the quota structure inode.
514 		 */
515 		if (vp == skipvp)
516 			continue;
517 		/*
518 		 * With v_usecount == 0, all we need to do is clear
519 		 * out the vnode data structures and we are done.
520 		 */
521 		if (vp->v_usecount == 0) {
522 			vgone(vp);
523 			continue;
524 		}
525 		/*
526 		 * For block or character devices, revert to an
527 		 * anonymous device. For all other files, just kill them.
528 		 */
529 		if (flags & MNT_FORCE) {
530 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
531 				vgone(vp);
532 			} else {
533 				vclean(vp, 0);
534 				vp->v_op = &spec_vnodeops;
535 				insmntque(vp, (struct mount *)0);
536 			}
537 			continue;
538 		}
539 		if (busyprt)
540 			vprint("vflush: busy vnode", vp);
541 		busy++;
542 	}
543 	if (busy)
544 		return (EBUSY);
545 	return (0);
546 }
548 /*
549  * Disassociate the underlying file system from a vnode.
550  */
551 void vclean(vp, doclose)
552 	register struct vnode *vp;
553 	long doclose;
554 {
555 	struct vnodeops *origops;
556 	int active;
558 	/*
559 	 * Check to see if the vnode is in use.
560 	 * If so we have to reference it before we clean it out
561 	 * so that its count cannot fall to zero and generate a
562 	 * race against ourselves to recycle it.
563 	 */
564 	if (active = vp->v_usecount)
565 		VREF(vp);
566 	/*
567 	 * Prevent the vnode from being recycled or
568 	 * brought into use while we clean it out.
569 	 */
570 	if (vp->v_flag & VXLOCK)
571 		panic("vclean: deadlock");
572 	vp->v_flag |= VXLOCK;
573 	/*
574 	 * Even if the count is zero, the VOP_INACTIVE routine may still
575 	 * have the object locked while it cleans it out. The VOP_LOCK
576 	 * ensures that the VOP_INACTIVE routine is done with its work.
577 	 * For active vnodes, it ensures that no other activity can
578 	 * occur while the buffer list is being cleaned out.
579 	 */
580 	VOP_LOCK(vp);
581 	if (doclose)
582 		vinvalbuf(vp, 1);
583 	/*
584 	 * Prevent any further operations on the vnode from
585 	 * being passed through to the old file system.
586 	 */
587 	origops = vp->v_op;
588 	vp->v_op = &dead_vnodeops;
589 	vp->v_tag = VT_NON;
590 	/*
591 	 * If purging an active vnode, it must be unlocked, closed,
592 	 * and deactivated before being reclaimed.
593 	 */
594 	(*(origops->vn_unlock))(vp);
595 	if (active) {
596 		if (doclose)
597 			(*(origops->vn_close))(vp, 0, NOCRED);
598 		(*(origops->vn_inactive))(vp);
599 	}
600 	/*
601 	 * Reclaim the vnode.
602 	 */
603 	if ((*(origops->vn_reclaim))(vp))
604 		panic("vclean: cannot reclaim");
605 	if (active)
606 		vrele(vp);
607 	/*
608 	 * Done with purge, notify sleepers in vget of the grim news.
609 	 */
610 	vp->v_flag &= ~VXLOCK;
611 	if (vp->v_flag & VXWANT) {
612 		vp->v_flag &= ~VXWANT;
613 		wakeup((caddr_t)vp);
614 	}
615 }
617 /*
618  * Eliminate all activity associated with  the requested vnode
619  * and with all vnodes aliased to the requested vnode.
620  */
621 void vgoneall(vp)
622 	register struct vnode *vp;
623 {
624 	register struct vnode *vq;
626 	while (vp->v_flag & VALIASED) {
627 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
628 			if (vq->v_rdev != vp->v_rdev || vp == vq)
629 				continue;
630 			vgone(vq);
631 			break;
632 		}
633 	}
634 	vgone(vp);
635 }
637 /*
638  * Eliminate all activity associated with a vnode
639  * in preparation for reuse.
640  */
641 void vgone(vp)
642 	register struct vnode *vp;
643 {
644 	register struct vnode *vq;
645 	struct vnode *vx;
646 	long count;
648 	/*
649 	 * Clean out the filesystem specific data.
650 	 */
651 	vclean(vp, 1);
652 	/*
653 	 * Delete from old mount point vnode list, if on one.
654 	 */
655 	if (vp->v_mountb) {
656 		if (vq = vp->v_mountf)
657 			vq->v_mountb = vp->v_mountb;
658 		*vp->v_mountb = vq;
659 		vp->v_mountf = NULL;
660 		vp->v_mountb = NULL;
661 	}
662 	/*
663 	 * If special device, remove it from special device alias list.
664 	 */
665 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
666 		if (*vp->v_hashchain == vp) {
667 			*vp->v_hashchain = vp->v_specnext;
668 		} else {
669 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
670 				if (vq->v_specnext != vp)
671 					continue;
672 				vq->v_specnext = vp->v_specnext;
673 				break;
674 			}
675 			if (vq == NULL)
676 				panic("missing bdev");
677 		}
678 		if (vp->v_flag & VALIASED) {
679 			count = 0;
680 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
681 				if (vq->v_rdev != vp->v_rdev)
682 					continue;
683 				count++;
684 				vx = vq;
685 			}
686 			if (count == 0)
687 				panic("missing alias");
688 			if (count == 1)
689 				vx->v_flag &= ~VALIASED;
690 			vp->v_flag &= ~VALIASED;
691 		}
692 		FREE(vp->v_specinfo, M_VNODE);
693 		vp->v_specinfo = NULL;
694 	}
695 	/*
696 	 * If it is on the freelist, move it to the head of the list.
697 	 */
698 	if (vp->v_freeb) {
699 		if (vq = vp->v_freef)
700 			vq->v_freeb = vp->v_freeb;
701 		else
702 			vfreet = vp->v_freeb;
703 		*vp->v_freeb = vq;
704 		vp->v_freef = vfreeh;
705 		vp->v_freeb = &vfreeh;
706 		vfreeh->v_freeb = &vp->v_freef;
707 		vfreeh = vp;
708 	}
709 	vp->v_type = VBAD;
710 }
712 /*
713  * Lookup a vnode by device number.
714  */
715 vfinddev(dev, type, vpp)
716 	dev_t dev;
717 	enum vtype type;
718 	struct vnode **vpp;
719 {
720 	register struct vnode *vp;
722 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
723 		if (dev != vp->v_rdev || type != vp->v_type)
724 			continue;
725 		*vpp = vp;
726 		return (0);
727 	}
728 	return (1);
729 }
731 /*
732  * Calculate the total number of references to a special device.
733  */
734 vcount(vp)
735 	register struct vnode *vp;
736 {
737 	register struct vnode *vq;
738 	int count;
740 	if ((vp->v_flag & VALIASED) == 0)
741 		return (vp->v_usecount);
742 loop:
743 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
744 		if (vq->v_rdev != vp->v_rdev)
745 			continue;
746 		/*
747 		 * Alias, but not in use, so flush it out.
748 		 */
749 		if (vq->v_usecount == 0) {
750 			vgone(vq);
751 			goto loop;
752 		}
753 		count += vq->v_usecount;
754 	}
755 	return (count);
756 }
758 /*
759  * Print out a description of a vnode.
760  */
761 static char *typename[] =
762 	{ "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VBAD" };
764 vprint(label, vp)
765 	char *label;
766 	register struct vnode *vp;
767 {
769 	if (label != NULL)
770 		printf("%s: ", label);
771 	printf("type %s, usecount %d, refcount %d,\n\t", typename[vp->v_type],
772 		vp->v_usecount, vp->v_holdcnt);
773 	VOP_PRINT(vp);
774 }