xref: /dragonfly/sys/vfs/ufs/ufs_vnops.c (revision 7eedf208)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
39  * $FreeBSD: src/sys/ufs/ufs/ufs_vnops.c,v 1.131.2.8 2003/01/02 17:26:19 bde Exp $
40  * $DragonFly: src/sys/vfs/ufs/ufs_vnops.c,v 1.67 2008/09/28 05:04:22 dillon Exp $
41  */
42 
43 #include "opt_quota.h"
44 #include "opt_suiddir.h"
45 #include "opt_ufs.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/fcntl.h>
51 #include <sys/stat.h>
52 #include <sys/buf.h>
53 #include <sys/proc.h>
54 #include <sys/priv.h>
55 #include <sys/namei.h>
56 #include <sys/mount.h>
57 #include <sys/unistd.h>
58 #include <sys/vnode.h>
59 #include <sys/malloc.h>
60 #include <sys/dirent.h>
61 #include <sys/lockf.h>
62 #include <sys/event.h>
63 #include <sys/conf.h>
64 
65 #include <sys/file.h>		/* XXX */
66 #include <sys/jail.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_extern.h>
70 
71 #include <vfs/fifofs/fifo.h>
72 
73 #include "quota.h"
74 #include "inode.h"
75 #include "dir.h"
76 #include "ufsmount.h"
77 #include "ufs_extern.h"
78 #include "ffs_extern.h"
79 #include "fs.h"
80 #ifdef UFS_DIRHASH
81 #include "dirhash.h"
82 #endif
83 
84 static int ufs_access (struct vop_access_args *);
85 static int ufs_advlock (struct vop_advlock_args *);
86 static int ufs_chmod (struct vnode *, int, struct ucred *);
87 static int ufs_chown (struct vnode *, uid_t, gid_t, struct ucred *);
88 static int ufs_close (struct vop_close_args *);
89 static int ufs_create (struct vop_old_create_args *);
90 static int ufs_getattr (struct vop_getattr_args *);
91 static int ufs_link (struct vop_old_link_args *);
92 static int ufs_makeinode (int mode, struct vnode *, struct vnode **, struct componentname *);
93 static int ufs_markatime (struct vop_markatime_args *);
94 static int ufs_missingop (struct vop_generic_args *ap);
95 static int ufs_mkdir (struct vop_old_mkdir_args *);
96 static int ufs_mknod (struct vop_old_mknod_args *);
97 static int ufs_mmap (struct vop_mmap_args *);
98 static int ufs_print (struct vop_print_args *);
99 static int ufs_readdir (struct vop_readdir_args *);
100 static int ufs_readlink (struct vop_readlink_args *);
101 static int ufs_remove (struct vop_old_remove_args *);
102 static int ufs_rename (struct vop_old_rename_args *);
103 static int ufs_rmdir (struct vop_old_rmdir_args *);
104 static int ufs_setattr (struct vop_setattr_args *);
105 static int ufs_strategy (struct vop_strategy_args *);
106 static int ufs_symlink (struct vop_old_symlink_args *);
107 static int ufs_whiteout (struct vop_old_whiteout_args *);
108 static int ufsfifo_close (struct vop_close_args *);
109 static int ufsfifo_kqfilter (struct vop_kqfilter_args *);
110 static int ufsfifo_read (struct vop_read_args *);
111 static int ufsfifo_write (struct vop_write_args *);
112 static int filt_ufsread (struct knote *kn, long hint);
113 static int filt_ufswrite (struct knote *kn, long hint);
114 static int filt_ufsvnode (struct knote *kn, long hint);
115 static void filt_ufsdetach (struct knote *kn);
116 static int ufs_kqfilter (struct vop_kqfilter_args *ap);
117 
118 union _qcvt {
119 	int64_t qcvt;
120 	int32_t val[2];
121 };
122 #define SETHIGH(q, h) { \
123 	union _qcvt tmp; \
124 	tmp.qcvt = (q); \
125 	tmp.val[_QUAD_HIGHWORD] = (h); \
126 	(q) = tmp.qcvt; \
127 }
128 #define SETLOW(q, l) { \
129 	union _qcvt tmp; \
130 	tmp.qcvt = (q); \
131 	tmp.val[_QUAD_LOWWORD] = (l); \
132 	(q) = tmp.qcvt; \
133 }
134 #define VN_KNOTE(vp, b) \
135 	KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, (b))
136 
137 #define OFSFMT(vp)		((vp)->v_mount->mnt_maxsymlinklen <= 0)
138 
139 /*
140  * A virgin directory (no blushing please).
141  */
142 static struct dirtemplate mastertemplate = {
143 	0, 12, DT_DIR, 1, ".",
144 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
145 };
146 static struct odirtemplate omastertemplate = {
147 	0, 12, 1, ".",
148 	0, DIRBLKSIZ - 12, 2, ".."
149 };
150 
151 void
152 ufs_itimes(struct vnode *vp)
153 {
154 	struct inode *ip;
155 	struct timespec ts;
156 
157 	ip = VTOI(vp);
158 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
159 		return;
160 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
161 		ip->i_flag |= IN_LAZYMOD;
162 	else
163 		ip->i_flag |= IN_MODIFIED;
164 	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
165 		vfs_timestamp(&ts);
166 		if (ip->i_flag & IN_ACCESS) {
167 			ip->i_atime = ts.tv_sec;
168 			ip->i_atimensec = ts.tv_nsec;
169 		}
170 		if (ip->i_flag & IN_UPDATE) {
171 			ip->i_mtime = ts.tv_sec;
172 			ip->i_mtimensec = ts.tv_nsec;
173 			ip->i_modrev++;
174 		}
175 		if (ip->i_flag & IN_CHANGE) {
176 			ip->i_ctime = ts.tv_sec;
177 			ip->i_ctimensec = ts.tv_nsec;
178 		}
179 	}
180 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
181 }
182 
183 /*
184  * Create a regular file
185  *
186  * ufs_create(struct vnode *a_dvp, struct vnode **a_vpp,
187  *	      struct componentname *a_cnp, struct vattr *a_vap)
188  */
189 static
190 int
191 ufs_create(struct vop_old_create_args *ap)
192 {
193 	int error;
194 
195 	error =
196 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
197 	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
198 	if (error)
199 		return (error);
200 	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
201 	return (0);
202 }
203 
204 /*
205  * Mknod vnode call
206  *
207  * ufs_mknod(struct vnode *a_dvp, struct vnode **a_vpp,
208  *	     struct componentname *a_cnp, struct vattr *a_vap)
209  */
210 /* ARGSUSED */
211 static
212 int
213 ufs_mknod(struct vop_old_mknod_args *ap)
214 {
215 	struct vattr *vap = ap->a_vap;
216 	struct vnode **vpp = ap->a_vpp;
217 	struct inode *ip;
218 	ino_t ino;
219 	int error;
220 
221 	/*
222 	 * UFS cannot represent the entire major/minor range supported by
223 	 * the kernel.
224 	 */
225 	if (vap->va_rmajor != VNOVAL &&
226 	    makeudev(vap->va_rmajor, vap->va_rminor) == NOUDEV) {
227 		return(EINVAL);
228 	}
229 
230 	/* no special directory support */
231 	if (vap->va_type == VDIR)
232 		return(EINVAL);
233 
234 	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
235 	    ap->a_dvp, vpp, ap->a_cnp);
236 	if (error)
237 		return (error);
238 	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
239 	ip = VTOI(*vpp);
240 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
241 	if (vap->va_rmajor != VNOVAL) {
242 		/*
243 		 * Want to be able to use this to make badblock
244 		 * inodes, so don't truncate the dev number.
245 		 */
246 		ip->i_rdev = makeudev(vap->va_rmajor, vap->va_rminor);
247 	}
248 	/*
249 	 * Remove inode, then reload it through VFS_VGET so it is
250 	 * checked to see if it is an alias of an existing entry in
251 	 * the inode cache.
252 	 */
253 	(*vpp)->v_type = VNON;
254 	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
255 	vgone_vxlocked(*vpp);
256 	vput(*vpp);
257 	error = VFS_VGET(ap->a_dvp->v_mount, NULL, ino, vpp);
258 	if (error) {
259 		*vpp = NULL;
260 		return (error);
261 	}
262 	return (0);
263 }
264 
265 /*
266  * Close called.
267  *
268  * Update the times on the inode.
269  *
270  * ufs_close(struct vnode *a_vp, int a_fflag)
271  */
272 /* ARGSUSED */
273 static
274 int
275 ufs_close(struct vop_close_args *ap)
276 {
277 	struct vnode *vp = ap->a_vp;
278 
279 	if (vp->v_sysref.refcnt > 1)
280 		ufs_itimes(vp);
281 	return (vop_stdclose(ap));
282 }
283 
284 /*
285  * ufs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
286  */
287 static
288 int
289 ufs_access(struct vop_access_args *ap)
290 {
291 	struct vnode *vp = ap->a_vp;
292 	struct inode *ip = VTOI(vp);
293 	int error;
294 
295 #ifdef QUOTA
296 	if (ap->a_mode & VWRITE) {
297 		switch (vp->v_type) {
298 		case VDIR:
299 		case VLNK:
300 		case VREG:
301 			if ((error = ufs_getinoquota(ip)) != 0)
302 				return (error);
303 			break;
304 		default:
305 			break;
306 		}
307 	}
308 #endif
309 
310 	error = vop_helper_access(ap, ip->i_uid, ip->i_gid, ip->i_mode, 0);
311 	return (error);
312 }
313 
314 /*
315  * ufs_getattr(struct vnode *a_vp, struct vattr *a_vap)
316  */
317 /* ARGSUSED */
318 static
319 int
320 ufs_getattr(struct vop_getattr_args *ap)
321 {
322 	struct vnode *vp = ap->a_vp;
323 	struct inode *ip = VTOI(vp);
324 	struct vattr *vap = ap->a_vap;
325 
326 	ufs_itimes(vp);
327 	/*
328 	 * Copy from inode table
329 	 */
330 	vap->va_fsid = dev2udev(ip->i_dev);
331 	vap->va_fileid = ip->i_number;
332 	vap->va_mode = ip->i_mode & ~IFMT;
333 	vap->va_nlink = VFSTOUFS(vp->v_mount)->um_i_effnlink_valid ?
334 	    ip->i_effnlink : ip->i_nlink;
335 	vap->va_uid = ip->i_uid;
336 	vap->va_gid = ip->i_gid;
337 	vap->va_rmajor = umajor(ip->i_rdev);
338 	vap->va_rminor = uminor(ip->i_rdev);
339 	vap->va_size = ip->i_din.di_size;
340 	vap->va_atime.tv_sec = ip->i_atime;
341 	vap->va_atime.tv_nsec = ip->i_atimensec;
342 	vap->va_mtime.tv_sec = ip->i_mtime;
343 	vap->va_mtime.tv_nsec = ip->i_mtimensec;
344 	vap->va_ctime.tv_sec = ip->i_ctime;
345 	vap->va_ctime.tv_nsec = ip->i_ctimensec;
346 	vap->va_flags = ip->i_flags;
347 	vap->va_gen = ip->i_gen;
348 	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
349 	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
350 	vap->va_type = IFTOVT(ip->i_mode);
351 	vap->va_filerev = ip->i_modrev;
352 	return (0);
353 }
354 
355 static
356 int
357 ufs_markatime(struct vop_markatime_args *ap)
358 {
359 	struct vnode *vp = ap->a_vp;
360 	struct inode *ip = VTOI(vp);
361 
362 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
363 		return (EROFS);
364 	if (vp->v_mount->mnt_flag & MNT_NOATIME)
365 		return (0);
366 	ip->i_flag |= IN_ACCESS;
367 	VN_KNOTE(vp, NOTE_ATTRIB);
368 	return (0);
369 }
370 
371 /*
372  * Set attribute vnode op. called from several syscalls
373  *
374  * ufs_setattr(struct vnode *a_vp, struct vattr *a_vap,
375  *		struct ucred *a_cred)
376  */
377 static
378 int
379 ufs_setattr(struct vop_setattr_args *ap)
380 {
381 	struct vattr *vap = ap->a_vap;
382 	struct vnode *vp = ap->a_vp;
383 	struct inode *ip = VTOI(vp);
384 	struct ucred *cred = ap->a_cred;
385 	int error;
386 
387 	/*
388 	 * Check for unsettable attributes.
389 	 */
390 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
391 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
392 	    (vap->va_blocksize != VNOVAL) || (vap->va_rmajor != VNOVAL) ||
393 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
394 		return (EINVAL);
395 	}
396 	if (vap->va_flags != VNOVAL) {
397 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
398 			return (EROFS);
399 		if (cred->cr_uid != ip->i_uid &&
400 		    (error = priv_check_cred(cred, PRIV_VFS_SETATTR, 0)))
401 			return (error);
402 		/*
403 		 * Note that a root chflags becomes a user chflags when
404 		 * we are jailed, unless the jail.chflags_allowed sysctl
405 		 * is set.
406 		 */
407 		if (cred->cr_uid == 0 &&
408 		    (!jailed(cred) || jail_chflags_allowed)) {
409 			if ((ip->i_flags
410 			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) &&
411 			    securelevel > 0)
412 				return (EPERM);
413 			ip->i_flags = vap->va_flags;
414 		} else {
415 			if (ip->i_flags
416 			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
417 			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
418 				return (EPERM);
419 			ip->i_flags &= SF_SETTABLE;
420 			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
421 		}
422 		ip->i_flag |= IN_CHANGE;
423 		if (vap->va_flags & (IMMUTABLE | APPEND))
424 			return (0);
425 	}
426 	if (ip->i_flags & (IMMUTABLE | APPEND))
427 		return (EPERM);
428 	/*
429 	 * Go through the fields and update iff not VNOVAL.
430 	 */
431 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
432 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
433 			return (EROFS);
434 		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred)) != 0)
435 			return (error);
436 	}
437 	if (vap->va_size != VNOVAL) {
438 		/*
439 		 * Disallow write attempts on read-only filesystems;
440 		 * unless the file is a socket, fifo, or a block or
441 		 * character device resident on the filesystem.
442 		 */
443 		switch (vp->v_type) {
444 		case VDIR:
445 			return (EISDIR);
446 		case VLNK:
447 		case VREG:
448 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
449 				return (EROFS);
450 			break;
451 		default:
452 			break;
453 		}
454 		if ((error = ffs_truncate(vp, vap->va_size, 0, cred)) != 0)
455 			return (error);
456 	}
457 	ip = VTOI(vp);
458 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
459 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
460 			return (EROFS);
461 		if (cred->cr_uid != ip->i_uid &&
462 		    (error = priv_check_cred(cred, PRIV_VFS_SETATTR, 0)) &&
463 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
464 		    (error = VOP_EACCESS(vp, VWRITE, cred))))
465 			return (error);
466 		if (vap->va_atime.tv_sec != VNOVAL)
467 			ip->i_flag |= IN_ACCESS;
468 		if (vap->va_mtime.tv_sec != VNOVAL)
469 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
470 		ufs_itimes(vp);
471 		if (vap->va_atime.tv_sec != VNOVAL) {
472 			ip->i_atime = vap->va_atime.tv_sec;
473 			ip->i_atimensec = vap->va_atime.tv_nsec;
474 		}
475 		if (vap->va_mtime.tv_sec != VNOVAL) {
476 			ip->i_mtime = vap->va_mtime.tv_sec;
477 			ip->i_mtimensec = vap->va_mtime.tv_nsec;
478 		}
479 		error = ffs_update(vp, 0);
480 		if (error)
481 			return (error);
482 	}
483 	error = 0;
484 	if (vap->va_mode != (mode_t)VNOVAL) {
485 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
486 			return (EROFS);
487 		error = ufs_chmod(vp, (int)vap->va_mode, cred);
488 	}
489 	VN_KNOTE(vp, NOTE_ATTRIB);
490 	return (error);
491 }
492 
493 /*
494  * Change the mode on a file.
495  * Inode must be locked before calling.
496  */
497 static int
498 ufs_chmod(struct vnode *vp, int mode, struct ucred *cred)
499 {
500 	struct inode *ip = VTOI(vp);
501 	int error;
502 	mode_t	cur_mode = ip->i_mode;
503 
504 	error = vop_helper_chmod(vp, mode, cred, ip->i_uid, ip->i_gid,
505 				 &cur_mode);
506 	if (error)
507 		return (error);
508 #if 0
509 	if (cred->cr_uid != ip->i_uid) {
510 	    error = priv_check_cred(cred, PRIV_VFS_CHMOD, 0);
511 	    if (error)
512 		return (error);
513 	}
514 	if (cred->cr_uid) {
515 		if (vp->v_type != VDIR && (mode & S_ISTXT))
516 			return (EFTYPE);
517 		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
518 			return (EPERM);
519 	}
520 #endif
521 	ip->i_mode = cur_mode;
522 	ip->i_flag |= IN_CHANGE;
523 	return (0);
524 }
525 
526 /*
527  * Perform chown operation on inode ip;
528  * inode must be locked prior to call.
529  */
530 static int
531 ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred)
532 {
533 	struct inode *ip = VTOI(vp);
534 	uid_t ouid;
535 	gid_t ogid;
536 	int error = 0;
537 #ifdef QUOTA
538 	int i;
539 	long change;
540 #endif
541 
542 	if (uid == (uid_t)VNOVAL)
543 		uid = ip->i_uid;
544 	if (gid == (gid_t)VNOVAL)
545 		gid = ip->i_gid;
546 	/*
547 	 * If we don't own the file, are trying to change the owner
548 	 * of the file, or are not a member of the target group,
549 	 * the caller must be superuser or the call fails.
550 	 */
551 	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
552 	    (gid != ip->i_gid && !(cred->cr_gid == gid ||
553 	    groupmember((gid_t)gid, cred)))) &&
554 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
555 		return (error);
556 	ogid = ip->i_gid;
557 	ouid = ip->i_uid;
558 #ifdef QUOTA
559 	if ((error = ufs_getinoquota(ip)) != 0)
560 		return (error);
561 	if (ouid == uid) {
562 		ufs_dqrele(vp, ip->i_dquot[USRQUOTA]);
563 		ip->i_dquot[USRQUOTA] = NODQUOT;
564 	}
565 	if (ogid == gid) {
566 		ufs_dqrele(vp, ip->i_dquot[GRPQUOTA]);
567 		ip->i_dquot[GRPQUOTA] = NODQUOT;
568 	}
569 	change = ip->i_blocks;
570 	(void) ufs_chkdq(ip, -change, cred, CHOWN);
571 	(void) ufs_chkiq(ip, -1, cred, CHOWN);
572 	for (i = 0; i < MAXQUOTAS; i++) {
573 		ufs_dqrele(vp, ip->i_dquot[i]);
574 		ip->i_dquot[i] = NODQUOT;
575 	}
576 #endif
577 	ip->i_gid = gid;
578 	ip->i_uid = uid;
579 #ifdef QUOTA
580 	if ((error = ufs_getinoquota(ip)) == 0) {
581 		if (ouid == uid) {
582 			ufs_dqrele(vp, ip->i_dquot[USRQUOTA]);
583 			ip->i_dquot[USRQUOTA] = NODQUOT;
584 		}
585 		if (ogid == gid) {
586 			ufs_dqrele(vp, ip->i_dquot[GRPQUOTA]);
587 			ip->i_dquot[GRPQUOTA] = NODQUOT;
588 		}
589 		if ((error = ufs_chkdq(ip, change, cred, CHOWN)) == 0) {
590 			if ((error = ufs_chkiq(ip, 1, cred, CHOWN)) == 0)
591 				goto good;
592 			else
593 				(void)ufs_chkdq(ip, -change, cred, CHOWN|FORCE);
594 		}
595 		for (i = 0; i < MAXQUOTAS; i++) {
596 			ufs_dqrele(vp, ip->i_dquot[i]);
597 			ip->i_dquot[i] = NODQUOT;
598 		}
599 	}
600 	ip->i_gid = ogid;
601 	ip->i_uid = ouid;
602 	if (ufs_getinoquota(ip) == 0) {
603 		if (ouid == uid) {
604 			ufs_dqrele(vp, ip->i_dquot[USRQUOTA]);
605 			ip->i_dquot[USRQUOTA] = NODQUOT;
606 		}
607 		if (ogid == gid) {
608 			ufs_dqrele(vp, ip->i_dquot[GRPQUOTA]);
609 			ip->i_dquot[GRPQUOTA] = NODQUOT;
610 		}
611 		(void) ufs_chkdq(ip, change, cred, FORCE|CHOWN);
612 		(void) ufs_chkiq(ip, 1, cred, FORCE|CHOWN);
613 		(void) ufs_getinoquota(ip);
614 	}
615 	return (error);
616 good:
617 	if (ufs_getinoquota(ip))
618 		panic("ufs_chown: lost quota");
619 #endif /* QUOTA */
620 	ip->i_flag |= IN_CHANGE;
621 	if (cred->cr_uid != 0 && (ouid != uid || ogid != gid))
622 		ip->i_mode &= ~(ISUID | ISGID);
623 	return (0);
624 }
625 
626 /*
627  * Mmap a file
628  *
629  * NB Currently unsupported.
630  *
631  * ufs_mmap(struct vnode *a_vp, int a_fflags, struct ucred *a_cred)
632  */
633 /* ARGSUSED */
634 static
635 int
636 ufs_mmap(struct vop_mmap_args *ap)
637 {
638 	return (EINVAL);
639 }
640 
641 /*
642  * ufs_remove(struct vnode *a_dvp, struct vnode *a_vp,
643  *	      struct componentname *a_cnp)
644  */
645 static
646 int
647 ufs_remove(struct vop_old_remove_args *ap)
648 {
649 	struct inode *ip;
650 	struct vnode *vp = ap->a_vp;
651 	struct vnode *dvp = ap->a_dvp;
652 	int error;
653 
654 	ip = VTOI(vp);
655 #if 0	/* handled by kernel now */
656 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
657 	    (VTOI(dvp)->i_flags & APPEND)) {
658 		error = EPERM;
659 		goto out;
660 	}
661 #endif
662 	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
663 	VN_KNOTE(vp, NOTE_DELETE);
664 	VN_KNOTE(dvp, NOTE_WRITE);
665 #if 0
666 out:
667 #endif
668 	return (error);
669 }
670 
671 /*
672  * link vnode call
673  *
674  * ufs_link(struct vnode *a_tdvp, struct vnode *a_vp,
675  *	    struct componentname *a_cnp)
676  */
677 static
678 int
679 ufs_link(struct vop_old_link_args *ap)
680 {
681 	struct vnode *vp = ap->a_vp;
682 	struct vnode *tdvp = ap->a_tdvp;
683 	struct componentname *cnp = ap->a_cnp;
684 	struct inode *ip;
685 	struct direct newdir;
686 	int error;
687 
688 	if (tdvp->v_mount != vp->v_mount) {
689 		error = EXDEV;
690 		goto out2;
691 	}
692 	if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE))) {
693 		goto out2;
694 	}
695 	ip = VTOI(vp);
696 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
697 		error = EMLINK;
698 		goto out1;
699 	}
700 #if 0	/* handled by kernel now, also DragonFly allows this */
701 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
702 		error = EPERM;
703 		goto out1;
704 	}
705 #endif
706 	ip->i_effnlink++;
707 	ip->i_nlink++;
708 	ip->i_flag |= IN_CHANGE;
709 	if (DOINGSOFTDEP(vp))
710 		softdep_change_linkcnt(ip);
711 	error = ffs_update(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
712 	if (!error) {
713 		ufs_makedirentry(ip, cnp, &newdir);
714 		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
715 	}
716 
717 	if (error) {
718 		ip->i_effnlink--;
719 		ip->i_nlink--;
720 		ip->i_flag |= IN_CHANGE;
721 		if (DOINGSOFTDEP(vp))
722 			softdep_change_linkcnt(ip);
723 	}
724 out1:
725 	if (tdvp != vp)
726 		vn_unlock(vp);
727 out2:
728 	VN_KNOTE(vp, NOTE_LINK);
729 	VN_KNOTE(tdvp, NOTE_WRITE);
730 	return (error);
731 }
732 
733 /*
734  * whiteout vnode call
735  *
736  * ufs_whiteout(struct vnode *a_dvp, struct componentname *a_cnp, int a_flags)
737  */
738 static
739 int
740 ufs_whiteout(struct vop_old_whiteout_args *ap)
741 {
742 	struct vnode *dvp = ap->a_dvp;
743 	struct componentname *cnp = ap->a_cnp;
744 	struct direct newdir;
745 	int error = 0;
746 
747 	switch (ap->a_flags) {
748 	case NAMEI_LOOKUP:
749 		/* 4.4 format directories support whiteout operations */
750 		if (dvp->v_mount->mnt_maxsymlinklen > 0)
751 			return (0);
752 		return (EOPNOTSUPP);
753 
754 	case NAMEI_CREATE:
755 		/* create a new directory whiteout */
756 #ifdef DIAGNOSTIC
757 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
758 			panic("ufs_whiteout: old format filesystem");
759 #endif
760 
761 		newdir.d_ino = WINO;
762 		newdir.d_namlen = cnp->cn_namelen;
763 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
764 		newdir.d_type = DT_WHT;
765 		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
766 		break;
767 
768 	case NAMEI_DELETE:
769 		/* remove an existing directory whiteout */
770 #ifdef DIAGNOSTIC
771 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
772 			panic("ufs_whiteout: old format filesystem");
773 #endif
774 
775 		cnp->cn_flags &= ~CNP_DOWHITEOUT;
776 		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
777 		break;
778 	default:
779 		panic("ufs_whiteout: unknown op");
780 	}
781 	return (error);
782 }
783 
784 /*
785  * Rename system call.
786  * 	rename("foo", "bar");
787  * is essentially
788  *	unlink("bar");
789  *	link("foo", "bar");
790  *	unlink("foo");
791  * but ``atomically''.  Can't do full commit without saving state in the
792  * inode on disk which isn't feasible at this time.  Best we can do is
793  * always guarantee the target exists.
794  *
795  * Basic algorithm is:
796  *
797  * 1) Bump link count on source while we're linking it to the
798  *    target.  This also ensure the inode won't be deleted out
799  *    from underneath us while we work (it may be truncated by
800  *    a concurrent `trunc' or `open' for creation).
801  * 2) Link source to destination.  If destination already exists,
802  *    delete it first.
803  * 3) Unlink source reference to inode if still around. If a
804  *    directory was moved and the parent of the destination
805  *    is different from the source, patch the ".." entry in the
806  *    directory.
807  *
808  * ufs_rename(struct vnode *a_fdvp, struct vnode *a_fvp,
809  *	      struct componentname *a_fcnp, struct vnode *a_tdvp,
810  *	      struct vnode *a_tvp, struct componentname *a_tcnp)
811  */
812 static
813 int
814 ufs_rename(struct vop_old_rename_args *ap)
815 {
816 	struct vnode *tvp = ap->a_tvp;
817 	struct vnode *tdvp = ap->a_tdvp;
818 	struct vnode *fvp = ap->a_fvp;
819 	struct vnode *fdvp = ap->a_fdvp;
820 	struct componentname *tcnp = ap->a_tcnp;
821 	struct componentname *fcnp = ap->a_fcnp;
822 	struct inode *ip, *xp, *dp;
823 	struct direct newdir;
824 	ino_t oldparent = 0, newparent = 0;
825 	int doingdirectory = 0;
826 	int error = 0, ioflag;
827 
828 	/*
829 	 * Check for cross-device rename.
830 	 */
831 	if ((fvp->v_mount != tdvp->v_mount) ||
832 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
833 		error = EXDEV;
834 abortit:
835 		if (tdvp == tvp)
836 			vrele(tdvp);
837 		else
838 			vput(tdvp);
839 		if (tvp)
840 			vput(tvp);
841 		vrele(fdvp);
842 		vrele(fvp);
843 		return (error);
844 	}
845 
846 #if 0	/* handled by kernel now */
847 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
848 	    (VTOI(tdvp)->i_flags & APPEND))) {
849 		error = EPERM;
850 		goto abortit;
851 	}
852 #endif
853 
854 	/*
855 	 * Renaming a file to itself has no effect.  The upper layers should
856 	 * not call us in that case.  Temporarily just warn if they do.
857 	 */
858 	if (fvp == tvp) {
859 		kprintf("ufs_rename: fvp == tvp (can't happen)\n");
860 		error = 0;
861 		goto abortit;
862 	}
863 
864 	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
865 		goto abortit;
866 
867 	/*
868 	 * Note: now that fvp is locked we have to be sure to unlock it before
869 	 * using the 'abortit' target.
870 	 */
871 	dp = VTOI(fdvp);
872 	ip = VTOI(fvp);
873 	if (ip->i_nlink >= LINK_MAX) {
874 		vn_unlock(fvp);
875 		error = EMLINK;
876 		goto abortit;
877 	}
878 #if 0	/* handled by kernel now */
879 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
880 	    || (dp->i_flags & APPEND)) {
881 		vn_unlock(fvp);
882 		error = EPERM;
883 		goto abortit;
884 	}
885 #endif
886 	if ((ip->i_mode & IFMT) == IFDIR) {
887 		/*
888 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
889 		 */
890 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
891 		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & CNP_ISDOTDOT ||
892 		    (ip->i_flag & IN_RENAME)) {
893 			vn_unlock(fvp);
894 			error = EINVAL;
895 			goto abortit;
896 		}
897 		ip->i_flag |= IN_RENAME;
898 		oldparent = dp->i_number;
899 		doingdirectory = 1;
900 	}
901 	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXX right place? */
902 
903 	/*
904 	 * fvp still locked.  ip->i_flag has IN_RENAME set if doingdirectory.
905 	 * Cleanup fvp requirements so we can unlock it.
906 	 *
907 	 * tvp and tdvp are locked.  tvp may be NULL.  Now that dp and xp
908 	 * is setup we can use the 'bad' target if we unlock fvp.  We cannot
909 	 * use the abortit target anymore because of IN_RENAME.
910 	 */
911 	dp = VTOI(tdvp);
912 	if (tvp)
913 		xp = VTOI(tvp);
914 	else
915 		xp = NULL;
916 
917 	/*
918 	 * 1) Bump link count while we're moving stuff
919 	 *    around.  If we crash somewhere before
920 	 *    completing our work, the link count
921 	 *    may be wrong, but correctable.
922 	 */
923 	ip->i_effnlink++;
924 	ip->i_nlink++;
925 	ip->i_flag |= IN_CHANGE;
926 	if (DOINGSOFTDEP(fvp))
927 		softdep_change_linkcnt(ip);
928 	if ((error = ffs_update(fvp, !(DOINGSOFTDEP(fvp) |
929 				       DOINGASYNC(fvp)))) != 0) {
930 		vn_unlock(fvp);
931 		goto bad;
932 	}
933 
934 	/*
935 	 * If ".." must be changed (ie the directory gets a new
936 	 * parent) then the source directory must not be in the
937 	 * directory heirarchy above the target, as this would
938 	 * orphan everything below the source directory. Also
939 	 * the user must have write permission in the source so
940 	 * as to be able to change "..". We must repeat the call
941 	 * to namei, as the parent directory is unlocked by the
942 	 * call to checkpath().
943 	 */
944 	error = VOP_EACCESS(fvp, VWRITE, tcnp->cn_cred);
945 	vn_unlock(fvp);
946 
947 	/*
948 	 * We are now back to where we were in that fvp, fdvp are unlocked
949 	 * and tvp, tdvp are locked.  tvp may be NULL.  IN_RENAME may be
950 	 * set.  Only the bad target or, if we clean up tvp and tdvp, the
951 	 * out target, may be used.
952 	 */
953 	if (oldparent != dp->i_number)
954 		newparent = dp->i_number;
955 	if (doingdirectory && newparent) {
956 		if (error)	/* write access check above */
957 			goto bad;
958 
959 		/*
960 		 * Once we start messing with tvp and tdvp we cannot use the
961 		 * 'bad' target, only finish cleaning tdvp and tvp up and
962 		 * use the 'out' target.
963 		 *
964 		 * This cleans up tvp.
965 		 */
966 		if (xp != NULL) {
967 			vput(tvp);
968 			xp = NULL;
969 		}
970 
971 		/*
972 		 * This is a real mess. ufs_checkpath vput's the target
973 		 * directory so retain an extra ref and note that tdvp will
974 		 * lose its lock on return.  This leaves us with one good
975 		 * ref after ufs_checkpath returns.
976 		 */
977 		vref(tdvp);
978 		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
979 		tcnp->cn_flags |= CNP_PDIRUNLOCK;
980 		if (error) {
981 			vrele(tdvp);
982 			goto out;
983 	        }
984 
985 		/*
986 		 * relookup no longer messes with tdvp's refs. tdvp must be
987 		 * unlocked on entry and will be locked on a successful
988 		 * return.
989 		 */
990 		error = relookup(tdvp, &tvp, tcnp);
991 		if (error) {
992 			if (tcnp->cn_flags & CNP_PDIRUNLOCK)
993 				vrele(tdvp);
994 			else
995 				vput(tdvp);
996 			goto out;
997 		}
998 		KKASSERT((tcnp->cn_flags & CNP_PDIRUNLOCK) == 0);
999 		dp = VTOI(tdvp);
1000 		if (tvp)
1001 			xp = VTOI(tvp);
1002 	}
1003 
1004 	/*
1005 	 * We are back to fvp, fdvp unlocked, tvp, tdvp locked.  tvp may
1006 	 * be NULL (xp will also be NULL in that case), and IN_RENAME will
1007 	 * be set if doingdirectory.  This means we can use the 'bad' target
1008 	 * again.
1009 	 */
1010 
1011 	/*
1012 	 * 2) If target doesn't exist, link the target
1013 	 *    to the source and unlink the source.
1014 	 *    Otherwise, rewrite the target directory
1015 	 *    entry to reference the source inode and
1016 	 *    expunge the original entry's existence.
1017 	 */
1018 	if (xp == NULL) {
1019 		if (dp->i_dev != ip->i_dev)
1020 			panic("ufs_rename: EXDEV");
1021 		/*
1022 		 * Account for ".." in new directory.
1023 		 * When source and destination have the same
1024 		 * parent we don't fool with the link count.
1025 		 */
1026 		if (doingdirectory && newparent) {
1027 			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1028 				error = EMLINK;
1029 				goto bad;
1030 			}
1031 			dp->i_effnlink++;
1032 			dp->i_nlink++;
1033 			dp->i_flag |= IN_CHANGE;
1034 			if (DOINGSOFTDEP(tdvp))
1035 				softdep_change_linkcnt(dp);
1036 			error = ffs_update(tdvp, !(DOINGSOFTDEP(tdvp) |
1037 						   DOINGASYNC(tdvp)));
1038 			if (error)
1039 				goto bad;
1040 		}
1041 		ufs_makedirentry(ip, tcnp, &newdir);
1042 		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
1043 		if (error) {
1044 			if (doingdirectory && newparent) {
1045 				dp->i_effnlink--;
1046 				dp->i_nlink--;
1047 				dp->i_flag |= IN_CHANGE;
1048 				if (DOINGSOFTDEP(tdvp))
1049 					softdep_change_linkcnt(dp);
1050 				(void)ffs_update(tdvp, 1);
1051 			}
1052 			goto bad;
1053 		}
1054 		VN_KNOTE(tdvp, NOTE_WRITE);
1055 		vput(tdvp);
1056 	} else {
1057 		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
1058 			panic("ufs_rename: EXDEV");
1059 		/*
1060 		 * Short circuit rename(foo, foo).
1061 		 */
1062 		if (xp->i_number == ip->i_number)
1063 			panic("ufs_rename: same file");
1064 		/*
1065 		 * If the parent directory is "sticky", then the user must
1066 		 * own the parent directory, or the destination of the rename,
1067 		 * otherwise the destination may not be changed (except by
1068 		 * root). This implements append-only directories.
1069 		 */
1070 		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
1071 		    tcnp->cn_cred->cr_uid != dp->i_uid &&
1072 		    xp->i_uid != tcnp->cn_cred->cr_uid) {
1073 			error = EPERM;
1074 			goto bad;
1075 		}
1076 		/*
1077 		 * Target must be empty if a directory and have no links
1078 		 * to it. Also, ensure source and target are compatible
1079 		 * (both directories, or both not directories).
1080 		 *
1081 		 * Purge the file or directory being replaced from the
1082 		 * nameccache.
1083 		 */
1084 		if ((xp->i_mode&IFMT) == IFDIR) {
1085 			if ((xp->i_effnlink > 2) ||
1086 			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
1087 				error = ENOTEMPTY;
1088 				goto bad;
1089 			}
1090 			if (!doingdirectory) {
1091 				error = ENOTDIR;
1092 				goto bad;
1093 			}
1094 			/* cache_purge removed - handled by VFS compat layer */
1095 		} else if (doingdirectory == 0) {
1096 			/* cache_purge removed - handled by VFS compat layer */
1097 		} else {
1098 			error = EISDIR;
1099 			goto bad;
1100 		}
1101 		/*
1102 		 * note: inode passed to ufs_dirrewrite() is 0 for a
1103 		 * non-directory file rename, 1 for a directory rename
1104 		 * in the same directory, and > 1 for an inode representing
1105 		 * the new directory.
1106 		 */
1107 		error = ufs_dirrewrite(dp, xp, ip->i_number,
1108 		    IFTODT(ip->i_mode),
1109 		    (doingdirectory && newparent) ?
1110 			newparent : (ino_t)doingdirectory);
1111 		if (error)
1112 			goto bad;
1113 		if (doingdirectory) {
1114 			if (!newparent) {
1115 				dp->i_effnlink--;
1116 				if (DOINGSOFTDEP(tdvp))
1117 					softdep_change_linkcnt(dp);
1118 			}
1119 			xp->i_effnlink--;
1120 			if (DOINGSOFTDEP(tvp))
1121 				softdep_change_linkcnt(xp);
1122 		}
1123 		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
1124 			/*
1125 			 * Truncate inode. The only stuff left in the directory
1126 			 * is "." and "..". The "." reference is inconsequential
1127 			 * since we are quashing it. We have removed the "."
1128 			 * reference and the reference in the parent directory,
1129 			 * but there may be other hard links. The soft
1130 			 * dependency code will arrange to do these operations
1131 			 * after the parent directory entry has been deleted on
1132 			 * disk, so when running with that code we avoid doing
1133 			 * them now.
1134 			 */
1135 			if (!newparent) {
1136 				dp->i_nlink--;
1137 				dp->i_flag |= IN_CHANGE;
1138 			}
1139 			xp->i_nlink--;
1140 			xp->i_flag |= IN_CHANGE;
1141 			ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
1142 			error = ffs_truncate(tvp, (off_t)0, ioflag,
1143 					     tcnp->cn_cred);
1144 			if (error)
1145 				goto bad;
1146 		}
1147 		VN_KNOTE(tdvp, NOTE_WRITE);
1148 		vput(tdvp);
1149 		VN_KNOTE(tvp, NOTE_DELETE);
1150 		vput(tvp);
1151 		xp = NULL;
1152 	}
1153 
1154 	/*
1155 	 * tvp and tdvp have been cleaned up.  only fvp and fdvp (both
1156 	 * unlocked) remain.  We are about to overwrite fvp but we have to
1157 	 * keep 'ip' intact so we cannot release the old fvp, which is still
1158 	 * refd and accessible via ap->a_fvp.
1159 	 *
1160 	 * This means we cannot use either 'bad' or 'out' to cleanup any
1161 	 * more.
1162 	 */
1163 
1164 	/*
1165 	 * 3) Unlink the source.
1166 	 */
1167 	fcnp->cn_flags &= ~CNP_MODMASK;
1168 	fcnp->cn_flags |= CNP_LOCKPARENT;
1169 	error = relookup(fdvp, &fvp, fcnp);
1170 	if (error || fvp == NULL) {
1171 		/*
1172 		 * From name has disappeared.  IN_RENAME will not be set if
1173 		 * we get past the panic so we don't have to clean it up.
1174 		 */
1175 		if (doingdirectory)
1176 			panic("ufs_rename: lost dir entry");
1177 		vrele(ap->a_fvp);
1178 		if (fcnp->cn_flags & CNP_PDIRUNLOCK)
1179 			vrele(fdvp);
1180 		else
1181 			vput(fdvp);
1182 		return(0);
1183 	}
1184 	KKASSERT((fcnp->cn_flags & CNP_PDIRUNLOCK) == 0);
1185 
1186 	/*
1187 	 * fdvp and fvp are locked.
1188 	 */
1189 	xp = VTOI(fvp);
1190 	dp = VTOI(fdvp);
1191 
1192 	/*
1193 	 * Ensure that the directory entry still exists and has not
1194 	 * changed while the new name has been entered. If the source is
1195 	 * a file then the entry may have been unlinked or renamed. In
1196 	 * either case there is no further work to be done. If the source
1197 	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
1198 	 * flag ensures that it cannot be moved by another rename or removed
1199 	 * by a rmdir.  Cleanup IN_RENAME.
1200 	 */
1201 	if (xp != ip) {
1202 		if (doingdirectory)
1203 			panic("ufs_rename: lost dir entry");
1204 	} else {
1205 		/*
1206 		 * If the source is a directory with a
1207 		 * new parent, the link count of the old
1208 		 * parent directory must be decremented
1209 		 * and ".." set to point to the new parent.
1210 		 */
1211 		if (doingdirectory && newparent) {
1212 			xp->i_offset = mastertemplate.dot_reclen;
1213 			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
1214 			/* cache_purge removed - handled by VFS compat layer */
1215 		}
1216 		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
1217 		xp->i_flag &= ~IN_RENAME;
1218 	}
1219 
1220 	VN_KNOTE(fvp, NOTE_RENAME);
1221 	vput(fdvp);
1222 	vput(fvp);
1223 	vrele(ap->a_fvp);
1224 	return (error);
1225 
1226 bad:
1227 	if (xp)
1228 		vput(ITOV(xp));
1229 	vput(ITOV(dp));
1230 out:
1231 	if (doingdirectory)
1232 		ip->i_flag &= ~IN_RENAME;
1233 	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
1234 		ip->i_effnlink--;
1235 		ip->i_nlink--;
1236 		ip->i_flag |= IN_CHANGE;
1237 		ip->i_flag &= ~IN_RENAME;
1238 		if (DOINGSOFTDEP(fvp))
1239 			softdep_change_linkcnt(ip);
1240 		vput(fvp);
1241 	} else {
1242 		vrele(fvp);
1243 	}
1244 	return (error);
1245 }
1246 
1247 /*
1248  * Mkdir system call
1249  *
1250  * ufs_mkdir(struct vnode *a_dvp, struct vnode **a_vpp,
1251  *	     struct componentname *a_cnp, struct vattr *a_vap)
1252  */
1253 static
1254 int
1255 ufs_mkdir(struct vop_old_mkdir_args *ap)
1256 {
1257 	struct vnode *dvp = ap->a_dvp;
1258 	struct vattr *vap = ap->a_vap;
1259 	struct componentname *cnp = ap->a_cnp;
1260 	struct inode *ip, *dp;
1261 	struct vnode *tvp;
1262 	struct buf *bp;
1263 	struct dirtemplate dirtemplate, *dtp;
1264 	struct direct newdir;
1265 	int error, dmode;
1266 	long blkoff;
1267 
1268 	dp = VTOI(dvp);
1269 	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1270 		error = EMLINK;
1271 		goto out;
1272 	}
1273 	dmode = vap->va_mode & 0777;
1274 	dmode |= IFDIR;
1275 	/*
1276 	 * Must simulate part of ufs_makeinode here to acquire the inode,
1277 	 * but not have it entered in the parent directory. The entry is
1278 	 * made later after writing "." and ".." entries.
1279 	 */
1280 	error = ffs_valloc(dvp, dmode, cnp->cn_cred, &tvp);
1281 	if (error)
1282 		goto out;
1283 	ip = VTOI(tvp);
1284 	ip->i_gid = dp->i_gid;
1285 #ifdef SUIDDIR
1286 	{
1287 #ifdef QUOTA
1288 		struct ucred ucred, *ucp;
1289 		ucp = cnp->cn_cred;
1290 #endif
1291 		/*
1292 		 * If we are hacking owners here, (only do this where told to)
1293 		 * and we are not giving it TO root, (would subvert quotas)
1294 		 * then go ahead and give it to the other user.
1295 		 * The new directory also inherits the SUID bit.
1296 		 * If user's UID and dir UID are the same,
1297 		 * 'give it away' so that the SUID is still forced on.
1298 		 */
1299 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1300 		    (dp->i_mode & ISUID) && dp->i_uid) {
1301 			dmode |= ISUID;
1302 			ip->i_uid = dp->i_uid;
1303 #ifdef QUOTA
1304 			if (dp->i_uid != cnp->cn_cred->cr_uid) {
1305 				/*
1306 				 * Make sure the correct user gets charged
1307 				 * for the space.
1308 				 * Make a dummy credential for the victim.
1309 				 * XXX This seems to never be accessed out of
1310 				 * our context so a stack variable is ok.
1311 				 */
1312 				ucred.cr_ref = 1;
1313 				ucred.cr_uid = ip->i_uid;
1314 				ucred.cr_ngroups = 1;
1315 				ucred.cr_groups[0] = dp->i_gid;
1316 				ucp = &ucred;
1317 			}
1318 #endif
1319 		} else
1320 			ip->i_uid = cnp->cn_cred->cr_uid;
1321 #ifdef QUOTA
1322 		if ((error = ufs_getinoquota(ip)) ||
1323 	    	    (error = ufs_chkiq(ip, 1, ucp, 0))) {
1324 			ffs_vfree(tvp, ip->i_number, dmode);
1325 			vput(tvp);
1326 			return (error);
1327 		}
1328 #endif
1329 	}
1330 #else	/* !SUIDDIR */
1331 	ip->i_uid = cnp->cn_cred->cr_uid;
1332 #ifdef QUOTA
1333 	if ((error = ufs_getinoquota(ip)) ||
1334 	    (error = ufs_chkiq(ip, 1, cnp->cn_cred, 0))) {
1335 		ffs_vfree(tvp, ip->i_number, dmode);
1336 		vput(tvp);
1337 		return (error);
1338 	}
1339 #endif
1340 #endif	/* !SUIDDIR */
1341 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1342 	ip->i_mode = dmode;
1343 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1344 	ip->i_effnlink = 2;
1345 	ip->i_nlink = 2;
1346 	if (DOINGSOFTDEP(tvp))
1347 		softdep_change_linkcnt(ip);
1348 	if (cnp->cn_flags & CNP_ISWHITEOUT)
1349 		ip->i_flags |= UF_OPAQUE;
1350 
1351 	/*
1352 	 * Bump link count in parent directory to reflect work done below.
1353 	 * Should be done before reference is created so cleanup is
1354 	 * possible if we crash.
1355 	 */
1356 	dp->i_effnlink++;
1357 	dp->i_nlink++;
1358 	dp->i_flag |= IN_CHANGE;
1359 	if (DOINGSOFTDEP(dvp))
1360 		softdep_change_linkcnt(dp);
1361 	error = ffs_update(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
1362 	if (error)
1363 		goto bad;
1364 
1365 	/*
1366 	 * The vnode must have a VM object in order to issue buffer cache
1367 	 * ops on it.
1368 	 */
1369 	vinitvmio(tvp, DIRBLKSIZ, DIRBLKSIZ, -1);
1370 
1371 	/*
1372 	 * Initialize directory with "." and ".." from static template.
1373 	 */
1374 	if (dvp->v_mount->mnt_maxsymlinklen > 0)
1375 		dtp = &mastertemplate;
1376 	else
1377 		dtp = (struct dirtemplate *)&omastertemplate;
1378 	dirtemplate = *dtp;
1379 	dirtemplate.dot_ino = ip->i_number;
1380 	dirtemplate.dotdot_ino = dp->i_number;
1381 	nvnode_pager_setsize(tvp, DIRBLKSIZ, DIRBLKSIZ, -1);
1382 	error = VOP_BALLOC(tvp, 0LL, DIRBLKSIZ, cnp->cn_cred, B_CLRBUF, &bp);
1383 	if (error)
1384 		goto bad;
1385 	ip->i_size = DIRBLKSIZ;
1386 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
1387 	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
1388 	if (DOINGSOFTDEP(tvp)) {
1389 		/*
1390 		 * Ensure that the entire newly allocated block is a
1391 		 * valid directory so that future growth within the
1392 		 * block does not have to ensure that the block is
1393 		 * written before the inode.
1394 		 */
1395 		blkoff = DIRBLKSIZ;
1396 		while (blkoff < bp->b_bcount) {
1397 			((struct direct *)
1398 			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
1399 			blkoff += DIRBLKSIZ;
1400 		}
1401 	}
1402 	if ((error = ffs_update(tvp, !(DOINGSOFTDEP(tvp) |
1403 				       DOINGASYNC(tvp)))) != 0) {
1404 		bwrite(bp);
1405 		goto bad;
1406 	}
1407 	/*
1408 	 * Directory set up, now install its entry in the parent directory.
1409 	 *
1410 	 * If we are not doing soft dependencies, then we must write out the
1411 	 * buffer containing the new directory body before entering the new
1412 	 * name in the parent. If we are doing soft dependencies, then the
1413 	 * buffer containing the new directory body will be passed to and
1414 	 * released in the soft dependency code after the code has attached
1415 	 * an appropriate ordering dependency to the buffer which ensures that
1416 	 * the buffer is written before the new name is written in the parent.
1417 	 */
1418 	if (DOINGASYNC(dvp))
1419 		bdwrite(bp);
1420 	else if (!DOINGSOFTDEP(dvp) && (error = bwrite(bp)) != 0)
1421 		goto bad;
1422 	ufs_makedirentry(ip, cnp, &newdir);
1423 	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
1424 
1425 bad:
1426 	if (error == 0) {
1427 		VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1428 		*ap->a_vpp = tvp;
1429 	} else {
1430 		dp->i_effnlink--;
1431 		dp->i_nlink--;
1432 		dp->i_flag |= IN_CHANGE;
1433 		if (DOINGSOFTDEP(dvp))
1434 			softdep_change_linkcnt(dp);
1435 		/*
1436 		 * No need to do an explicit VOP_TRUNCATE here, vrele will
1437 		 * do this for us because we set the link count to 0.
1438 		 */
1439 		ip->i_effnlink = 0;
1440 		ip->i_nlink = 0;
1441 		ip->i_flag |= IN_CHANGE;
1442 		if (DOINGSOFTDEP(tvp))
1443 			softdep_change_linkcnt(ip);
1444 		vput(tvp);
1445 	}
1446 out:
1447 	return (error);
1448 }
1449 
1450 /*
1451  * Rmdir system call.
1452  *
1453  * ufs_rmdir(struct vnode *a_dvp, struct vnode *a_vp,
1454  *	     struct componentname *a_cnp)
1455  */
1456 static
1457 int
1458 ufs_rmdir(struct vop_old_rmdir_args *ap)
1459 {
1460 	struct vnode *vp = ap->a_vp;
1461 	struct vnode *dvp = ap->a_dvp;
1462 	struct componentname *cnp = ap->a_cnp;
1463 	struct inode *ip, *dp;
1464 	int error, ioflag;
1465 
1466 	ip = VTOI(vp);
1467 	dp = VTOI(dvp);
1468 
1469 	/*
1470 	 * Do not remove a directory that is in the process of being renamed.
1471 	 * Verify the directory is empty (and valid). Rmdir ".." will not be
1472 	 * valid since ".." will contain a reference to the current directory
1473 	 * and thus be non-empty. Do not allow the removal of mounted on
1474 	 * directories (this can happen when an NFS exported filesystem
1475 	 * tries to remove a locally mounted on directory).
1476 	 */
1477 	error = 0;
1478 	if (ip->i_flag & IN_RENAME) {
1479 		error = EINVAL;
1480 		goto out;
1481 	}
1482 	if (ip->i_effnlink != 2 ||
1483 	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1484 		error = ENOTEMPTY;
1485 		goto out;
1486 	}
1487 #if 0	/* handled by kernel now */
1488 	if ((dp->i_flags & APPEND)
1489 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1490 		error = EPERM;
1491 		goto out;
1492 	}
1493 #endif
1494 	/*
1495 	 * Delete reference to directory before purging
1496 	 * inode.  If we crash in between, the directory
1497 	 * will be reattached to lost+found,
1498 	 */
1499 	dp->i_effnlink--;
1500 	ip->i_effnlink--;
1501 	if (DOINGSOFTDEP(vp)) {
1502 		softdep_change_linkcnt(dp);
1503 		softdep_change_linkcnt(ip);
1504 	}
1505 	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
1506 	if (error) {
1507 		dp->i_effnlink++;
1508 		ip->i_effnlink++;
1509 		if (DOINGSOFTDEP(vp)) {
1510 			softdep_change_linkcnt(dp);
1511 			softdep_change_linkcnt(ip);
1512 		}
1513 		goto out;
1514 	}
1515 	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1516 	/*
1517 	 * Truncate inode. The only stuff left in the directory is "." and
1518 	 * "..". The "." reference is inconsequential since we are quashing
1519 	 * it. The soft dependency code will arrange to do these operations
1520 	 * after the parent directory entry has been deleted on disk, so
1521 	 * when running with that code we avoid doing them now.
1522 	 */
1523 	if (!DOINGSOFTDEP(vp)) {
1524 		dp->i_nlink--;
1525 		dp->i_flag |= IN_CHANGE;
1526 		ip->i_nlink--;
1527 		ip->i_flag |= IN_CHANGE;
1528 		ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
1529 		error = ffs_truncate(vp, (off_t)0, ioflag, cnp->cn_cred);
1530 	}
1531 	/* cache_purge removed - handled by VFS compat layer */
1532 #ifdef UFS_DIRHASH
1533 	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
1534 	if (ip->i_dirhash != NULL)
1535 		ufsdirhash_free(ip);
1536 #endif
1537 out:
1538 	VN_KNOTE(vp, NOTE_DELETE);
1539 	return (error);
1540 }
1541 
1542 /*
1543  * symlink -- make a symbolic link
1544  *
1545  * ufs_symlink(struct vnode *a_dvp, struct vnode **a_vpp,
1546  *		struct componentname *a_cnp, struct vattr *a_vap,
1547  *		char *a_target)
1548  */
1549 static
1550 int
1551 ufs_symlink(struct vop_old_symlink_args *ap)
1552 {
1553 	struct vnode *vp, **vpp = ap->a_vpp;
1554 	struct inode *ip;
1555 	int len, error;
1556 
1557 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
1558 			      vpp, ap->a_cnp);
1559 	if (error)
1560 		return (error);
1561 	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
1562 	vp = *vpp;
1563 	len = strlen(ap->a_target);
1564 	if (len < vp->v_mount->mnt_maxsymlinklen) {
1565 		ip = VTOI(vp);
1566 		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
1567 		ip->i_size = len;
1568 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1569 	} else {
1570 		/*
1571 		 * Make sure we have a VM object in order to use
1572 		 * the buffer cache.
1573 		 */
1574 		if (vp->v_object == NULL)
1575 			vinitvmio(vp, 0, PAGE_SIZE, -1);
1576 		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1577 				UIO_SYSSPACE, IO_NODELOCKED,
1578 				ap->a_cnp->cn_cred, NULL);
1579 	}
1580 	if (error)
1581 		vput(vp);
1582 	return (error);
1583 }
1584 
1585 /*
1586  * Vnode op for reading directories.
1587  *
1588  * ufs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred,
1589  *		int *a_eofflag, int *ncookies, off_t **a_cookies)
1590  */
1591 static
1592 int
1593 ufs_readdir(struct vop_readdir_args *ap)
1594 {
1595 	struct uio *uio = ap->a_uio;
1596 	struct vnode *vp = ap->a_vp;
1597 	struct direct *dp;
1598 	struct buf *bp;
1599 	int retval;
1600 	int error;
1601 	int offset;	/* offset into buffer cache buffer */
1602 	int eoffset;	/* end of buffer clipped to file EOF */
1603 	int pickup;	/* pickup point */
1604 	int ncookies;
1605 	int cookie_index;
1606 	off_t *cookies;
1607 
1608 	if (uio->uio_offset < 0)
1609 		return (EINVAL);
1610 	/*
1611 	 * Guess the number of cookies needed.  Make sure we compute at
1612 	 * least 1, and no more then a reasonable limit.
1613 	 */
1614 	if (ap->a_ncookies) {
1615 		ncookies = uio->uio_resid / 16 + 1;
1616 		if (ncookies > 1024)
1617 			ncookies = 1024;
1618 		cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
1619 	} else {
1620 		ncookies = -1;	/* force conditionals below */
1621 		cookies = NULL;
1622 	}
1623 	cookie_index = 0;
1624 
1625 	if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0)
1626 		return (error);
1627 
1628 	/*
1629 	 * Past or at EOF
1630 	 */
1631 	if (uio->uio_offset >= VTOI(vp)->i_size) {
1632 		if (ap->a_eofflag)
1633 			*ap->a_eofflag = 1;
1634 		if (ap->a_ncookies) {
1635 			*ap->a_ncookies = cookie_index;
1636 			*ap->a_cookies = cookies;
1637 		}
1638 		goto done;
1639 	}
1640 
1641 	/*
1642 	 * Loop until we run out of cookies, we run out of user buffer,
1643 	 * or we hit the directory EOF.
1644 	 *
1645 	 * Always start scans at the beginning of the buffer, don't trust
1646 	 * the offset supplied by userland.
1647 	 */
1648 	while ((error = ffs_blkatoff_ra(vp, uio->uio_offset, NULL, &bp, 2)) == 0) {
1649 		pickup = (int)(uio->uio_offset - bp->b_loffset);
1650 		offset = 0;
1651 		retval = 0;
1652 		if (bp->b_loffset + bp->b_bcount > VTOI(vp)->i_size)
1653 			eoffset = (int)(VTOI(vp)->i_size - bp->b_loffset);
1654 		else
1655 			eoffset = bp->b_bcount;
1656 
1657 		while (offset < eoffset) {
1658 			dp = (struct direct *)(bp->b_data + offset);
1659 			if (dp->d_reclen <= 0 || (dp->d_reclen & 3) ||
1660 			    offset + dp->d_reclen > bp->b_bcount) {
1661 				error = EIO;
1662 				break;
1663 			}
1664 			if (offsetof(struct direct, d_name[dp->d_namlen]) >				     dp->d_reclen) {
1665 				error = EIO;
1666 				break;
1667 			}
1668 			if (offset < pickup) {
1669 				offset += dp->d_reclen;
1670 				continue;
1671 			}
1672 #if BYTE_ORDER == LITTLE_ENDIAN
1673 			if (OFSFMT(vp)) {
1674 				retval = vop_write_dirent(&error, uio,
1675 				    dp->d_ino, dp->d_namlen, dp->d_type,
1676 				    dp->d_name);
1677 			} else
1678 #endif
1679 			{
1680 				retval = vop_write_dirent(&error, uio,
1681 				    dp->d_ino, dp->d_type, dp->d_namlen,
1682 				    dp->d_name);
1683 			}
1684 			if (retval)
1685 				break;
1686 			if (cookies)
1687 				cookies[cookie_index] = bp->b_loffset + offset;
1688 			++cookie_index;
1689 			offset += dp->d_reclen;
1690 			if (cookie_index == ncookies)
1691 				break;
1692 		}
1693 
1694 		/*
1695 		 * This will align the next loop to the beginning of the
1696 		 * next block, and pickup will calculate to 0.
1697 		 */
1698 		uio->uio_offset = bp->b_loffset + offset;
1699 		brelse(bp);
1700 
1701 		if (retval || error || cookie_index == ncookies ||
1702 		    uio->uio_offset >= VTOI(vp)->i_size) {
1703 			break;
1704 		}
1705 	}
1706 	if (ap->a_eofflag)
1707 		*ap->a_eofflag = VTOI(vp)->i_size <= uio->uio_offset;
1708 
1709 	/*
1710 	 * Report errors only if we didn't manage to read anything
1711 	 */
1712 	if (error && cookie_index == 0) {
1713 		if (cookies) {
1714 			kfree(cookies, M_TEMP);
1715 			*ap->a_ncookies = 0;
1716 			*ap->a_cookies = NULL;
1717 		}
1718 	} else {
1719 		error = 0;
1720 		if (cookies) {
1721 			*ap->a_ncookies = cookie_index;
1722 			*ap->a_cookies = cookies;
1723 		}
1724 	}
1725 done:
1726 	vn_unlock(vp);
1727         return (error);
1728 }
1729 
1730 /*
1731  * Return target name of a symbolic link
1732  *
1733  * ufs_readlink(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred)
1734  */
1735 static
1736 int
1737 ufs_readlink(struct vop_readlink_args *ap)
1738 {
1739 	struct vnode *vp = ap->a_vp;
1740 	struct inode *ip = VTOI(vp);
1741 	int isize;
1742 
1743 	isize = ip->i_size;
1744 	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
1745 	    (ip->i_din.di_blocks == 0)) {   /* XXX - for old fastlink support */
1746 		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
1747 		return (0);
1748 	}
1749 
1750 	/*
1751 	 * Perform the equivalent of an OPEN on vp so we can issue a
1752 	 * VOP_READ.
1753 	 */
1754 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1755 }
1756 
1757 /*
1758  * Calculate the logical to physical mapping if not done already,
1759  * then call the device strategy routine.
1760  *
1761  * In order to be able to swap to a file, the VOP_BMAP operation may not
1762  * deadlock on memory.  See ufs_bmap() for details.
1763  *
1764  * ufs_strategy(struct vnode *a_vp, struct bio *a_bio)
1765  */
1766 static
1767 int
1768 ufs_strategy(struct vop_strategy_args *ap)
1769 {
1770 	struct bio *bio = ap->a_bio;
1771 	struct bio *nbio;
1772 	struct buf *bp = bio->bio_buf;
1773 	struct vnode *vp = ap->a_vp;
1774 	struct inode *ip;
1775 	int error;
1776 
1777 	ip = VTOI(vp);
1778 	if (vp->v_type == VBLK || vp->v_type == VCHR)
1779 		panic("ufs_strategy: spec");
1780 	nbio = push_bio(bio);
1781 	if (nbio->bio_offset == NOOFFSET) {
1782 		error = VOP_BMAP(vp, bio->bio_offset, &nbio->bio_offset,
1783 				 NULL, NULL, bp->b_cmd);
1784 		if (error) {
1785 			bp->b_error = error;
1786 			bp->b_flags |= B_ERROR;
1787 			/* I/O was never started on nbio, must biodone(bio) */
1788 			biodone(bio);
1789 			return (error);
1790 		}
1791 		if (nbio->bio_offset == NOOFFSET)
1792 			vfs_bio_clrbuf(bp);
1793 	}
1794 	if (nbio->bio_offset == NOOFFSET) {
1795 		/*
1796 		 * We hit a hole in the file.  The buffer has been zero-filled
1797 		 * so just biodone() it.
1798 		 */
1799 		biodone(bio);
1800 	} else {
1801 		vn_strategy(ip->i_devvp, nbio);
1802 	}
1803 	return (0);
1804 }
1805 
1806 /*
1807  * Print out the contents of an inode.
1808  *
1809  * ufs_print(struct vnode *a_vp)
1810  */
1811 static
1812 int
1813 ufs_print(struct vop_print_args *ap)
1814 {
1815 	struct vnode *vp = ap->a_vp;
1816 	struct inode *ip = VTOI(vp);
1817 
1818 	kprintf("tag VT_UFS, ino %lu, on dev %s (%d, %d)",
1819 	    (u_long)ip->i_number, devtoname(ip->i_dev), major(ip->i_dev),
1820 	    minor(ip->i_dev));
1821 	if (vp->v_type == VFIFO)
1822 		fifo_printinfo(vp);
1823 	lockmgr_printinfo(&vp->v_lock);
1824 	kprintf("\n");
1825 	return (0);
1826 }
1827 
1828 /*
1829  * Read wrapper for fifos.
1830  *
1831  * ufsfifo_read(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1832  *		struct ucred *a_cred)
1833  */
1834 static
1835 int
1836 ufsfifo_read(struct vop_read_args *ap)
1837 {
1838 	int error, resid;
1839 	struct inode *ip;
1840 	struct uio *uio;
1841 
1842 	uio = ap->a_uio;
1843 	resid = uio->uio_resid;
1844 	error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1845 	ip = VTOI(ap->a_vp);
1846 	if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL &&
1847 	    (uio->uio_resid != resid || (error == 0 && resid != 0)))
1848 		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
1849 	return (error);
1850 }
1851 
1852 /*
1853  * Write wrapper for fifos.
1854  *
1855  * ufsfifo_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1856  *		 struct ucred *a_cred)
1857  */
1858 static
1859 int
1860 ufsfifo_write(struct vop_write_args *ap)
1861 {
1862 	int error, resid;
1863 	struct inode *ip;
1864 	struct uio *uio;
1865 
1866 	uio = ap->a_uio;
1867 	resid = uio->uio_resid;
1868 	error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1869 	ip = VTOI(ap->a_vp);
1870 	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1871 		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1872 	return (error);
1873 }
1874 
1875 /*
1876  * Close wrapper for fifos.
1877  *
1878  * Update the times on the inode then do device close.
1879  *
1880  * ufsfifo_close(struct vnode *a_vp, int a_fflag)
1881  */
1882 static
1883 int
1884 ufsfifo_close(struct vop_close_args *ap)
1885 {
1886 	struct vnode *vp = ap->a_vp;
1887 
1888 	if (vp->v_sysref.refcnt > 1)
1889 		ufs_itimes(vp);
1890 	return (VOCALL(&fifo_vnode_vops, &ap->a_head));
1891 }
1892 
1893 /*
1894  * Kqfilter wrapper for fifos.
1895  *
1896  * Fall through to ufs kqfilter routines if needed
1897  */
1898 static
1899 int
1900 ufsfifo_kqfilter(struct vop_kqfilter_args *ap)
1901 {
1902 	int error;
1903 
1904 	error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1905 	if (error)
1906 		error = ufs_kqfilter(ap);
1907 	return (error);
1908 }
1909 
1910 /*
1911  * Advisory record locking support
1912  *
1913  * ufs_advlock(struct vnode *a_vp, caddr_t a_id, int a_op, struct flock *a_fl,
1914  *	       int a_flags)
1915  */
1916 static
1917 int
1918 ufs_advlock(struct vop_advlock_args *ap)
1919 {
1920 	struct inode *ip = VTOI(ap->a_vp);
1921 
1922 	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
1923 }
1924 
1925 /*
1926  * Initialize the vnode associated with a new inode, handle aliased
1927  * vnodes.
1928  *
1929  * Make sure directories have their VM object now rather then later,
1930  * saving us from having to check on all the myrid directory VOPs
1931  * that might be executed without a VOP_OPEN being performed.
1932  */
1933 int
1934 ufs_vinit(struct mount *mntp, struct vnode **vpp)
1935 {
1936 	struct inode *ip;
1937 	struct vnode *vp;
1938 	struct timeval tv;
1939 
1940 	vp = *vpp;
1941 	ip = VTOI(vp);
1942 
1943 	vp->v_type = IFTOVT(ip->i_mode);
1944 
1945 	switch(vp->v_type) {
1946 	case VCHR:
1947 	case VBLK:
1948 		vp->v_ops = &mntp->mnt_vn_spec_ops;
1949 		addaliasu(vp, umajor(ip->i_rdev), uminor(ip->i_rdev));
1950 		break;
1951 	case VFIFO:
1952 		vp->v_ops = &mntp->mnt_vn_fifo_ops;
1953 		break;
1954 	case VDIR:
1955 	case VREG:
1956 		vinitvmio(vp, ip->i_size,
1957 			  blkoffsize(ip->i_fs, ip, ip->i_size),
1958 			  blkoff(ip->i_fs, ip->i_size));
1959 		break;
1960 	case VLNK:
1961 		if (ip->i_size >= vp->v_mount->mnt_maxsymlinklen) {
1962 			vinitvmio(vp, ip->i_size,
1963 				  blkoffsize(ip->i_fs, ip, ip->i_size),
1964 				  blkoff(ip->i_fs, ip->i_size));
1965 		}
1966 		break;
1967 	default:
1968 		break;
1969 
1970 	}
1971 
1972 	if (ip->i_number == ROOTINO)
1973 		vsetflags(vp, VROOT);
1974 	/*
1975 	 * Initialize modrev times
1976 	 */
1977 	getmicrouptime(&tv);
1978 	SETHIGH(ip->i_modrev, tv.tv_sec);
1979 	SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1980 	*vpp = vp;
1981 	return (0);
1982 }
1983 
1984 /*
1985  * Allocate a new inode.
1986  */
1987 static
1988 int
1989 ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
1990 	      struct componentname *cnp)
1991 {
1992 	struct inode *ip, *pdir;
1993 	struct direct newdir;
1994 	struct vnode *tvp;
1995 	int error;
1996 
1997 	pdir = VTOI(dvp);
1998 	*vpp = NULL;
1999 	if ((mode & IFMT) == 0)
2000 		mode |= IFREG;
2001 
2002 	error = ffs_valloc(dvp, mode, cnp->cn_cred, &tvp);
2003 	if (error)
2004 		return (error);
2005 	ip = VTOI(tvp);
2006 	ip->i_flags = pdir->i_flags & (SF_NOHISTORY|UF_NOHISTORY|UF_NODUMP);
2007 	ip->i_gid = pdir->i_gid;
2008 #ifdef SUIDDIR
2009 	{
2010 #ifdef QUOTA
2011 		struct ucred ucred, *ucp;
2012 		ucp = cnp->cn_cred;
2013 #endif
2014 		/*
2015 		 * If we are not the owner of the directory,
2016 		 * and we are hacking owners here, (only do this where told to)
2017 		 * and we are not giving it TO root, (would subvert quotas)
2018 		 * then go ahead and give it to the other user.
2019 		 * Note that this drops off the execute bits for security.
2020 		 */
2021 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
2022 		    (pdir->i_mode & ISUID) &&
2023 		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
2024 			ip->i_uid = pdir->i_uid;
2025 			mode &= ~07111;
2026 #ifdef QUOTA
2027 			/*
2028 			 * Make sure the correct user gets charged
2029 			 * for the space.
2030 			 * Quickly knock up a dummy credential for the victim.
2031 			 * XXX This seems to never be accessed out of our
2032 			 * context so a stack variable is ok.
2033 			 */
2034 			ucred.cr_ref = 1;
2035 			ucred.cr_uid = ip->i_uid;
2036 			ucred.cr_ngroups = 1;
2037 			ucred.cr_groups[0] = pdir->i_gid;
2038 			ucp = &ucred;
2039 #endif
2040 		} else
2041 			ip->i_uid = cnp->cn_cred->cr_uid;
2042 
2043 #ifdef QUOTA
2044 		if ((error = ufs_getinoquota(ip)) ||
2045 	    	    (error = ufs_chkiq(ip, 1, ucp, 0))) {
2046 			ffs_vfree(tvp, ip->i_number, mode);
2047 			vput(tvp);
2048 			return (error);
2049 		}
2050 #endif
2051 	}
2052 #else	/* !SUIDDIR */
2053 	ip->i_uid = cnp->cn_cred->cr_uid;
2054 #ifdef QUOTA
2055 	if ((error = ufs_getinoquota(ip)) ||
2056 	    (error = ufs_chkiq(ip, 1, cnp->cn_cred, 0))) {
2057 		ffs_vfree(tvp, ip->i_number, mode);
2058 		vput(tvp);
2059 		return (error);
2060 	}
2061 #endif
2062 #endif	/* !SUIDDIR */
2063 	ip->i_din.di_spare[0] = 0;
2064 	ip->i_din.di_spare[1] = 0;
2065 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2066 	ip->i_mode = mode;
2067 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2068 	ip->i_effnlink = 1;
2069 	ip->i_nlink = 1;
2070 	if (DOINGSOFTDEP(tvp))
2071 		softdep_change_linkcnt(ip);
2072 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
2073 	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
2074 		ip->i_mode &= ~ISGID;
2075 	}
2076 
2077 	if (cnp->cn_flags & CNP_ISWHITEOUT)
2078 		ip->i_flags |= UF_OPAQUE;
2079 
2080 	/*
2081 	 * Regular files and directories need VM objects.  Softlinks do
2082 	 * not (not immediately anyway).
2083 	 */
2084 	if (tvp->v_type == VREG || tvp->v_type == VDIR)
2085 		vinitvmio(tvp, 0, PAGE_SIZE, -1);
2086 
2087 	/*
2088 	 * Make sure inode goes to disk before directory entry.
2089 	 */
2090 	error = ffs_update(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
2091 	if (error)
2092 		goto bad;
2093 	ufs_makedirentry(ip, cnp, &newdir);
2094 	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
2095 	if (error)
2096 		goto bad;
2097 	*vpp = tvp;
2098 	return (0);
2099 
2100 bad:
2101 	/*
2102 	 * Write error occurred trying to update the inode
2103 	 * or the directory so must deallocate the inode.
2104 	 */
2105 	ip->i_effnlink = 0;
2106 	ip->i_nlink = 0;
2107 	ip->i_flag |= IN_CHANGE;
2108 	if (DOINGSOFTDEP(tvp))
2109 		softdep_change_linkcnt(ip);
2110 	vput(tvp);
2111 	return (error);
2112 }
2113 
2114 static int
2115 ufs_missingop(struct vop_generic_args *ap)
2116 {
2117 	panic("no vop function for %s in ufs child", ap->a_desc->sd_name);
2118 	return (EOPNOTSUPP);
2119 }
2120 
2121 static struct filterops ufsread_filtops =
2122 	{ FILTEROP_ISFD, NULL, filt_ufsdetach, filt_ufsread };
2123 static struct filterops ufswrite_filtops =
2124 	{ FILTEROP_ISFD, NULL, filt_ufsdetach, filt_ufswrite };
2125 static struct filterops ufsvnode_filtops =
2126 	{ FILTEROP_ISFD, NULL, filt_ufsdetach, filt_ufsvnode };
2127 
2128 /*
2129  * ufs_kqfilter(struct vnode *a_vp, struct knote *a_kn)
2130  */
2131 static int
2132 ufs_kqfilter(struct vop_kqfilter_args *ap)
2133 {
2134 	struct vnode *vp = ap->a_vp;
2135 	struct knote *kn = ap->a_kn;
2136 
2137 	switch (kn->kn_filter) {
2138 	case EVFILT_READ:
2139 		kn->kn_fop = &ufsread_filtops;
2140 		break;
2141 	case EVFILT_WRITE:
2142 		kn->kn_fop = &ufswrite_filtops;
2143 		break;
2144 	case EVFILT_VNODE:
2145 		kn->kn_fop = &ufsvnode_filtops;
2146 		break;
2147 	default:
2148 		return (EOPNOTSUPP);
2149 	}
2150 
2151 	kn->kn_hook = (caddr_t)vp;
2152 
2153 	/* XXX: kq token actually protects the list */
2154 	lwkt_gettoken(&vp->v_token);
2155 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2156 	lwkt_reltoken(&vp->v_token);
2157 
2158 	return (0);
2159 }
2160 
2161 static void
2162 filt_ufsdetach(struct knote *kn)
2163 {
2164 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2165 
2166 	lwkt_gettoken(&vp->v_token);
2167 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2168 	lwkt_reltoken(&vp->v_token);
2169 }
2170 
2171 /*ARGSUSED*/
2172 static int
2173 filt_ufsread(struct knote *kn, long hint)
2174 {
2175 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2176 	struct inode *ip = VTOI(vp);
2177 	off_t off;
2178 
2179 	/*
2180 	 * filesystem is gone, so set the EOF flag and schedule
2181 	 * the knote for deletion.
2182 	 */
2183 	if (hint == NOTE_REVOKE) {
2184 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2185 		return (1);
2186 	}
2187 
2188 	off = ip->i_size - kn->kn_fp->f_offset;
2189 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
2190 	if (kn->kn_sfflags & NOTE_OLDAPI)
2191 		return(1);
2192         return (kn->kn_data != 0);
2193 }
2194 
2195 /*ARGSUSED*/
2196 static int
2197 filt_ufswrite(struct knote *kn, long hint)
2198 {
2199 	/*
2200 	 * filesystem is gone, so set the EOF flag and schedule
2201 	 * the knote for deletion.
2202 	 */
2203 	if (hint == NOTE_REVOKE)
2204 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2205 
2206         kn->kn_data = 0;
2207         return (1);
2208 }
2209 
2210 static int
2211 filt_ufsvnode(struct knote *kn, long hint)
2212 {
2213 	if (kn->kn_sfflags & hint)
2214 		kn->kn_fflags |= hint;
2215 	if (hint == NOTE_REVOKE) {
2216 		kn->kn_flags |= (EV_EOF | EV_NODATA);
2217 		return (1);
2218 	}
2219 	return (kn->kn_fflags != 0);
2220 }
2221 
2222 /* Global vfs data structures for ufs. */
2223 static struct vop_ops ufs_vnode_vops = {
2224 	.vop_default =		vop_defaultop,
2225 	.vop_fsync =		(void *)ufs_missingop,
2226 	.vop_read =		(void *)ufs_missingop,
2227 	.vop_reallocblks =	(void *)ufs_missingop,
2228 	.vop_write =		(void *)ufs_missingop,
2229 	.vop_access =		ufs_access,
2230 	.vop_advlock =		ufs_advlock,
2231 	.vop_bmap =		ufs_bmap,
2232 	.vop_old_lookup =	ufs_lookup,
2233 	.vop_close =		ufs_close,
2234 	.vop_old_create =	ufs_create,
2235 	.vop_getattr =		ufs_getattr,
2236 	.vop_inactive =		ufs_inactive,
2237 	.vop_old_link =		ufs_link,
2238 	.vop_old_mkdir =	ufs_mkdir,
2239 	.vop_old_mknod =	ufs_mknod,
2240 	.vop_mmap =		ufs_mmap,
2241 	.vop_open =		vop_stdopen,
2242 	.vop_pathconf =		vop_stdpathconf,
2243 	.vop_kqfilter =		ufs_kqfilter,
2244 	.vop_print =		ufs_print,
2245 	.vop_readdir =		ufs_readdir,
2246 	.vop_readlink =		ufs_readlink,
2247 	.vop_reclaim =		ufs_reclaim,
2248 	.vop_old_remove =	ufs_remove,
2249 	.vop_old_rename =	ufs_rename,
2250 	.vop_old_rmdir =	ufs_rmdir,
2251 	.vop_setattr =		ufs_setattr,
2252 	.vop_markatime =	ufs_markatime,
2253 	.vop_strategy =		ufs_strategy,
2254 	.vop_old_symlink =	ufs_symlink,
2255 	.vop_old_whiteout =	ufs_whiteout
2256 };
2257 
2258 static struct vop_ops ufs_spec_vops = {
2259 	.vop_default =		vop_defaultop,
2260 	.vop_fsync =		(void *)ufs_missingop,
2261 	.vop_access =		ufs_access,
2262 	.vop_close =		ufs_close,
2263 	.vop_getattr =		ufs_getattr,
2264 	.vop_inactive =		ufs_inactive,
2265 	.vop_print =		ufs_print,
2266 	.vop_read =		vop_stdnoread,
2267 	.vop_reclaim =		ufs_reclaim,
2268 	.vop_setattr =		ufs_setattr,
2269 	.vop_markatime =	ufs_markatime,
2270 	.vop_write =		vop_stdnowrite
2271 };
2272 
2273 static struct vop_ops ufs_fifo_vops = {
2274 	.vop_default =		fifo_vnoperate,
2275 	.vop_fsync =		(void *)ufs_missingop,
2276 	.vop_access =		ufs_access,
2277 	.vop_close =		ufsfifo_close,
2278 	.vop_getattr =		ufs_getattr,
2279 	.vop_inactive =		ufs_inactive,
2280 	.vop_kqfilter =		ufsfifo_kqfilter,
2281 	.vop_print =		ufs_print,
2282 	.vop_read =		ufsfifo_read,
2283 	.vop_reclaim =		ufs_reclaim,
2284 	.vop_setattr =		ufs_setattr,
2285 	.vop_markatime =	ufs_markatime,
2286 	.vop_write =		ufsfifo_write
2287 };
2288 
2289 VNODEOP_SET(ufs_vnode_vops);
2290 VNODEOP_SET(ufs_spec_vops);
2291 VNODEOP_SET(ufs_fifo_vops);
2292 
2293 /*
2294  * ufs_vnoperate()
2295  */
2296 int
2297 ufs_vnoperate(struct vop_generic_args *ap)
2298 {
2299 	return (VOCALL(&ufs_vnode_vops, ap));
2300 }
2301 
2302 /*
2303  * ufs_vnoperatefifo()
2304  */
2305 int
2306 ufs_vnoperatefifo(struct vop_generic_args *ap)
2307 {
2308 	return (VOCALL(&ufs_fifo_vops, ap));
2309 }
2310 
2311 /*
2312  * ufs_vnoperatespec()
2313  */
2314 int
2315 ufs_vnoperatespec(struct vop_generic_args *ap)
2316 {
2317 	return (VOCALL(&ufs_spec_vops, ap));
2318 }
2319