xref: /dragonfly/sys/vfs/ufs/ufs_vnops.c (revision 9bb2a92d)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
39  * $FreeBSD: src/sys/ufs/ufs/ufs_vnops.c,v 1.131.2.8 2003/01/02 17:26:19 bde Exp $
40  * $DragonFly: src/sys/vfs/ufs/ufs_vnops.c,v 1.11 2004/03/01 06:33:23 dillon Exp $
41  */
42 
43 #include "opt_quota.h"
44 #include "opt_suiddir.h"
45 #include "opt_ufs.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/fcntl.h>
51 #include <sys/stat.h>
52 #include <sys/buf.h>
53 #include <sys/proc.h>
54 #include <sys/namei.h>
55 #include <sys/mount.h>
56 #include <sys/unistd.h>
57 #include <sys/vnode.h>
58 #include <sys/malloc.h>
59 #include <sys/dirent.h>
60 #include <sys/lockf.h>
61 #include <sys/event.h>
62 #include <sys/conf.h>
63 
64 #include <sys/file.h>		/* XXX */
65 
66 #include <vm/vm.h>
67 #include <vm/vm_extern.h>
68 
69 #include <vfs/fifofs/fifo.h>
70 
71 #include "quota.h"
72 #include "inode.h"
73 #include "dir.h"
74 #include "ufsmount.h"
75 #include "ufs_extern.h"
76 #ifdef UFS_DIRHASH
77 #include "dirhash.h"
78 #endif
79 
80 static int ufs_access (struct vop_access_args *);
81 static int ufs_advlock (struct vop_advlock_args *);
82 static int ufs_chmod (struct vnode *, int, struct ucred *, struct thread *);
83 static int ufs_chown (struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
84 static int ufs_close (struct vop_close_args *);
85 static int ufs_create (struct vop_create_args *);
86 static int ufs_getattr (struct vop_getattr_args *);
87 static int ufs_link (struct vop_link_args *);
88 static int ufs_makeinode (int mode, struct vnode *, struct vnode **, struct componentname *);
89 static int ufs_missingop (struct vop_generic_args *ap);
90 static int ufs_mkdir (struct vop_mkdir_args *);
91 static int ufs_mknod (struct vop_mknod_args *);
92 static int ufs_mmap (struct vop_mmap_args *);
93 static int ufs_open (struct vop_open_args *);
94 static int ufs_pathconf (struct vop_pathconf_args *);
95 static int ufs_print (struct vop_print_args *);
96 static int ufs_readdir (struct vop_readdir_args *);
97 static int ufs_readlink (struct vop_readlink_args *);
98 static int ufs_remove (struct vop_remove_args *);
99 static int ufs_rename (struct vop_rename_args *);
100 static int ufs_rmdir (struct vop_rmdir_args *);
101 static int ufs_setattr (struct vop_setattr_args *);
102 static int ufs_strategy (struct vop_strategy_args *);
103 static int ufs_symlink (struct vop_symlink_args *);
104 static int ufs_whiteout (struct vop_whiteout_args *);
105 static int ufsfifo_close (struct vop_close_args *);
106 static int ufsfifo_kqfilter (struct vop_kqfilter_args *);
107 static int ufsfifo_read (struct vop_read_args *);
108 static int ufsfifo_write (struct vop_write_args *);
109 static int ufsspec_close (struct vop_close_args *);
110 static int ufsspec_read (struct vop_read_args *);
111 static int ufsspec_write (struct vop_write_args *);
112 static int filt_ufsread (struct knote *kn, long hint);
113 static int filt_ufswrite (struct knote *kn, long hint);
114 static int filt_ufsvnode (struct knote *kn, long hint);
115 static void filt_ufsdetach (struct knote *kn);
116 static int ufs_kqfilter (struct vop_kqfilter_args *ap);
117 
118 union _qcvt {
119 	int64_t qcvt;
120 	int32_t val[2];
121 };
122 #define SETHIGH(q, h) { \
123 	union _qcvt tmp; \
124 	tmp.qcvt = (q); \
125 	tmp.val[_QUAD_HIGHWORD] = (h); \
126 	(q) = tmp.qcvt; \
127 }
128 #define SETLOW(q, l) { \
129 	union _qcvt tmp; \
130 	tmp.qcvt = (q); \
131 	tmp.val[_QUAD_LOWWORD] = (l); \
132 	(q) = tmp.qcvt; \
133 }
134 #define VN_KNOTE(vp, b) \
135 	KNOTE(&vp->v_pollinfo.vpi_selinfo.si_note, (b))
136 
137 /*
138  * A virgin directory (no blushing please).
139  */
140 static struct dirtemplate mastertemplate = {
141 	0, 12, DT_DIR, 1, ".",
142 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
143 };
144 static struct odirtemplate omastertemplate = {
145 	0, 12, 1, ".",
146 	0, DIRBLKSIZ - 12, 2, ".."
147 };
148 
149 void
150 ufs_itimes(vp)
151 	struct vnode *vp;
152 {
153 	struct inode *ip;
154 	struct timespec ts;
155 
156 	ip = VTOI(vp);
157 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
158 		return;
159 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
160 		ip->i_flag |= IN_LAZYMOD;
161 	else
162 		ip->i_flag |= IN_MODIFIED;
163 	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
164 		vfs_timestamp(&ts);
165 		if (ip->i_flag & IN_ACCESS) {
166 			ip->i_atime = ts.tv_sec;
167 			ip->i_atimensec = ts.tv_nsec;
168 		}
169 		if (ip->i_flag & IN_UPDATE) {
170 			ip->i_mtime = ts.tv_sec;
171 			ip->i_mtimensec = ts.tv_nsec;
172 			ip->i_modrev++;
173 		}
174 		if (ip->i_flag & IN_CHANGE) {
175 			ip->i_ctime = ts.tv_sec;
176 			ip->i_ctimensec = ts.tv_nsec;
177 		}
178 	}
179 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
180 }
181 
182 /*
183  * Create a regular file
184  */
185 int
186 ufs_create(ap)
187 	struct vop_create_args /* {
188 		struct vnode *a_dvp;
189 		struct vnode **a_vpp;
190 		struct componentname *a_cnp;
191 		struct vattr *a_vap;
192 	} */ *ap;
193 {
194 	int error;
195 
196 	error =
197 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
198 	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
199 	if (error)
200 		return (error);
201 	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
202 	return (0);
203 }
204 
205 /*
206  * Mknod vnode call
207  */
208 /* ARGSUSED */
209 int
210 ufs_mknod(ap)
211 	struct vop_mknod_args /* {
212 		struct vnode *a_dvp;
213 		struct vnode **a_vpp;
214 		struct componentname *a_cnp;
215 		struct vattr *a_vap;
216 	} */ *ap;
217 {
218 	struct vattr *vap = ap->a_vap;
219 	struct vnode **vpp = ap->a_vpp;
220 	struct inode *ip;
221 	ino_t ino;
222 	int error;
223 
224 	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
225 	    ap->a_dvp, vpp, ap->a_cnp);
226 	if (error)
227 		return (error);
228 	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
229 	ip = VTOI(*vpp);
230 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
231 	if (vap->va_rdev != VNOVAL) {
232 		/*
233 		 * Want to be able to use this to make badblock
234 		 * inodes, so don't truncate the dev number.
235 		 */
236 		ip->i_rdev = vap->va_rdev;
237 	}
238 	/*
239 	 * Remove inode, then reload it through VFS_VGET so it is
240 	 * checked to see if it is an alias of an existing entry in
241 	 * the inode cache.
242 	 */
243 	vput(*vpp);
244 	(*vpp)->v_type = VNON;
245 	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
246 	vgone(*vpp);
247 	error = VFS_VGET(ap->a_dvp->v_mount, ino, vpp);
248 	if (error) {
249 		*vpp = NULL;
250 		return (error);
251 	}
252 	return (0);
253 }
254 
255 /*
256  * Open called.
257  *
258  * Nothing to do.
259  */
260 /* ARGSUSED */
261 int
262 ufs_open(ap)
263 	struct vop_open_args /* {
264 		struct vnode *a_vp;
265 		int  a_mode;
266 		struct ucred *a_cred;
267 		struct thread *a_td;
268 	} */ *ap;
269 {
270 
271 	/*
272 	 * Files marked append-only must be opened for appending.
273 	 */
274 	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
275 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
276 		return (EPERM);
277 	return (0);
278 }
279 
280 /*
281  * Close called.
282  *
283  * Update the times on the inode.
284  */
285 /* ARGSUSED */
286 int
287 ufs_close(ap)
288 	struct vop_close_args /* {
289 		struct vnode *a_vp;
290 		int  a_fflag;
291 		struct ucred *a_cred;
292 		struct thread *a_td;
293 	} */ *ap;
294 {
295 	struct vnode *vp = ap->a_vp;
296 	lwkt_tokref vlock;
297 
298 	lwkt_gettoken(&vlock, vp->v_interlock);
299 	if (vp->v_usecount > 1)
300 		ufs_itimes(vp);
301 	lwkt_reltoken(&vlock);
302 	return (0);
303 }
304 
305 int
306 ufs_access(ap)
307 	struct vop_access_args /* {
308 		struct vnode *a_vp;
309 		int  a_mode;
310 		struct ucred *a_cred;
311 		struct thread *a_td;
312 	} */ *ap;
313 {
314 	struct vnode *vp = ap->a_vp;
315 	struct inode *ip = VTOI(vp);
316 	struct ucred *cred = ap->a_cred;
317 	mode_t mask, mode = ap->a_mode;
318 	gid_t *gp;
319 	int i;
320 #ifdef QUOTA
321 	int error;
322 #endif
323 
324 	/*
325 	 * Disallow write attempts on read-only file systems;
326 	 * unless the file is a socket, fifo, or a block or
327 	 * character device resident on the file system.
328 	 */
329 	if (mode & VWRITE) {
330 		switch (vp->v_type) {
331 		case VDIR:
332 		case VLNK:
333 		case VREG:
334 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
335 				return (EROFS);
336 #ifdef QUOTA
337 			if ((error = getinoquota(ip)) != 0)
338 				return (error);
339 #endif
340 			break;
341 		default:
342 			break;
343 		}
344 	}
345 
346 	/* If immutable bit set, nobody gets to write it. */
347 	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
348 		return (EPERM);
349 
350 	/* Otherwise, user id 0 always gets access. */
351 	if (cred->cr_uid == 0)
352 		return (0);
353 
354 	mask = 0;
355 
356 	/* Otherwise, check the owner. */
357 	if (cred->cr_uid == ip->i_uid) {
358 		if (mode & VEXEC)
359 			mask |= S_IXUSR;
360 		if (mode & VREAD)
361 			mask |= S_IRUSR;
362 		if (mode & VWRITE)
363 			mask |= S_IWUSR;
364 		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
365 	}
366 
367 	/* Otherwise, check the groups. */
368 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
369 		if (ip->i_gid == *gp) {
370 			if (mode & VEXEC)
371 				mask |= S_IXGRP;
372 			if (mode & VREAD)
373 				mask |= S_IRGRP;
374 			if (mode & VWRITE)
375 				mask |= S_IWGRP;
376 			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
377 		}
378 
379 	/* Otherwise, check everyone else. */
380 	if (mode & VEXEC)
381 		mask |= S_IXOTH;
382 	if (mode & VREAD)
383 		mask |= S_IROTH;
384 	if (mode & VWRITE)
385 		mask |= S_IWOTH;
386 	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
387 }
388 
389 /* ARGSUSED */
390 int
391 ufs_getattr(ap)
392 	struct vop_getattr_args /* {
393 		struct vnode *a_vp;
394 		struct vattr *a_vap;
395 		struct thread *a_td;
396 	} */ *ap;
397 {
398 	struct vnode *vp = ap->a_vp;
399 	struct inode *ip = VTOI(vp);
400 	struct vattr *vap = ap->a_vap;
401 
402 	ufs_itimes(vp);
403 	/*
404 	 * Copy from inode table
405 	 */
406 	vap->va_fsid = dev2udev(ip->i_dev);
407 	vap->va_fileid = ip->i_number;
408 	vap->va_mode = ip->i_mode & ~IFMT;
409 	vap->va_nlink = VFSTOUFS(vp->v_mount)->um_i_effnlink_valid ?
410 	    ip->i_effnlink : ip->i_nlink;
411 	vap->va_uid = ip->i_uid;
412 	vap->va_gid = ip->i_gid;
413 	vap->va_rdev = ip->i_rdev;
414 	vap->va_size = ip->i_din.di_size;
415 	vap->va_atime.tv_sec = ip->i_atime;
416 	vap->va_atime.tv_nsec = ip->i_atimensec;
417 	vap->va_mtime.tv_sec = ip->i_mtime;
418 	vap->va_mtime.tv_nsec = ip->i_mtimensec;
419 	vap->va_ctime.tv_sec = ip->i_ctime;
420 	vap->va_ctime.tv_nsec = ip->i_ctimensec;
421 	vap->va_flags = ip->i_flags;
422 	vap->va_gen = ip->i_gen;
423 	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
424 	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
425 	vap->va_type = IFTOVT(ip->i_mode);
426 	vap->va_filerev = ip->i_modrev;
427 	return (0);
428 }
429 
430 /*
431  * Set attribute vnode op. called from several syscalls
432  */
433 int
434 ufs_setattr(ap)
435 	struct vop_setattr_args /* {
436 		struct vnode *a_vp;
437 		struct vattr *a_vap;
438 		struct ucred *a_cred;
439 		struct thread *a_td;
440 	} */ *ap;
441 {
442 	struct vattr *vap = ap->a_vap;
443 	struct vnode *vp = ap->a_vp;
444 	struct inode *ip = VTOI(vp);
445 	struct ucred *cred = ap->a_cred;
446 	int error;
447 
448 	/*
449 	 * Check for unsettable attributes.
450 	 */
451 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
452 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
453 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
454 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
455 		return (EINVAL);
456 	}
457 	if (vap->va_flags != VNOVAL) {
458 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
459 			return (EROFS);
460 		if (cred->cr_uid != ip->i_uid &&
461 		    (error = suser_cred(cred, PRISON_ROOT)))
462 			return (error);
463 		if ((cred->cr_uid == 0) && (cred->cr_prison == NULL)) {
464 			if ((ip->i_flags
465 			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) &&
466 			    securelevel > 0)
467 				return (EPERM);
468 			ip->i_flags = vap->va_flags;
469 		} else {
470 			if (ip->i_flags
471 			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
472 			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
473 				return (EPERM);
474 			ip->i_flags &= SF_SETTABLE;
475 			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
476 		}
477 		ip->i_flag |= IN_CHANGE;
478 		if (vap->va_flags & (IMMUTABLE | APPEND))
479 			return (0);
480 	}
481 	if (ip->i_flags & (IMMUTABLE | APPEND))
482 		return (EPERM);
483 	/*
484 	 * Go through the fields and update iff not VNOVAL.
485 	 */
486 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
487 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
488 			return (EROFS);
489 		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, ap->a_td)) != 0)
490 			return (error);
491 	}
492 	if (vap->va_size != VNOVAL) {
493 		/*
494 		 * Disallow write attempts on read-only file systems;
495 		 * unless the file is a socket, fifo, or a block or
496 		 * character device resident on the file system.
497 		 */
498 		switch (vp->v_type) {
499 		case VDIR:
500 			return (EISDIR);
501 		case VLNK:
502 		case VREG:
503 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
504 				return (EROFS);
505 			break;
506 		default:
507 			break;
508 		}
509 		if ((error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, ap->a_td)) != 0)
510 			return (error);
511 	}
512 	ip = VTOI(vp);
513 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
514 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
515 			return (EROFS);
516 		if (cred->cr_uid != ip->i_uid &&
517 		    (error = suser_cred(cred, PRISON_ROOT)) &&
518 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
519 		    (error = VOP_ACCESS(vp, VWRITE, cred, ap->a_td))))
520 			return (error);
521 		if (vap->va_atime.tv_sec != VNOVAL)
522 			ip->i_flag |= IN_ACCESS;
523 		if (vap->va_mtime.tv_sec != VNOVAL)
524 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
525 		ufs_itimes(vp);
526 		if (vap->va_atime.tv_sec != VNOVAL) {
527 			ip->i_atime = vap->va_atime.tv_sec;
528 			ip->i_atimensec = vap->va_atime.tv_nsec;
529 		}
530 		if (vap->va_mtime.tv_sec != VNOVAL) {
531 			ip->i_mtime = vap->va_mtime.tv_sec;
532 			ip->i_mtimensec = vap->va_mtime.tv_nsec;
533 		}
534 		error = UFS_UPDATE(vp, 0);
535 		if (error)
536 			return (error);
537 	}
538 	error = 0;
539 	if (vap->va_mode != (mode_t)VNOVAL) {
540 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
541 			return (EROFS);
542 		error = ufs_chmod(vp, (int)vap->va_mode, cred, ap->a_td);
543 	}
544 	VN_KNOTE(vp, NOTE_ATTRIB);
545 	return (error);
546 }
547 
548 /*
549  * Change the mode on a file.
550  * Inode must be locked before calling.
551  */
552 static int
553 ufs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
554 {
555 	struct inode *ip = VTOI(vp);
556 	int error;
557 
558 	if (cred->cr_uid != ip->i_uid) {
559 	    error = suser_cred(cred, PRISON_ROOT);
560 	    if (error)
561 		return (error);
562 	}
563 	if (cred->cr_uid) {
564 		if (vp->v_type != VDIR && (mode & S_ISTXT))
565 			return (EFTYPE);
566 		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
567 			return (EPERM);
568 	}
569 	ip->i_mode &= ~ALLPERMS;
570 	ip->i_mode |= (mode & ALLPERMS);
571 	ip->i_flag |= IN_CHANGE;
572 	return (0);
573 }
574 
575 /*
576  * Perform chown operation on inode ip;
577  * inode must be locked prior to call.
578  */
579 static int
580 ufs_chown(vp, uid, gid, cred, td)
581 	struct vnode *vp;
582 	uid_t uid;
583 	gid_t gid;
584 	struct ucred *cred;
585 	struct thread *td;
586 {
587 	struct inode *ip = VTOI(vp);
588 	uid_t ouid;
589 	gid_t ogid;
590 	int error = 0;
591 #ifdef QUOTA
592 	int i;
593 	long change;
594 #endif
595 
596 	if (uid == (uid_t)VNOVAL)
597 		uid = ip->i_uid;
598 	if (gid == (gid_t)VNOVAL)
599 		gid = ip->i_gid;
600 	/*
601 	 * If we don't own the file, are trying to change the owner
602 	 * of the file, or are not a member of the target group,
603 	 * the caller must be superuser or the call fails.
604 	 */
605 	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
606 	    (gid != ip->i_gid && !groupmember((gid_t)gid, cred))) &&
607 	    (error = suser_cred(cred, PRISON_ROOT)))
608 		return (error);
609 	ogid = ip->i_gid;
610 	ouid = ip->i_uid;
611 #ifdef QUOTA
612 	if ((error = getinoquota(ip)) != 0)
613 		return (error);
614 	if (ouid == uid) {
615 		dqrele(vp, ip->i_dquot[USRQUOTA]);
616 		ip->i_dquot[USRQUOTA] = NODQUOT;
617 	}
618 	if (ogid == gid) {
619 		dqrele(vp, ip->i_dquot[GRPQUOTA]);
620 		ip->i_dquot[GRPQUOTA] = NODQUOT;
621 	}
622 	change = ip->i_blocks;
623 	(void) chkdq(ip, -change, cred, CHOWN);
624 	(void) chkiq(ip, -1, cred, CHOWN);
625 	for (i = 0; i < MAXQUOTAS; i++) {
626 		dqrele(vp, ip->i_dquot[i]);
627 		ip->i_dquot[i] = NODQUOT;
628 	}
629 #endif
630 	ip->i_gid = gid;
631 	ip->i_uid = uid;
632 #ifdef QUOTA
633 	if ((error = getinoquota(ip)) == 0) {
634 		if (ouid == uid) {
635 			dqrele(vp, ip->i_dquot[USRQUOTA]);
636 			ip->i_dquot[USRQUOTA] = NODQUOT;
637 		}
638 		if (ogid == gid) {
639 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
640 			ip->i_dquot[GRPQUOTA] = NODQUOT;
641 		}
642 		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
643 			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
644 				goto good;
645 			else
646 				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
647 		}
648 		for (i = 0; i < MAXQUOTAS; i++) {
649 			dqrele(vp, ip->i_dquot[i]);
650 			ip->i_dquot[i] = NODQUOT;
651 		}
652 	}
653 	ip->i_gid = ogid;
654 	ip->i_uid = ouid;
655 	if (getinoquota(ip) == 0) {
656 		if (ouid == uid) {
657 			dqrele(vp, ip->i_dquot[USRQUOTA]);
658 			ip->i_dquot[USRQUOTA] = NODQUOT;
659 		}
660 		if (ogid == gid) {
661 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
662 			ip->i_dquot[GRPQUOTA] = NODQUOT;
663 		}
664 		(void) chkdq(ip, change, cred, FORCE|CHOWN);
665 		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
666 		(void) getinoquota(ip);
667 	}
668 	return (error);
669 good:
670 	if (getinoquota(ip))
671 		panic("ufs_chown: lost quota");
672 #endif /* QUOTA */
673 	ip->i_flag |= IN_CHANGE;
674 	if (cred->cr_uid != 0 && (ouid != uid || ogid != gid))
675 		ip->i_mode &= ~(ISUID | ISGID);
676 	return (0);
677 }
678 
679 /*
680  * Mmap a file
681  *
682  * NB Currently unsupported.
683  */
684 /* ARGSUSED */
685 int
686 ufs_mmap(ap)
687 	struct vop_mmap_args /* {
688 		struct vnode *a_vp;
689 		int  a_fflags;
690 		struct ucred *a_cred;
691 		struct thread *a_td;
692 	} */ *ap;
693 {
694 
695 	return (EINVAL);
696 }
697 
698 int
699 ufs_remove(ap)
700 	struct vop_remove_args /* {
701 		struct vnode *a_dvp;
702 		struct vnode *a_vp;
703 		struct componentname *a_cnp;
704 	} */ *ap;
705 {
706 	struct inode *ip;
707 	struct vnode *vp = ap->a_vp;
708 	struct vnode *dvp = ap->a_dvp;
709 	int error;
710 
711 	ip = VTOI(vp);
712 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
713 	    (VTOI(dvp)->i_flags & APPEND)) {
714 		error = EPERM;
715 		goto out;
716 	}
717 	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
718 	VN_KNOTE(vp, NOTE_DELETE);
719 	VN_KNOTE(dvp, NOTE_WRITE);
720 out:
721 	return (error);
722 }
723 
724 /*
725  * link vnode call
726  */
727 int
728 ufs_link(ap)
729 	struct vop_link_args /* {
730 		struct vnode *a_tdvp;
731 		struct vnode *a_vp;
732 		struct componentname *a_cnp;
733 	} */ *ap;
734 {
735 	struct vnode *vp = ap->a_vp;
736 	struct vnode *tdvp = ap->a_tdvp;
737 	struct componentname *cnp = ap->a_cnp;
738 	struct thread *td = cnp->cn_td;
739 	struct inode *ip;
740 	struct direct newdir;
741 	int error;
742 
743 #ifdef DIAGNOSTIC
744 	if ((cnp->cn_flags & CNP_HASBUF) == 0)
745 		panic("ufs_link: no name");
746 #endif
747 	if (tdvp->v_mount != vp->v_mount) {
748 		error = EXDEV;
749 		goto out2;
750 	}
751 	if (tdvp != vp && (error = vn_lock(vp, NULL, LK_EXCLUSIVE, td))) {
752 		goto out2;
753 	}
754 	ip = VTOI(vp);
755 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
756 		error = EMLINK;
757 		goto out1;
758 	}
759 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
760 		error = EPERM;
761 		goto out1;
762 	}
763 	ip->i_effnlink++;
764 	ip->i_nlink++;
765 	ip->i_flag |= IN_CHANGE;
766 	if (DOINGSOFTDEP(vp))
767 		softdep_change_linkcnt(ip);
768 	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
769 	if (!error) {
770 		ufs_makedirentry(ip, cnp, &newdir);
771 		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
772 	}
773 
774 	if (error) {
775 		ip->i_effnlink--;
776 		ip->i_nlink--;
777 		ip->i_flag |= IN_CHANGE;
778 		if (DOINGSOFTDEP(vp))
779 			softdep_change_linkcnt(ip);
780 	}
781 out1:
782 	if (tdvp != vp)
783 		VOP_UNLOCK(vp, NULL, 0, td);
784 out2:
785 	VN_KNOTE(vp, NOTE_LINK);
786 	VN_KNOTE(tdvp, NOTE_WRITE);
787 	return (error);
788 }
789 
790 /*
791  * whiteout vnode call
792  */
793 int
794 ufs_whiteout(ap)
795 	struct vop_whiteout_args /* {
796 		struct vnode *a_dvp;
797 		struct componentname *a_cnp;
798 		int a_flags;
799 	} */ *ap;
800 {
801 	struct vnode *dvp = ap->a_dvp;
802 	struct componentname *cnp = ap->a_cnp;
803 	struct direct newdir;
804 	int error = 0;
805 
806 	switch (ap->a_flags) {
807 	case NAMEI_LOOKUP:
808 		/* 4.4 format directories support whiteout operations */
809 		if (dvp->v_mount->mnt_maxsymlinklen > 0)
810 			return (0);
811 		return (EOPNOTSUPP);
812 
813 	case NAMEI_CREATE:
814 		/* create a new directory whiteout */
815 #ifdef DIAGNOSTIC
816 		if ((cnp->cn_flags & CNP_SAVENAME) == 0)
817 			panic("ufs_whiteout: missing name");
818 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
819 			panic("ufs_whiteout: old format filesystem");
820 #endif
821 
822 		newdir.d_ino = WINO;
823 		newdir.d_namlen = cnp->cn_namelen;
824 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
825 		newdir.d_type = DT_WHT;
826 		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
827 		break;
828 
829 	case NAMEI_DELETE:
830 		/* remove an existing directory whiteout */
831 #ifdef DIAGNOSTIC
832 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
833 			panic("ufs_whiteout: old format filesystem");
834 #endif
835 
836 		cnp->cn_flags &= ~CNP_DOWHITEOUT;
837 		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
838 		break;
839 	default:
840 		panic("ufs_whiteout: unknown op");
841 	}
842 	return (error);
843 }
844 
845 /*
846  * Rename system call.
847  * 	rename("foo", "bar");
848  * is essentially
849  *	unlink("bar");
850  *	link("foo", "bar");
851  *	unlink("foo");
852  * but ``atomically''.  Can't do full commit without saving state in the
853  * inode on disk which isn't feasible at this time.  Best we can do is
854  * always guarantee the target exists.
855  *
856  * Basic algorithm is:
857  *
858  * 1) Bump link count on source while we're linking it to the
859  *    target.  This also ensure the inode won't be deleted out
860  *    from underneath us while we work (it may be truncated by
861  *    a concurrent `trunc' or `open' for creation).
862  * 2) Link source to destination.  If destination already exists,
863  *    delete it first.
864  * 3) Unlink source reference to inode if still around. If a
865  *    directory was moved and the parent of the destination
866  *    is different from the source, patch the ".." entry in the
867  *    directory.
868  */
869 int
870 ufs_rename(ap)
871 	struct vop_rename_args  /* {
872 		struct vnode *a_fdvp;
873 		struct vnode *a_fvp;
874 		struct componentname *a_fcnp;
875 		struct vnode *a_tdvp;
876 		struct vnode *a_tvp;
877 		struct componentname *a_tcnp;
878 	} */ *ap;
879 {
880 	struct vnode *tvp = ap->a_tvp;
881 	struct vnode *tdvp = ap->a_tdvp;
882 	struct vnode *fvp = ap->a_fvp;
883 	struct vnode *fdvp = ap->a_fdvp;
884 	struct componentname *tcnp = ap->a_tcnp;
885 	struct componentname *fcnp = ap->a_fcnp;
886 	struct thread *td = fcnp->cn_td;
887 	struct inode *ip, *xp, *dp;
888 	struct direct newdir;
889 	int doingdirectory = 0, oldparent = 0, newparent = 0;
890 	int error = 0, ioflag;
891 
892 #ifdef DIAGNOSTIC
893 	if ((tcnp->cn_flags & CNP_HASBUF) == 0 ||
894 	    (fcnp->cn_flags & CNP_HASBUF) == 0)
895 		panic("ufs_rename: no name");
896 #endif
897 	/*
898 	 * Check for cross-device rename.
899 	 */
900 	if ((fvp->v_mount != tdvp->v_mount) ||
901 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
902 		error = EXDEV;
903 abortit:
904 		if (tdvp == tvp)
905 			vrele(tdvp);
906 		else
907 			vput(tdvp);
908 		if (tvp)
909 			vput(tvp);
910 		vrele(fdvp);
911 		vrele(fvp);
912 		return (error);
913 	}
914 
915 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
916 	    (VTOI(tdvp)->i_flags & APPEND))) {
917 		error = EPERM;
918 		goto abortit;
919 	}
920 
921 	/*
922 	 * Renaming a file to itself has no effect.  The upper layers should
923 	 * not call us in that case.  Temporarily just warn if they do.
924 	 */
925 	if (fvp == tvp) {
926 		printf("ufs_rename: fvp == tvp (can't happen)\n");
927 		error = 0;
928 		goto abortit;
929 	}
930 
931 	if ((error = vn_lock(fvp, NULL, LK_EXCLUSIVE, td)) != 0)
932 		goto abortit;
933 	dp = VTOI(fdvp);
934 	ip = VTOI(fvp);
935 	if (ip->i_nlink >= LINK_MAX) {
936 		VOP_UNLOCK(fvp, NULL, 0, td);
937 		error = EMLINK;
938 		goto abortit;
939 	}
940 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
941 	    || (dp->i_flags & APPEND)) {
942 		VOP_UNLOCK(fvp, NULL, 0, td);
943 		error = EPERM;
944 		goto abortit;
945 	}
946 	if ((ip->i_mode & IFMT) == IFDIR) {
947 		/*
948 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
949 		 */
950 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
951 		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & CNP_ISDOTDOT ||
952 		    (ip->i_flag & IN_RENAME)) {
953 			VOP_UNLOCK(fvp, NULL, 0, td);
954 			error = EINVAL;
955 			goto abortit;
956 		}
957 		ip->i_flag |= IN_RENAME;
958 		oldparent = dp->i_number;
959 		doingdirectory = 1;
960 	}
961 	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXX right place? */
962 	vrele(fdvp);
963 
964 	/*
965 	 * When the target exists, both the directory
966 	 * and target vnodes are returned locked.
967 	 */
968 	dp = VTOI(tdvp);
969 	xp = NULL;
970 	if (tvp)
971 		xp = VTOI(tvp);
972 
973 	/*
974 	 * 1) Bump link count while we're moving stuff
975 	 *    around.  If we crash somewhere before
976 	 *    completing our work, the link count
977 	 *    may be wrong, but correctable.
978 	 */
979 	ip->i_effnlink++;
980 	ip->i_nlink++;
981 	ip->i_flag |= IN_CHANGE;
982 	if (DOINGSOFTDEP(fvp))
983 		softdep_change_linkcnt(ip);
984 	if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
985 				       DOINGASYNC(fvp)))) != 0) {
986 		VOP_UNLOCK(fvp, NULL, 0, td);
987 		goto bad;
988 	}
989 
990 	/*
991 	 * If ".." must be changed (ie the directory gets a new
992 	 * parent) then the source directory must not be in the
993 	 * directory heirarchy above the target, as this would
994 	 * orphan everything below the source directory. Also
995 	 * the user must have write permission in the source so
996 	 * as to be able to change "..". We must repeat the call
997 	 * to namei, as the parent directory is unlocked by the
998 	 * call to checkpath().
999 	 */
1000 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_td);
1001 	VOP_UNLOCK(fvp, NULL, 0, td);
1002 	if (oldparent != dp->i_number)
1003 		newparent = dp->i_number;
1004 	if (doingdirectory && newparent) {
1005 		if (error)	/* write access check above */
1006 			goto bad;
1007 		if (xp != NULL)
1008 			vput(tvp);
1009 		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
1010 		if (error)
1011 			goto out;
1012 		if ((tcnp->cn_flags & CNP_SAVESTART) == 0)
1013 			panic("ufs_rename: lost to startdir");
1014 		VREF(tdvp);
1015 		error = relookup(tdvp, &tvp, tcnp);
1016 		if (error)
1017 			goto out;
1018 		vrele(tdvp);
1019 		dp = VTOI(tdvp);
1020 		xp = NULL;
1021 		if (tvp)
1022 			xp = VTOI(tvp);
1023 	}
1024 	/*
1025 	 * 2) If target doesn't exist, link the target
1026 	 *    to the source and unlink the source.
1027 	 *    Otherwise, rewrite the target directory
1028 	 *    entry to reference the source inode and
1029 	 *    expunge the original entry's existence.
1030 	 */
1031 	if (xp == NULL) {
1032 		if (dp->i_dev != ip->i_dev)
1033 			panic("ufs_rename: EXDEV");
1034 		/*
1035 		 * Account for ".." in new directory.
1036 		 * When source and destination have the same
1037 		 * parent we don't fool with the link count.
1038 		 */
1039 		if (doingdirectory && newparent) {
1040 			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1041 				error = EMLINK;
1042 				goto bad;
1043 			}
1044 			dp->i_effnlink++;
1045 			dp->i_nlink++;
1046 			dp->i_flag |= IN_CHANGE;
1047 			if (DOINGSOFTDEP(tdvp))
1048 				softdep_change_linkcnt(dp);
1049 			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
1050 						   DOINGASYNC(tdvp)));
1051 			if (error)
1052 				goto bad;
1053 		}
1054 		ufs_makedirentry(ip, tcnp, &newdir);
1055 		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
1056 		if (error) {
1057 			if (doingdirectory && newparent) {
1058 				dp->i_effnlink--;
1059 				dp->i_nlink--;
1060 				dp->i_flag |= IN_CHANGE;
1061 				if (DOINGSOFTDEP(tdvp))
1062 					softdep_change_linkcnt(dp);
1063 				(void)UFS_UPDATE(tdvp, 1);
1064 			}
1065 			goto bad;
1066 		}
1067 		VN_KNOTE(tdvp, NOTE_WRITE);
1068 		vput(tdvp);
1069 	} else {
1070 		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
1071 			panic("ufs_rename: EXDEV");
1072 		/*
1073 		 * Short circuit rename(foo, foo).
1074 		 */
1075 		if (xp->i_number == ip->i_number)
1076 			panic("ufs_rename: same file");
1077 		/*
1078 		 * If the parent directory is "sticky", then the user must
1079 		 * own the parent directory, or the destination of the rename,
1080 		 * otherwise the destination may not be changed (except by
1081 		 * root). This implements append-only directories.
1082 		 */
1083 		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
1084 		    tcnp->cn_cred->cr_uid != dp->i_uid &&
1085 		    xp->i_uid != tcnp->cn_cred->cr_uid) {
1086 			error = EPERM;
1087 			goto bad;
1088 		}
1089 		/*
1090 		 * Target must be empty if a directory and have no links
1091 		 * to it. Also, ensure source and target are compatible
1092 		 * (both directories, or both not directories).
1093 		 */
1094 		if ((xp->i_mode&IFMT) == IFDIR) {
1095 			if ((xp->i_effnlink > 2) ||
1096 			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
1097 				error = ENOTEMPTY;
1098 				goto bad;
1099 			}
1100 			if (!doingdirectory) {
1101 				error = ENOTDIR;
1102 				goto bad;
1103 			}
1104 			cache_purge(tdvp);
1105 		} else if (doingdirectory) {
1106 			error = EISDIR;
1107 			goto bad;
1108 		}
1109 		error = ufs_dirrewrite(dp, xp, ip->i_number,
1110 		    IFTODT(ip->i_mode),
1111 		    (doingdirectory && newparent) ? newparent : doingdirectory);
1112 		if (error)
1113 			goto bad;
1114 		if (doingdirectory) {
1115 			if (!newparent) {
1116 				dp->i_effnlink--;
1117 				if (DOINGSOFTDEP(tdvp))
1118 					softdep_change_linkcnt(dp);
1119 			}
1120 			xp->i_effnlink--;
1121 			if (DOINGSOFTDEP(tvp))
1122 				softdep_change_linkcnt(xp);
1123 		}
1124 		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
1125 			/*
1126 			 * Truncate inode. The only stuff left in the directory
1127 			 * is "." and "..". The "." reference is inconsequential
1128 			 * since we are quashing it. We have removed the "."
1129 			 * reference and the reference in the parent directory,
1130 			 * but there may be other hard links. The soft
1131 			 * dependency code will arrange to do these operations
1132 			 * after the parent directory entry has been deleted on
1133 			 * disk, so when running with that code we avoid doing
1134 			 * them now.
1135 			 */
1136 			if (!newparent) {
1137 				dp->i_nlink--;
1138 				dp->i_flag |= IN_CHANGE;
1139 			}
1140 			xp->i_nlink--;
1141 			xp->i_flag |= IN_CHANGE;
1142 			ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
1143 			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
1144 			    tcnp->cn_cred, tcnp->cn_td)) != 0)
1145 				goto bad;
1146 		}
1147 		VN_KNOTE(tdvp, NOTE_WRITE);
1148 		vput(tdvp);
1149 		VN_KNOTE(tvp, NOTE_DELETE);
1150 		vput(tvp);
1151 		xp = NULL;
1152 	}
1153 
1154 	/*
1155 	 * 3) Unlink the source.
1156 	 */
1157 	fcnp->cn_flags &= ~CNP_MODMASK;
1158 	fcnp->cn_flags |= CNP_LOCKPARENT | CNP_LOCKLEAF;
1159 	if ((fcnp->cn_flags & CNP_SAVESTART) == 0)
1160 		panic("ufs_rename: lost from startdir");
1161 	VREF(fdvp);
1162 	error = relookup(fdvp, &fvp, fcnp);
1163 	if (error == 0)
1164 		vrele(fdvp);
1165 	if (fvp != NULL) {
1166 		xp = VTOI(fvp);
1167 		dp = VTOI(fdvp);
1168 	} else {
1169 		/*
1170 		 * From name has disappeared.
1171 		 */
1172 		if (doingdirectory)
1173 			panic("ufs_rename: lost dir entry");
1174 		vrele(ap->a_fvp);
1175 		return (0);
1176 	}
1177 	/*
1178 	 * Ensure that the directory entry still exists and has not
1179 	 * changed while the new name has been entered. If the source is
1180 	 * a file then the entry may have been unlinked or renamed. In
1181 	 * either case there is no further work to be done. If the source
1182 	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
1183 	 * flag ensures that it cannot be moved by another rename or removed
1184 	 * by a rmdir.
1185 	 */
1186 	if (xp != ip) {
1187 		if (doingdirectory)
1188 			panic("ufs_rename: lost dir entry");
1189 	} else {
1190 		/*
1191 		 * If the source is a directory with a
1192 		 * new parent, the link count of the old
1193 		 * parent directory must be decremented
1194 		 * and ".." set to point to the new parent.
1195 		 */
1196 		if (doingdirectory && newparent) {
1197 			xp->i_offset = mastertemplate.dot_reclen;
1198 			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
1199 			cache_purge(fdvp);
1200 		}
1201 		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
1202 		xp->i_flag &= ~IN_RENAME;
1203 	}
1204 	VN_KNOTE(fvp, NOTE_RENAME);
1205 	if (dp)
1206 		vput(fdvp);
1207 	if (xp)
1208 		vput(fvp);
1209 	vrele(ap->a_fvp);
1210 	return (error);
1211 
1212 bad:
1213 	if (xp)
1214 		vput(ITOV(xp));
1215 	vput(ITOV(dp));
1216 out:
1217 	if (doingdirectory)
1218 		ip->i_flag &= ~IN_RENAME;
1219 	if (vn_lock(fvp, NULL, LK_EXCLUSIVE, td) == 0) {
1220 		ip->i_effnlink--;
1221 		ip->i_nlink--;
1222 		ip->i_flag |= IN_CHANGE;
1223 		ip->i_flag &= ~IN_RENAME;
1224 		if (DOINGSOFTDEP(fvp))
1225 			softdep_change_linkcnt(ip);
1226 		vput(fvp);
1227 	} else
1228 		vrele(fvp);
1229 	return (error);
1230 }
1231 
1232 /*
1233  * Mkdir system call
1234  */
1235 int
1236 ufs_mkdir(ap)
1237 	struct vop_mkdir_args /* {
1238 		struct vnode *a_dvp;
1239 		struct vnode **a_vpp;
1240 		struct componentname *a_cnp;
1241 		struct vattr *a_vap;
1242 	} */ *ap;
1243 {
1244 	struct vnode *dvp = ap->a_dvp;
1245 	struct vattr *vap = ap->a_vap;
1246 	struct componentname *cnp = ap->a_cnp;
1247 	struct inode *ip, *dp;
1248 	struct vnode *tvp;
1249 	struct buf *bp;
1250 	struct dirtemplate dirtemplate, *dtp;
1251 	struct direct newdir;
1252 	int error, dmode;
1253 	long blkoff;
1254 
1255 #ifdef DIAGNOSTIC
1256 	if ((cnp->cn_flags & CNP_HASBUF) == 0)
1257 		panic("ufs_mkdir: no name");
1258 #endif
1259 	dp = VTOI(dvp);
1260 	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1261 		error = EMLINK;
1262 		goto out;
1263 	}
1264 	dmode = vap->va_mode & 0777;
1265 	dmode |= IFDIR;
1266 	/*
1267 	 * Must simulate part of ufs_makeinode here to acquire the inode,
1268 	 * but not have it entered in the parent directory. The entry is
1269 	 * made later after writing "." and ".." entries.
1270 	 */
1271 	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
1272 	if (error)
1273 		goto out;
1274 	ip = VTOI(tvp);
1275 	ip->i_gid = dp->i_gid;
1276 #ifdef SUIDDIR
1277 	{
1278 #ifdef QUOTA
1279 		struct ucred ucred, *ucp;
1280 		ucp = cnp->cn_cred;
1281 #endif
1282 		/*
1283 		 * If we are hacking owners here, (only do this where told to)
1284 		 * and we are not giving it TO root, (would subvert quotas)
1285 		 * then go ahead and give it to the other user.
1286 		 * The new directory also inherits the SUID bit.
1287 		 * If user's UID and dir UID are the same,
1288 		 * 'give it away' so that the SUID is still forced on.
1289 		 */
1290 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1291 		    (dp->i_mode & ISUID) && dp->i_uid) {
1292 			dmode |= ISUID;
1293 			ip->i_uid = dp->i_uid;
1294 #ifdef QUOTA
1295 			if (dp->i_uid != cnp->cn_cred->cr_uid) {
1296 				/*
1297 				 * Make sure the correct user gets charged
1298 				 * for the space.
1299 				 * Make a dummy credential for the victim.
1300 				 * XXX This seems to never be accessed out of
1301 				 * our context so a stack variable is ok.
1302 				 */
1303 				ucred.cr_ref = 1;
1304 				ucred.cr_uid = ip->i_uid;
1305 				ucred.cr_ngroups = 1;
1306 				ucred.cr_groups[0] = dp->i_gid;
1307 				ucp = &ucred;
1308 			}
1309 #endif
1310 		} else
1311 			ip->i_uid = cnp->cn_cred->cr_uid;
1312 #ifdef QUOTA
1313 		if ((error = getinoquota(ip)) ||
1314 	    	    (error = chkiq(ip, 1, ucp, 0))) {
1315 			UFS_VFREE(tvp, ip->i_number, dmode);
1316 			vput(tvp);
1317 			return (error);
1318 		}
1319 #endif
1320 	}
1321 #else	/* !SUIDDIR */
1322 	ip->i_uid = cnp->cn_cred->cr_uid;
1323 #ifdef QUOTA
1324 	if ((error = getinoquota(ip)) ||
1325 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
1326 		UFS_VFREE(tvp, ip->i_number, dmode);
1327 		vput(tvp);
1328 		return (error);
1329 	}
1330 #endif
1331 #endif	/* !SUIDDIR */
1332 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1333 	ip->i_mode = dmode;
1334 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
1335 	ip->i_effnlink = 2;
1336 	ip->i_nlink = 2;
1337 	if (DOINGSOFTDEP(tvp))
1338 		softdep_change_linkcnt(ip);
1339 	if (cnp->cn_flags & CNP_ISWHITEOUT)
1340 		ip->i_flags |= UF_OPAQUE;
1341 
1342 	/*
1343 	 * Bump link count in parent directory to reflect work done below.
1344 	 * Should be done before reference is created so cleanup is
1345 	 * possible if we crash.
1346 	 */
1347 	dp->i_effnlink++;
1348 	dp->i_nlink++;
1349 	dp->i_flag |= IN_CHANGE;
1350 	if (DOINGSOFTDEP(dvp))
1351 		softdep_change_linkcnt(dp);
1352 	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
1353 	if (error)
1354 		goto bad;
1355 
1356 	/*
1357 	 * Initialize directory with "." and ".." from static template.
1358 	 */
1359 	if (dvp->v_mount->mnt_maxsymlinklen > 0
1360 	)
1361 		dtp = &mastertemplate;
1362 	else
1363 		dtp = (struct dirtemplate *)&omastertemplate;
1364 	dirtemplate = *dtp;
1365 	dirtemplate.dot_ino = ip->i_number;
1366 	dirtemplate.dotdot_ino = dp->i_number;
1367 	if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
1368 	    B_CLRBUF, &bp)) != 0)
1369 		goto bad;
1370 	ip->i_size = DIRBLKSIZ;
1371 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
1372 	vnode_pager_setsize(tvp, (u_long)ip->i_size);
1373 	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
1374 	if (DOINGSOFTDEP(tvp)) {
1375 		/*
1376 		 * Ensure that the entire newly allocated block is a
1377 		 * valid directory so that future growth within the
1378 		 * block does not have to ensure that the block is
1379 		 * written before the inode.
1380 		 */
1381 		blkoff = DIRBLKSIZ;
1382 		while (blkoff < bp->b_bcount) {
1383 			((struct direct *)
1384 			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
1385 			blkoff += DIRBLKSIZ;
1386 		}
1387 	}
1388 	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
1389 				       DOINGASYNC(tvp)))) != 0) {
1390 		(void)VOP_BWRITE(bp->b_vp, bp);
1391 		goto bad;
1392 	}
1393 	/*
1394 	 * Directory set up, now install its entry in the parent directory.
1395 	 *
1396 	 * If we are not doing soft dependencies, then we must write out the
1397 	 * buffer containing the new directory body before entering the new
1398 	 * name in the parent. If we are doing soft dependencies, then the
1399 	 * buffer containing the new directory body will be passed to and
1400 	 * released in the soft dependency code after the code has attached
1401 	 * an appropriate ordering dependency to the buffer which ensures that
1402 	 * the buffer is written before the new name is written in the parent.
1403 	 */
1404 	if (DOINGASYNC(dvp))
1405 		bdwrite(bp);
1406 	else if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp->b_vp, bp))))
1407 		goto bad;
1408 	ufs_makedirentry(ip, cnp, &newdir);
1409 	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
1410 
1411 bad:
1412 	if (error == 0) {
1413 		VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1414 		*ap->a_vpp = tvp;
1415 	} else {
1416 		dp->i_effnlink--;
1417 		dp->i_nlink--;
1418 		dp->i_flag |= IN_CHANGE;
1419 		if (DOINGSOFTDEP(dvp))
1420 			softdep_change_linkcnt(dp);
1421 		/*
1422 		 * No need to do an explicit VOP_TRUNCATE here, vrele will
1423 		 * do this for us because we set the link count to 0.
1424 		 */
1425 		ip->i_effnlink = 0;
1426 		ip->i_nlink = 0;
1427 		ip->i_flag |= IN_CHANGE;
1428 		if (DOINGSOFTDEP(tvp))
1429 			softdep_change_linkcnt(ip);
1430 		vput(tvp);
1431 	}
1432 out:
1433 	return (error);
1434 }
1435 
1436 /*
1437  * Rmdir system call.
1438  */
1439 int
1440 ufs_rmdir(ap)
1441 	struct vop_rmdir_args /* {
1442 		struct vnode *a_dvp;
1443 		struct vnode *a_vp;
1444 		struct componentname *a_cnp;
1445 	} */ *ap;
1446 {
1447 	struct vnode *vp = ap->a_vp;
1448 	struct vnode *dvp = ap->a_dvp;
1449 	struct componentname *cnp = ap->a_cnp;
1450 	struct inode *ip, *dp;
1451 	int error, ioflag;
1452 
1453 	ip = VTOI(vp);
1454 	dp = VTOI(dvp);
1455 
1456 	/*
1457 	 * Do not remove a directory that is in the process of being renamed.
1458 	 * Verify the directory is empty (and valid). Rmdir ".." will not be
1459 	 * valid since ".." will contain a reference to the current directory
1460 	 * and thus be non-empty. Do not allow the removal of mounted on
1461 	 * directories (this can happen when an NFS exported filesystem
1462 	 * tries to remove a locally mounted on directory).
1463 	 */
1464 	error = 0;
1465 	if (ip->i_flag & IN_RENAME) {
1466 		error = EINVAL;
1467 		goto out;
1468 	}
1469 	if (ip->i_effnlink != 2 ||
1470 	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1471 		error = ENOTEMPTY;
1472 		goto out;
1473 	}
1474 	if ((dp->i_flags & APPEND)
1475 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1476 		error = EPERM;
1477 		goto out;
1478 	}
1479 	if (vp->v_mountedhere != 0) {
1480 		error = EINVAL;
1481 		goto out;
1482 	}
1483 	/*
1484 	 * Delete reference to directory before purging
1485 	 * inode.  If we crash in between, the directory
1486 	 * will be reattached to lost+found,
1487 	 */
1488 	dp->i_effnlink--;
1489 	ip->i_effnlink--;
1490 	if (DOINGSOFTDEP(vp)) {
1491 		softdep_change_linkcnt(dp);
1492 		softdep_change_linkcnt(ip);
1493 	}
1494 	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
1495 	if (error) {
1496 		dp->i_effnlink++;
1497 		ip->i_effnlink++;
1498 		if (DOINGSOFTDEP(vp)) {
1499 			softdep_change_linkcnt(dp);
1500 			softdep_change_linkcnt(ip);
1501 		}
1502 		goto out;
1503 	}
1504 	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1505 	cache_purge(dvp);
1506 	/*
1507 	 * Truncate inode. The only stuff left in the directory is "." and
1508 	 * "..". The "." reference is inconsequential since we are quashing
1509 	 * it. The soft dependency code will arrange to do these operations
1510 	 * after the parent directory entry has been deleted on disk, so
1511 	 * when running with that code we avoid doing them now.
1512 	 */
1513 	if (!DOINGSOFTDEP(vp)) {
1514 		dp->i_nlink--;
1515 		dp->i_flag |= IN_CHANGE;
1516 		ip->i_nlink--;
1517 		ip->i_flag |= IN_CHANGE;
1518 		ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
1519 		error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
1520 		    cnp->cn_td);
1521 	}
1522 	cache_purge(vp);
1523 #ifdef UFS_DIRHASH
1524 	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
1525 	if (ip->i_dirhash != NULL)
1526 		ufsdirhash_free(ip);
1527 #endif
1528 out:
1529 	VN_KNOTE(vp, NOTE_DELETE);
1530 	return (error);
1531 }
1532 
1533 /*
1534  * symlink -- make a symbolic link
1535  */
1536 int
1537 ufs_symlink(ap)
1538 	struct vop_symlink_args /* {
1539 		struct vnode *a_dvp;
1540 		struct vnode **a_vpp;
1541 		struct componentname *a_cnp;
1542 		struct vattr *a_vap;
1543 		char *a_target;
1544 	} */ *ap;
1545 {
1546 	struct vnode *vp, **vpp = ap->a_vpp;
1547 	struct inode *ip;
1548 	int len, error;
1549 
1550 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
1551 	    vpp, ap->a_cnp);
1552 	if (error)
1553 		return (error);
1554 	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
1555 	vp = *vpp;
1556 	len = strlen(ap->a_target);
1557 	if (len < vp->v_mount->mnt_maxsymlinklen) {
1558 		ip = VTOI(vp);
1559 		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
1560 		ip->i_size = len;
1561 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1562 	} else
1563 		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1564 		    UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred,
1565 		    (int *)0, NULL);
1566 	if (error)
1567 		vput(vp);
1568 	return (error);
1569 }
1570 
1571 /*
1572  * Vnode op for reading directories.
1573  *
1574  * The routine below assumes that the on-disk format of a directory
1575  * is the same as that defined by <sys/dirent.h>. If the on-disk
1576  * format changes, then it will be necessary to do a conversion
1577  * from the on-disk format that read returns to the format defined
1578  * by <sys/dirent.h>.
1579  */
1580 int
1581 ufs_readdir(ap)
1582 	struct vop_readdir_args /* {
1583 		struct vnode *a_vp;
1584 		struct uio *a_uio;
1585 		struct ucred *a_cred;
1586 		int *a_eofflag;
1587 		int *ncookies;
1588 		u_long **a_cookies;
1589 	} */ *ap;
1590 {
1591 	struct uio *uio = ap->a_uio;
1592 	int error;
1593 	size_t count, lost;
1594 	off_t off;
1595 
1596 	if (ap->a_ncookies != NULL)
1597 		/*
1598 		 * Ensure that the block is aligned.  The caller can use
1599 		 * the cookies to determine where in the block to start.
1600 		 */
1601 		uio->uio_offset &= ~(DIRBLKSIZ - 1);
1602 	off = uio->uio_offset;
1603 	count = uio->uio_resid;
1604 	/* Make sure we don't return partial entries. */
1605 	if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
1606 		return (EINVAL);
1607 	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
1608 	lost = uio->uio_resid - count;
1609 	uio->uio_resid = count;
1610 	uio->uio_iov->iov_len = count;
1611 #	if (BYTE_ORDER == LITTLE_ENDIAN)
1612 		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
1613 			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1614 		} else {
1615 			struct dirent *dp, *edp;
1616 			struct uio auio;
1617 			struct iovec aiov;
1618 			caddr_t dirbuf;
1619 			int readcnt;
1620 			u_char tmp;
1621 
1622 			auio = *uio;
1623 			auio.uio_iov = &aiov;
1624 			auio.uio_iovcnt = 1;
1625 			auio.uio_segflg = UIO_SYSSPACE;
1626 			aiov.iov_len = count;
1627 			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
1628 			aiov.iov_base = dirbuf;
1629 			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
1630 			if (error == 0) {
1631 				readcnt = count - auio.uio_resid;
1632 				edp = (struct dirent *)&dirbuf[readcnt];
1633 				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
1634 					tmp = dp->d_namlen;
1635 					dp->d_namlen = dp->d_type;
1636 					dp->d_type = tmp;
1637 					if (dp->d_reclen > 0) {
1638 						dp = (struct dirent *)
1639 						    ((char *)dp + dp->d_reclen);
1640 					} else {
1641 						error = EIO;
1642 						break;
1643 					}
1644 				}
1645 				if (dp >= edp)
1646 					error = uiomove(dirbuf, readcnt, uio);
1647 			}
1648 			FREE(dirbuf, M_TEMP);
1649 		}
1650 #	else
1651 		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
1652 #	endif
1653 	if (!error && ap->a_ncookies != NULL) {
1654 		struct dirent* dpStart;
1655 		struct dirent* dpEnd;
1656 		struct dirent* dp;
1657 		int ncookies;
1658 		u_long *cookies;
1659 		u_long *cookiep;
1660 
1661 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
1662 			panic("ufs_readdir: unexpected uio from NFS server");
1663 		dpStart = (struct dirent *)
1664 		     (uio->uio_iov->iov_base - (uio->uio_offset - off));
1665 		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
1666 		for (dp = dpStart, ncookies = 0;
1667 		     dp < dpEnd;
1668 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
1669 			ncookies++;
1670 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
1671 		    M_WAITOK);
1672 		for (dp = dpStart, cookiep = cookies;
1673 		     dp < dpEnd;
1674 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
1675 			off += dp->d_reclen;
1676 			*cookiep++ = (u_long) off;
1677 		}
1678 		*ap->a_ncookies = ncookies;
1679 		*ap->a_cookies = cookies;
1680 	}
1681 	uio->uio_resid += lost;
1682 	if (ap->a_eofflag)
1683 	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
1684 	return (error);
1685 }
1686 
1687 /*
1688  * Return target name of a symbolic link
1689  */
1690 int
1691 ufs_readlink(ap)
1692 	struct vop_readlink_args /* {
1693 		struct vnode *a_vp;
1694 		struct uio *a_uio;
1695 		struct ucred *a_cred;
1696 	} */ *ap;
1697 {
1698 	struct vnode *vp = ap->a_vp;
1699 	struct inode *ip = VTOI(vp);
1700 	int isize;
1701 
1702 	isize = ip->i_size;
1703 	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
1704 	    (ip->i_din.di_blocks == 0)) {	/* XXX - for old fastlink support */
1705 		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
1706 		return (0);
1707 	}
1708 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1709 }
1710 
1711 /*
1712  * Calculate the logical to physical mapping if not done already,
1713  * then call the device strategy routine.
1714  *
1715  * In order to be able to swap to a file, the VOP_BMAP operation may not
1716  * deadlock on memory.  See ufs_bmap() for details.
1717  */
1718 int
1719 ufs_strategy(ap)
1720 	struct vop_strategy_args /* {
1721 		struct vnode *a_vp;
1722 		struct buf *a_bp;
1723 	} */ *ap;
1724 {
1725 	struct buf *bp = ap->a_bp;
1726 	struct vnode *vp = ap->a_vp;
1727 	struct inode *ip;
1728 	int error;
1729 
1730 	ip = VTOI(vp);
1731 	if (vp->v_type == VBLK || vp->v_type == VCHR)
1732 		panic("ufs_strategy: spec");
1733 	if (bp->b_blkno == bp->b_lblkno) {
1734 		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
1735 		if (error) {
1736 			bp->b_error = error;
1737 			bp->b_flags |= B_ERROR;
1738 			biodone(bp);
1739 			return (error);
1740 		}
1741 		if ((long)bp->b_blkno == -1)
1742 			vfs_bio_clrbuf(bp);
1743 	}
1744 	if ((long)bp->b_blkno == -1) {
1745 		biodone(bp);
1746 		return (0);
1747 	}
1748 	vp = ip->i_devvp;
1749 	bp->b_dev = vp->v_rdev;
1750 	VOP_STRATEGY(vp, bp);
1751 	return (0);
1752 }
1753 
1754 /*
1755  * Print out the contents of an inode.
1756  */
1757 int
1758 ufs_print(ap)
1759 	struct vop_print_args /* {
1760 		struct vnode *a_vp;
1761 	} */ *ap;
1762 {
1763 	struct vnode *vp = ap->a_vp;
1764 	struct inode *ip = VTOI(vp);
1765 
1766 	printf("tag VT_UFS, ino %lu, on dev %s (%d, %d)",
1767 	    (u_long)ip->i_number, devtoname(ip->i_dev), major(ip->i_dev),
1768 	    minor(ip->i_dev));
1769 	if (vp->v_type == VFIFO)
1770 		fifo_printinfo(vp);
1771 	lockmgr_printinfo(&ip->i_lock);
1772 	printf("\n");
1773 	return (0);
1774 }
1775 
1776 /*
1777  * Read wrapper for special devices.
1778  */
1779 int
1780 ufsspec_read(ap)
1781 	struct vop_read_args /* {
1782 		struct vnode *a_vp;
1783 		struct uio *a_uio;
1784 		int  a_ioflag;
1785 		struct ucred *a_cred;
1786 	} */ *ap;
1787 {
1788 	int error, resid;
1789 	struct inode *ip;
1790 	struct uio *uio;
1791 
1792 	uio = ap->a_uio;
1793 	resid = uio->uio_resid;
1794 	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap);
1795 	/*
1796 	 * The inode may have been revoked during the call, so it must not
1797 	 * be accessed blindly here or in the other wrapper functions.
1798 	 */
1799 	ip = VTOI(ap->a_vp);
1800 	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1801 		ip->i_flag |= IN_ACCESS;
1802 	return (error);
1803 }
1804 
1805 /*
1806  * Write wrapper for special devices.
1807  */
1808 int
1809 ufsspec_write(ap)
1810 	struct vop_write_args /* {
1811 		struct vnode *a_vp;
1812 		struct uio *a_uio;
1813 		int  a_ioflag;
1814 		struct ucred *a_cred;
1815 	} */ *ap;
1816 {
1817 	int error, resid;
1818 	struct inode *ip;
1819 	struct uio *uio;
1820 
1821 	uio = ap->a_uio;
1822 	resid = uio->uio_resid;
1823 	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap);
1824 	ip = VTOI(ap->a_vp);
1825 	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1826 		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1827 	return (error);
1828 }
1829 
1830 /*
1831  * Close wrapper for special devices.
1832  *
1833  * Update the times on the inode then do device close.
1834  */
1835 int
1836 ufsspec_close(ap)
1837 	struct vop_close_args /* {
1838 		struct vnode *a_vp;
1839 		int  a_fflag;
1840 		struct ucred *a_cred;
1841 		struct thread *a_td;
1842 	} */ *ap;
1843 {
1844 	struct vnode *vp = ap->a_vp;
1845 	lwkt_tokref vlock;
1846 
1847 	lwkt_gettoken(&vlock, vp->v_interlock);
1848 	if (vp->v_usecount > 1)
1849 		ufs_itimes(vp);
1850 	lwkt_reltoken(&vlock);
1851 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
1852 }
1853 
1854 /*
1855  * Read wrapper for fifos.
1856  */
1857 int
1858 ufsfifo_read(ap)
1859 	struct vop_read_args /* {
1860 		struct vnode *a_vp;
1861 		struct uio *a_uio;
1862 		int  a_ioflag;
1863 		struct ucred *a_cred;
1864 	} */ *ap;
1865 {
1866 	int error, resid;
1867 	struct inode *ip;
1868 	struct uio *uio;
1869 
1870 	uio = ap->a_uio;
1871 	resid = uio->uio_resid;
1872 	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap);
1873 	ip = VTOI(ap->a_vp);
1874 	if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL &&
1875 	    (uio->uio_resid != resid || (error == 0 && resid != 0)))
1876 		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
1877 	return (error);
1878 }
1879 
1880 /*
1881  * Write wrapper for fifos.
1882  */
1883 int
1884 ufsfifo_write(ap)
1885 	struct vop_write_args /* {
1886 		struct vnode *a_vp;
1887 		struct uio *a_uio;
1888 		int  a_ioflag;
1889 		struct ucred *a_cred;
1890 	} */ *ap;
1891 {
1892 	int error, resid;
1893 	struct inode *ip;
1894 	struct uio *uio;
1895 
1896 	uio = ap->a_uio;
1897 	resid = uio->uio_resid;
1898 	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap);
1899 	ip = VTOI(ap->a_vp);
1900 	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1901 		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1902 	return (error);
1903 }
1904 
1905 /*
1906  * Close wrapper for fifos.
1907  *
1908  * Update the times on the inode then do device close.
1909  */
1910 int
1911 ufsfifo_close(ap)
1912 	struct vop_close_args /* {
1913 		struct vnode *a_vp;
1914 		int  a_fflag;
1915 		struct ucred *a_cred;
1916 		struct thread *a_td;
1917 	} */ *ap;
1918 {
1919 	struct vnode *vp = ap->a_vp;
1920 	lwkt_tokref vlock;
1921 
1922 	lwkt_gettoken(&vlock, vp->v_interlock);
1923 	if (vp->v_usecount > 1)
1924 		ufs_itimes(vp);
1925 	lwkt_reltoken(&vlock);
1926 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
1927 }
1928 
1929 /*
1930  * Kqfilter wrapper for fifos.
1931  *
1932  * Fall through to ufs kqfilter routines if needed
1933  */
1934 int
1935 ufsfifo_kqfilter(ap)
1936 	struct vop_kqfilter_args *ap;
1937 {
1938 	int error;
1939 
1940 	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilter), ap);
1941 	if (error)
1942 		error = ufs_kqfilter(ap);
1943 	return (error);
1944 }
1945 
1946 /*
1947  * Return POSIX pathconf information applicable to ufs filesystems.
1948  */
1949 int
1950 ufs_pathconf(ap)
1951 	struct vop_pathconf_args /* {
1952 		struct vnode *a_vp;
1953 		int a_name;
1954 		int *a_retval;
1955 	} */ *ap;
1956 {
1957 
1958 	switch (ap->a_name) {
1959 	case _PC_LINK_MAX:
1960 		*ap->a_retval = LINK_MAX;
1961 		return (0);
1962 	case _PC_NAME_MAX:
1963 		*ap->a_retval = NAME_MAX;
1964 		return (0);
1965 	case _PC_PATH_MAX:
1966 		*ap->a_retval = PATH_MAX;
1967 		return (0);
1968 	case _PC_PIPE_BUF:
1969 		*ap->a_retval = PIPE_BUF;
1970 		return (0);
1971 	case _PC_CHOWN_RESTRICTED:
1972 		*ap->a_retval = 1;
1973 		return (0);
1974 	case _PC_NO_TRUNC:
1975 		*ap->a_retval = 1;
1976 		return (0);
1977 	default:
1978 		return (EINVAL);
1979 	}
1980 	/* NOTREACHED */
1981 }
1982 
1983 /*
1984  * Advisory record locking support
1985  */
1986 int
1987 ufs_advlock(ap)
1988 	struct vop_advlock_args /* {
1989 		struct vnode *a_vp;
1990 		caddr_t  a_id;
1991 		int  a_op;
1992 		struct flock *a_fl;
1993 		int  a_flags;
1994 	} */ *ap;
1995 {
1996 	struct inode *ip = VTOI(ap->a_vp);
1997 
1998 	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
1999 }
2000 
2001 /*
2002  * Initialize the vnode associated with a new inode, handle aliased
2003  * vnodes.
2004  */
2005 int
2006 ufs_vinit(mntp, specops, fifoops, vpp)
2007 	struct mount *mntp;
2008 	vop_t **specops;
2009 	vop_t **fifoops;
2010 	struct vnode **vpp;
2011 {
2012 	struct inode *ip;
2013 	struct vnode *vp;
2014 	struct timeval tv;
2015 
2016 	vp = *vpp;
2017 	ip = VTOI(vp);
2018 	switch(vp->v_type = IFTOVT(ip->i_mode)) {
2019 	case VCHR:
2020 	case VBLK:
2021 		vp->v_op = specops;
2022 		addaliasu(vp, ip->i_rdev);
2023 		break;
2024 	case VFIFO:
2025 		vp->v_op = fifoops;
2026 		break;
2027 	default:
2028 		break;
2029 
2030 	}
2031 	if (ip->i_number == ROOTINO)
2032 		vp->v_flag |= VROOT;
2033 	/*
2034 	 * Initialize modrev times
2035 	 */
2036 	getmicrouptime(&tv);
2037 	SETHIGH(ip->i_modrev, tv.tv_sec);
2038 	SETLOW(ip->i_modrev, tv.tv_usec * 4294);
2039 	*vpp = vp;
2040 	return (0);
2041 }
2042 
2043 /*
2044  * Allocate a new inode.
2045  */
2046 int
2047 ufs_makeinode(mode, dvp, vpp, cnp)
2048 	int mode;
2049 	struct vnode *dvp;
2050 	struct vnode **vpp;
2051 	struct componentname *cnp;
2052 {
2053 	struct inode *ip, *pdir;
2054 	struct direct newdir;
2055 	struct vnode *tvp;
2056 	int error;
2057 
2058 	pdir = VTOI(dvp);
2059 #ifdef DIAGNOSTIC
2060 	if ((cnp->cn_flags & CNP_HASBUF) == 0)
2061 		panic("ufs_makeinode: no name");
2062 #endif
2063 	*vpp = NULL;
2064 	if ((mode & IFMT) == 0)
2065 		mode |= IFREG;
2066 
2067 	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
2068 	if (error)
2069 		return (error);
2070 	ip = VTOI(tvp);
2071 	ip->i_gid = pdir->i_gid;
2072 #ifdef SUIDDIR
2073 	{
2074 #ifdef QUOTA
2075 		struct ucred ucred, *ucp;
2076 		ucp = cnp->cn_cred;
2077 #endif
2078 		/*
2079 		 * If we are not the owner of the directory,
2080 		 * and we are hacking owners here, (only do this where told to)
2081 		 * and we are not giving it TO root, (would subvert quotas)
2082 		 * then go ahead and give it to the other user.
2083 		 * Note that this drops off the execute bits for security.
2084 		 */
2085 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
2086 		    (pdir->i_mode & ISUID) &&
2087 		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
2088 			ip->i_uid = pdir->i_uid;
2089 			mode &= ~07111;
2090 #ifdef QUOTA
2091 			/*
2092 			 * Make sure the correct user gets charged
2093 			 * for the space.
2094 			 * Quickly knock up a dummy credential for the victim.
2095 			 * XXX This seems to never be accessed out of our
2096 			 * context so a stack variable is ok.
2097 			 */
2098 			ucred.cr_ref = 1;
2099 			ucred.cr_uid = ip->i_uid;
2100 			ucred.cr_ngroups = 1;
2101 			ucred.cr_groups[0] = pdir->i_gid;
2102 			ucp = &ucred;
2103 #endif
2104 		} else
2105 			ip->i_uid = cnp->cn_cred->cr_uid;
2106 
2107 #ifdef QUOTA
2108 		if ((error = getinoquota(ip)) ||
2109 	    	    (error = chkiq(ip, 1, ucp, 0))) {
2110 			UFS_VFREE(tvp, ip->i_number, mode);
2111 			vput(tvp);
2112 			return (error);
2113 		}
2114 #endif
2115 	}
2116 #else	/* !SUIDDIR */
2117 	ip->i_uid = cnp->cn_cred->cr_uid;
2118 #ifdef QUOTA
2119 	if ((error = getinoquota(ip)) ||
2120 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2121 		UFS_VFREE(tvp, ip->i_number, mode);
2122 		vput(tvp);
2123 		return (error);
2124 	}
2125 #endif
2126 #endif	/* !SUIDDIR */
2127 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2128 	ip->i_mode = mode;
2129 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2130 	ip->i_effnlink = 1;
2131 	ip->i_nlink = 1;
2132 	if (DOINGSOFTDEP(tvp))
2133 		softdep_change_linkcnt(ip);
2134 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
2135 	    suser_cred(cnp->cn_cred, 0)) {
2136 		ip->i_mode &= ~ISGID;
2137 	}
2138 
2139 	if (cnp->cn_flags & CNP_ISWHITEOUT)
2140 		ip->i_flags |= UF_OPAQUE;
2141 
2142 	/*
2143 	 * Make sure inode goes to disk before directory entry.
2144 	 */
2145 	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
2146 	if (error)
2147 		goto bad;
2148 	ufs_makedirentry(ip, cnp, &newdir);
2149 	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
2150 	if (error)
2151 		goto bad;
2152 	*vpp = tvp;
2153 	return (0);
2154 
2155 bad:
2156 	/*
2157 	 * Write error occurred trying to update the inode
2158 	 * or the directory so must deallocate the inode.
2159 	 */
2160 	ip->i_effnlink = 0;
2161 	ip->i_nlink = 0;
2162 	ip->i_flag |= IN_CHANGE;
2163 	if (DOINGSOFTDEP(tvp))
2164 		softdep_change_linkcnt(ip);
2165 	vput(tvp);
2166 	return (error);
2167 }
2168 
2169 static int
2170 ufs_missingop(ap)
2171 	struct vop_generic_args *ap;
2172 {
2173 
2174 	panic("no vop function for %s in ufs child", ap->a_desc->vdesc_name);
2175 	return (EOPNOTSUPP);
2176 }
2177 
2178 static struct filterops ufsread_filtops =
2179 	{ 1, NULL, filt_ufsdetach, filt_ufsread };
2180 static struct filterops ufswrite_filtops =
2181 	{ 1, NULL, filt_ufsdetach, filt_ufswrite };
2182 static struct filterops ufsvnode_filtops =
2183 	{ 1, NULL, filt_ufsdetach, filt_ufsvnode };
2184 
2185 static int
2186 ufs_kqfilter(ap)
2187 	struct vop_kqfilter_args /* {
2188 		struct vnode *a_vp;
2189 		struct knote *a_kn;
2190 	} */ *ap;
2191 {
2192 	struct vnode *vp = ap->a_vp;
2193 	struct knote *kn = ap->a_kn;
2194 	lwkt_tokref ilock;
2195 
2196 	switch (kn->kn_filter) {
2197 	case EVFILT_READ:
2198 		kn->kn_fop = &ufsread_filtops;
2199 		break;
2200 	case EVFILT_WRITE:
2201 		kn->kn_fop = &ufswrite_filtops;
2202 		break;
2203 	case EVFILT_VNODE:
2204 		kn->kn_fop = &ufsvnode_filtops;
2205 		break;
2206 	default:
2207 		return (1);
2208 	}
2209 
2210 	kn->kn_hook = (caddr_t)vp;
2211 
2212 	lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
2213 	SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext);
2214 	lwkt_reltoken(&ilock);
2215 
2216 	return (0);
2217 }
2218 
2219 static void
2220 filt_ufsdetach(struct knote *kn)
2221 {
2222 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2223 	lwkt_tokref ilock;
2224 
2225 	lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
2226 	SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note,
2227 	    kn, knote, kn_selnext);
2228 	lwkt_reltoken(&ilock);
2229 }
2230 
2231 /*ARGSUSED*/
2232 static int
2233 filt_ufsread(struct knote *kn, long hint)
2234 {
2235 	struct vnode *vp = (struct vnode *)kn->kn_hook;
2236 	struct inode *ip = VTOI(vp);
2237 
2238 	/*
2239 	 * filesystem is gone, so set the EOF flag and schedule
2240 	 * the knote for deletion.
2241 	 */
2242 	if (hint == NOTE_REVOKE) {
2243 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2244 		return (1);
2245 	}
2246 
2247         kn->kn_data = ip->i_size - kn->kn_fp->f_offset;
2248         return (kn->kn_data != 0);
2249 }
2250 
2251 /*ARGSUSED*/
2252 static int
2253 filt_ufswrite(struct knote *kn, long hint)
2254 {
2255 
2256 	/*
2257 	 * filesystem is gone, so set the EOF flag and schedule
2258 	 * the knote for deletion.
2259 	 */
2260 	if (hint == NOTE_REVOKE)
2261 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
2262 
2263         kn->kn_data = 0;
2264         return (1);
2265 }
2266 
2267 static int
2268 filt_ufsvnode(struct knote *kn, long hint)
2269 {
2270 
2271 	if (kn->kn_sfflags & hint)
2272 		kn->kn_fflags |= hint;
2273 	if (hint == NOTE_REVOKE) {
2274 		kn->kn_flags |= EV_EOF;
2275 		return (1);
2276 	}
2277 	return (kn->kn_fflags != 0);
2278 }
2279 
2280 /* Global vfs data structures for ufs. */
2281 static vop_t **ufs_vnodeop_p;
2282 static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
2283 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
2284 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2285 	{ &vop_read_desc,		(vop_t *) ufs_missingop },
2286 	{ &vop_reallocblks_desc,	(vop_t *) ufs_missingop },
2287 	{ &vop_write_desc,		(vop_t *) ufs_missingop },
2288 	{ &vop_access_desc,		(vop_t *) ufs_access },
2289 	{ &vop_advlock_desc,		(vop_t *) ufs_advlock },
2290 	{ &vop_bmap_desc,		(vop_t *) ufs_bmap },
2291 	{ &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
2292 	{ &vop_close_desc,		(vop_t *) ufs_close },
2293 	{ &vop_create_desc,		(vop_t *) ufs_create },
2294 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2295 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2296 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2297 	{ &vop_link_desc,		(vop_t *) ufs_link },
2298 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2299 	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
2300 	{ &vop_mkdir_desc,		(vop_t *) ufs_mkdir },
2301 	{ &vop_mknod_desc,		(vop_t *) ufs_mknod },
2302 	{ &vop_mmap_desc,		(vop_t *) ufs_mmap },
2303 	{ &vop_open_desc,		(vop_t *) ufs_open },
2304 	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
2305 	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
2306 	{ &vop_kqfilter_desc,		(vop_t *) ufs_kqfilter },
2307 	{ &vop_print_desc,		(vop_t *) ufs_print },
2308 	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
2309 	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
2310 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2311 	{ &vop_remove_desc,		(vop_t *) ufs_remove },
2312 	{ &vop_rename_desc,		(vop_t *) ufs_rename },
2313 	{ &vop_rmdir_desc,		(vop_t *) ufs_rmdir },
2314 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2315 	{ &vop_strategy_desc,		(vop_t *) ufs_strategy },
2316 	{ &vop_symlink_desc,		(vop_t *) ufs_symlink },
2317 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2318 	{ &vop_whiteout_desc,		(vop_t *) ufs_whiteout },
2319 	{ NULL, NULL }
2320 };
2321 static struct vnodeopv_desc ufs_vnodeop_opv_desc =
2322 	{ &ufs_vnodeop_p, ufs_vnodeop_entries };
2323 
2324 static vop_t **ufs_specop_p;
2325 static struct vnodeopv_entry_desc ufs_specop_entries[] = {
2326 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
2327 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2328 	{ &vop_access_desc,		(vop_t *) ufs_access },
2329 	{ &vop_close_desc,		(vop_t *) ufsspec_close },
2330 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2331 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2332 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2333 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2334 	{ &vop_print_desc,		(vop_t *) ufs_print },
2335 	{ &vop_read_desc,		(vop_t *) ufsspec_read },
2336 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2337 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2338 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2339 	{ &vop_write_desc,		(vop_t *) ufsspec_write },
2340 	{ NULL, NULL }
2341 };
2342 static struct vnodeopv_desc ufs_specop_opv_desc =
2343 	{ &ufs_specop_p, ufs_specop_entries };
2344 
2345 static vop_t **ufs_fifoop_p;
2346 static struct vnodeopv_entry_desc ufs_fifoop_entries[] = {
2347 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
2348 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
2349 	{ &vop_access_desc,		(vop_t *) ufs_access },
2350 	{ &vop_close_desc,		(vop_t *) ufsfifo_close },
2351 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
2352 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
2353 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
2354 	{ &vop_kqfilter_desc,		(vop_t *) ufsfifo_kqfilter },
2355 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
2356 	{ &vop_print_desc,		(vop_t *) ufs_print },
2357 	{ &vop_read_desc,		(vop_t *) ufsfifo_read },
2358 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
2359 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
2360 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
2361 	{ &vop_write_desc,		(vop_t *) ufsfifo_write },
2362 	{ NULL, NULL }
2363 };
2364 static struct vnodeopv_desc ufs_fifoop_opv_desc =
2365 	{ &ufs_fifoop_p, ufs_fifoop_entries };
2366 
2367 VNODEOP_SET(ufs_vnodeop_opv_desc);
2368 VNODEOP_SET(ufs_specop_opv_desc);
2369 VNODEOP_SET(ufs_fifoop_opv_desc);
2370 
2371 int
2372 ufs_vnoperate(ap)
2373 	struct vop_generic_args /* {
2374 		struct vnodeop_desc *a_desc;
2375 	} */ *ap;
2376 {
2377 	return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
2378 }
2379 
2380 int
2381 ufs_vnoperatefifo(ap)
2382 	struct vop_generic_args /* {
2383 		struct vnodeop_desc *a_desc;
2384 	} */ *ap;
2385 {
2386 	return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap));
2387 }
2388 
2389 int
2390 ufs_vnoperatespec(ap)
2391 	struct vop_generic_args /* {
2392 		struct vnodeop_desc *a_desc;
2393 	} */ *ap;
2394 {
2395 	return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap));
2396 }
2397