xref: /original-bsd/sys/ufs/ffs/ffs_vnops.c (revision 99986382)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)ffs_vnops.c	7.57 (Berkeley) 03/11/91
8  */
9 
10 #include "param.h"
11 #include "systm.h"
12 #include "user.h"
13 #include "kernel.h"
14 #include "file.h"
15 #include "stat.h"
16 #include "buf.h"
17 #include "proc.h"
18 #include "socket.h"
19 #include "socketvar.h"
20 #include "conf.h"
21 #include "mount.h"
22 #include "vnode.h"
23 #include "specdev.h"
24 #include "fcntl.h"
25 #include "malloc.h"
26 #include "../ufs/lockf.h"
27 #include "../ufs/quota.h"
28 #include "../ufs/inode.h"
29 #include "../ufs/fs.h"
30 
31 /*
32  * Global vfs data structures for ufs
33  */
34 
35 int	ufs_lookup(),
36 	ufs_create(),
37 	ufs_mknod(),
38 	ufs_open(),
39 	ufs_close(),
40 	ufs_access(),
41 	ufs_getattr(),
42 	ufs_setattr(),
43 	ufs_read(),
44 	ufs_write(),
45 	ufs_ioctl(),
46 	ufs_select(),
47 	ufs_mmap(),
48 	ufs_fsync(),
49 	ufs_seek(),
50 	ufs_remove(),
51 	ufs_link(),
52 	ufs_rename(),
53 	ufs_mkdir(),
54 	ufs_rmdir(),
55 	ufs_symlink(),
56 	ufs_readdir(),
57 	ufs_readlink(),
58 	ufs_abortop(),
59 	ufs_inactive(),
60 	ufs_reclaim(),
61 	ufs_lock(),
62 	ufs_unlock(),
63 	ufs_bmap(),
64 	ufs_strategy(),
65 	ufs_print(),
66 	ufs_islocked(),
67 	ufs_advlock();
68 
69 struct vnodeops ufs_vnodeops = {
70 	ufs_lookup,		/* lookup */
71 	ufs_create,		/* create */
72 	ufs_mknod,		/* mknod */
73 	ufs_open,		/* open */
74 	ufs_close,		/* close */
75 	ufs_access,		/* access */
76 	ufs_getattr,		/* getattr */
77 	ufs_setattr,		/* setattr */
78 	ufs_read,		/* read */
79 	ufs_write,		/* write */
80 	ufs_ioctl,		/* ioctl */
81 	ufs_select,		/* select */
82 	ufs_mmap,		/* mmap */
83 	ufs_fsync,		/* fsync */
84 	ufs_seek,		/* seek */
85 	ufs_remove,		/* remove */
86 	ufs_link,		/* link */
87 	ufs_rename,		/* rename */
88 	ufs_mkdir,		/* mkdir */
89 	ufs_rmdir,		/* rmdir */
90 	ufs_symlink,		/* symlink */
91 	ufs_readdir,		/* readdir */
92 	ufs_readlink,		/* readlink */
93 	ufs_abortop,		/* abortop */
94 	ufs_inactive,		/* inactive */
95 	ufs_reclaim,		/* reclaim */
96 	ufs_lock,		/* lock */
97 	ufs_unlock,		/* unlock */
98 	ufs_bmap,		/* bmap */
99 	ufs_strategy,		/* strategy */
100 	ufs_print,		/* print */
101 	ufs_islocked,		/* islocked */
102 	ufs_advlock,		/* advlock */
103 };
104 
105 int	spec_lookup(),
106 	spec_open(),
107 	ufsspec_read(),
108 	ufsspec_write(),
109 	spec_strategy(),
110 	spec_bmap(),
111 	spec_ioctl(),
112 	spec_select(),
113 	ufsspec_close(),
114 	spec_advlock(),
115 	spec_badop(),
116 	nullop();
117 
118 struct vnodeops spec_inodeops = {
119 	spec_lookup,		/* lookup */
120 	spec_badop,		/* create */
121 	spec_badop,		/* mknod */
122 	spec_open,		/* open */
123 	ufsspec_close,		/* close */
124 	ufs_access,		/* access */
125 	ufs_getattr,		/* getattr */
126 	ufs_setattr,		/* setattr */
127 	ufsspec_read,		/* read */
128 	ufsspec_write,		/* write */
129 	spec_ioctl,		/* ioctl */
130 	spec_select,		/* select */
131 	spec_badop,		/* mmap */
132 	nullop,			/* fsync */
133 	spec_badop,		/* seek */
134 	spec_badop,		/* remove */
135 	spec_badop,		/* link */
136 	spec_badop,		/* rename */
137 	spec_badop,		/* mkdir */
138 	spec_badop,		/* rmdir */
139 	spec_badop,		/* symlink */
140 	spec_badop,		/* readdir */
141 	spec_badop,		/* readlink */
142 	spec_badop,		/* abortop */
143 	ufs_inactive,		/* inactive */
144 	ufs_reclaim,		/* reclaim */
145 	ufs_lock,		/* lock */
146 	ufs_unlock,		/* unlock */
147 	spec_bmap,		/* bmap */
148 	spec_strategy,		/* strategy */
149 	ufs_print,		/* print */
150 	ufs_islocked,		/* islocked */
151 	spec_advlock,		/* advlock */
152 };
153 
154 #ifdef FIFO
155 int	fifo_lookup(),
156 	fifo_open(),
157 	ufsfifo_read(),
158 	ufsfifo_write(),
159 	fifo_bmap(),
160 	fifo_ioctl(),
161 	fifo_select(),
162 	ufsfifo_close(),
163 	fifo_print(),
164 	fifo_advlock(),
165 	fifo_badop();
166 
167 struct vnodeops fifo_inodeops = {
168 	fifo_lookup,		/* lookup */
169 	fifo_badop,		/* create */
170 	fifo_badop,		/* mknod */
171 	fifo_open,		/* open */
172 	ufsfifo_close,		/* close */
173 	ufs_access,		/* access */
174 	ufs_getattr,		/* getattr */
175 	ufs_setattr,		/* setattr */
176 	ufsfifo_read,		/* read */
177 	ufsfifo_write,		/* write */
178 	fifo_ioctl,		/* ioctl */
179 	fifo_select,		/* select */
180 	fifo_badop,		/* mmap */
181 	nullop,			/* fsync */
182 	fifo_badop,		/* seek */
183 	fifo_badop,		/* remove */
184 	fifo_badop,		/* link */
185 	fifo_badop,		/* rename */
186 	fifo_badop,		/* mkdir */
187 	fifo_badop,		/* rmdir */
188 	fifo_badop,		/* symlink */
189 	fifo_badop,		/* readdir */
190 	fifo_badop,		/* readlink */
191 	fifo_badop,		/* abortop */
192 	ufs_inactive,		/* inactive */
193 	ufs_reclaim,		/* reclaim */
194 	ufs_lock,		/* lock */
195 	ufs_unlock,		/* unlock */
196 	fifo_bmap,		/* bmap */
197 	fifo_badop,		/* strategy */
198 	ufs_print,		/* print */
199 	ufs_islocked,		/* islocked */
200 	fifo_advlock,		/* advlock */
201 };
202 #endif /* FIFO */
203 
204 enum vtype iftovt_tab[16] = {
205 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
206 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
207 };
208 int	vttoif_tab[9] = {
209 	0, IFREG, IFDIR, IFBLK, IFCHR, IFLNK, IFSOCK, IFIFO, IFMT,
210 };
211 
212 /*
213  * Create a regular file
214  */
215 ufs_create(ndp, vap)
216 	struct nameidata *ndp;
217 	struct vattr *vap;
218 {
219 	struct inode *ip;
220 	int error;
221 
222 	if (error = maknode(MAKEIMODE(vap->va_type, vap->va_mode), ndp, &ip))
223 		return (error);
224 	ndp->ni_vp = ITOV(ip);
225 	return (0);
226 }
227 
228 /*
229  * Mknod vnode call
230  */
231 /* ARGSUSED */
232 ufs_mknod(ndp, vap, cred)
233 	struct nameidata *ndp;
234 	struct ucred *cred;
235 	struct vattr *vap;
236 {
237 	register struct vnode *vp;
238 	struct inode *ip;
239 	int error;
240 
241 	if (error = maknode(MAKEIMODE(vap->va_type, vap->va_mode), ndp, &ip))
242 		return (error);
243 	ip->i_flag |= IACC|IUPD|ICHG;
244 	if (vap->va_rdev != VNOVAL) {
245 		/*
246 		 * Want to be able to use this to make badblock
247 		 * inodes, so don't truncate the dev number.
248 		 */
249 		ip->i_rdev = vap->va_rdev;
250 	}
251 	/*
252 	 * Remove inode so that it will be reloaded by iget and
253 	 * checked to see if it is an alias of an existing entry
254 	 * in the inode cache.
255 	 */
256 	vp = ITOV(ip);
257 	vput(vp);
258 	vp->v_type = VNON;
259 	vgone(vp);
260 	return (0);
261 }
262 
263 /*
264  * Open called.
265  *
266  * Nothing to do.
267  */
268 /* ARGSUSED */
269 ufs_open(vp, mode, cred)
270 	struct vnode *vp;
271 	int mode;
272 	struct ucred *cred;
273 {
274 
275 	return (0);
276 }
277 
278 /*
279  * Close called
280  *
281  * Update the times on the inode.
282  */
283 /* ARGSUSED */
284 ufs_close(vp, fflag, cred)
285 	struct vnode *vp;
286 	int fflag;
287 	struct ucred *cred;
288 {
289 	register struct inode *ip = VTOI(vp);
290 
291 	if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED))
292 		ITIMES(ip, &time, &time);
293 	return (0);
294 }
295 
296 /*
297  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
298  * The mode is shifted to select the owner/group/other fields. The
299  * super user is granted all permissions.
300  */
301 ufs_access(vp, mode, cred)
302 	struct vnode *vp;
303 	register int mode;
304 	struct ucred *cred;
305 {
306 	register struct inode *ip = VTOI(vp);
307 	register gid_t *gp;
308 	int i, error;
309 
310 #ifdef DIAGNOSTIC
311 	if (!VOP_ISLOCKED(vp)) {
312 		vprint("ufs_access: not locked", vp);
313 		panic("ufs_access: not locked");
314 	}
315 #endif
316 #ifdef QUOTA
317 	if (mode & VWRITE) {
318 		switch (vp->v_type) {
319 		case VREG: case VDIR: case VLNK:
320 			if (error = getinoquota(ip))
321 				return (error);
322 		}
323 	}
324 #endif /* QUOTA */
325 	/*
326 	 * If you're the super-user, you always get access.
327 	 */
328 	if (cred->cr_uid == 0)
329 		return (0);
330 	/*
331 	 * Access check is based on only one of owner, group, public.
332 	 * If not owner, then check group. If not a member of the
333 	 * group, then check public access.
334 	 */
335 	if (cred->cr_uid != ip->i_uid) {
336 		mode >>= 3;
337 		gp = cred->cr_groups;
338 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
339 			if (ip->i_gid == *gp)
340 				goto found;
341 		mode >>= 3;
342 found:
343 		;
344 	}
345 	if ((ip->i_mode & mode) != 0)
346 		return (0);
347 	return (EACCES);
348 }
349 
350 /* ARGSUSED */
351 ufs_getattr(vp, vap, cred)
352 	struct vnode *vp;
353 	register struct vattr *vap;
354 	struct ucred *cred;
355 {
356 	register struct inode *ip = VTOI(vp);
357 
358 	ITIMES(ip, &time, &time);
359 	/*
360 	 * Copy from inode table
361 	 */
362 	vap->va_fsid = ip->i_dev;
363 	vap->va_fileid = ip->i_number;
364 	vap->va_mode = ip->i_mode & ~IFMT;
365 	vap->va_nlink = ip->i_nlink;
366 	vap->va_uid = ip->i_uid;
367 	vap->va_gid = ip->i_gid;
368 	vap->va_rdev = (dev_t)ip->i_rdev;
369 #ifdef tahoe
370 	vap->va_size = ip->i_size;
371 	vap->va_size_rsv = 0;
372 #else
373 	vap->va_qsize = ip->i_din.di_qsize;
374 #endif
375 	vap->va_atime.tv_sec = ip->i_atime;
376 	vap->va_atime.tv_usec = 0;
377 	vap->va_mtime.tv_sec = ip->i_mtime;
378 	vap->va_mtime.tv_usec = 0;
379 	vap->va_ctime.tv_sec = ip->i_ctime;
380 	vap->va_ctime.tv_usec = 0;
381 	vap->va_flags = ip->i_flags;
382 	vap->va_gen = ip->i_gen;
383 	/* this doesn't belong here */
384 	if (vp->v_type == VBLK)
385 		vap->va_blocksize = BLKDEV_IOSIZE;
386 	else if (vp->v_type == VCHR)
387 		vap->va_blocksize = MAXBSIZE;
388 	else
389 		vap->va_blocksize = ip->i_fs->fs_bsize;
390 	vap->va_bytes = dbtob(ip->i_blocks);
391 	vap->va_bytes_rsv = 0;
392 	vap->va_type = vp->v_type;
393 	return (0);
394 }
395 
396 /*
397  * Set attribute vnode op. called from several syscalls
398  */
399 ufs_setattr(vp, vap, cred)
400 	register struct vnode *vp;
401 	register struct vattr *vap;
402 	register struct ucred *cred;
403 {
404 	register struct inode *ip = VTOI(vp);
405 	int error = 0;
406 
407 	/*
408 	 * Check for unsetable attributes.
409 	 */
410 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
411 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
412 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
413 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
414 		return (EINVAL);
415 	}
416 	/*
417 	 * Go through the fields and update iff not VNOVAL.
418 	 */
419 	if (vap->va_uid != (u_short)VNOVAL || vap->va_gid != (u_short)VNOVAL)
420 		if (error = chown1(vp, vap->va_uid, vap->va_gid, cred))
421 			return (error);
422 	if (vap->va_size != VNOVAL) {
423 		if (vp->v_type == VDIR)
424 			return (EISDIR);
425 		if (error = itrunc(ip, vap->va_size, 0)) /* XXX IO_SYNC? */
426 			return (error);
427 	}
428 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
429 		if (cred->cr_uid != ip->i_uid &&
430 		    (error = suser(cred, &u.u_acflag)))
431 			return (error);
432 		if (vap->va_atime.tv_sec != VNOVAL)
433 			ip->i_flag |= IACC;
434 		if (vap->va_mtime.tv_sec != VNOVAL)
435 			ip->i_flag |= IUPD;
436 		ip->i_flag |= ICHG;
437 		if (error = iupdat(ip, &vap->va_atime, &vap->va_mtime, 1))
438 			return (error);
439 	}
440 	if (vap->va_mode != (u_short)VNOVAL)
441 		error = chmod1(vp, (int)vap->va_mode, cred);
442 	if (vap->va_flags != VNOVAL) {
443 		if (cred->cr_uid != ip->i_uid &&
444 		    (error = suser(cred, &u.u_acflag)))
445 			return (error);
446 		if (cred->cr_uid == 0) {
447 			ip->i_flags = vap->va_flags;
448 		} else {
449 			ip->i_flags &= 0xffff0000;
450 			ip->i_flags |= (vap->va_flags & 0xffff);
451 		}
452 		ip->i_flag |= ICHG;
453 	}
454 	return (error);
455 }
456 
457 /*
458  * Change the mode on a file.
459  * Inode must be locked before calling.
460  */
461 chmod1(vp, mode, cred)
462 	register struct vnode *vp;
463 	register int mode;
464 	struct ucred *cred;
465 {
466 	register struct inode *ip = VTOI(vp);
467 	int error;
468 
469 	if (cred->cr_uid != ip->i_uid &&
470 	    (error = suser(cred, &u.u_acflag)))
471 		return (error);
472 	if (cred->cr_uid) {
473 		if (vp->v_type != VDIR && (mode & ISVTX))
474 			return (EFTYPE);
475 		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
476 			return (EPERM);
477 	}
478 	ip->i_mode &= ~07777;
479 	ip->i_mode |= mode & 07777;
480 	ip->i_flag |= ICHG;
481 	if ((vp->v_flag & VTEXT) && (ip->i_mode & ISVTX) == 0)
482 		(void) vnode_pager_uncache(vp);
483 	return (0);
484 }
485 
486 /*
487  * Perform chown operation on inode ip;
488  * inode must be locked prior to call.
489  */
490 chown1(vp, uid, gid, cred)
491 	register struct vnode *vp;
492 	uid_t uid;
493 	gid_t gid;
494 	struct ucred *cred;
495 {
496 	register struct inode *ip = VTOI(vp);
497 	uid_t ouid;
498 	gid_t ogid;
499 	int error = 0;
500 #ifdef QUOTA
501 	register int i;
502 	long change;
503 #endif
504 
505 	if (uid == (u_short)VNOVAL)
506 		uid = ip->i_uid;
507 	if (gid == (u_short)VNOVAL)
508 		gid = ip->i_gid;
509 	/*
510 	 * If we don't own the file, are trying to change the owner
511 	 * of the file, or are not a member of the target group,
512 	 * the caller must be superuser or the call fails.
513 	 */
514 	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
515 	    !groupmember((gid_t)gid, cred)) &&
516 	    (error = suser(cred, &u.u_acflag)))
517 		return (error);
518 	ouid = ip->i_uid;
519 	ogid = ip->i_gid;
520 #ifdef QUOTA
521 	if (error = getinoquota(ip))
522 		return (error);
523 	if (ouid == uid) {
524 		dqrele(vp, ip->i_dquot[USRQUOTA]);
525 		ip->i_dquot[USRQUOTA] = NODQUOT;
526 	}
527 	if (ogid == gid) {
528 		dqrele(vp, ip->i_dquot[GRPQUOTA]);
529 		ip->i_dquot[GRPQUOTA] = NODQUOT;
530 	}
531 	change = ip->i_blocks;
532 	(void) chkdq(ip, -change, cred, CHOWN);
533 	(void) chkiq(ip, -1, cred, CHOWN);
534 	for (i = 0; i < MAXQUOTAS; i++) {
535 		dqrele(vp, ip->i_dquot[i]);
536 		ip->i_dquot[i] = NODQUOT;
537 	}
538 #endif
539 	ip->i_uid = uid;
540 	ip->i_gid = gid;
541 #ifdef QUOTA
542 	if ((error = getinoquota(ip)) == 0) {
543 		if (ouid == uid) {
544 			dqrele(vp, ip->i_dquot[USRQUOTA]);
545 			ip->i_dquot[USRQUOTA] = NODQUOT;
546 		}
547 		if (ogid == gid) {
548 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
549 			ip->i_dquot[GRPQUOTA] = NODQUOT;
550 		}
551 		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
552 			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
553 				goto good;
554 			else
555 				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
556 		}
557 		for (i = 0; i < MAXQUOTAS; i++) {
558 			dqrele(vp, ip->i_dquot[i]);
559 			ip->i_dquot[i] = NODQUOT;
560 		}
561 	}
562 	ip->i_uid = ouid;
563 	ip->i_gid = ogid;
564 	if (getinoquota(ip) == 0) {
565 		if (ouid == uid) {
566 			dqrele(vp, ip->i_dquot[USRQUOTA]);
567 			ip->i_dquot[USRQUOTA] = NODQUOT;
568 		}
569 		if (ogid == gid) {
570 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
571 			ip->i_dquot[GRPQUOTA] = NODQUOT;
572 		}
573 		(void) chkdq(ip, change, cred, FORCE|CHOWN);
574 		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
575 		(void) getinoquota(ip);
576 	}
577 	return (error);
578 good:
579 	if (getinoquota(ip))
580 		panic("chown: lost quota");
581 #endif /* QUOTA */
582 	if (ouid != uid || ogid != gid)
583 		ip->i_flag |= ICHG;
584 	if (ouid != uid && cred->cr_uid != 0)
585 		ip->i_mode &= ~ISUID;
586 	if (ogid != gid && cred->cr_uid != 0)
587 		ip->i_mode &= ~ISGID;
588 	return (0);
589 }
590 
591 /*
592  * Vnode op for reading.
593  */
594 /* ARGSUSED */
595 ufs_read(vp, uio, ioflag, cred)
596 	struct vnode *vp;
597 	register struct uio *uio;
598 	int ioflag;
599 	struct ucred *cred;
600 {
601 	register struct inode *ip = VTOI(vp);
602 	register struct fs *fs;
603 	struct buf *bp;
604 	daddr_t lbn, bn, rablock;
605 	int size, diff, error = 0;
606 	long n, on, type;
607 
608 	if (uio->uio_rw != UIO_READ)
609 		panic("ufs_read mode");
610 	type = ip->i_mode & IFMT;
611 	if (type != IFDIR && type != IFREG && type != IFLNK)
612 		panic("ufs_read type");
613 	if (uio->uio_resid == 0)
614 		return (0);
615 	if (uio->uio_offset < 0)
616 		return (EINVAL);
617 	ip->i_flag |= IACC;
618 	fs = ip->i_fs;
619 	do {
620 		lbn = lblkno(fs, uio->uio_offset);
621 		on = blkoff(fs, uio->uio_offset);
622 		n = MIN((unsigned)(fs->fs_bsize - on), uio->uio_resid);
623 		diff = ip->i_size - uio->uio_offset;
624 		if (diff <= 0)
625 			return (0);
626 		if (diff < n)
627 			n = diff;
628 		size = blksize(fs, ip, lbn);
629 		rablock = lbn + 1;
630 		if (vp->v_lastr + 1 == lbn &&
631 		    lblktosize(fs, rablock) < ip->i_size)
632 			error = breada(ITOV(ip), lbn, size, rablock,
633 				blksize(fs, ip, rablock), NOCRED, &bp);
634 		else
635 			error = bread(ITOV(ip), lbn, size, NOCRED, &bp);
636 		vp->v_lastr = lbn;
637 		n = MIN(n, size - bp->b_resid);
638 		if (error) {
639 			brelse(bp);
640 			return (error);
641 		}
642 		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
643 		if (n + on == fs->fs_bsize || uio->uio_offset == ip->i_size)
644 			bp->b_flags |= B_AGE;
645 		brelse(bp);
646 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
647 	return (error);
648 }
649 
650 /*
651  * Vnode op for writing.
652  */
653 ufs_write(vp, uio, ioflag, cred)
654 	register struct vnode *vp;
655 	struct uio *uio;
656 	int ioflag;
657 	struct ucred *cred;
658 {
659 	register struct inode *ip = VTOI(vp);
660 	register struct fs *fs;
661 	struct buf *bp;
662 	daddr_t lbn, bn;
663 	u_long osize;
664 	int n, on, flags;
665 	int size, resid, error = 0;
666 
667 	if (uio->uio_rw != UIO_WRITE)
668 		panic("ufs_write mode");
669 	switch (vp->v_type) {
670 	case VREG:
671 		if (ioflag & IO_APPEND)
672 			uio->uio_offset = ip->i_size;
673 		/* fall through */
674 	case VLNK:
675 		break;
676 
677 	case VDIR:
678 		if ((ioflag & IO_SYNC) == 0)
679 			panic("ufs_write nonsync dir write");
680 		break;
681 
682 	default:
683 		panic("ufs_write type");
684 	}
685 	if (uio->uio_offset < 0)
686 		return (EINVAL);
687 	if (uio->uio_resid == 0)
688 		return (0);
689 	/*
690 	 * Maybe this should be above the vnode op call, but so long as
691 	 * file servers have no limits, i don't think it matters
692 	 */
693 	if (vp->v_type == VREG &&
694 	    uio->uio_offset + uio->uio_resid >
695 	      u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
696 		psignal(u.u_procp, SIGXFSZ);
697 		return (EFBIG);
698 	}
699 	resid = uio->uio_resid;
700 	osize = ip->i_size;
701 	fs = ip->i_fs;
702 	flags = 0;
703 	if (ioflag & IO_SYNC)
704 		flags = B_SYNC;
705 	do {
706 		lbn = lblkno(fs, uio->uio_offset);
707 		on = blkoff(fs, uio->uio_offset);
708 		n = MIN((unsigned)(fs->fs_bsize - on), uio->uio_resid);
709 		if (n < fs->fs_bsize)
710 			flags |= B_CLRBUF;
711 		else
712 			flags &= ~B_CLRBUF;
713 		if (error = balloc(ip, lbn, (int)(on + n), &bp, flags))
714 			break;
715 		bn = bp->b_blkno;
716 		if (uio->uio_offset + n > ip->i_size) {
717 			ip->i_size = uio->uio_offset + n;
718 			vnode_pager_setsize(vp, ip->i_size);
719 		}
720 		size = blksize(fs, ip, lbn);
721 		(void) vnode_pager_uncache(vp);
722 		n = MIN(n, size - bp->b_resid);
723 		error = uiomove(bp->b_un.b_addr + on, n, uio);
724 		if (ioflag & IO_SYNC)
725 			(void) bwrite(bp);
726 		else if (n + on == fs->fs_bsize) {
727 			bp->b_flags |= B_AGE;
728 			bawrite(bp);
729 		} else
730 			bdwrite(bp);
731 		ip->i_flag |= IUPD|ICHG;
732 		if (cred->cr_uid != 0)
733 			ip->i_mode &= ~(ISUID|ISGID);
734 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
735 	if (error && (ioflag & IO_UNIT)) {
736 		(void) itrunc(ip, osize, ioflag & IO_SYNC);
737 		uio->uio_offset -= resid - uio->uio_resid;
738 		uio->uio_resid = resid;
739 	}
740 	if (!error && (ioflag & IO_SYNC))
741 		error = iupdat(ip, &time, &time, 1);
742 	return (error);
743 }
744 
745 /* ARGSUSED */
746 ufs_ioctl(vp, com, data, fflag, cred)
747 	struct vnode *vp;
748 	int com;
749 	caddr_t data;
750 	int fflag;
751 	struct ucred *cred;
752 {
753 
754 	return (ENOTTY);
755 }
756 
757 /* ARGSUSED */
758 ufs_select(vp, which, fflags, cred)
759 	struct vnode *vp;
760 	int which, fflags;
761 	struct ucred *cred;
762 {
763 
764 	return (1);		/* XXX */
765 }
766 
767 /*
768  * Mmap a file
769  *
770  * NB Currently unsupported.
771  */
772 /* ARGSUSED */
773 ufs_mmap(vp, fflags, cred)
774 	struct vnode *vp;
775 	int fflags;
776 	struct ucred *cred;
777 {
778 
779 	return (EINVAL);
780 }
781 
782 /*
783  * Synch an open file.
784  */
785 /* ARGSUSED */
786 ufs_fsync(vp, fflags, cred, waitfor)
787 	struct vnode *vp;
788 	int fflags;
789 	struct ucred *cred;
790 	int waitfor;
791 {
792 	struct inode *ip = VTOI(vp);
793 
794 	if (fflags&FWRITE)
795 		ip->i_flag |= ICHG;
796 	vflushbuf(vp, waitfor == MNT_WAIT ? B_SYNC : 0);
797 	return (iupdat(ip, &time, &time, waitfor == MNT_WAIT));
798 }
799 
800 /*
801  * Seek on a file
802  *
803  * Nothing to do, so just return.
804  */
805 /* ARGSUSED */
806 ufs_seek(vp, oldoff, newoff, cred)
807 	struct vnode *vp;
808 	off_t oldoff, newoff;
809 	struct ucred *cred;
810 {
811 
812 	return (0);
813 }
814 
815 /*
816  * ufs remove
817  * Hard to avoid races here, especially
818  * in unlinking directories.
819  */
820 ufs_remove(ndp)
821 	struct nameidata *ndp;
822 {
823 	register struct inode *ip, *dp;
824 	int error;
825 
826 	ip = VTOI(ndp->ni_vp);
827 	dp = VTOI(ndp->ni_dvp);
828 	error = dirremove(ndp);
829 	if (!error) {
830 		ip->i_nlink--;
831 		ip->i_flag |= ICHG;
832 	}
833 	if (dp == ip)
834 		vrele(ITOV(ip));
835 	else
836 		iput(ip);
837 	iput(dp);
838 	return (error);
839 }
840 
841 /*
842  * link vnode call
843  */
844 ufs_link(vp, ndp)
845 	register struct vnode *vp;
846 	register struct nameidata *ndp;
847 {
848 	register struct inode *ip = VTOI(vp);
849 	int error;
850 
851 	if ((unsigned short)ip->i_nlink >= LINK_MAX)
852 		return (EMLINK);
853 	if (ndp->ni_dvp != vp)
854 		ILOCK(ip);
855 	ip->i_nlink++;
856 	ip->i_flag |= ICHG;
857 	error = iupdat(ip, &time, &time, 1);
858 	if (!error)
859 		error = direnter(ip, ndp);
860 	if (ndp->ni_dvp != vp)
861 		IUNLOCK(ip);
862 	vput(ndp->ni_dvp);
863 	if (error) {
864 		ip->i_nlink--;
865 		ip->i_flag |= ICHG;
866 	}
867 	return (error);
868 }
869 
870 /*
871  * Rename system call.
872  * 	rename("foo", "bar");
873  * is essentially
874  *	unlink("bar");
875  *	link("foo", "bar");
876  *	unlink("foo");
877  * but ``atomically''.  Can't do full commit without saving state in the
878  * inode on disk which isn't feasible at this time.  Best we can do is
879  * always guarantee the target exists.
880  *
881  * Basic algorithm is:
882  *
883  * 1) Bump link count on source while we're linking it to the
884  *    target.  This also ensure the inode won't be deleted out
885  *    from underneath us while we work (it may be truncated by
886  *    a concurrent `trunc' or `open' for creation).
887  * 2) Link source to destination.  If destination already exists,
888  *    delete it first.
889  * 3) Unlink source reference to inode if still around. If a
890  *    directory was moved and the parent of the destination
891  *    is different from the source, patch the ".." entry in the
892  *    directory.
893  */
894 ufs_rename(fndp, tndp)
895 	register struct nameidata *fndp, *tndp;
896 {
897 	register struct inode *ip, *xp, *dp;
898 	struct dirtemplate dirbuf;
899 	int doingdirectory = 0, oldparent = 0, newparent = 0;
900 	int error = 0;
901 
902 	dp = VTOI(fndp->ni_dvp);
903 	ip = VTOI(fndp->ni_vp);
904 	ILOCK(ip);
905 	if ((ip->i_mode&IFMT) == IFDIR) {
906 		register struct direct *d = &fndp->ni_dent;
907 
908 		/*
909 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
910 		 */
911 		if ((d->d_namlen == 1 && d->d_name[0] == '.') || dp == ip ||
912 		    fndp->ni_isdotdot || (ip->i_flag & IRENAME)) {
913 			VOP_ABORTOP(tndp);
914 			vput(tndp->ni_dvp);
915 			if (tndp->ni_vp)
916 				vput(tndp->ni_vp);
917 			VOP_ABORTOP(fndp);
918 			vrele(fndp->ni_dvp);
919 			vput(fndp->ni_vp);
920 			return (EINVAL);
921 		}
922 		ip->i_flag |= IRENAME;
923 		oldparent = dp->i_number;
924 		doingdirectory++;
925 	}
926 	vrele(fndp->ni_dvp);
927 
928 	/*
929 	 * 1) Bump link count while we're moving stuff
930 	 *    around.  If we crash somewhere before
931 	 *    completing our work, the link count
932 	 *    may be wrong, but correctable.
933 	 */
934 	ip->i_nlink++;
935 	ip->i_flag |= ICHG;
936 	error = iupdat(ip, &time, &time, 1);
937 	IUNLOCK(ip);
938 
939 	/*
940 	 * When the target exists, both the directory
941 	 * and target vnodes are returned locked.
942 	 */
943 	dp = VTOI(tndp->ni_dvp);
944 	xp = NULL;
945 	if (tndp->ni_vp)
946 		xp = VTOI(tndp->ni_vp);
947 	/*
948 	 * If ".." must be changed (ie the directory gets a new
949 	 * parent) then the source directory must not be in the
950 	 * directory heirarchy above the target, as this would
951 	 * orphan everything below the source directory. Also
952 	 * the user must have write permission in the source so
953 	 * as to be able to change "..". We must repeat the call
954 	 * to namei, as the parent directory is unlocked by the
955 	 * call to checkpath().
956 	 */
957 	if (oldparent != dp->i_number)
958 		newparent = dp->i_number;
959 	if (doingdirectory && newparent) {
960 		VOP_LOCK(fndp->ni_vp);
961 		error = ufs_access(fndp->ni_vp, VWRITE, tndp->ni_cred);
962 		VOP_UNLOCK(fndp->ni_vp);
963 		if (error)
964 			goto bad;
965 		tndp->ni_nameiop &= ~(MODMASK | OPMASK);
966 		tndp->ni_nameiop |= RENAME | LOCKPARENT | LOCKLEAF | NOCACHE;
967 		do {
968 			dp = VTOI(tndp->ni_dvp);
969 			if (xp != NULL)
970 				iput(xp);
971 			if (error = checkpath(ip, dp, tndp->ni_cred))
972 				goto out;
973 			if (error = namei(tndp))
974 				goto out;
975 			xp = NULL;
976 			if (tndp->ni_vp)
977 				xp = VTOI(tndp->ni_vp);
978 		} while (dp != VTOI(tndp->ni_dvp));
979 	}
980 	/*
981 	 * 2) If target doesn't exist, link the target
982 	 *    to the source and unlink the source.
983 	 *    Otherwise, rewrite the target directory
984 	 *    entry to reference the source inode and
985 	 *    expunge the original entry's existence.
986 	 */
987 	if (xp == NULL) {
988 		if (dp->i_dev != ip->i_dev)
989 			panic("rename: EXDEV");
990 		/*
991 		 * Account for ".." in new directory.
992 		 * When source and destination have the same
993 		 * parent we don't fool with the link count.
994 		 */
995 		if (doingdirectory && newparent) {
996 			if ((unsigned short)dp->i_nlink >= LINK_MAX) {
997 				error = EMLINK;
998 				goto bad;
999 			}
1000 			dp->i_nlink++;
1001 			dp->i_flag |= ICHG;
1002 			if (error = iupdat(dp, &time, &time, 1))
1003 				goto bad;
1004 		}
1005 		if (error = direnter(ip, tndp)) {
1006 			if (doingdirectory && newparent) {
1007 				dp->i_nlink--;
1008 				dp->i_flag |= ICHG;
1009 				(void) iupdat(dp, &time, &time, 1);
1010 			}
1011 			goto bad;
1012 		}
1013 		iput(dp);
1014 	} else {
1015 		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
1016 			panic("rename: EXDEV");
1017 		/*
1018 		 * Short circuit rename(foo, foo).
1019 		 */
1020 		if (xp->i_number == ip->i_number)
1021 			panic("rename: same file");
1022 		/*
1023 		 * If the parent directory is "sticky", then the user must
1024 		 * own the parent directory, or the destination of the rename,
1025 		 * otherwise the destination may not be changed (except by
1026 		 * root). This implements append-only directories.
1027 		 */
1028 		if ((dp->i_mode & ISVTX) && tndp->ni_cred->cr_uid != 0 &&
1029 		    tndp->ni_cred->cr_uid != dp->i_uid &&
1030 		    xp->i_uid != tndp->ni_cred->cr_uid) {
1031 			error = EPERM;
1032 			goto bad;
1033 		}
1034 		/*
1035 		 * Target must be empty if a directory
1036 		 * and have no links to it.
1037 		 * Also, insure source and target are
1038 		 * compatible (both directories, or both
1039 		 * not directories).
1040 		 */
1041 		if ((xp->i_mode&IFMT) == IFDIR) {
1042 			if (!dirempty(xp, dp->i_number, tndp->ni_cred) ||
1043 			    xp->i_nlink > 2) {
1044 				error = ENOTEMPTY;
1045 				goto bad;
1046 			}
1047 			if (!doingdirectory) {
1048 				error = ENOTDIR;
1049 				goto bad;
1050 			}
1051 			cache_purge(ITOV(dp));
1052 		} else if (doingdirectory) {
1053 			error = EISDIR;
1054 			goto bad;
1055 		}
1056 		if (error = dirrewrite(dp, ip, tndp))
1057 			goto bad;
1058 		/*
1059 		 * If the target directory is in the same
1060 		 * directory as the source directory,
1061 		 * decrement the link count on the parent
1062 		 * of the target directory.
1063 		 */
1064 		 if (doingdirectory && !newparent) {
1065 			dp->i_nlink--;
1066 			dp->i_flag |= ICHG;
1067 		}
1068 		vput(ITOV(dp));
1069 		/*
1070 		 * Adjust the link count of the target to
1071 		 * reflect the dirrewrite above.  If this is
1072 		 * a directory it is empty and there are
1073 		 * no links to it, so we can squash the inode and
1074 		 * any space associated with it.  We disallowed
1075 		 * renaming over top of a directory with links to
1076 		 * it above, as the remaining link would point to
1077 		 * a directory without "." or ".." entries.
1078 		 */
1079 		xp->i_nlink--;
1080 		if (doingdirectory) {
1081 			if (--xp->i_nlink != 0)
1082 				panic("rename: linked directory");
1083 			error = itrunc(xp, (u_long)0, IO_SYNC);
1084 		}
1085 		xp->i_flag |= ICHG;
1086 		iput(xp);
1087 		xp = NULL;
1088 	}
1089 
1090 	/*
1091 	 * 3) Unlink the source.
1092 	 */
1093 	fndp->ni_nameiop &= ~(MODMASK | OPMASK);
1094 	fndp->ni_nameiop |= DELETE | LOCKPARENT | LOCKLEAF;
1095 	(void)namei(fndp);
1096 	if (fndp->ni_vp != NULL) {
1097 		xp = VTOI(fndp->ni_vp);
1098 		dp = VTOI(fndp->ni_dvp);
1099 	} else {
1100 		/*
1101 		 * From name has disappeared.
1102 		 */
1103 		if (doingdirectory)
1104 			panic("rename: lost dir entry");
1105 		vrele(ITOV(ip));
1106 		return (0);
1107 	}
1108 	/*
1109 	 * Ensure that the directory entry still exists and has not
1110 	 * changed while the new name has been entered. If the source is
1111 	 * a file then the entry may have been unlinked or renamed. In
1112 	 * either case there is no further work to be done. If the source
1113 	 * is a directory then it cannot have been rmdir'ed; its link
1114 	 * count of three would cause a rmdir to fail with ENOTEMPTY.
1115 	 * The IRENAME flag ensures that it cannot be moved by another
1116 	 * rename.
1117 	 */
1118 	if (xp != ip) {
1119 		if (doingdirectory)
1120 			panic("rename: lost dir entry");
1121 	} else {
1122 		/*
1123 		 * If the source is a directory with a
1124 		 * new parent, the link count of the old
1125 		 * parent directory must be decremented
1126 		 * and ".." set to point to the new parent.
1127 		 */
1128 		if (doingdirectory && newparent) {
1129 			dp->i_nlink--;
1130 			dp->i_flag |= ICHG;
1131 			error = vn_rdwr(UIO_READ, ITOV(xp), (caddr_t)&dirbuf,
1132 				sizeof (struct dirtemplate), (off_t)0,
1133 				UIO_SYSSPACE, IO_NODELOCKED,
1134 				tndp->ni_cred, (int *)0);
1135 			if (error == 0) {
1136 				if (dirbuf.dotdot_namlen != 2 ||
1137 				    dirbuf.dotdot_name[0] != '.' ||
1138 				    dirbuf.dotdot_name[1] != '.') {
1139 					dirbad(xp, 12, "rename: mangled dir");
1140 				} else {
1141 					dirbuf.dotdot_ino = newparent;
1142 					(void) vn_rdwr(UIO_WRITE, ITOV(xp),
1143 					    (caddr_t)&dirbuf,
1144 					    sizeof (struct dirtemplate),
1145 					    (off_t)0, UIO_SYSSPACE,
1146 					    IO_NODELOCKED|IO_SYNC,
1147 					    tndp->ni_cred, (int *)0);
1148 					cache_purge(ITOV(dp));
1149 				}
1150 			}
1151 		}
1152 		error = dirremove(fndp);
1153 		if (!error) {
1154 			xp->i_nlink--;
1155 			xp->i_flag |= ICHG;
1156 		}
1157 		xp->i_flag &= ~IRENAME;
1158 	}
1159 	if (dp)
1160 		vput(ITOV(dp));
1161 	if (xp)
1162 		vput(ITOV(xp));
1163 	vrele(ITOV(ip));
1164 	return (error);
1165 
1166 bad:
1167 	if (xp)
1168 		vput(ITOV(xp));
1169 	vput(ITOV(dp));
1170 out:
1171 	ip->i_nlink--;
1172 	ip->i_flag |= ICHG;
1173 	vrele(ITOV(ip));
1174 	return (error);
1175 }
1176 
1177 /*
1178  * A virgin directory (no blushing please).
1179  */
1180 struct dirtemplate mastertemplate = {
1181 	0, 12, 1, ".",
1182 	0, DIRBLKSIZ - 12, 2, ".."
1183 };
1184 
1185 /*
1186  * Mkdir system call
1187  */
1188 ufs_mkdir(ndp, vap)
1189 	struct nameidata *ndp;
1190 	struct vattr *vap;
1191 {
1192 	register struct inode *ip, *dp;
1193 	struct inode *tip;
1194 	struct vnode *dvp;
1195 	struct dirtemplate dirtemplate;
1196 	int error;
1197 	int dmode;
1198 
1199 	dvp = ndp->ni_dvp;
1200 	dp = VTOI(dvp);
1201 	if ((unsigned short)dp->i_nlink >= LINK_MAX) {
1202 		iput(dp);
1203 		return (EMLINK);
1204 	}
1205 	dmode = vap->va_mode&0777;
1206 	dmode |= IFDIR;
1207 	/*
1208 	 * Must simulate part of maknode here
1209 	 * in order to acquire the inode, but
1210 	 * not have it entered in the parent
1211 	 * directory.  The entry is made later
1212 	 * after writing "." and ".." entries out.
1213 	 */
1214 	if (error = ialloc(dp, dirpref(dp->i_fs), dmode, ndp->ni_cred, &tip)) {
1215 		iput(dp);
1216 		return (error);
1217 	}
1218 	ip = tip;
1219 	ip->i_uid = ndp->ni_cred->cr_uid;
1220 	ip->i_gid = dp->i_gid;
1221 #ifdef QUOTA
1222 	if ((error = getinoquota(ip)) ||
1223 	    (error = chkiq(ip, 1, ndp->ni_cred, 0))) {
1224 		ifree(ip, ip->i_number, dmode);
1225 		iput(ip);
1226 		iput(dp);
1227 		return (error);
1228 	}
1229 #endif
1230 	ip->i_flag |= IACC|IUPD|ICHG;
1231 	ip->i_mode = dmode;
1232 	ITOV(ip)->v_type = VDIR;	/* Rest init'd in iget() */
1233 	ip->i_nlink = 2;
1234 	error = iupdat(ip, &time, &time, 1);
1235 
1236 	/*
1237 	 * Bump link count in parent directory
1238 	 * to reflect work done below.  Should
1239 	 * be done before reference is created
1240 	 * so reparation is possible if we crash.
1241 	 */
1242 	dp->i_nlink++;
1243 	dp->i_flag |= ICHG;
1244 	if (error = iupdat(dp, &time, &time, 1))
1245 		goto bad;
1246 
1247 	/*
1248 	 * Initialize directory with "."
1249 	 * and ".." from static template.
1250 	 */
1251 	dirtemplate = mastertemplate;
1252 	dirtemplate.dot_ino = ip->i_number;
1253 	dirtemplate.dotdot_ino = dp->i_number;
1254 	error = vn_rdwr(UIO_WRITE, ITOV(ip), (caddr_t)&dirtemplate,
1255 		sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
1256 		IO_NODELOCKED|IO_SYNC, ndp->ni_cred, (int *)0);
1257 	if (error) {
1258 		dp->i_nlink--;
1259 		dp->i_flag |= ICHG;
1260 		goto bad;
1261 	}
1262 	if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
1263 		panic("mkdir: blksize");     /* XXX - should grow w/balloc() */
1264 	} else {
1265 		ip->i_size = DIRBLKSIZ;
1266 		ip->i_flag |= ICHG;
1267 	}
1268 	/*
1269 	 * Directory all set up, now
1270 	 * install the entry for it in
1271 	 * the parent directory.
1272 	 */
1273 	if (error = direnter(ip, ndp)) {
1274 		ndp->ni_nameiop &= ~(MODMASK | OPMASK);
1275 		ndp->ni_nameiop |= LOOKUP | LOCKLEAF | NOCACHE;
1276 		error = namei(ndp);
1277 		if (!error) {
1278 			iput(dp);
1279 			dp = VTOI(ndp->ni_vp);
1280 			dp->i_nlink--;
1281 			dp->i_flag |= ICHG;
1282 		}
1283 	}
1284 bad:
1285 	/*
1286 	 * No need to do an explicit itrunc here,
1287 	 * vrele will do this for us because we set
1288 	 * the link count to 0.
1289 	 */
1290 	if (error) {
1291 		ip->i_nlink = 0;
1292 		ip->i_flag |= ICHG;
1293 		iput(ip);
1294 	} else
1295 		ndp->ni_vp = ITOV(ip);
1296 	iput(dp);
1297 	return (error);
1298 }
1299 
1300 /*
1301  * Rmdir system call.
1302  */
1303 ufs_rmdir(ndp)
1304 	register struct nameidata *ndp;
1305 {
1306 	register struct inode *ip, *dp;
1307 	int error = 0;
1308 
1309 	ip = VTOI(ndp->ni_vp);
1310 	dp = VTOI(ndp->ni_dvp);
1311 	/*
1312 	 * No rmdir "." please.
1313 	 */
1314 	if (dp == ip) {
1315 		vrele(ITOV(dp));
1316 		iput(ip);
1317 		return (EINVAL);
1318 	}
1319 	/*
1320 	 * Verify the directory is empty (and valid).
1321 	 * (Rmdir ".." won't be valid since
1322 	 *  ".." will contain a reference to
1323 	 *  the current directory and thus be
1324 	 *  non-empty.)
1325 	 */
1326 	if (ip->i_nlink != 2 || !dirempty(ip, dp->i_number, ndp->ni_cred)) {
1327 		error = ENOTEMPTY;
1328 		goto out;
1329 	}
1330 	/*
1331 	 * Delete reference to directory before purging
1332 	 * inode.  If we crash in between, the directory
1333 	 * will be reattached to lost+found,
1334 	 */
1335 	if (error = dirremove(ndp))
1336 		goto out;
1337 	dp->i_nlink--;
1338 	dp->i_flag |= ICHG;
1339 	cache_purge(ITOV(dp));
1340 	iput(dp);
1341 	ndp->ni_dvp = NULL;
1342 	/*
1343 	 * Truncate inode.  The only stuff left
1344 	 * in the directory is "." and "..".  The
1345 	 * "." reference is inconsequential since
1346 	 * we're quashing it.  The ".." reference
1347 	 * has already been adjusted above.  We've
1348 	 * removed the "." reference and the reference
1349 	 * in the parent directory, but there may be
1350 	 * other hard links so decrement by 2 and
1351 	 * worry about them later.
1352 	 */
1353 	ip->i_nlink -= 2;
1354 	error = itrunc(ip, (u_long)0, IO_SYNC);
1355 	cache_purge(ITOV(ip));
1356 out:
1357 	if (ndp->ni_dvp)
1358 		iput(dp);
1359 	iput(ip);
1360 	return (error);
1361 }
1362 
1363 /*
1364  * symlink -- make a symbolic link
1365  */
1366 ufs_symlink(ndp, vap, target)
1367 	struct nameidata *ndp;
1368 	struct vattr *vap;
1369 	char *target;
1370 {
1371 	struct inode *ip;
1372 	int error;
1373 
1374 	error = maknode(IFLNK | vap->va_mode, ndp, &ip);
1375 	if (error)
1376 		return (error);
1377 	error = vn_rdwr(UIO_WRITE, ITOV(ip), target, strlen(target), (off_t)0,
1378 		UIO_SYSSPACE, IO_NODELOCKED, ndp->ni_cred, (int *)0);
1379 	iput(ip);
1380 	return (error);
1381 }
1382 
1383 /*
1384  * Vnode op for read and write
1385  */
1386 ufs_readdir(vp, uio, cred, eofflagp)
1387 	struct vnode *vp;
1388 	register struct uio *uio;
1389 	struct ucred *cred;
1390 	int *eofflagp;
1391 {
1392 	int count, lost, error;
1393 
1394 	count = uio->uio_resid;
1395 	count &= ~(DIRBLKSIZ - 1);
1396 	lost = uio->uio_resid - count;
1397 	if (count < DIRBLKSIZ || (uio->uio_offset & (DIRBLKSIZ -1)))
1398 		return (EINVAL);
1399 	uio->uio_resid = count;
1400 	uio->uio_iov->iov_len = count;
1401 	error = ufs_read(vp, uio, 0, cred);
1402 	uio->uio_resid += lost;
1403 	if ((VTOI(vp)->i_size - uio->uio_offset) <= 0)
1404 		*eofflagp = 1;
1405 	else
1406 		*eofflagp = 0;
1407 	return (error);
1408 }
1409 
1410 /*
1411  * Return target name of a symbolic link
1412  */
1413 ufs_readlink(vp, uiop, cred)
1414 	struct vnode *vp;
1415 	struct uio *uiop;
1416 	struct ucred *cred;
1417 {
1418 
1419 	return (ufs_read(vp, uiop, 0, cred));
1420 }
1421 
1422 /*
1423  * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
1424  * done. Nothing to do at the moment.
1425  */
1426 /* ARGSUSED */
1427 ufs_abortop(ndp)
1428 	struct nameidata *ndp;
1429 {
1430 
1431 	return (0);
1432 }
1433 
1434 /*
1435  * Lock an inode.
1436  */
1437 ufs_lock(vp)
1438 	struct vnode *vp;
1439 {
1440 	register struct inode *ip = VTOI(vp);
1441 
1442 	ILOCK(ip);
1443 	return (0);
1444 }
1445 
1446 /*
1447  * Unlock an inode.
1448  */
1449 ufs_unlock(vp)
1450 	struct vnode *vp;
1451 {
1452 	register struct inode *ip = VTOI(vp);
1453 
1454 	if (!(ip->i_flag & ILOCKED))
1455 		panic("ufs_unlock NOT LOCKED");
1456 	IUNLOCK(ip);
1457 	return (0);
1458 }
1459 
1460 /*
1461  * Check for a locked inode.
1462  */
1463 ufs_islocked(vp)
1464 	struct vnode *vp;
1465 {
1466 
1467 	if (VTOI(vp)->i_flag & ILOCKED)
1468 		return (1);
1469 	return (0);
1470 }
1471 
1472 /*
1473  * Get access to bmap
1474  */
1475 ufs_bmap(vp, bn, vpp, bnp)
1476 	struct vnode *vp;
1477 	daddr_t bn;
1478 	struct vnode **vpp;
1479 	daddr_t *bnp;
1480 {
1481 	struct inode *ip = VTOI(vp);
1482 
1483 	if (vpp != NULL)
1484 		*vpp = ip->i_devvp;
1485 	if (bnp == NULL)
1486 		return (0);
1487 	return (bmap(ip, bn, bnp));
1488 }
1489 
1490 /*
1491  * Calculate the logical to physical mapping if not done already,
1492  * then call the device strategy routine.
1493  */
1494 int checkoverlap = 0;
1495 
1496 ufs_strategy(bp)
1497 	register struct buf *bp;
1498 {
1499 	register struct inode *ip = VTOI(bp->b_vp);
1500 	struct vnode *vp;
1501 	int error;
1502 
1503 	if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR)
1504 		panic("ufs_strategy: spec");
1505 	if (bp->b_blkno == bp->b_lblkno) {
1506 		if (error = bmap(ip, bp->b_lblkno, &bp->b_blkno))
1507 			return (error);
1508 		if ((long)bp->b_blkno == -1)
1509 			clrbuf(bp);
1510 	}
1511 	if ((long)bp->b_blkno == -1) {
1512 		biodone(bp);
1513 		return (0);
1514 	}
1515 #ifdef DIAGNOSTIC
1516 	if (checkoverlap) {
1517 		register struct buf *ep;
1518 		struct buf *ebp;
1519 		daddr_t start, last;
1520 
1521 		ebp = &buf[nbuf];
1522 		start = bp->b_blkno;
1523 		last = start + btodb(bp->b_bcount) - 1;
1524 		for (ep = buf; ep < ebp; ep++) {
1525 			if (ep == bp || (ep->b_flags & B_INVAL) ||
1526 			    ep->b_vp == NULLVP)
1527 				continue;
1528 			if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0))
1529 				continue;
1530 			if (vp != ip->i_devvp)
1531 				continue;
1532 			/* look for overlap */
1533 			if (ep->b_bcount == 0 || ep->b_blkno > last ||
1534 			    ep->b_blkno + btodb(ep->b_bcount) <= start)
1535 				continue;
1536 			vprint("Disk overlap", vp);
1537 			printf("\tstart %d, end %d overlap start %d, end %d\n",
1538 				start, last, ep->b_blkno,
1539 				ep->b_blkno + btodb(ep->b_bcount) - 1);
1540 			panic("Disk buffer overlap");
1541 		}
1542 	}
1543 #endif /* DIAGNOSTIC */
1544 	vp = ip->i_devvp;
1545 	bp->b_dev = vp->v_rdev;
1546 	(*(vp->v_op->vn_strategy))(bp);
1547 	return (0);
1548 }
1549 
1550 /*
1551  * Print out the contents of an inode.
1552  */
1553 ufs_print(vp)
1554 	struct vnode *vp;
1555 {
1556 	register struct inode *ip = VTOI(vp);
1557 
1558 	printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number,
1559 		major(ip->i_dev), minor(ip->i_dev));
1560 #ifdef FIFO
1561 	if (vp->v_type == VFIFO)
1562 		fifo_printinfo(vp);
1563 #endif /* FIFO */
1564 	printf("%s\n", (ip->i_flag & ILOCKED) ? " (LOCKED)" : "");
1565 	if (ip->i_spare0 == 0)
1566 		return;
1567 	printf("\towner pid %d", ip->i_spare0);
1568 	if (ip->i_spare1)
1569 		printf(" waiting pid %d", ip->i_spare1);
1570 	printf("\n");
1571 }
1572 
1573 /*
1574  * Read wrapper for special devices.
1575  */
1576 ufsspec_read(vp, uio, ioflag, cred)
1577 	struct vnode *vp;
1578 	struct uio *uio;
1579 	int ioflag;
1580 	struct ucred *cred;
1581 {
1582 
1583 	/*
1584 	 * Set access flag.
1585 	 */
1586 	VTOI(vp)->i_flag |= IACC;
1587 	return (spec_read(vp, uio, ioflag, cred));
1588 }
1589 
1590 /*
1591  * Write wrapper for special devices.
1592  */
1593 ufsspec_write(vp, uio, ioflag, cred)
1594 	struct vnode *vp;
1595 	struct uio *uio;
1596 	int ioflag;
1597 	struct ucred *cred;
1598 {
1599 
1600 	/*
1601 	 * Set update and change flags.
1602 	 */
1603 	VTOI(vp)->i_flag |= IUPD|ICHG;
1604 	return (spec_write(vp, uio, ioflag, cred));
1605 }
1606 
1607 /*
1608  * Close wrapper for special devices.
1609  *
1610  * Update the times on the inode then do device close.
1611  */
1612 ufsspec_close(vp, fflag, cred)
1613 	struct vnode *vp;
1614 	int fflag;
1615 	struct ucred *cred;
1616 {
1617 	register struct inode *ip = VTOI(vp);
1618 
1619 	if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED))
1620 		ITIMES(ip, &time, &time);
1621 	return (spec_close(vp, fflag, cred));
1622 }
1623 
1624 #ifdef FIFO
1625 /*
1626  * Read wrapper for fifo's
1627  */
1628 ufsfifo_read(vp, uio, ioflag, cred)
1629 	struct vnode *vp;
1630 	struct uio *uio;
1631 	int ioflag;
1632 	struct ucred *cred;
1633 {
1634 
1635 	/*
1636 	 * Set access flag.
1637 	 */
1638 	VTOI(vp)->i_flag |= IACC;
1639 	return (fifo_read(vp, uio, ioflag, cred));
1640 }
1641 
1642 /*
1643  * Write wrapper for fifo's.
1644  */
1645 ufsfifo_write(vp, uio, ioflag, cred)
1646 	struct vnode *vp;
1647 	struct uio *uio;
1648 	int ioflag;
1649 	struct ucred *cred;
1650 {
1651 
1652 	/*
1653 	 * Set update and change flags.
1654 	 */
1655 	VTOI(vp)->i_flag |= IUPD|ICHG;
1656 	return (fifo_write(vp, uio, ioflag, cred));
1657 }
1658 
1659 /*
1660  * Close wrapper for fifo's.
1661  *
1662  * Update the times on the inode then do device close.
1663  */
1664 ufsfifo_close(vp, fflag, cred)
1665 	struct vnode *vp;
1666 	int fflag;
1667 	struct ucred *cred;
1668 {
1669 	register struct inode *ip = VTOI(vp);
1670 
1671 	if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED))
1672 		ITIMES(ip, &time, &time);
1673 	return (fifo_close(vp, fflag, cred));
1674 }
1675 #endif /* FIFO */
1676 
1677 /*
1678  * Make a new file.
1679  */
1680 maknode(mode, ndp, ipp)
1681 	int mode;
1682 	register struct nameidata *ndp;
1683 	struct inode **ipp;
1684 {
1685 	register struct inode *ip;
1686 	struct inode *tip;
1687 	register struct inode *pdir = VTOI(ndp->ni_dvp);
1688 	ino_t ipref;
1689 	int error;
1690 
1691 	*ipp = 0;
1692 	if ((mode & IFMT) == 0)
1693 		mode |= IFREG;
1694 	if ((mode & IFMT) == IFDIR)
1695 		ipref = dirpref(pdir->i_fs);
1696 	else
1697 		ipref = pdir->i_number;
1698 	if (error = ialloc(pdir, ipref, mode, ndp->ni_cred, &tip)) {
1699 		iput(pdir);
1700 		return (error);
1701 	}
1702 	ip = tip;
1703 	ip->i_uid = ndp->ni_cred->cr_uid;
1704 	ip->i_gid = pdir->i_gid;
1705 #ifdef QUOTA
1706 	if ((error = getinoquota(ip)) ||
1707 	    (error = chkiq(ip, 1, ndp->ni_cred, 0))) {
1708 		ifree(ip, ip->i_number, mode);
1709 		iput(ip);
1710 		iput(pdir);
1711 		return (error);
1712 	}
1713 #endif
1714 	ip->i_flag |= IACC|IUPD|ICHG;
1715 	ip->i_mode = mode;
1716 	ITOV(ip)->v_type = IFTOVT(mode);	/* Rest init'd in iget() */
1717 	ip->i_nlink = 1;
1718 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, ndp->ni_cred) &&
1719 	    suser(ndp->ni_cred, NULL))
1720 		ip->i_mode &= ~ISGID;
1721 
1722 	/*
1723 	 * Make sure inode goes to disk before directory entry.
1724 	 */
1725 	if (error = iupdat(ip, &time, &time, 1))
1726 		goto bad;
1727 	if (error = direnter(ip, ndp))
1728 		goto bad;
1729 	iput(pdir);
1730 	*ipp = ip;
1731 	return (0);
1732 
1733 bad:
1734 	/*
1735 	 * Write error occurred trying to update the inode
1736 	 * or the directory so must deallocate the inode.
1737 	 */
1738 	iput(pdir);
1739 	ip->i_nlink = 0;
1740 	ip->i_flag |= ICHG;
1741 	iput(ip);
1742 	return (error);
1743 }
1744 
1745 /*
1746  * Advisory record locking support
1747  */
1748 ufs_advlock(vp, id, op, fl, flags)
1749 	struct vnode *vp;
1750 	caddr_t id;
1751 	int op;
1752 	register struct flock *fl;
1753 	int flags;
1754 {
1755 	register struct inode *ip = VTOI(vp);
1756 	register struct lockf *lock;
1757 	off_t start, end;
1758 	int error;
1759 
1760 	/*
1761 	 * Avoid the common case of unlocking when inode has no locks.
1762 	 */
1763 	if (ip->i_lockf == (struct lockf *)0) {
1764 		if (op != F_SETLK) {
1765 			fl->l_type = F_UNLCK;
1766 			return (0);
1767 		}
1768 	}
1769 	/*
1770 	 * Convert the flock structure into a start and end.
1771 	 */
1772 	switch (fl->l_whence) {
1773 
1774 	case SEEK_SET:
1775 	case SEEK_CUR:
1776 		/*
1777 		 * Caller is responsible for adding any necessary offset
1778 		 * when SEEK_CUR is used.
1779 		 */
1780 		start = fl->l_start;
1781 		break;
1782 
1783 	case SEEK_END:
1784 		start = ip->i_size + fl->l_start;
1785 		break;
1786 
1787 	default:
1788 		return (EINVAL);
1789 	}
1790 	if (start < 0)
1791 		return (EINVAL);
1792 	if (fl->l_len == 0)
1793 		end = -1;
1794 	else
1795 		end = start + fl->l_len - 1;
1796 	/*
1797 	 * Create the lockf structure
1798 	 */
1799 	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
1800 	lock->lf_start = start;
1801 	lock->lf_end = end;
1802 	lock->lf_id = id;
1803 	lock->lf_inode = ip;
1804 	lock->lf_type = fl->l_type;
1805 	lock->lf_next = (struct lockf *)0;
1806 	lock->lf_block = (struct lockf *)0;
1807 	lock->lf_flags = flags;
1808 	/*
1809 	 * Do the requested operation.
1810 	 */
1811 	switch(op) {
1812 	case F_SETLK:
1813 		return (lf_setlock(lock));
1814 
1815 	case F_UNLCK:
1816 		error = lf_clearlock(lock);
1817 		FREE(lock, M_LOCKF);
1818 		return (error);
1819 
1820 	case F_GETLK:
1821 		error = lf_getlock(lock, fl);
1822 		FREE(lock, M_LOCKF);
1823 		return (error);
1824 
1825 	default:
1826 		free(lock, M_LOCKF);
1827 		return (EINVAL);
1828 	}
1829 	/* NOTREACHED */
1830 }
1831