xref: /original-bsd/sys/ufs/lfs/lfs_vnops.c (revision c6d5c0d7)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_vnops.c	7.54 (Berkeley) 02/21/91
8  */
9 
10 #include "param.h"
11 #include "systm.h"
12 #include "user.h"
13 #include "kernel.h"
14 #include "file.h"
15 #include "stat.h"
16 #include "buf.h"
17 #include "proc.h"
18 #include "socket.h"
19 #include "socketvar.h"
20 #include "conf.h"
21 #include "mount.h"
22 #include "vnode.h"
23 #include "specdev.h"
24 #include "fcntl.h"
25 #include "malloc.h"
26 #include "../ufs/lockf.h"
27 #include "../ufs/quota.h"
28 #include "../ufs/inode.h"
29 #include "../ufs/fs.h"
30 
31 /*
32  * Global vfs data structures for ufs
33  */
34 
35 int	ufs_lookup(),
36 	ufs_create(),
37 	ufs_mknod(),
38 	ufs_open(),
39 	ufs_close(),
40 	ufs_access(),
41 	ufs_getattr(),
42 	ufs_setattr(),
43 	ufs_read(),
44 	ufs_write(),
45 	ufs_ioctl(),
46 	ufs_select(),
47 	ufs_mmap(),
48 	ufs_fsync(),
49 	ufs_seek(),
50 	ufs_remove(),
51 	ufs_link(),
52 	ufs_rename(),
53 	ufs_mkdir(),
54 	ufs_rmdir(),
55 	ufs_symlink(),
56 	ufs_readdir(),
57 	ufs_readlink(),
58 	ufs_abortop(),
59 	ufs_inactive(),
60 	ufs_reclaim(),
61 	ufs_lock(),
62 	ufs_unlock(),
63 	ufs_bmap(),
64 	ufs_strategy(),
65 	ufs_print(),
66 	ufs_islocked(),
67 	ufs_advlock();
68 
69 struct vnodeops ufs_vnodeops = {
70 	ufs_lookup,		/* lookup */
71 	ufs_create,		/* create */
72 	ufs_mknod,		/* mknod */
73 	ufs_open,		/* open */
74 	ufs_close,		/* close */
75 	ufs_access,		/* access */
76 	ufs_getattr,		/* getattr */
77 	ufs_setattr,		/* setattr */
78 	ufs_read,		/* read */
79 	ufs_write,		/* write */
80 	ufs_ioctl,		/* ioctl */
81 	ufs_select,		/* select */
82 	ufs_mmap,		/* mmap */
83 	ufs_fsync,		/* fsync */
84 	ufs_seek,		/* seek */
85 	ufs_remove,		/* remove */
86 	ufs_link,		/* link */
87 	ufs_rename,		/* rename */
88 	ufs_mkdir,		/* mkdir */
89 	ufs_rmdir,		/* rmdir */
90 	ufs_symlink,		/* symlink */
91 	ufs_readdir,		/* readdir */
92 	ufs_readlink,		/* readlink */
93 	ufs_abortop,		/* abortop */
94 	ufs_inactive,		/* inactive */
95 	ufs_reclaim,		/* reclaim */
96 	ufs_lock,		/* lock */
97 	ufs_unlock,		/* unlock */
98 	ufs_bmap,		/* bmap */
99 	ufs_strategy,		/* strategy */
100 	ufs_print,		/* print */
101 	ufs_islocked,		/* islocked */
102 	ufs_advlock,		/* advlock */
103 };
104 
105 int	spec_lookup(),
106 	spec_open(),
107 	ufsspec_read(),
108 	ufsspec_write(),
109 	spec_strategy(),
110 	spec_bmap(),
111 	spec_ioctl(),
112 	spec_select(),
113 	ufsspec_close(),
114 	spec_advlock(),
115 	spec_badop(),
116 	spec_nullop();
117 
118 struct vnodeops spec_inodeops = {
119 	spec_lookup,		/* lookup */
120 	spec_badop,		/* create */
121 	spec_badop,		/* mknod */
122 	spec_open,		/* open */
123 	ufsspec_close,		/* close */
124 	ufs_access,		/* access */
125 	ufs_getattr,		/* getattr */
126 	ufs_setattr,		/* setattr */
127 	ufsspec_read,		/* read */
128 	ufsspec_write,		/* write */
129 	spec_ioctl,		/* ioctl */
130 	spec_select,		/* select */
131 	spec_badop,		/* mmap */
132 	spec_nullop,		/* fsync */
133 	spec_badop,		/* seek */
134 	spec_badop,		/* remove */
135 	spec_badop,		/* link */
136 	spec_badop,		/* rename */
137 	spec_badop,		/* mkdir */
138 	spec_badop,		/* rmdir */
139 	spec_badop,		/* symlink */
140 	spec_badop,		/* readdir */
141 	spec_badop,		/* readlink */
142 	spec_badop,		/* abortop */
143 	ufs_inactive,		/* inactive */
144 	ufs_reclaim,		/* reclaim */
145 	ufs_lock,		/* lock */
146 	ufs_unlock,		/* unlock */
147 	spec_bmap,		/* bmap */
148 	spec_strategy,		/* strategy */
149 	ufs_print,		/* print */
150 	ufs_islocked,		/* islocked */
151 	spec_advlock,		/* advlock */
152 };
153 
154 #ifdef FIFO
155 int	fifo_lookup(),
156 	fifo_open(),
157 	ufsfifo_read(),
158 	ufsfifo_write(),
159 	fifo_bmap(),
160 	fifo_ioctl(),
161 	fifo_select(),
162 	ufsfifo_close(),
163 	fifo_print(),
164 	fifo_advlock(),
165 	fifo_badop(),
166 	fifo_nullop();
167 
168 struct vnodeops fifo_inodeops = {
169 	fifo_lookup,		/* lookup */
170 	fifo_badop,		/* create */
171 	fifo_badop,		/* mknod */
172 	fifo_open,		/* open */
173 	ufsfifo_close,		/* close */
174 	ufs_access,		/* access */
175 	ufs_getattr,		/* getattr */
176 	ufs_setattr,		/* setattr */
177 	ufsfifo_read,		/* read */
178 	ufsfifo_write,		/* write */
179 	fifo_ioctl,		/* ioctl */
180 	fifo_select,		/* select */
181 	fifo_badop,		/* mmap */
182 	fifo_nullop,		/* fsync */
183 	fifo_badop,		/* seek */
184 	fifo_badop,		/* remove */
185 	fifo_badop,		/* link */
186 	fifo_badop,		/* rename */
187 	fifo_badop,		/* mkdir */
188 	fifo_badop,		/* rmdir */
189 	fifo_badop,		/* symlink */
190 	fifo_badop,		/* readdir */
191 	fifo_badop,		/* readlink */
192 	fifo_badop,		/* abortop */
193 	ufs_inactive,		/* inactive */
194 	ufs_reclaim,		/* reclaim */
195 	ufs_lock,		/* lock */
196 	ufs_unlock,		/* unlock */
197 	fifo_bmap,		/* bmap */
198 	fifo_badop,		/* strategy */
199 	ufs_print,		/* print */
200 	ufs_islocked,		/* islocked */
201 	fifo_advlock,		/* advlock */
202 };
203 #endif /* FIFO */
204 
205 enum vtype iftovt_tab[16] = {
206 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
207 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
208 };
209 int	vttoif_tab[9] = {
210 	0, IFREG, IFDIR, IFBLK, IFCHR, IFLNK, IFSOCK, IFIFO, IFMT,
211 };
212 
213 /*
214  * Create a regular file
215  */
216 ufs_create(ndp, vap)
217 	struct nameidata *ndp;
218 	struct vattr *vap;
219 {
220 	struct inode *ip;
221 	int error;
222 
223 	if (error = maknode(MAKEIMODE(vap->va_type, vap->va_mode), ndp, &ip))
224 		return (error);
225 	ndp->ni_vp = ITOV(ip);
226 	return (0);
227 }
228 
229 /*
230  * Mknod vnode call
231  */
232 /* ARGSUSED */
233 ufs_mknod(ndp, vap, cred)
234 	struct nameidata *ndp;
235 	struct ucred *cred;
236 	struct vattr *vap;
237 {
238 	register struct vnode *vp;
239 	struct inode *ip;
240 	int error;
241 
242 	if (error = maknode(MAKEIMODE(vap->va_type, vap->va_mode), ndp, &ip))
243 		return (error);
244 	ip->i_flag |= IACC|IUPD|ICHG;
245 	if (vap->va_rdev != VNOVAL) {
246 		/*
247 		 * Want to be able to use this to make badblock
248 		 * inodes, so don't truncate the dev number.
249 		 */
250 		ip->i_rdev = vap->va_rdev;
251 	}
252 	/*
253 	 * Remove inode so that it will be reloaded by iget and
254 	 * checked to see if it is an alias of an existing entry
255 	 * in the inode cache.
256 	 */
257 	vp = ITOV(ip);
258 	vput(vp);
259 	vp->v_type = VNON;
260 	vgone(vp);
261 	return (0);
262 }
263 
264 /*
265  * Open called.
266  *
267  * Nothing to do.
268  */
269 /* ARGSUSED */
270 ufs_open(vp, mode, cred)
271 	struct vnode *vp;
272 	int mode;
273 	struct ucred *cred;
274 {
275 
276 	return (0);
277 }
278 
279 /*
280  * Close called
281  *
282  * Update the times on the inode.
283  */
284 /* ARGSUSED */
285 ufs_close(vp, fflag, cred)
286 	struct vnode *vp;
287 	int fflag;
288 	struct ucred *cred;
289 {
290 	register struct inode *ip = VTOI(vp);
291 
292 	if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED))
293 		ITIMES(ip, &time, &time);
294 	return (0);
295 }
296 
297 /*
298  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
299  * The mode is shifted to select the owner/group/other fields. The
300  * super user is granted all permissions.
301  */
302 ufs_access(vp, mode, cred)
303 	struct vnode *vp;
304 	register int mode;
305 	struct ucred *cred;
306 {
307 	register struct inode *ip = VTOI(vp);
308 	register gid_t *gp;
309 	int i, error;
310 
311 #ifdef DIAGNOSTIC
312 	if (!VOP_ISLOCKED(vp)) {
313 		vprint("ufs_access: not locked", vp);
314 		panic("ufs_access: not locked");
315 	}
316 #endif
317 #ifdef QUOTA
318 	if (mode & VWRITE) {
319 		switch (vp->v_type) {
320 		case VREG: case VDIR: case VLNK:
321 			if (error = getinoquota(ip))
322 				return (error);
323 		}
324 	}
325 #endif /* QUOTA */
326 	/*
327 	 * If you're the super-user, you always get access.
328 	 */
329 	if (cred->cr_uid == 0)
330 		return (0);
331 	/*
332 	 * Access check is based on only one of owner, group, public.
333 	 * If not owner, then check group. If not a member of the
334 	 * group, then check public access.
335 	 */
336 	if (cred->cr_uid != ip->i_uid) {
337 		mode >>= 3;
338 		gp = cred->cr_groups;
339 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
340 			if (ip->i_gid == *gp)
341 				goto found;
342 		mode >>= 3;
343 found:
344 		;
345 	}
346 	if ((ip->i_mode & mode) != 0)
347 		return (0);
348 	return (EACCES);
349 }
350 
351 /* ARGSUSED */
352 ufs_getattr(vp, vap, cred)
353 	struct vnode *vp;
354 	register struct vattr *vap;
355 	struct ucred *cred;
356 {
357 	register struct inode *ip = VTOI(vp);
358 
359 	ITIMES(ip, &time, &time);
360 	/*
361 	 * Copy from inode table
362 	 */
363 	vap->va_fsid = ip->i_dev;
364 	vap->va_fileid = ip->i_number;
365 	vap->va_mode = ip->i_mode & ~IFMT;
366 	vap->va_nlink = ip->i_nlink;
367 	vap->va_uid = ip->i_uid;
368 	vap->va_gid = ip->i_gid;
369 	vap->va_rdev = (dev_t)ip->i_rdev;
370 #ifdef tahoe
371 	vap->va_size = ip->i_size;
372 	vap->va_size_rsv = 0;
373 #else
374 	vap->va_qsize = ip->i_din.di_qsize;
375 #endif
376 	vap->va_atime.tv_sec = ip->i_atime;
377 	vap->va_atime.tv_usec = 0;
378 	vap->va_mtime.tv_sec = ip->i_mtime;
379 	vap->va_mtime.tv_usec = 0;
380 	vap->va_ctime.tv_sec = ip->i_ctime;
381 	vap->va_ctime.tv_usec = 0;
382 	vap->va_flags = ip->i_flags;
383 	vap->va_gen = ip->i_gen;
384 	/* this doesn't belong here */
385 	if (vp->v_type == VBLK)
386 		vap->va_blocksize = BLKDEV_IOSIZE;
387 	else if (vp->v_type == VCHR)
388 		vap->va_blocksize = MAXBSIZE;
389 	else
390 		vap->va_blocksize = ip->i_fs->fs_bsize;
391 	vap->va_bytes = dbtob(ip->i_blocks);
392 	vap->va_bytes_rsv = 0;
393 	vap->va_type = vp->v_type;
394 	return (0);
395 }
396 
397 /*
398  * Set attribute vnode op. called from several syscalls
399  */
400 ufs_setattr(vp, vap, cred)
401 	register struct vnode *vp;
402 	register struct vattr *vap;
403 	register struct ucred *cred;
404 {
405 	register struct inode *ip = VTOI(vp);
406 	int error = 0;
407 
408 	/*
409 	 * Check for unsetable attributes.
410 	 */
411 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
412 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
413 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
414 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
415 		return (EINVAL);
416 	}
417 	/*
418 	 * Go through the fields and update iff not VNOVAL.
419 	 */
420 	if (vap->va_uid != (u_short)VNOVAL || vap->va_gid != (u_short)VNOVAL)
421 		if (error = chown1(vp, vap->va_uid, vap->va_gid, cred))
422 			return (error);
423 	if (vap->va_size != VNOVAL) {
424 		if (vp->v_type == VDIR)
425 			return (EISDIR);
426 		if (error = itrunc(ip, vap->va_size, 0)) /* XXX IO_SYNC? */
427 			return (error);
428 	}
429 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
430 		if (cred->cr_uid != ip->i_uid &&
431 		    (error = suser(cred, &u.u_acflag)))
432 			return (error);
433 		if (vap->va_atime.tv_sec != VNOVAL)
434 			ip->i_flag |= IACC;
435 		if (vap->va_mtime.tv_sec != VNOVAL)
436 			ip->i_flag |= IUPD;
437 		ip->i_flag |= ICHG;
438 		if (error = iupdat(ip, &vap->va_atime, &vap->va_mtime, 1))
439 			return (error);
440 	}
441 	if (vap->va_mode != (u_short)VNOVAL)
442 		error = chmod1(vp, (int)vap->va_mode, cred);
443 	if (vap->va_flags != VNOVAL) {
444 		if (cred->cr_uid != ip->i_uid &&
445 		    (error = suser(cred, &u.u_acflag)))
446 			return (error);
447 		if (cred->cr_uid == 0) {
448 			ip->i_flags = vap->va_flags;
449 		} else {
450 			ip->i_flags &= 0xffff0000;
451 			ip->i_flags |= (vap->va_flags & 0xffff);
452 		}
453 		ip->i_flag |= ICHG;
454 	}
455 	return (error);
456 }
457 
458 /*
459  * Change the mode on a file.
460  * Inode must be locked before calling.
461  */
462 chmod1(vp, mode, cred)
463 	register struct vnode *vp;
464 	register int mode;
465 	struct ucred *cred;
466 {
467 	register struct inode *ip = VTOI(vp);
468 	int error;
469 
470 	if (cred->cr_uid != ip->i_uid &&
471 	    (error = suser(cred, &u.u_acflag)))
472 		return (error);
473 	if (cred->cr_uid) {
474 		if (vp->v_type != VDIR && (mode & ISVTX))
475 			return (EFTYPE);
476 		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
477 			return (EPERM);
478 	}
479 	ip->i_mode &= ~07777;
480 	ip->i_mode |= mode & 07777;
481 	ip->i_flag |= ICHG;
482 	if ((vp->v_flag & VTEXT) && (ip->i_mode & ISVTX) == 0)
483 		(void) vnode_pager_uncache(vp);
484 	return (0);
485 }
486 
487 /*
488  * Perform chown operation on inode ip;
489  * inode must be locked prior to call.
490  */
491 chown1(vp, uid, gid, cred)
492 	register struct vnode *vp;
493 	uid_t uid;
494 	gid_t gid;
495 	struct ucred *cred;
496 {
497 	register struct inode *ip = VTOI(vp);
498 	uid_t ouid;
499 	gid_t ogid;
500 	int error = 0;
501 #ifdef QUOTA
502 	register int i;
503 	long change;
504 #endif
505 
506 	if (uid == (u_short)VNOVAL)
507 		uid = ip->i_uid;
508 	if (gid == (u_short)VNOVAL)
509 		gid = ip->i_gid;
510 	/*
511 	 * If we don't own the file, are trying to change the owner
512 	 * of the file, or are not a member of the target group,
513 	 * the caller must be superuser or the call fails.
514 	 */
515 	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
516 	    !groupmember((gid_t)gid, cred)) &&
517 	    (error = suser(cred, &u.u_acflag)))
518 		return (error);
519 	ouid = ip->i_uid;
520 	ogid = ip->i_gid;
521 #ifdef QUOTA
522 	if (error = getinoquota(ip))
523 		return (error);
524 	if (ouid == uid) {
525 		dqrele(vp, ip->i_dquot[USRQUOTA]);
526 		ip->i_dquot[USRQUOTA] = NODQUOT;
527 	}
528 	if (ogid == gid) {
529 		dqrele(vp, ip->i_dquot[GRPQUOTA]);
530 		ip->i_dquot[GRPQUOTA] = NODQUOT;
531 	}
532 	change = ip->i_blocks;
533 	(void) chkdq(ip, -change, cred, CHOWN);
534 	(void) chkiq(ip, -1, cred, CHOWN);
535 	for (i = 0; i < MAXQUOTAS; i++) {
536 		dqrele(vp, ip->i_dquot[i]);
537 		ip->i_dquot[i] = NODQUOT;
538 	}
539 #endif
540 	ip->i_uid = uid;
541 	ip->i_gid = gid;
542 #ifdef QUOTA
543 	if ((error = getinoquota(ip)) == 0) {
544 		if (ouid == uid) {
545 			dqrele(vp, ip->i_dquot[USRQUOTA]);
546 			ip->i_dquot[USRQUOTA] = NODQUOT;
547 		}
548 		if (ogid == gid) {
549 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
550 			ip->i_dquot[GRPQUOTA] = NODQUOT;
551 		}
552 		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
553 			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
554 				goto good;
555 			else
556 				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
557 		}
558 		for (i = 0; i < MAXQUOTAS; i++) {
559 			dqrele(vp, ip->i_dquot[i]);
560 			ip->i_dquot[i] = NODQUOT;
561 		}
562 	}
563 	ip->i_uid = ouid;
564 	ip->i_gid = ogid;
565 	if (getinoquota(ip) == 0) {
566 		if (ouid == uid) {
567 			dqrele(vp, ip->i_dquot[USRQUOTA]);
568 			ip->i_dquot[USRQUOTA] = NODQUOT;
569 		}
570 		if (ogid == gid) {
571 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
572 			ip->i_dquot[GRPQUOTA] = NODQUOT;
573 		}
574 		(void) chkdq(ip, change, cred, FORCE|CHOWN);
575 		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
576 		(void) getinoquota(ip);
577 	}
578 	return (error);
579 good:
580 	if (getinoquota(ip))
581 		panic("chown: lost quota");
582 #endif /* QUOTA */
583 	if (ouid != uid || ogid != gid)
584 		ip->i_flag |= ICHG;
585 	if (ouid != uid && cred->cr_uid != 0)
586 		ip->i_mode &= ~ISUID;
587 	if (ogid != gid && cred->cr_uid != 0)
588 		ip->i_mode &= ~ISGID;
589 	return (0);
590 }
591 
592 /*
593  * Vnode op for reading.
594  */
595 /* ARGSUSED */
596 ufs_read(vp, uio, ioflag, cred)
597 	struct vnode *vp;
598 	register struct uio *uio;
599 	int ioflag;
600 	struct ucred *cred;
601 {
602 	register struct inode *ip = VTOI(vp);
603 	register struct fs *fs;
604 	struct buf *bp;
605 	daddr_t lbn, bn, rablock;
606 	int size, diff, error = 0;
607 	long n, on, type;
608 
609 	if (uio->uio_rw != UIO_READ)
610 		panic("ufs_read mode");
611 	type = ip->i_mode & IFMT;
612 	if (type != IFDIR && type != IFREG && type != IFLNK)
613 		panic("ufs_read type");
614 	if (uio->uio_resid == 0)
615 		return (0);
616 	if (uio->uio_offset < 0)
617 		return (EINVAL);
618 	ip->i_flag |= IACC;
619 	fs = ip->i_fs;
620 	do {
621 		lbn = lblkno(fs, uio->uio_offset);
622 		on = blkoff(fs, uio->uio_offset);
623 		n = MIN((unsigned)(fs->fs_bsize - on), uio->uio_resid);
624 		diff = ip->i_size - uio->uio_offset;
625 		if (diff <= 0)
626 			return (0);
627 		if (diff < n)
628 			n = diff;
629 		size = blksize(fs, ip, lbn);
630 		rablock = lbn + 1;
631 		if (vp->v_lastr + 1 == lbn &&
632 		    lblktosize(fs, rablock) < ip->i_size)
633 			error = breada(ITOV(ip), lbn, size, rablock,
634 				blksize(fs, ip, rablock), NOCRED, &bp);
635 		else
636 			error = bread(ITOV(ip), lbn, size, NOCRED, &bp);
637 		vp->v_lastr = lbn;
638 		n = MIN(n, size - bp->b_resid);
639 		if (error) {
640 			brelse(bp);
641 			return (error);
642 		}
643 		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
644 		if (n + on == fs->fs_bsize || uio->uio_offset == ip->i_size)
645 			bp->b_flags |= B_AGE;
646 		brelse(bp);
647 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
648 	return (error);
649 }
650 
651 /*
652  * Vnode op for writing.
653  */
654 ufs_write(vp, uio, ioflag, cred)
655 	register struct vnode *vp;
656 	struct uio *uio;
657 	int ioflag;
658 	struct ucred *cred;
659 {
660 	register struct inode *ip = VTOI(vp);
661 	register struct fs *fs;
662 	struct buf *bp;
663 	daddr_t lbn, bn;
664 	u_long osize;
665 	int n, on, flags;
666 	int size, resid, error = 0;
667 
668 	if (uio->uio_rw != UIO_WRITE)
669 		panic("ufs_write mode");
670 	switch (vp->v_type) {
671 	case VREG:
672 		if (ioflag & IO_APPEND)
673 			uio->uio_offset = ip->i_size;
674 		/* fall through */
675 	case VLNK:
676 		break;
677 
678 	case VDIR:
679 		if ((ioflag & IO_SYNC) == 0)
680 			panic("ufs_write nonsync dir write");
681 		break;
682 
683 	default:
684 		panic("ufs_write type");
685 	}
686 	if (uio->uio_offset < 0)
687 		return (EINVAL);
688 	if (uio->uio_resid == 0)
689 		return (0);
690 	/*
691 	 * Maybe this should be above the vnode op call, but so long as
692 	 * file servers have no limits, i don't think it matters
693 	 */
694 	if (vp->v_type == VREG &&
695 	    uio->uio_offset + uio->uio_resid >
696 	      u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
697 		psignal(u.u_procp, SIGXFSZ);
698 		return (EFBIG);
699 	}
700 	resid = uio->uio_resid;
701 	osize = ip->i_size;
702 	fs = ip->i_fs;
703 	flags = 0;
704 	if (ioflag & IO_SYNC)
705 		flags = B_SYNC;
706 	do {
707 		lbn = lblkno(fs, uio->uio_offset);
708 		on = blkoff(fs, uio->uio_offset);
709 		n = MIN((unsigned)(fs->fs_bsize - on), uio->uio_resid);
710 		if (n < fs->fs_bsize)
711 			flags |= B_CLRBUF;
712 		else
713 			flags &= ~B_CLRBUF;
714 		if (error = balloc(ip, lbn, (int)(on + n), &bp, flags))
715 			break;
716 		bn = bp->b_blkno;
717 		if (uio->uio_offset + n > ip->i_size) {
718 			ip->i_size = uio->uio_offset + n;
719 			vnode_pager_setsize(vp, ip->i_size);
720 		}
721 		size = blksize(fs, ip, lbn);
722 		(void) vnode_pager_uncache(vp);
723 		n = MIN(n, size - bp->b_resid);
724 		error = uiomove(bp->b_un.b_addr + on, n, uio);
725 		if (ioflag & IO_SYNC)
726 			(void) bwrite(bp);
727 		else if (n + on == fs->fs_bsize) {
728 			bp->b_flags |= B_AGE;
729 			bawrite(bp);
730 		} else
731 			bdwrite(bp);
732 		ip->i_flag |= IUPD|ICHG;
733 		if (cred->cr_uid != 0)
734 			ip->i_mode &= ~(ISUID|ISGID);
735 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
736 	if (error && (ioflag & IO_UNIT)) {
737 		(void) itrunc(ip, osize, ioflag & IO_SYNC);
738 		uio->uio_offset -= resid - uio->uio_resid;
739 		uio->uio_resid = resid;
740 	}
741 	if (!error && (ioflag & IO_SYNC))
742 		error = iupdat(ip, &time, &time, 1);
743 	return (error);
744 }
745 
746 /* ARGSUSED */
747 ufs_ioctl(vp, com, data, fflag, cred)
748 	struct vnode *vp;
749 	int com;
750 	caddr_t data;
751 	int fflag;
752 	struct ucred *cred;
753 {
754 
755 	return (ENOTTY);
756 }
757 
758 /* ARGSUSED */
759 ufs_select(vp, which, fflags, cred)
760 	struct vnode *vp;
761 	int which, fflags;
762 	struct ucred *cred;
763 {
764 
765 	return (1);		/* XXX */
766 }
767 
768 /*
769  * Mmap a file
770  *
771  * NB Currently unsupported.
772  */
773 /* ARGSUSED */
774 ufs_mmap(vp, fflags, cred)
775 	struct vnode *vp;
776 	int fflags;
777 	struct ucred *cred;
778 {
779 
780 	return (EINVAL);
781 }
782 
783 /*
784  * Synch an open file.
785  */
786 /* ARGSUSED */
787 ufs_fsync(vp, fflags, cred, waitfor)
788 	struct vnode *vp;
789 	int fflags;
790 	struct ucred *cred;
791 	int waitfor;
792 {
793 	struct inode *ip = VTOI(vp);
794 
795 	if (fflags&FWRITE)
796 		ip->i_flag |= ICHG;
797 	vflushbuf(vp, waitfor == MNT_WAIT ? B_SYNC : 0);
798 	return (iupdat(ip, &time, &time, waitfor == MNT_WAIT));
799 }
800 
801 /*
802  * Seek on a file
803  *
804  * Nothing to do, so just return.
805  */
806 /* ARGSUSED */
807 ufs_seek(vp, oldoff, newoff, cred)
808 	struct vnode *vp;
809 	off_t oldoff, newoff;
810 	struct ucred *cred;
811 {
812 
813 	return (0);
814 }
815 
816 /*
817  * ufs remove
818  * Hard to avoid races here, especially
819  * in unlinking directories.
820  */
821 ufs_remove(ndp)
822 	struct nameidata *ndp;
823 {
824 	register struct inode *ip, *dp;
825 	int error;
826 
827 	ip = VTOI(ndp->ni_vp);
828 	dp = VTOI(ndp->ni_dvp);
829 	error = dirremove(ndp);
830 	if (!error) {
831 		ip->i_nlink--;
832 		ip->i_flag |= ICHG;
833 	}
834 	if (dp == ip)
835 		vrele(ITOV(ip));
836 	else
837 		iput(ip);
838 	iput(dp);
839 	return (error);
840 }
841 
842 /*
843  * link vnode call
844  */
845 ufs_link(vp, ndp)
846 	register struct vnode *vp;
847 	register struct nameidata *ndp;
848 {
849 	register struct inode *ip = VTOI(vp);
850 	int error;
851 
852 	if ((unsigned short)ip->i_nlink >= LINK_MAX)
853 		return (EMLINK);
854 	if (ndp->ni_dvp != vp)
855 		ILOCK(ip);
856 	ip->i_nlink++;
857 	ip->i_flag |= ICHG;
858 	error = iupdat(ip, &time, &time, 1);
859 	if (!error)
860 		error = direnter(ip, ndp);
861 	if (ndp->ni_dvp != vp)
862 		IUNLOCK(ip);
863 	if (error) {
864 		ip->i_nlink--;
865 		ip->i_flag |= ICHG;
866 	}
867 	return (error);
868 }
869 
870 /*
871  * Rename system call.
872  * 	rename("foo", "bar");
873  * is essentially
874  *	unlink("bar");
875  *	link("foo", "bar");
876  *	unlink("foo");
877  * but ``atomically''.  Can't do full commit without saving state in the
878  * inode on disk which isn't feasible at this time.  Best we can do is
879  * always guarantee the target exists.
880  *
881  * Basic algorithm is:
882  *
883  * 1) Bump link count on source while we're linking it to the
884  *    target.  This also ensure the inode won't be deleted out
885  *    from underneath us while we work (it may be truncated by
886  *    a concurrent `trunc' or `open' for creation).
887  * 2) Link source to destination.  If destination already exists,
888  *    delete it first.
889  * 3) Unlink source reference to inode if still around. If a
890  *    directory was moved and the parent of the destination
891  *    is different from the source, patch the ".." entry in the
892  *    directory.
893  */
894 ufs_rename(fndp, tndp)
895 	register struct nameidata *fndp, *tndp;
896 {
897 	register struct inode *ip, *xp, *dp;
898 	struct dirtemplate dirbuf;
899 	int doingdirectory = 0, oldparent = 0, newparent = 0;
900 	int error = 0;
901 
902 	dp = VTOI(fndp->ni_dvp);
903 	ip = VTOI(fndp->ni_vp);
904 	ILOCK(ip);
905 	if ((ip->i_mode&IFMT) == IFDIR) {
906 		register struct direct *d = &fndp->ni_dent;
907 
908 		/*
909 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
910 		 */
911 		if ((d->d_namlen == 1 && d->d_name[0] == '.') || dp == ip ||
912 		    fndp->ni_isdotdot || (ip->i_flag & IRENAME)) {
913 			VOP_ABORTOP(tndp);
914 			vput(tndp->ni_dvp);
915 			if (tndp->ni_vp)
916 				vput(tndp->ni_vp);
917 			VOP_ABORTOP(fndp);
918 			vrele(fndp->ni_dvp);
919 			vput(fndp->ni_vp);
920 			return (EINVAL);
921 		}
922 		ip->i_flag |= IRENAME;
923 		oldparent = dp->i_number;
924 		doingdirectory++;
925 	}
926 	vrele(fndp->ni_dvp);
927 
928 	/*
929 	 * 1) Bump link count while we're moving stuff
930 	 *    around.  If we crash somewhere before
931 	 *    completing our work, the link count
932 	 *    may be wrong, but correctable.
933 	 */
934 	ip->i_nlink++;
935 	ip->i_flag |= ICHG;
936 	error = iupdat(ip, &time, &time, 1);
937 	IUNLOCK(ip);
938 
939 	/*
940 	 * When the target exists, both the directory
941 	 * and target vnodes are returned locked.
942 	 */
943 	dp = VTOI(tndp->ni_dvp);
944 	xp = NULL;
945 	if (tndp->ni_vp)
946 		xp = VTOI(tndp->ni_vp);
947 	/*
948 	 * If ".." must be changed (ie the directory gets a new
949 	 * parent) then the source directory must not be in the
950 	 * directory heirarchy above the target, as this would
951 	 * orphan everything below the source directory. Also
952 	 * the user must have write permission in the source so
953 	 * as to be able to change "..". We must repeat the call
954 	 * to namei, as the parent directory is unlocked by the
955 	 * call to checkpath().
956 	 */
957 	if (oldparent != dp->i_number)
958 		newparent = dp->i_number;
959 	if (doingdirectory && newparent) {
960 		VOP_LOCK(fndp->ni_vp);
961 		error = ufs_access(fndp->ni_vp, VWRITE, tndp->ni_cred);
962 		VOP_UNLOCK(fndp->ni_vp);
963 		if (error)
964 			goto bad;
965 		tndp->ni_nameiop &= ~(MODMASK | OPMASK);
966 		tndp->ni_nameiop |= RENAME | LOCKPARENT | LOCKLEAF | NOCACHE;
967 		do {
968 			dp = VTOI(tndp->ni_dvp);
969 			if (xp != NULL)
970 				iput(xp);
971 			if (error = checkpath(ip, dp, tndp->ni_cred))
972 				goto out;
973 			if (error = namei(tndp))
974 				goto out;
975 			xp = NULL;
976 			if (tndp->ni_vp)
977 				xp = VTOI(tndp->ni_vp);
978 		} while (dp != VTOI(tndp->ni_dvp));
979 	}
980 	/*
981 	 * 2) If target doesn't exist, link the target
982 	 *    to the source and unlink the source.
983 	 *    Otherwise, rewrite the target directory
984 	 *    entry to reference the source inode and
985 	 *    expunge the original entry's existence.
986 	 */
987 	if (xp == NULL) {
988 		if (dp->i_dev != ip->i_dev)
989 			panic("rename: EXDEV");
990 		/*
991 		 * Account for ".." in new directory.
992 		 * When source and destination have the same
993 		 * parent we don't fool with the link count.
994 		 */
995 		if (doingdirectory && newparent) {
996 			if ((unsigned short)dp->i_nlink >= LINK_MAX) {
997 				error = EMLINK;
998 				goto bad;
999 			}
1000 			dp->i_nlink++;
1001 			dp->i_flag |= ICHG;
1002 			if (error = iupdat(dp, &time, &time, 1))
1003 				goto bad;
1004 		}
1005 		if (error = direnter(ip, tndp))
1006 			goto out;
1007 	} else {
1008 		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
1009 			panic("rename: EXDEV");
1010 		/*
1011 		 * Short circuit rename(foo, foo).
1012 		 */
1013 		if (xp->i_number == ip->i_number)
1014 			panic("rename: same file");
1015 		/*
1016 		 * If the parent directory is "sticky", then the user must
1017 		 * own the parent directory, or the destination of the rename,
1018 		 * otherwise the destination may not be changed (except by
1019 		 * root). This implements append-only directories.
1020 		 */
1021 		if ((dp->i_mode & ISVTX) && tndp->ni_cred->cr_uid != 0 &&
1022 		    tndp->ni_cred->cr_uid != dp->i_uid &&
1023 		    xp->i_uid != tndp->ni_cred->cr_uid) {
1024 			error = EPERM;
1025 			goto bad;
1026 		}
1027 		/*
1028 		 * Target must be empty if a directory
1029 		 * and have no links to it.
1030 		 * Also, insure source and target are
1031 		 * compatible (both directories, or both
1032 		 * not directories).
1033 		 */
1034 		if ((xp->i_mode&IFMT) == IFDIR) {
1035 			if (!dirempty(xp, dp->i_number, tndp->ni_cred) ||
1036 			    xp->i_nlink > 2) {
1037 				error = ENOTEMPTY;
1038 				goto bad;
1039 			}
1040 			if (!doingdirectory) {
1041 				error = ENOTDIR;
1042 				goto bad;
1043 			}
1044 			cache_purge(ITOV(dp));
1045 		} else if (doingdirectory) {
1046 			error = EISDIR;
1047 			goto bad;
1048 		}
1049 		if (error = dirrewrite(dp, ip, tndp))
1050 			goto bad;
1051 		/*
1052 		 * If the target directory is in the same
1053 		 * directory as the source directory,
1054 		 * decrement the link count on the parent
1055 		 * of the target directory.
1056 		 */
1057 		 if (doingdirectory && !newparent) {
1058 			dp->i_nlink--;
1059 			dp->i_flag |= ICHG;
1060 		}
1061 		vput(ITOV(dp));
1062 		/*
1063 		 * Adjust the link count of the target to
1064 		 * reflect the dirrewrite above.  If this is
1065 		 * a directory it is empty and there are
1066 		 * no links to it, so we can squash the inode and
1067 		 * any space associated with it.  We disallowed
1068 		 * renaming over top of a directory with links to
1069 		 * it above, as the remaining link would point to
1070 		 * a directory without "." or ".." entries.
1071 		 */
1072 		xp->i_nlink--;
1073 		if (doingdirectory) {
1074 			if (--xp->i_nlink != 0)
1075 				panic("rename: linked directory");
1076 			error = itrunc(xp, (u_long)0, IO_SYNC);
1077 		}
1078 		xp->i_flag |= ICHG;
1079 		iput(xp);
1080 		xp = NULL;
1081 	}
1082 
1083 	/*
1084 	 * 3) Unlink the source.
1085 	 */
1086 	fndp->ni_nameiop &= ~(MODMASK | OPMASK);
1087 	fndp->ni_nameiop |= DELETE | LOCKPARENT | LOCKLEAF;
1088 	(void)namei(fndp);
1089 	if (fndp->ni_vp != NULL) {
1090 		xp = VTOI(fndp->ni_vp);
1091 		dp = VTOI(fndp->ni_dvp);
1092 	} else {
1093 		/*
1094 		 * From name has disappeared.
1095 		 */
1096 		if (doingdirectory)
1097 			panic("rename: lost dir entry");
1098 		vrele(ITOV(ip));
1099 		return (0);
1100 	}
1101 	/*
1102 	 * Ensure that the directory entry still exists and has not
1103 	 * changed while the new name has been entered. If the source is
1104 	 * a file then the entry may have been unlinked or renamed. In
1105 	 * either case there is no further work to be done. If the source
1106 	 * is a directory then it cannot have been rmdir'ed; its link
1107 	 * count of three would cause a rmdir to fail with ENOTEMPTY.
1108 	 * The IRENAME flag ensures that it cannot be moved by another
1109 	 * rename.
1110 	 */
1111 	if (xp != ip) {
1112 		if (doingdirectory)
1113 			panic("rename: lost dir entry");
1114 	} else {
1115 		/*
1116 		 * If the source is a directory with a
1117 		 * new parent, the link count of the old
1118 		 * parent directory must be decremented
1119 		 * and ".." set to point to the new parent.
1120 		 */
1121 		if (doingdirectory && newparent) {
1122 			dp->i_nlink--;
1123 			dp->i_flag |= ICHG;
1124 			error = vn_rdwr(UIO_READ, ITOV(xp), (caddr_t)&dirbuf,
1125 				sizeof (struct dirtemplate), (off_t)0,
1126 				UIO_SYSSPACE, IO_NODELOCKED,
1127 				tndp->ni_cred, (int *)0);
1128 			if (error == 0) {
1129 				if (dirbuf.dotdot_namlen != 2 ||
1130 				    dirbuf.dotdot_name[0] != '.' ||
1131 				    dirbuf.dotdot_name[1] != '.') {
1132 					dirbad(xp, 12, "rename: mangled dir");
1133 				} else {
1134 					dirbuf.dotdot_ino = newparent;
1135 					(void) vn_rdwr(UIO_WRITE, ITOV(xp),
1136 					    (caddr_t)&dirbuf,
1137 					    sizeof (struct dirtemplate),
1138 					    (off_t)0, UIO_SYSSPACE,
1139 					    IO_NODELOCKED|IO_SYNC,
1140 					    tndp->ni_cred, (int *)0);
1141 					cache_purge(ITOV(dp));
1142 				}
1143 			}
1144 		}
1145 		error = dirremove(fndp);
1146 		if (!error) {
1147 			xp->i_nlink--;
1148 			xp->i_flag |= ICHG;
1149 		}
1150 		xp->i_flag &= ~IRENAME;
1151 	}
1152 	if (dp)
1153 		vput(ITOV(dp));
1154 	if (xp)
1155 		vput(ITOV(xp));
1156 	vrele(ITOV(ip));
1157 	return (error);
1158 
1159 bad:
1160 	if (xp)
1161 		vput(ITOV(xp));
1162 	vput(ITOV(dp));
1163 out:
1164 	ip->i_nlink--;
1165 	ip->i_flag |= ICHG;
1166 	vrele(ITOV(ip));
1167 	return (error);
1168 }
1169 
1170 /*
1171  * A virgin directory (no blushing please).
1172  */
1173 struct dirtemplate mastertemplate = {
1174 	0, 12, 1, ".",
1175 	0, DIRBLKSIZ - 12, 2, ".."
1176 };
1177 
1178 /*
1179  * Mkdir system call
1180  */
1181 ufs_mkdir(ndp, vap)
1182 	struct nameidata *ndp;
1183 	struct vattr *vap;
1184 {
1185 	register struct inode *ip, *dp;
1186 	struct inode *tip;
1187 	struct vnode *dvp;
1188 	struct dirtemplate dirtemplate;
1189 	int error;
1190 	int dmode;
1191 
1192 	dvp = ndp->ni_dvp;
1193 	dp = VTOI(dvp);
1194 	if ((unsigned short)dp->i_nlink >= LINK_MAX) {
1195 		iput(dp);
1196 		return (EMLINK);
1197 	}
1198 	dmode = vap->va_mode&0777;
1199 	dmode |= IFDIR;
1200 	/*
1201 	 * Must simulate part of maknode here
1202 	 * in order to acquire the inode, but
1203 	 * not have it entered in the parent
1204 	 * directory.  The entry is made later
1205 	 * after writing "." and ".." entries out.
1206 	 */
1207 	if (error = ialloc(dp, dirpref(dp->i_fs), dmode, ndp->ni_cred, &tip)) {
1208 		iput(dp);
1209 		return (error);
1210 	}
1211 	ip = tip;
1212 	ip->i_uid = ndp->ni_cred->cr_uid;
1213 	ip->i_gid = dp->i_gid;
1214 #ifdef QUOTA
1215 	if ((error = getinoquota(ip)) ||
1216 	    (error = chkiq(ip, 1, ndp->ni_cred, 0))) {
1217 		ifree(ip, ip->i_number, dmode);
1218 		iput(ip);
1219 		iput(dp);
1220 		return (error);
1221 	}
1222 #endif
1223 	ip->i_flag |= IACC|IUPD|ICHG;
1224 	ip->i_mode = dmode;
1225 	ITOV(ip)->v_type = VDIR;	/* Rest init'd in iget() */
1226 	ip->i_nlink = 2;
1227 	error = iupdat(ip, &time, &time, 1);
1228 
1229 	/*
1230 	 * Bump link count in parent directory
1231 	 * to reflect work done below.  Should
1232 	 * be done before reference is created
1233 	 * so reparation is possible if we crash.
1234 	 */
1235 	dp->i_nlink++;
1236 	dp->i_flag |= ICHG;
1237 	error = iupdat(dp, &time, &time, 1);
1238 
1239 	/*
1240 	 * Initialize directory with "."
1241 	 * and ".." from static template.
1242 	 */
1243 	dirtemplate = mastertemplate;
1244 	dirtemplate.dot_ino = ip->i_number;
1245 	dirtemplate.dotdot_ino = dp->i_number;
1246 	error = vn_rdwr(UIO_WRITE, ITOV(ip), (caddr_t)&dirtemplate,
1247 		sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
1248 		IO_NODELOCKED|IO_SYNC, ndp->ni_cred, (int *)0);
1249 	if (error) {
1250 		dp->i_nlink--;
1251 		dp->i_flag |= ICHG;
1252 		goto bad;
1253 	}
1254 	if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
1255 		panic("mkdir: blksize");     /* XXX - should grow w/balloc() */
1256 	} else {
1257 		ip->i_size = DIRBLKSIZ;
1258 		ip->i_flag |= ICHG;
1259 	}
1260 	/*
1261 	 * Directory all set up, now
1262 	 * install the entry for it in
1263 	 * the parent directory.
1264 	 */
1265 	error = direnter(ip, ndp);
1266 	dp = NULL;
1267 	if (error) {
1268 		ndp->ni_nameiop &= ~(MODMASK | OPMASK);
1269 		ndp->ni_nameiop |= LOOKUP | NOCACHE;
1270 		error = namei(ndp);
1271 		if (!error) {
1272 			dp = VTOI(ndp->ni_vp);
1273 			dp->i_nlink--;
1274 			dp->i_flag |= ICHG;
1275 		}
1276 	}
1277 bad:
1278 	/*
1279 	 * No need to do an explicit itrunc here,
1280 	 * vrele will do this for us because we set
1281 	 * the link count to 0.
1282 	 */
1283 	if (error) {
1284 		ip->i_nlink = 0;
1285 		ip->i_flag |= ICHG;
1286 		iput(ip);
1287 	} else
1288 		ndp->ni_vp = ITOV(ip);
1289 	if (dp)
1290 		iput(dp);
1291 	return (error);
1292 }
1293 
1294 /*
1295  * Rmdir system call.
1296  */
1297 ufs_rmdir(ndp)
1298 	register struct nameidata *ndp;
1299 {
1300 	register struct inode *ip, *dp;
1301 	int error = 0;
1302 
1303 	ip = VTOI(ndp->ni_vp);
1304 	dp = VTOI(ndp->ni_dvp);
1305 	/*
1306 	 * No rmdir "." please.
1307 	 */
1308 	if (dp == ip) {
1309 		vrele(ITOV(dp));
1310 		iput(ip);
1311 		return (EINVAL);
1312 	}
1313 	/*
1314 	 * Verify the directory is empty (and valid).
1315 	 * (Rmdir ".." won't be valid since
1316 	 *  ".." will contain a reference to
1317 	 *  the current directory and thus be
1318 	 *  non-empty.)
1319 	 */
1320 	if (ip->i_nlink != 2 || !dirempty(ip, dp->i_number, ndp->ni_cred)) {
1321 		error = ENOTEMPTY;
1322 		goto out;
1323 	}
1324 	/*
1325 	 * Delete reference to directory before purging
1326 	 * inode.  If we crash in between, the directory
1327 	 * will be reattached to lost+found,
1328 	 */
1329 	if (error = dirremove(ndp))
1330 		goto out;
1331 	dp->i_nlink--;
1332 	dp->i_flag |= ICHG;
1333 	cache_purge(ITOV(dp));
1334 	iput(dp);
1335 	ndp->ni_dvp = NULL;
1336 	/*
1337 	 * Truncate inode.  The only stuff left
1338 	 * in the directory is "." and "..".  The
1339 	 * "." reference is inconsequential since
1340 	 * we're quashing it.  The ".." reference
1341 	 * has already been adjusted above.  We've
1342 	 * removed the "." reference and the reference
1343 	 * in the parent directory, but there may be
1344 	 * other hard links so decrement by 2 and
1345 	 * worry about them later.
1346 	 */
1347 	ip->i_nlink -= 2;
1348 	error = itrunc(ip, (u_long)0, IO_SYNC);
1349 	cache_purge(ITOV(ip));
1350 out:
1351 	if (ndp->ni_dvp)
1352 		iput(dp);
1353 	iput(ip);
1354 	return (error);
1355 }
1356 
1357 /*
1358  * symlink -- make a symbolic link
1359  */
1360 ufs_symlink(ndp, vap, target)
1361 	struct nameidata *ndp;
1362 	struct vattr *vap;
1363 	char *target;
1364 {
1365 	struct inode *ip;
1366 	int error;
1367 
1368 	error = maknode(IFLNK | vap->va_mode, ndp, &ip);
1369 	if (error)
1370 		return (error);
1371 	error = vn_rdwr(UIO_WRITE, ITOV(ip), target, strlen(target), (off_t)0,
1372 		UIO_SYSSPACE, IO_NODELOCKED, ndp->ni_cred, (int *)0);
1373 	iput(ip);
1374 	return (error);
1375 }
1376 
1377 /*
1378  * Vnode op for read and write
1379  */
1380 ufs_readdir(vp, uio, cred, eofflagp)
1381 	struct vnode *vp;
1382 	register struct uio *uio;
1383 	struct ucred *cred;
1384 	int *eofflagp;
1385 {
1386 	int count, lost, error;
1387 
1388 	count = uio->uio_resid;
1389 	count &= ~(DIRBLKSIZ - 1);
1390 	lost = uio->uio_resid - count;
1391 	if (count < DIRBLKSIZ || (uio->uio_offset & (DIRBLKSIZ -1)))
1392 		return (EINVAL);
1393 	uio->uio_resid = count;
1394 	uio->uio_iov->iov_len = count;
1395 	error = ufs_read(vp, uio, 0, cred);
1396 	uio->uio_resid += lost;
1397 	if ((VTOI(vp)->i_size - uio->uio_offset) <= 0)
1398 		*eofflagp = 1;
1399 	else
1400 		*eofflagp = 0;
1401 	return (error);
1402 }
1403 
1404 /*
1405  * Return target name of a symbolic link
1406  */
1407 ufs_readlink(vp, uiop, cred)
1408 	struct vnode *vp;
1409 	struct uio *uiop;
1410 	struct ucred *cred;
1411 {
1412 
1413 	return (ufs_read(vp, uiop, 0, cred));
1414 }
1415 
1416 /*
1417  * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
1418  * done. Nothing to do at the moment.
1419  */
1420 /* ARGSUSED */
1421 ufs_abortop(ndp)
1422 	struct nameidata *ndp;
1423 {
1424 
1425 	return (0);
1426 }
1427 
1428 /*
1429  * Lock an inode.
1430  */
1431 ufs_lock(vp)
1432 	struct vnode *vp;
1433 {
1434 	register struct inode *ip = VTOI(vp);
1435 
1436 	ILOCK(ip);
1437 	return (0);
1438 }
1439 
1440 /*
1441  * Unlock an inode.
1442  */
1443 ufs_unlock(vp)
1444 	struct vnode *vp;
1445 {
1446 	register struct inode *ip = VTOI(vp);
1447 
1448 	if (!(ip->i_flag & ILOCKED))
1449 		panic("ufs_unlock NOT LOCKED");
1450 	IUNLOCK(ip);
1451 	return (0);
1452 }
1453 
1454 /*
1455  * Check for a locked inode.
1456  */
1457 ufs_islocked(vp)
1458 	struct vnode *vp;
1459 {
1460 
1461 	if (VTOI(vp)->i_flag & ILOCKED)
1462 		return (1);
1463 	return (0);
1464 }
1465 
1466 /*
1467  * Get access to bmap
1468  */
1469 ufs_bmap(vp, bn, vpp, bnp)
1470 	struct vnode *vp;
1471 	daddr_t bn;
1472 	struct vnode **vpp;
1473 	daddr_t *bnp;
1474 {
1475 	struct inode *ip = VTOI(vp);
1476 
1477 	if (vpp != NULL)
1478 		*vpp = ip->i_devvp;
1479 	if (bnp == NULL)
1480 		return (0);
1481 	return (bmap(ip, bn, bnp));
1482 }
1483 
1484 /*
1485  * Calculate the logical to physical mapping if not done already,
1486  * then call the device strategy routine.
1487  */
1488 int checkoverlap = 0;
1489 
1490 ufs_strategy(bp)
1491 	register struct buf *bp;
1492 {
1493 	register struct inode *ip = VTOI(bp->b_vp);
1494 	struct vnode *vp;
1495 	int error;
1496 
1497 	if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR)
1498 		panic("ufs_strategy: spec");
1499 	if (bp->b_blkno == bp->b_lblkno) {
1500 		if (error = bmap(ip, bp->b_lblkno, &bp->b_blkno))
1501 			return (error);
1502 		if ((long)bp->b_blkno == -1)
1503 			clrbuf(bp);
1504 	}
1505 	if ((long)bp->b_blkno == -1) {
1506 		biodone(bp);
1507 		return (0);
1508 	}
1509 #ifdef DIAGNOSTIC
1510 	if (checkoverlap) {
1511 		register struct buf *ep;
1512 		struct buf *ebp;
1513 		daddr_t start, last;
1514 
1515 		ebp = &buf[nbuf];
1516 		start = bp->b_blkno;
1517 		last = start + btodb(bp->b_bcount) - 1;
1518 		for (ep = buf; ep < ebp; ep++) {
1519 			if (ep == bp || (ep->b_flags & B_INVAL) ||
1520 			    ep->b_vp == NULLVP)
1521 				continue;
1522 			if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0))
1523 				continue;
1524 			if (vp != ip->i_devvp)
1525 				continue;
1526 			/* look for overlap */
1527 			if (ep->b_bcount == 0 || ep->b_blkno > last ||
1528 			    ep->b_blkno + btodb(ep->b_bcount) <= start)
1529 				continue;
1530 			vprint("Disk overlap", vp);
1531 			printf("\tstart %d, end %d overlap start %d, end %d\n",
1532 				start, last, ep->b_blkno,
1533 				ep->b_blkno + btodb(ep->b_bcount) - 1);
1534 			panic("Disk buffer overlap");
1535 		}
1536 	}
1537 #endif /* DIAGNOSTIC */
1538 	vp = ip->i_devvp;
1539 	bp->b_dev = vp->v_rdev;
1540 	(*(vp->v_op->vn_strategy))(bp);
1541 	return (0);
1542 }
1543 
1544 /*
1545  * Print out the contents of an inode.
1546  */
1547 ufs_print(vp)
1548 	struct vnode *vp;
1549 {
1550 	register struct inode *ip = VTOI(vp);
1551 
1552 	printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number,
1553 		major(ip->i_dev), minor(ip->i_dev));
1554 #ifdef FIFO
1555 	if (vp->v_type == VFIFO)
1556 		fifo_printinfo(vp);
1557 #endif /* FIFO */
1558 	printf("%s\n", (ip->i_flag & ILOCKED) ? " (LOCKED)" : "");
1559 	if (ip->i_spare0 == 0)
1560 		return;
1561 	printf("\towner pid %d", ip->i_spare0);
1562 	if (ip->i_spare1)
1563 		printf(" waiting pid %d", ip->i_spare1);
1564 	printf("\n");
1565 }
1566 
1567 /*
1568  * Read wrapper for special devices.
1569  */
1570 ufsspec_read(vp, uio, ioflag, cred)
1571 	struct vnode *vp;
1572 	struct uio *uio;
1573 	int ioflag;
1574 	struct ucred *cred;
1575 {
1576 
1577 	/*
1578 	 * Set access flag.
1579 	 */
1580 	VTOI(vp)->i_flag |= IACC;
1581 	return (spec_read(vp, uio, ioflag, cred));
1582 }
1583 
1584 /*
1585  * Write wrapper for special devices.
1586  */
1587 ufsspec_write(vp, uio, ioflag, cred)
1588 	struct vnode *vp;
1589 	struct uio *uio;
1590 	int ioflag;
1591 	struct ucred *cred;
1592 {
1593 
1594 	/*
1595 	 * Set update and change flags.
1596 	 */
1597 	VTOI(vp)->i_flag |= IUPD|ICHG;
1598 	return (spec_write(vp, uio, ioflag, cred));
1599 }
1600 
1601 /*
1602  * Close wrapper for special devices.
1603  *
1604  * Update the times on the inode then do device close.
1605  */
1606 ufsspec_close(vp, fflag, cred)
1607 	struct vnode *vp;
1608 	int fflag;
1609 	struct ucred *cred;
1610 {
1611 	register struct inode *ip = VTOI(vp);
1612 
1613 	if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED))
1614 		ITIMES(ip, &time, &time);
1615 	return (spec_close(vp, fflag, cred));
1616 }
1617 
1618 #ifdef FIFO
1619 /*
1620  * Read wrapper for fifo's
1621  */
1622 ufsfifo_read(vp, uio, ioflag, cred)
1623 	struct vnode *vp;
1624 	struct uio *uio;
1625 	int ioflag;
1626 	struct ucred *cred;
1627 {
1628 
1629 	/*
1630 	 * Set access flag.
1631 	 */
1632 	VTOI(vp)->i_flag |= IACC;
1633 	return (fifo_read(vp, uio, ioflag, cred));
1634 }
1635 
1636 /*
1637  * Write wrapper for fifo's.
1638  */
1639 ufsfifo_write(vp, uio, ioflag, cred)
1640 	struct vnode *vp;
1641 	struct uio *uio;
1642 	int ioflag;
1643 	struct ucred *cred;
1644 {
1645 
1646 	/*
1647 	 * Set update and change flags.
1648 	 */
1649 	VTOI(vp)->i_flag |= IUPD|ICHG;
1650 	return (fifo_write(vp, uio, ioflag, cred));
1651 }
1652 
1653 /*
1654  * Close wrapper for fifo's.
1655  *
1656  * Update the times on the inode then do device close.
1657  */
1658 ufsfifo_close(vp, fflag, cred)
1659 	struct vnode *vp;
1660 	int fflag;
1661 	struct ucred *cred;
1662 {
1663 	register struct inode *ip = VTOI(vp);
1664 
1665 	if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED))
1666 		ITIMES(ip, &time, &time);
1667 	return (fifo_close(vp, fflag, cred));
1668 }
1669 #endif /* FIFO */
1670 
1671 /*
1672  * Make a new file.
1673  */
1674 maknode(mode, ndp, ipp)
1675 	int mode;
1676 	register struct nameidata *ndp;
1677 	struct inode **ipp;
1678 {
1679 	register struct inode *ip;
1680 	struct inode *tip;
1681 	register struct inode *pdir = VTOI(ndp->ni_dvp);
1682 	ino_t ipref;
1683 	int error;
1684 
1685 	*ipp = 0;
1686 	if ((mode & IFMT) == 0)
1687 		mode |= IFREG;
1688 	if ((mode & IFMT) == IFDIR)
1689 		ipref = dirpref(pdir->i_fs);
1690 	else
1691 		ipref = pdir->i_number;
1692 	if (error = ialloc(pdir, ipref, mode, ndp->ni_cred, &tip)) {
1693 		iput(pdir);
1694 		return (error);
1695 	}
1696 	ip = tip;
1697 	ip->i_uid = ndp->ni_cred->cr_uid;
1698 	ip->i_gid = pdir->i_gid;
1699 #ifdef QUOTA
1700 	if ((error = getinoquota(ip)) ||
1701 	    (error = chkiq(ip, 1, ndp->ni_cred, 0))) {
1702 		ifree(ip, ip->i_number, mode);
1703 		iput(ip);
1704 		iput(pdir);
1705 		return (error);
1706 	}
1707 #endif
1708 	ip->i_flag |= IACC|IUPD|ICHG;
1709 	ip->i_mode = mode;
1710 	ITOV(ip)->v_type = IFTOVT(mode);	/* Rest init'd in iget() */
1711 	ip->i_nlink = 1;
1712 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, ndp->ni_cred) &&
1713 	    suser(ndp->ni_cred, NULL))
1714 		ip->i_mode &= ~ISGID;
1715 
1716 	/*
1717 	 * Make sure inode goes to disk before directory entry.
1718 	 */
1719 	if (error = iupdat(ip, &time, &time, 1))
1720 		goto bad;
1721 	if (error = direnter(ip, ndp)) {
1722 		pdir = NULL;
1723 		goto bad;
1724 	}
1725 	*ipp = ip;
1726 	return (0);
1727 
1728 bad:
1729 	/*
1730 	 * Write error occurred trying to update the inode
1731 	 * or the directory so must deallocate the inode.
1732 	 */
1733 	if (pdir)
1734 		iput(pdir);
1735 	ip->i_nlink = 0;
1736 	ip->i_flag |= ICHG;
1737 	iput(ip);
1738 	return (error);
1739 }
1740 
1741 /*
1742  * Advisory record locking support
1743  */
1744 ufs_advlock(vp, id, op, fl, flags)
1745 	struct vnode *vp;
1746 	caddr_t id;
1747 	int op;
1748 	register struct flock *fl;
1749 	int flags;
1750 {
1751 	register struct inode *ip = VTOI(vp);
1752 	register struct lockf *lock;
1753 	off_t start, end;
1754 	int error;
1755 
1756 	/*
1757 	 * Avoid the common case of unlocking when inode has no locks.
1758 	 */
1759 	if (ip->i_lockf == (struct lockf *)0) {
1760 		if (op != F_SETLK) {
1761 			fl->l_type = F_UNLCK;
1762 			return (0);
1763 		}
1764 	}
1765 	/*
1766 	 * Convert the flock structure into a start and end.
1767 	 */
1768 	switch (fl->l_whence) {
1769 
1770 	case SEEK_SET:
1771 	case SEEK_CUR:
1772 		/*
1773 		 * Caller is responsible for adding any necessary offset
1774 		 * when SEEK_CUR is used.
1775 		 */
1776 		start = fl->l_start;
1777 		break;
1778 
1779 	case SEEK_END:
1780 		start = ip->i_size + fl->l_start;
1781 		break;
1782 
1783 	default:
1784 		return (EINVAL);
1785 	}
1786 	if (start < 0)
1787 		return (EINVAL);
1788 	if (fl->l_len == 0)
1789 		end = -1;
1790 	else
1791 		end = start + fl->l_len - 1;
1792 	/*
1793 	 * Create the lockf structure
1794 	 */
1795 	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
1796 	lock->lf_start = start;
1797 	lock->lf_end = end;
1798 	lock->lf_id = id;
1799 	lock->lf_inode = ip;
1800 	lock->lf_type = fl->l_type;
1801 	lock->lf_next = (struct lockf *)0;
1802 	lock->lf_block = (struct lockf *)0;
1803 	lock->lf_flags = flags;
1804 	/*
1805 	 * Do the requested operation.
1806 	 */
1807 	switch(op) {
1808 	case F_SETLK:
1809 		return (ufs_setlock(lock));
1810 
1811 	case F_UNLCK:
1812 		return (ufs_advunlock(lock));
1813 
1814 	case F_GETLK:
1815 		return (ufs_advgetlock(lock, fl));
1816 
1817 	default:
1818 		free(lock, M_LOCKF);
1819 		return (EINVAL);
1820 	}
1821 	/* NOTREACHED */
1822 }
1823 
1824 /*
1825  * This variable controls the maximum number of processes that will
1826  * be checked in doing deadlock detection.
1827  */
1828 int maxlockdepth = MAXDEPTH;
1829 
1830 /*
1831  * Set a byte-range lock.
1832  */
1833 ufs_setlock(lock)
1834 	register struct lockf *lock;
1835 {
1836 	register struct inode *ip = lock->lf_inode;
1837 	register struct lockf *block;
1838 	static char lockstr[] = "lockf";
1839 	int priority, error;
1840 
1841 #ifdef LOCKF_DEBUG
1842 	if (lockf_debug & 4)
1843 		lf_print("ufs_setlock", lock);
1844 #endif /* LOCKF_DEBUG */
1845 
1846 	/*
1847 	 * Set the priority
1848 	 */
1849 	priority = PLOCK;
1850 	if ((lock->lf_type & F_WRLCK) == 0)
1851 		priority += 4;
1852 	priority |= PCATCH;
1853 	/*
1854 	 * Scan lock list for this file looking for locks that would block us.
1855 	 */
1856 	while (block = lf_getblock(lock)) {
1857 		/*
1858 		 * Free the structure and return if nonblocking.
1859 		 */
1860 		if ((lock->lf_flags & F_WAIT) == 0) {
1861 			free(lock, M_LOCKF);
1862 			return (EAGAIN);
1863 		}
1864 		/*
1865 		 * We are blocked. Since flock style locks cover
1866 		 * the whole file, there is no chance for deadlock.
1867 		 * For byte-range locks we must check for deadlock.
1868 		 *
1869 		 * Deadlock detection is done by looking through the
1870 		 * wait channels to see if there are any cycles that
1871 		 * involve us. MAXDEPTH is set just to make sure we
1872 		 * do not go off into neverland.
1873 		 */
1874 		if ((lock->lf_flags & F_POSIX) &&
1875 		    (block->lf_flags & F_POSIX)) {
1876 			register struct proc *wproc;
1877 			register struct lockf *waitblock;
1878 			int i = 0;
1879 
1880 			/* The block is waiting on something */
1881 			wproc = (struct proc *)block->lf_id;
1882 			while (wproc->p_wchan &&
1883 			       (wproc->p_wmesg == lockstr) &&
1884 			       (i++ < maxlockdepth)) {
1885 				waitblock = (struct lockf *)wproc->p_wchan;
1886 				/* Get the owner of the blocking lock */
1887 				waitblock = waitblock->lf_next;
1888 				if ((waitblock->lf_flags & F_POSIX) == 0)
1889 					break;
1890 				wproc = (struct proc *)waitblock->lf_id;
1891 				if (wproc == (struct proc *)lock->lf_id) {
1892 					free(lock, M_LOCKF);
1893 					return (EDEADLK);
1894 				}
1895 			}
1896 		}
1897 		/*
1898 		 * Add our lock to the blocked
1899 		 * list and sleep until we're free.
1900 		 */
1901 #ifdef LOCKF_DEBUG
1902 		if (lockf_debug & 4)
1903 			lf_print("ufs_advlock: blocking on", block);
1904 #endif /* LOCKF_DEBUG */
1905 		/*
1906 		 * Remember who blocked us (for deadlock detection)
1907 		 */
1908 		lock->lf_next = block;
1909 		lf_addblock(block, lock);
1910 		if (error = tsleep((caddr_t *)lock, priority, lockstr, 0)) {
1911 			free(lock, M_LOCKF);
1912 			return (error);
1913 		}
1914 	}
1915 	/*
1916 	 * No blocks!!  Add the lock.  Note that addlock will
1917 	 * downgrade or upgrade any overlapping locks this
1918 	 * process already owns.
1919 	 */
1920 	lf_addlock(lock);
1921 #ifdef LOCKF_DEBUG
1922 	if (lockf_debug & 4) {
1923 		lf_print("ufs_advlock: got the lock", lock);
1924 		lf_printlist(lock);
1925 	}
1926 #endif /* LOCKF_DEBUG */
1927 	return (0);
1928 }
1929 
1930 /*
1931  * Remove a byte-range lock on an inode.
1932  */
1933 ufs_advunlock(lock)
1934 	struct lockf *lock;
1935 {
1936 	struct lockf *blocklist;
1937 
1938 	if (lock->lf_inode->i_lockf == (struct lockf *)0)
1939 		return (0);
1940 #ifdef LOCKF_DEBUG
1941 	if (lockf_debug & 4)
1942 		lf_print("ufs_advunlock", lock);
1943 #endif /* LOCKF_DEBUG */
1944 	/*
1945 	 * Generally, find the lock (or an overlap to that lock)
1946 	 * and remove it (or shrink it), then wakeup anyone we can.
1947 	 */
1948 	blocklist = lf_remove(lock);
1949 #ifdef LOCKF_DEBUG
1950 	lf_printlist(lock);
1951 #endif /* LOCKF_DEBUG */
1952 	FREE(lock, M_LOCKF);
1953 	lf_wakelock(blocklist);
1954 	return (0);
1955 }
1956 
1957 /*
1958  * Return the blocking pid
1959  */
1960 ufs_advgetlock(lock, fl)
1961 	register struct lockf *lock;
1962 	register struct flock *fl;
1963 {
1964 	register struct lockf *block;
1965 	off_t start, end;
1966 
1967 #ifdef LOCKF_DEBUG
1968 	if (lockf_debug & 4)
1969 		lf_print("ufs_advgetlock", lock);
1970 #endif /* LOCKF_DEBUG */
1971 
1972 	if (block = lf_getblock(lock)) {
1973 		fl->l_type = block->lf_type;
1974 		fl->l_whence = SEEK_SET;
1975 		fl->l_start = block->lf_start;
1976 		if (block->lf_end == -1)
1977 			fl->l_len = 0;
1978 		else
1979 			fl->l_len = block->lf_end - block->lf_start + 1;
1980 		if (block->lf_flags & F_POSIX)
1981 			fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
1982 		else
1983 			fl->l_pid = -1;
1984 	}
1985 	FREE(lock, M_LOCKF);
1986 	return (0);
1987 }
1988