xref: /dragonfly/sys/vfs/nfs/nfs_vnops.c (revision 47492050)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
33  * $FreeBSD: src/sys/nfs/nfs_vnops.c,v 1.150.2.5 2001/12/20 19:56:28 dillon Exp $
34  */
35 
36 
37 /*
38  * vnode op calls for Sun NFS version 2 and 3
39  */
40 
41 #include "opt_inet.h"
42 
43 #include <sys/param.h>
44 #include <sys/kernel.h>
45 #include <sys/systm.h>
46 #include <sys/resourcevar.h>
47 #include <sys/proc.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/namei.h>
53 #include <sys/nlookup.h>
54 #include <sys/socket.h>
55 #include <sys/vnode.h>
56 #include <sys/dirent.h>
57 #include <sys/fcntl.h>
58 #include <sys/lockf.h>
59 #include <sys/stat.h>
60 #include <sys/sysctl.h>
61 #include <sys/conf.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 
66 #include <sys/buf2.h>
67 
68 #include <vfs/fifofs/fifo.h>
69 #include <vfs/ufs/dir.h>
70 
71 #undef DIRBLKSIZ
72 
73 #include "rpcv2.h"
74 #include "nfsproto.h"
75 #include "nfs.h"
76 #include "nfsmount.h"
77 #include "nfsnode.h"
78 #include "xdr_subs.h"
79 #include "nfsm_subs.h"
80 
81 #include <net/if.h>
82 #include <netinet/in.h>
83 #include <netinet/in_var.h>
84 
85 /* Defs */
86 #define	TRUE	1
87 #define	FALSE	0
88 
89 static int	nfsfifo_read (struct vop_read_args *);
90 static int	nfsfifo_write (struct vop_write_args *);
91 static int	nfsfifo_close (struct vop_close_args *);
92 static int	nfs_setattrrpc (struct vnode *,struct vattr *,struct ucred *,struct thread *);
93 static	int	nfs_lookup (struct vop_old_lookup_args *);
94 static	int	nfs_create (struct vop_old_create_args *);
95 static	int	nfs_mknod (struct vop_old_mknod_args *);
96 static	int	nfs_open (struct vop_open_args *);
97 static	int	nfs_close (struct vop_close_args *);
98 static	int	nfs_access (struct vop_access_args *);
99 static	int	nfs_getattr (struct vop_getattr_args *);
100 static	int	nfs_setattr (struct vop_setattr_args *);
101 static	int	nfs_read (struct vop_read_args *);
102 static	int	nfs_fsync (struct vop_fsync_args *);
103 static	int	nfs_remove (struct vop_old_remove_args *);
104 static	int	nfs_link (struct vop_old_link_args *);
105 static	int	nfs_rename (struct vop_old_rename_args *);
106 static	int	nfs_mkdir (struct vop_old_mkdir_args *);
107 static	int	nfs_rmdir (struct vop_old_rmdir_args *);
108 static	int	nfs_symlink (struct vop_old_symlink_args *);
109 static	int	nfs_readdir (struct vop_readdir_args *);
110 static	int	nfs_bmap (struct vop_bmap_args *);
111 static	int	nfs_strategy (struct vop_strategy_args *);
112 static	int	nfs_lookitup (struct vnode *, const char *, int,
113 			struct ucred *, struct thread *, struct nfsnode **);
114 static	int	nfs_sillyrename (struct vnode *,struct vnode *,struct componentname *);
115 static int	nfs_laccess (struct vop_access_args *);
116 static int	nfs_readlink (struct vop_readlink_args *);
117 static int	nfs_print (struct vop_print_args *);
118 static int	nfs_advlock (struct vop_advlock_args *);
119 static int	nfs_kqfilter (struct vop_kqfilter_args *ap);
120 
121 static	int	nfs_nresolve (struct vop_nresolve_args *);
122 /*
123  * Global vfs data structures for nfs
124  */
125 struct vop_ops nfsv2_vnode_vops = {
126 	.vop_default =		vop_defaultop,
127 	.vop_access =		nfs_access,
128 	.vop_advlock =		nfs_advlock,
129 	.vop_bmap =		nfs_bmap,
130 	.vop_close =		nfs_close,
131 	.vop_old_create =	nfs_create,
132 	.vop_fsync =		nfs_fsync,
133 	.vop_getattr =		nfs_getattr,
134 	.vop_getpages =		vop_stdgetpages,
135 	.vop_putpages =		vop_stdputpages,
136 	.vop_inactive =		nfs_inactive,
137 	.vop_old_link =		nfs_link,
138 	.vop_old_lookup =	nfs_lookup,
139 	.vop_old_mkdir =	nfs_mkdir,
140 	.vop_old_mknod =	nfs_mknod,
141 	.vop_open =		nfs_open,
142 	.vop_print =		nfs_print,
143 	.vop_read =		nfs_read,
144 	.vop_readdir =		nfs_readdir,
145 	.vop_readlink =		nfs_readlink,
146 	.vop_reclaim =		nfs_reclaim,
147 	.vop_old_remove =	nfs_remove,
148 	.vop_old_rename =	nfs_rename,
149 	.vop_old_rmdir =	nfs_rmdir,
150 	.vop_setattr =		nfs_setattr,
151 	.vop_strategy =		nfs_strategy,
152 	.vop_old_symlink =	nfs_symlink,
153 	.vop_write =		nfs_write,
154 	.vop_nresolve =		nfs_nresolve,
155 	.vop_kqfilter =		nfs_kqfilter
156 };
157 
158 /*
159  * Special device vnode ops
160  */
161 struct vop_ops nfsv2_spec_vops = {
162 	.vop_default =		vop_defaultop,
163 	.vop_access =		nfs_laccess,
164 	.vop_close =		nfs_close,
165 	.vop_fsync =		nfs_fsync,
166 	.vop_getattr =		nfs_getattr,
167 	.vop_inactive =		nfs_inactive,
168 	.vop_print =		nfs_print,
169 	.vop_read =		vop_stdnoread,
170 	.vop_reclaim =		nfs_reclaim,
171 	.vop_setattr =		nfs_setattr,
172 	.vop_write =		vop_stdnowrite
173 };
174 
175 struct vop_ops nfsv2_fifo_vops = {
176 	.vop_default =		fifo_vnoperate,
177 	.vop_access =		nfs_laccess,
178 	.vop_close =		nfsfifo_close,
179 	.vop_fsync =		nfs_fsync,
180 	.vop_getattr =		nfs_getattr,
181 	.vop_inactive =		nfs_inactive,
182 	.vop_print =		nfs_print,
183 	.vop_read =		nfsfifo_read,
184 	.vop_reclaim =		nfs_reclaim,
185 	.vop_setattr =		nfs_setattr,
186 	.vop_write =		nfsfifo_write
187 };
188 
189 static int	nfs_mknodrpc (struct vnode *dvp, struct vnode **vpp,
190 				  struct componentname *cnp,
191 				  struct vattr *vap);
192 static int	nfs_removerpc (struct vnode *dvp, const char *name,
193 				   int namelen,
194 				   struct ucred *cred, struct thread *td);
195 static int	nfs_renamerpc (struct vnode *fdvp, const char *fnameptr,
196 				   int fnamelen, struct vnode *tdvp,
197 				   const char *tnameptr, int tnamelen,
198 				   struct ucred *cred, struct thread *td);
199 static int	nfs_renameit (struct vnode *sdvp,
200 				  struct componentname *scnp,
201 				  struct sillyrename *sp);
202 
203 SYSCTL_DECL(_vfs_nfs);
204 
205 static int nfs_flush_on_rename = 1;
206 SYSCTL_INT(_vfs_nfs, OID_AUTO, flush_on_rename, CTLFLAG_RW,
207 	   &nfs_flush_on_rename, 0, "flush fvp prior to rename");
208 static int nfs_flush_on_hlink = 0;
209 SYSCTL_INT(_vfs_nfs, OID_AUTO, flush_on_hlink, CTLFLAG_RW,
210 	   &nfs_flush_on_hlink, 0, "flush fvp prior to hard link");
211 
212 static int	nfsaccess_cache_timeout = NFS_DEFATTRTIMO;
213 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
214 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
215 
216 static int	nfsneg_cache_timeout = NFS_MINATTRTIMO;
217 SYSCTL_INT(_vfs_nfs, OID_AUTO, neg_cache_timeout, CTLFLAG_RW,
218 	   &nfsneg_cache_timeout, 0, "NFS NEGATIVE NAMECACHE timeout");
219 
220 static int	nfspos_cache_timeout = NFS_MINATTRTIMO;
221 SYSCTL_INT(_vfs_nfs, OID_AUTO, pos_cache_timeout, CTLFLAG_RW,
222 	   &nfspos_cache_timeout, 0, "NFS POSITIVE NAMECACHE timeout");
223 
224 static int	nfsv3_commit_on_close = 0;
225 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
226 	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
227 #if 0
228 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
229 	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
230 
231 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
232 	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
233 #endif
234 
235 #define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
236 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
237 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
238 
239 static __inline
240 void
241 nfs_knote(struct vnode *vp, int flags)
242 {
243 	if (flags)
244 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
245 }
246 
247 /*
248  * Returns whether a name component is a degenerate '.' or '..'.
249  */
250 static __inline
251 int
252 nlcdegenerate(struct nlcomponent *nlc)
253 {
254 	if (nlc->nlc_namelen == 1 && nlc->nlc_nameptr[0] == '.')
255 		return(1);
256 	if (nlc->nlc_namelen == 2 &&
257 	    nlc->nlc_nameptr[0] == '.' && nlc->nlc_nameptr[1] == '.')
258 		return(1);
259 	return(0);
260 }
261 
262 static int
263 nfs3_access_otw(struct vnode *vp, int wmode,
264 		struct thread *td, struct ucred *cred)
265 {
266 	struct nfsnode *np = VTONFS(vp);
267 	int attrflag;
268 	int error = 0;
269 	u_int32_t *tl;
270 	u_int32_t rmode;
271 	struct nfsm_info info;
272 
273 	info.mrep = NULL;
274 	info.v3 = 1;
275 
276 	nfsstats.rpccnt[NFSPROC_ACCESS]++;
277 	nfsm_reqhead(&info, vp, NFSPROC_ACCESS,
278 		     NFSX_FH(info.v3) + NFSX_UNSIGNED);
279 	ERROROUT(nfsm_fhtom(&info, vp));
280 	tl = nfsm_build(&info, NFSX_UNSIGNED);
281 	*tl = txdr_unsigned(wmode);
282 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_ACCESS, td, cred, &error));
283 	ERROROUT(nfsm_postop_attr(&info, vp, &attrflag, NFS_LATTR_NOSHRINK));
284 	if (error == 0) {
285 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
286 		rmode = fxdr_unsigned(u_int32_t, *tl);
287 		np->n_mode = rmode;
288 		np->n_modeuid = cred->cr_uid;
289 		np->n_modestamp = mycpu->gd_time_seconds;
290 	}
291 	m_freem(info.mrep);
292 	info.mrep = NULL;
293 nfsmout:
294 	return error;
295 }
296 
297 /*
298  * nfs access vnode op.
299  * For nfs version 2, just return ok. File accesses may fail later.
300  * For nfs version 3, use the access rpc to check accessibility. If file modes
301  * are changed on the server, accesses might still fail later.
302  *
303  * nfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
304  */
305 static int
306 nfs_access(struct vop_access_args *ap)
307 {
308 	struct ucred *cred;
309 	struct vnode *vp = ap->a_vp;
310 	thread_t td = curthread;
311 	int error = 0;
312 	u_int32_t mode, wmode;
313 	struct nfsnode *np = VTONFS(vp);
314 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
315 	int v3 = NFS_ISV3(vp);
316 
317 	lwkt_gettoken(&nmp->nm_token);
318 
319 	/*
320 	 * Disallow write attempts on filesystems mounted read-only;
321 	 * unless the file is a socket, fifo, or a block or character
322 	 * device resident on the filesystem.
323 	 */
324 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
325 		switch (vp->v_type) {
326 		case VREG:
327 		case VDIR:
328 		case VLNK:
329 			lwkt_reltoken(&nmp->nm_token);
330 			return (EROFS);
331 		default:
332 			break;
333 		}
334 	}
335 
336 	/*
337 	 * The NFS protocol passes only the effective uid/gid over the wire but
338 	 * we need to check access against real ids if AT_EACCESS not set.
339 	 * Handle this case by cloning the credentials and setting the
340 	 * effective ids to the real ones.
341 	 *
342 	 * The crdup() here can cause a lot of ucred structures to build-up
343 	 * (up to maxvnodes), so do our best to avoid it.
344 	 */
345 	if (ap->a_flags & AT_EACCESS) {
346 		cred = crhold(ap->a_cred);
347 	} else {
348 		cred = ap->a_cred;
349 		if (cred->cr_uid == cred->cr_ruid &&
350 		    cred->cr_gid == cred->cr_rgid) {
351 			cred = crhold(ap->a_cred);
352 		} else {
353 			cred = crdup(ap->a_cred);
354 			cred->cr_uid = cred->cr_ruid;
355 			cred->cr_gid = cred->cr_rgid;
356 		}
357 	}
358 
359 	/*
360 	 * For nfs v3, check to see if we have done this recently, and if
361 	 * so return our cached result instead of making an ACCESS call.
362 	 * If not, do an access rpc, otherwise you are stuck emulating
363 	 * ufs_access() locally using the vattr. This may not be correct,
364 	 * since the server may apply other access criteria such as
365 	 * client uid-->server uid mapping that we do not know about.
366 	 */
367 	if (v3) {
368 		if (ap->a_mode & VREAD)
369 			mode = NFSV3ACCESS_READ;
370 		else
371 			mode = 0;
372 		if (vp->v_type != VDIR) {
373 			if (ap->a_mode & VWRITE)
374 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
375 			if (ap->a_mode & VEXEC)
376 				mode |= NFSV3ACCESS_EXECUTE;
377 		} else {
378 			if (ap->a_mode & VWRITE)
379 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
380 					 NFSV3ACCESS_DELETE);
381 			if (ap->a_mode & VEXEC)
382 				mode |= NFSV3ACCESS_LOOKUP;
383 		}
384 		/* XXX safety belt, only make blanket request if caching */
385 		if (nfsaccess_cache_timeout > 0) {
386 			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
387 				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
388 				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
389 		} else {
390 			wmode = mode;
391 		}
392 
393 		/*
394 		 * Does our cached result allow us to give a definite yes to
395 		 * this request?
396 		 */
397 		if (np->n_modestamp &&
398 		   (mycpu->gd_time_seconds < (np->n_modestamp + nfsaccess_cache_timeout)) &&
399 		   (cred->cr_uid == np->n_modeuid) &&
400 		   ((np->n_mode & mode) == mode)) {
401 			nfsstats.accesscache_hits++;
402 		} else {
403 			/*
404 			 * Either a no, or a don't know.  Go to the wire.
405 			 */
406 			nfsstats.accesscache_misses++;
407 		        error = nfs3_access_otw(vp, wmode, td, cred);
408 			if (!error) {
409 				if ((np->n_mode & mode) != mode) {
410 					error = EACCES;
411 				}
412 			}
413 		}
414 	} else {
415 		if ((error = nfs_laccess(ap)) != 0) {
416 			crfree(cred);
417 			lwkt_reltoken(&nmp->nm_token);
418 			return (error);
419 		}
420 
421 		/*
422 		 * Attempt to prevent a mapped root from accessing a file
423 		 * which it shouldn't.  We try to read a byte from the file
424 		 * if the user is root and the file is not zero length.
425 		 * After calling nfs_laccess, we should have the correct
426 		 * file size cached.
427 		 */
428 		if (cred->cr_uid == 0 && (ap->a_mode & VREAD)
429 		    && VTONFS(vp)->n_size > 0) {
430 			struct iovec aiov;
431 			struct uio auio;
432 			char buf[1];
433 
434 			aiov.iov_base = buf;
435 			aiov.iov_len = 1;
436 			auio.uio_iov = &aiov;
437 			auio.uio_iovcnt = 1;
438 			auio.uio_offset = 0;
439 			auio.uio_resid = 1;
440 			auio.uio_segflg = UIO_SYSSPACE;
441 			auio.uio_rw = UIO_READ;
442 			auio.uio_td = td;
443 
444 			if (vp->v_type == VREG) {
445 				error = nfs_readrpc_uio(vp, &auio);
446 			} else if (vp->v_type == VDIR) {
447 				char* bp;
448 				bp = kmalloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
449 				aiov.iov_base = bp;
450 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
451 				error = nfs_readdirrpc_uio(vp, &auio);
452 				kfree(bp, M_TEMP);
453 			} else if (vp->v_type == VLNK) {
454 				error = nfs_readlinkrpc_uio(vp, &auio);
455 			} else {
456 				error = EACCES;
457 			}
458 		}
459 	}
460 	/*
461 	 * [re]record creds for reading and/or writing if access
462 	 * was granted.  Assume the NFS server will grant read access
463 	 * for execute requests.
464 	 */
465 	if (error == 0) {
466 		if ((ap->a_mode & (VREAD|VEXEC)) && cred != np->n_rucred) {
467 			crhold(cred);
468 			if (np->n_rucred)
469 				crfree(np->n_rucred);
470 			np->n_rucred = cred;
471 		}
472 		if ((ap->a_mode & VWRITE) && cred != np->n_wucred) {
473 			crhold(cred);
474 			if (np->n_wucred)
475 				crfree(np->n_wucred);
476 			np->n_wucred = cred;
477 		}
478 	}
479 	lwkt_reltoken(&nmp->nm_token);
480 	crfree(cred);
481 
482 	return(error);
483 }
484 
485 /*
486  * nfs open vnode op
487  * Check to see if the type is ok
488  * and that deletion is not in progress.
489  * For paged in text files, you will need to flush the page cache
490  * if consistency is lost.
491  *
492  * nfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
493  *	    struct file *a_fp)
494  */
495 /* ARGSUSED */
496 static int
497 nfs_open(struct vop_open_args *ap)
498 {
499 	struct vnode *vp = ap->a_vp;
500 	struct nfsnode *np = VTONFS(vp);
501 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
502 	struct vattr vattr;
503 	int error;
504 
505 	lwkt_gettoken(&nmp->nm_token);
506 
507 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
508 #ifdef DIAGNOSTIC
509 		kprintf("open eacces vtyp=%d\n",vp->v_type);
510 #endif
511 		lwkt_reltoken(&nmp->nm_token);
512 		return (EOPNOTSUPP);
513 	}
514 
515 	/*
516 	 * Save valid creds for reading and writing for later RPCs.
517 	 */
518 	if ((ap->a_mode & FREAD) && ap->a_cred != np->n_rucred) {
519 		crhold(ap->a_cred);
520 		if (np->n_rucred)
521 			crfree(np->n_rucred);
522 		np->n_rucred = ap->a_cred;
523 	}
524 	if ((ap->a_mode & FWRITE) && ap->a_cred != np->n_wucred) {
525 		crhold(ap->a_cred);
526 		if (np->n_wucred)
527 			crfree(np->n_wucred);
528 		np->n_wucred = ap->a_cred;
529 	}
530 
531 	/*
532 	 * Clear the attribute cache only if opening with write access.  It
533 	 * is unclear if we should do this at all here, but we certainly
534 	 * should not clear the cache unconditionally simply because a file
535 	 * is being opened.
536 	 */
537 	if (ap->a_mode & FWRITE)
538 		np->n_attrstamp = 0;
539 
540 	/*
541 	 * For normal NFS, reconcile changes made locally verses
542 	 * changes made remotely.  Note that VOP_GETATTR only goes
543 	 * to the wire if the cached attribute has timed out or been
544 	 * cleared.
545 	 *
546 	 * If local modifications have been made clear the attribute
547 	 * cache to force an attribute and modified time check.  If
548 	 * GETATTR detects that the file has been changed by someone
549 	 * other then us it will set NRMODIFIED.
550 	 *
551 	 * If we are opening a directory and local changes have been
552 	 * made we have to invalidate the cache in order to ensure
553 	 * that we get the most up-to-date information from the
554 	 * server.  XXX
555 	 */
556 	if (np->n_flag & NLMODIFIED) {
557 		np->n_attrstamp = 0;
558 		if (vp->v_type == VDIR) {
559 			error = nfs_vinvalbuf(vp, V_SAVE, 1);
560 			if (error == EINTR) {
561 				lwkt_reltoken(&nmp->nm_token);
562 				return (error);
563 			}
564 			nfs_invaldir(vp);
565 		}
566 	}
567 	error = VOP_GETATTR(vp, &vattr);
568 	if (error) {
569 		lwkt_reltoken(&nmp->nm_token);
570 		return (error);
571 	}
572 	if (np->n_flag & NRMODIFIED) {
573 		if (vp->v_type == VDIR)
574 			nfs_invaldir(vp);
575 		error = nfs_vinvalbuf(vp, V_SAVE, 1);
576 		if (error == EINTR) {
577 			lwkt_reltoken(&nmp->nm_token);
578 			return (error);
579 		}
580 		np->n_flag &= ~NRMODIFIED;
581 	}
582 	error = vop_stdopen(ap);
583 	lwkt_reltoken(&nmp->nm_token);
584 
585 	return error;
586 }
587 
588 /*
589  * nfs close vnode op
590  * What an NFS client should do upon close after writing is a debatable issue.
591  * Most NFS clients push delayed writes to the server upon close, basically for
592  * two reasons:
593  * 1 - So that any write errors may be reported back to the client process
594  *     doing the close system call. By far the two most likely errors are
595  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
596  * 2 - To put a worst case upper bound on cache inconsistency between
597  *     multiple clients for the file.
598  * There is also a consistency problem for Version 2 of the protocol w.r.t.
599  * not being able to tell if other clients are writing a file concurrently,
600  * since there is no way of knowing if the changed modify time in the reply
601  * is only due to the write for this client.
602  * (NFS Version 3 provides weak cache consistency data in the reply that
603  *  should be sufficient to detect and handle this case.)
604  *
605  * The current code does the following:
606  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
607  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
608  *                     or commit them (this satisfies 1 and 2 except for the
609  *                     case where the server crashes after this close but
610  *                     before the commit RPC, which is felt to be "good
611  *                     enough". Changing the last argument to nfs_flush() to
612  *                     a 1 would force a commit operation, if it is felt a
613  *                     commit is necessary now.
614  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
615  *                     1 should be dealt with via an fsync() system call for
616  *                     cases where write errors are important.
617  *
618  * nfs_close(struct vnode *a_vp, int a_fflag)
619  */
620 /* ARGSUSED */
621 static int
622 nfs_close(struct vop_close_args *ap)
623 {
624 	struct vnode *vp = ap->a_vp;
625 	struct nfsnode *np = VTONFS(vp);
626 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
627 	int error = 0;
628 	thread_t td = curthread;
629 
630 	vn_lock(vp, LK_UPGRADE | LK_RETRY); /* XXX */
631 	lwkt_gettoken(&nmp->nm_token);
632 
633 	if (vp->v_type == VREG) {
634 	    if (np->n_flag & NLMODIFIED) {
635 		if (NFS_ISV3(vp)) {
636 		    /*
637 		     * Under NFSv3 we have dirty buffers to dispose of.  We
638 		     * must flush them to the NFS server.  We have the option
639 		     * of waiting all the way through the commit rpc or just
640 		     * waiting for the initial write.  The default is to only
641 		     * wait through the initial write so the data is in the
642 		     * server's cache, which is roughly similar to the state
643 		     * a standard disk subsystem leaves the file in on close().
644 		     *
645 		     * We cannot clear the NLMODIFIED bit in np->n_flag due to
646 		     * potential races with other processes, and certainly
647 		     * cannot clear it if we don't commit.
648 		     */
649 		    int cm = nfsv3_commit_on_close ? 1 : 0;
650 		    error = nfs_flush(vp, MNT_WAIT, td, cm);
651 		    /* np->n_flag &= ~NLMODIFIED; */
652 		} else {
653 		    error = nfs_vinvalbuf(vp, V_SAVE, 1);
654 		}
655 		np->n_attrstamp = 0;
656 	    }
657 	    if (np->n_flag & NWRITEERR) {
658 		np->n_flag &= ~NWRITEERR;
659 		error = np->n_error;
660 	    }
661 	}
662 	vop_stdclose(ap);
663 	lwkt_reltoken(&nmp->nm_token);
664 
665 	return (error);
666 }
667 
668 /*
669  * nfs getattr call from vfs.
670  *
671  * nfs_getattr(struct vnode *a_vp, struct vattr *a_vap)
672  */
673 static int
674 nfs_getattr(struct vop_getattr_args *ap)
675 {
676 	struct vnode *vp = ap->a_vp;
677 	struct nfsnode *np = VTONFS(vp);
678 	struct nfsmount *nmp;
679 	int error = 0;
680 	thread_t td = curthread;
681 	struct nfsm_info info;
682 
683 	info.mrep = NULL;
684 	info.v3 = NFS_ISV3(vp);
685 	nmp = VFSTONFS(vp->v_mount);
686 
687 	lwkt_gettoken(&nmp->nm_token);
688 
689 	/*
690 	 * Update local times for special files.
691 	 */
692 	if (np->n_flag & (NACC | NUPD))
693 		np->n_flag |= NCHG;
694 	/*
695 	 * First look in the cache.
696 	 */
697 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
698 		goto done;
699 
700 	if (info.v3 && nfsaccess_cache_timeout > 0) {
701 		nfsstats.accesscache_misses++;
702 		nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, nfs_vpcred(vp, ND_CHECK));
703 		if (nfs_getattrcache(vp, ap->a_vap) == 0)
704 			goto done;
705 	}
706 
707 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
708 	nfsm_reqhead(&info, vp, NFSPROC_GETATTR, NFSX_FH(info.v3));
709 	ERROROUT(nfsm_fhtom(&info, vp));
710 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_GETATTR, td,
711 				nfs_vpcred(vp, ND_CHECK), &error));
712 	if (error == 0) {
713 		ERROROUT(nfsm_loadattr(&info, vp, ap->a_vap));
714 	}
715 	m_freem(info.mrep);
716 	info.mrep = NULL;
717 done:
718 	/*
719 	 * NFS doesn't support chflags flags.  If the nfs mount was
720 	 * made -o cache set the UF_CACHE bit for swapcache.
721 	 */
722 	if ((nmp->nm_flag & NFSMNT_CACHE) && (vp->v_flag & VROOT))
723 		ap->a_vap->va_flags |= UF_CACHE;
724 nfsmout:
725 	lwkt_reltoken(&nmp->nm_token);
726 	return (error);
727 }
728 
729 /*
730  * nfs setattr call.
731  *
732  * nfs_setattr(struct vnode *a_vp, struct vattr *a_vap, struct ucred *a_cred)
733  */
734 static int
735 nfs_setattr(struct vop_setattr_args *ap)
736 {
737 	struct vnode *vp = ap->a_vp;
738 	struct nfsnode *np = VTONFS(vp);
739 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
740 	struct vattr *vap = ap->a_vap;
741 	int error = 0;
742 	int kflags = 0;
743 	off_t tsize;
744 	thread_t td = curthread;
745 
746 #ifndef nolint
747 	tsize = (off_t)0;
748 #endif
749 	/*
750 	 * Setting of flags is not supported.
751 	 */
752 	if (vap->va_flags != VNOVAL)
753 		return (EOPNOTSUPP);
754 
755 	/*
756 	 * Disallow write attempts if the filesystem is mounted read-only.
757 	 */
758   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
759 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
760 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
761 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
762 		return (EROFS);
763 
764 	lwkt_gettoken(&nmp->nm_token);
765 
766 	/*
767 	 * Handle size changes
768 	 */
769 	if (vap->va_size != VNOVAL) {
770 		/*
771 		 * truncation requested
772 		 */
773  		switch (vp->v_type) {
774  		case VDIR:
775 			lwkt_reltoken(&nmp->nm_token);
776  			return (EISDIR);
777  		case VCHR:
778  		case VBLK:
779  		case VSOCK:
780  		case VFIFO:
781 			if (vap->va_mtime.tv_sec == VNOVAL &&
782 			    vap->va_atime.tv_sec == VNOVAL &&
783 			    vap->va_mode == (mode_t)VNOVAL &&
784 			    vap->va_uid == (uid_t)VNOVAL &&
785 			    vap->va_gid == (gid_t)VNOVAL) {
786 				lwkt_reltoken(&nmp->nm_token);
787 				return (0);
788 			}
789  			vap->va_size = VNOVAL;
790  			break;
791  		default:
792 			/*
793 			 * Disallow write attempts if the filesystem is
794 			 * mounted read-only.
795 			 */
796 			if (vp->v_mount->mnt_flag & MNT_RDONLY) {
797 				lwkt_reltoken(&nmp->nm_token);
798 				return (EROFS);
799 			}
800 
801 			tsize = np->n_size;
802 again:
803 			error = nfs_meta_setsize(vp, td, vap->va_size, 0);
804 
805 #if 0
806  			if (np->n_flag & NLMODIFIED) {
807  			    if (vap->va_size == 0)
808  				error = nfs_vinvalbuf(vp, 0, 1);
809  			    else
810  				error = nfs_vinvalbuf(vp, V_SAVE, 1);
811  			}
812 #endif
813 			/*
814 			 * note: this loop case almost always happens at
815 			 * least once per truncation.
816 			 */
817 			if (error == 0 && np->n_size != vap->va_size)
818 				goto again;
819 			np->n_vattr.va_size = vap->va_size;
820 			kflags |= NOTE_WRITE;
821 			if (tsize < vap->va_size)
822 				kflags |= NOTE_EXTEND;
823 			break;
824 		}
825 	}
826 
827 	/*
828 	 * If setting the mtime or if server/other-client modifications have
829 	 * been detected, we must fully flush any pending writes.
830 	 *
831 	 * This will slow down cp/cpdup/rdist/rsync and other operations which
832 	 * might call [l]utimes() to set the mtime after writing to a file,
833 	 * but honestly there is no way to properly defer the write flush
834 	 * and still get reasonably accurate/dependable synchronization of
835 	 * [l]utimes().
836 	 */
837 	if ((np->n_flag & NLMODIFIED) && vp->v_type == VREG) {
838 		if ((np->n_flag & NRMODIFIED) ||
839 		    (vap->va_mtime.tv_sec != VNOVAL)) {
840 			error = nfs_vinvalbuf(vp, V_SAVE, 1);
841 			if (error == EINTR) {
842 				lwkt_reltoken(&nmp->nm_token);
843 				return (error);
844 			}
845 		}
846 	}
847 
848 	/*
849 	 * Get the blasted mtime to report properly.
850 	 */
851 	if (vap->va_mtime.tv_sec != VNOVAL) {
852 		np->n_mtime = vap->va_mtime.tv_sec;
853 		np->n_flag &= ~NUPD;
854 		np->n_vattr.va_mtime = vap->va_mtime;
855 	}
856 
857 	/*
858 	 * Issue the setattr rpc, adjust our mtime and make sure NUPD
859 	 * has been cleared so it does not get overridden.
860 	 */
861 	error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
862 	if (error == 0)
863 		kflags |= NOTE_EXTEND;
864 
865 	/*
866 	 * Sanity check if a truncation was issued.  This should only occur
867 	 * if multiple processes are racing on the same file.
868 	 */
869 	if (error == 0 && vap->va_size != VNOVAL &&
870 	    np->n_size != vap->va_size) {
871 		kprintf("NFS ftruncate: server disagrees on the file size: "
872 			"%jd/%jd/%jd\n",
873 			(intmax_t)tsize,
874 			(intmax_t)vap->va_size,
875 			(intmax_t)np->n_size);
876 		goto again;
877 	}
878 	if (error && vap->va_size != VNOVAL) {
879 		np->n_size = np->n_vattr.va_size = tsize;
880 		nfs_meta_setsize(vp, td, np->n_size, 0);
881 	}
882 	lwkt_reltoken(&nmp->nm_token);
883 	nfs_knote(vp, kflags);
884 
885 	return (error);
886 }
887 
888 /*
889  * Do an nfs setattr rpc.
890  */
891 static int
892 nfs_setattrrpc(struct vnode *vp, struct vattr *vap,
893 	       struct ucred *cred, struct thread *td)
894 {
895 	struct nfsv2_sattr *sp;
896 	struct nfsnode *np = VTONFS(vp);
897 	u_int32_t *tl;
898 	int error = 0, wccflag = NFSV3_WCCRATTR;
899 	struct nfsm_info info;
900 
901 	info.mrep = NULL;
902 	info.v3 = NFS_ISV3(vp);
903 
904 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
905 	nfsm_reqhead(&info, vp, NFSPROC_SETATTR,
906 		     NFSX_FH(info.v3) + NFSX_SATTR(info.v3));
907 	ERROROUT(nfsm_fhtom(&info, vp));
908 	if (info.v3) {
909 		nfsm_v3attrbuild(&info, vap, TRUE);
910 		tl = nfsm_build(&info, NFSX_UNSIGNED);
911 		*tl = nfs_false;
912 	} else {
913 		sp = nfsm_build(&info, NFSX_V2SATTR);
914 		if (vap->va_mode == (mode_t)VNOVAL)
915 			sp->sa_mode = nfs_xdrneg1;
916 		else
917 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
918 		if (vap->va_uid == (uid_t)VNOVAL)
919 			sp->sa_uid = nfs_xdrneg1;
920 		else
921 			sp->sa_uid = txdr_unsigned(vap->va_uid);
922 		if (vap->va_gid == (gid_t)VNOVAL)
923 			sp->sa_gid = nfs_xdrneg1;
924 		else
925 			sp->sa_gid = txdr_unsigned(vap->va_gid);
926 		sp->sa_size = txdr_unsigned(vap->va_size);
927 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
928 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
929 	}
930 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_SETATTR, td, cred, &error));
931 	if (info.v3) {
932 		np->n_modestamp = 0;
933 		ERROROUT(nfsm_wcc_data(&info, vp, &wccflag));
934 	} else {
935 		ERROROUT(nfsm_loadattr(&info, vp, NULL));
936 	}
937 	m_freem(info.mrep);
938 	info.mrep = NULL;
939 nfsmout:
940 	return (error);
941 }
942 
943 static
944 void
945 nfs_cache_setvp(struct nchandle *nch, struct vnode *vp, int nctimeout)
946 {
947 	if (nctimeout == 0)
948 		nctimeout = 1;
949 	else
950 		nctimeout *= hz;
951 	cache_setvp(nch, vp);
952 	cache_settimeout(nch, nctimeout);
953 }
954 
955 /*
956  * NEW API CALL - replaces nfs_lookup().  However, we cannot remove
957  * nfs_lookup() until all remaining new api calls are implemented.
958  *
959  * Resolve a namecache entry.  This function is passed a locked ncp and
960  * must call nfs_cache_setvp() on it as appropriate to resolve the entry.
961  */
962 static int
963 nfs_nresolve(struct vop_nresolve_args *ap)
964 {
965 	struct thread *td = curthread;
966 	struct namecache *ncp;
967 	struct nfsmount *nmp;
968 	struct nfsnode *np;
969 	struct vnode *dvp;
970 	struct vnode *nvp;
971 	nfsfh_t *fhp;
972 	int attrflag;
973 	int fhsize;
974 	int error;
975 	int tmp_error;
976 	int len;
977 	struct nfsm_info info;
978 
979 	dvp = ap->a_dvp;
980 	nmp = VFSTONFS(dvp->v_mount);
981 
982 	lwkt_gettoken(&nmp->nm_token);
983 
984 	if ((error = vget(dvp, LK_SHARED)) != 0) {
985 		lwkt_reltoken(&nmp->nm_token);
986 		return (error);
987 	}
988 
989 	info.mrep = NULL;
990 	info.v3 = NFS_ISV3(dvp);
991 
992 	nvp = NULL;
993 	nfsstats.lookupcache_misses++;
994 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
995 	ncp = ap->a_nch->ncp;
996 	len = ncp->nc_nlen;
997 	nfsm_reqhead(&info, dvp, NFSPROC_LOOKUP,
998 		     NFSX_FH(info.v3) + NFSX_UNSIGNED + nfsm_rndup(len));
999 	ERROROUT(nfsm_fhtom(&info, dvp));
1000 	ERROROUT(nfsm_strtom(&info, ncp->nc_name, len, NFS_MAXNAMLEN));
1001 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_LOOKUP, td,
1002 				ap->a_cred, &error));
1003 	if (error) {
1004 		/*
1005 		 * Cache negatve lookups to reduce NFS traffic, but use
1006 		 * a fast timeout.  Otherwise use a timeout of 1 tick.
1007 		 * XXX we should add a namecache flag for no-caching
1008 		 * to uncache the negative hit as soon as possible, but
1009 		 * we cannot simply destroy the entry because it is used
1010 		 * as a placeholder by the caller.
1011 		 *
1012 		 * The refactored nfs code will overwrite a non-zero error
1013 		 * with 0 when we use ERROROUT(), so don't here.
1014 		 */
1015 		if (error == ENOENT)
1016 			nfs_cache_setvp(ap->a_nch, NULL, nfsneg_cache_timeout);
1017 		tmp_error = nfsm_postop_attr(&info, dvp, &attrflag,
1018 					     NFS_LATTR_NOSHRINK);
1019 		if (tmp_error) {
1020 			error = tmp_error;
1021 			goto nfsmout;
1022 		}
1023 		m_freem(info.mrep);
1024 		info.mrep = NULL;
1025 		goto nfsmout;
1026 	}
1027 
1028 	/*
1029 	 * Success, get the file handle, do various checks, and load
1030 	 * post-operation data from the reply packet.  Theoretically
1031 	 * we should never be looking up "." so, theoretically, we
1032 	 * should never get the same file handle as our directory.  But
1033 	 * we check anyway. XXX
1034 	 *
1035 	 * Note that no timeout is set for the positive cache hit.  We
1036 	 * assume, theoretically, that ESTALE returns will be dealt with
1037 	 * properly to handle NFS races and in anycase we cannot depend
1038 	 * on a timeout to deal with NFS open/create/excl issues so instead
1039 	 * of a bad hack here the rest of the NFS client code needs to do
1040 	 * the right thing.
1041 	 */
1042 	NEGATIVEOUT(fhsize = nfsm_getfh(&info, &fhp));
1043 
1044 	np = VTONFS(dvp);
1045 	if (NFS_CMPFH(np, fhp, fhsize)) {
1046 		vref(dvp);
1047 		nvp = dvp;
1048 	} else {
1049 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, NULL);
1050 		if (error) {
1051 			m_freem(info.mrep);
1052 			info.mrep = NULL;
1053 			vput(dvp);
1054 			lwkt_reltoken(&nmp->nm_token);
1055 			return (error);
1056 		}
1057 		nvp = NFSTOV(np);
1058 	}
1059 	if (info.v3) {
1060 		ERROROUT(nfsm_postop_attr(&info, nvp, &attrflag,
1061 					  NFS_LATTR_NOSHRINK));
1062 		ERROROUT(nfsm_postop_attr(&info, dvp, &attrflag,
1063 					  NFS_LATTR_NOSHRINK));
1064 	} else {
1065 		ERROROUT(nfsm_loadattr(&info, nvp, NULL));
1066 	}
1067 	nfs_cache_setvp(ap->a_nch, nvp, nfspos_cache_timeout);
1068 	m_freem(info.mrep);
1069 	info.mrep = NULL;
1070 nfsmout:
1071 	lwkt_reltoken(&nmp->nm_token);
1072 	vput(dvp);
1073 	if (nvp) {
1074 		if (nvp == dvp)
1075 			vrele(nvp);
1076 		else
1077 			vput(nvp);
1078 	}
1079 	return (error);
1080 }
1081 
1082 /*
1083  * 'cached' nfs directory lookup
1084  *
1085  * NOTE: cannot be removed until NFS implements all the new n*() API calls.
1086  *
1087  * nfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
1088  *	      struct componentname *a_cnp)
1089  */
1090 static int
1091 nfs_lookup(struct vop_old_lookup_args *ap)
1092 {
1093 	struct componentname *cnp = ap->a_cnp;
1094 	struct vnode *dvp = ap->a_dvp;
1095 	struct vnode **vpp = ap->a_vpp;
1096 	int flags = cnp->cn_flags;
1097 	struct vnode *newvp;
1098 	struct vnode *notvp;
1099 	struct nfsmount *nmp;
1100 	long len;
1101 	nfsfh_t *fhp;
1102 	struct nfsnode *np;
1103 	int lockparent, wantparent, attrflag, fhsize;
1104 	int error;
1105 	int tmp_error;
1106 	struct nfsm_info info;
1107 
1108 	info.mrep = NULL;
1109 	info.v3 = NFS_ISV3(dvp);
1110 	error = 0;
1111 
1112 	notvp = (cnp->cn_flags & CNP_NOTVP) ? cnp->cn_notvp : NULL;
1113 
1114 	/*
1115 	 * Read-only mount check and directory check.
1116 	 */
1117 	*vpp = NULLVP;
1118 	if ((dvp->v_mount->mnt_flag & MNT_RDONLY) &&
1119 	    (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME))
1120 		return (EROFS);
1121 
1122 	if (dvp->v_type != VDIR)
1123 		return (ENOTDIR);
1124 
1125 	/*
1126 	 * Look it up in the cache.  Note that ENOENT is only returned if we
1127 	 * previously entered a negative hit (see later on).  The additional
1128 	 * nfsneg_cache_timeout check causes previously cached results to
1129 	 * be instantly ignored if the negative caching is turned off.
1130 	 */
1131 	lockparent = flags & CNP_LOCKPARENT;
1132 	wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT);
1133 	nmp = VFSTONFS(dvp->v_mount);
1134 	np = VTONFS(dvp);
1135 
1136 	lwkt_gettoken(&nmp->nm_token);
1137 
1138 	/*
1139 	 * Go to the wire.
1140 	 */
1141 	error = 0;
1142 	newvp = NULLVP;
1143 	nfsstats.lookupcache_misses++;
1144 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
1145 	len = cnp->cn_namelen;
1146 	nfsm_reqhead(&info, dvp, NFSPROC_LOOKUP,
1147 		     NFSX_FH(info.v3) + NFSX_UNSIGNED + nfsm_rndup(len));
1148 	ERROROUT(nfsm_fhtom(&info, dvp));
1149 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, len, NFS_MAXNAMLEN));
1150 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_LOOKUP, cnp->cn_td,
1151 				cnp->cn_cred, &error));
1152 	if (error) {
1153 		tmp_error = nfsm_postop_attr(&info, dvp, &attrflag,
1154 					     NFS_LATTR_NOSHRINK);
1155 		if (tmp_error) {
1156 			error = tmp_error;
1157 			goto nfsmout;
1158 		}
1159 
1160 		m_freem(info.mrep);
1161 		info.mrep = NULL;
1162 		goto nfsmout;
1163 	}
1164 	NEGATIVEOUT(fhsize = nfsm_getfh(&info, &fhp));
1165 
1166 	/*
1167 	 * Handle RENAME case...
1168 	 */
1169 	if (cnp->cn_nameiop == NAMEI_RENAME && wantparent) {
1170 		if (NFS_CMPFH(np, fhp, fhsize)) {
1171 			m_freem(info.mrep);
1172 			info.mrep = NULL;
1173 			lwkt_reltoken(&nmp->nm_token);
1174 			return (EISDIR);
1175 		}
1176 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, notvp);
1177 		if (error) {
1178 			m_freem(info.mrep);
1179 			info.mrep = NULL;
1180 			lwkt_reltoken(&nmp->nm_token);
1181 			return (error);
1182 		}
1183 		newvp = NFSTOV(np);
1184 		if (info.v3) {
1185 			ERROROUT(nfsm_postop_attr(&info, newvp, &attrflag,
1186 						  NFS_LATTR_NOSHRINK));
1187 			ERROROUT(nfsm_postop_attr(&info, dvp, &attrflag,
1188 						  NFS_LATTR_NOSHRINK));
1189 		} else {
1190 			ERROROUT(nfsm_loadattr(&info, newvp, NULL));
1191 		}
1192 		*vpp = newvp;
1193 		m_freem(info.mrep);
1194 		info.mrep = NULL;
1195 		if (!lockparent) {
1196 			vn_unlock(dvp);
1197 			cnp->cn_flags |= CNP_PDIRUNLOCK;
1198 		}
1199 		lwkt_reltoken(&nmp->nm_token);
1200 		return (0);
1201 	}
1202 
1203 	if (flags & CNP_ISDOTDOT) {
1204 		vn_unlock(dvp);
1205 		cnp->cn_flags |= CNP_PDIRUNLOCK;
1206 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, notvp);
1207 		if (error) {
1208 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1209 			cnp->cn_flags &= ~CNP_PDIRUNLOCK;
1210 			lwkt_reltoken(&nmp->nm_token);
1211 			return (error); /* NOTE: return error from nget */
1212 		}
1213 		newvp = NFSTOV(np);
1214 		if (lockparent) {
1215 			error = vn_lock(dvp, LK_EXCLUSIVE | LK_FAILRECLAIM);
1216 			if (error) {
1217 				vput(newvp);
1218 				lwkt_reltoken(&nmp->nm_token);
1219 				return (error);
1220 			}
1221 			cnp->cn_flags |= CNP_PDIRUNLOCK;
1222 		}
1223 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
1224 		vref(dvp);
1225 		newvp = dvp;
1226 	} else {
1227 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, notvp);
1228 		if (error) {
1229 			m_freem(info.mrep);
1230 			info.mrep = NULL;
1231 			lwkt_reltoken(&nmp->nm_token);
1232 			return (error);
1233 		}
1234 		if (!lockparent) {
1235 			vn_unlock(dvp);
1236 			cnp->cn_flags |= CNP_PDIRUNLOCK;
1237 		}
1238 		newvp = NFSTOV(np);
1239 	}
1240 	if (info.v3) {
1241 		ERROROUT(nfsm_postop_attr(&info, newvp, &attrflag,
1242 					  NFS_LATTR_NOSHRINK));
1243 		ERROROUT(nfsm_postop_attr(&info, dvp, &attrflag,
1244 					  NFS_LATTR_NOSHRINK));
1245 	} else {
1246 		ERROROUT(nfsm_loadattr(&info, newvp, NULL));
1247 	}
1248 #if 0
1249 	/* XXX MOVE TO nfs_nremove() */
1250 	if ((cnp->cn_flags & CNP_MAKEENTRY) &&
1251 	    cnp->cn_nameiop != NAMEI_DELETE) {
1252 		np->n_ctime = np->n_vattr.va_ctime.tv_sec; /* XXX */
1253 	}
1254 #endif
1255 	*vpp = newvp;
1256 	m_freem(info.mrep);
1257 	info.mrep = NULL;
1258 nfsmout:
1259 	if (error) {
1260 		if (newvp != NULLVP) {
1261 			vrele(newvp);
1262 			*vpp = NULLVP;
1263 		}
1264 		if ((cnp->cn_nameiop == NAMEI_CREATE ||
1265 		     cnp->cn_nameiop == NAMEI_RENAME) &&
1266 		    error == ENOENT) {
1267 			if (!lockparent) {
1268 				vn_unlock(dvp);
1269 				cnp->cn_flags |= CNP_PDIRUNLOCK;
1270 			}
1271 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
1272 				error = EROFS;
1273 			else
1274 				error = EJUSTRETURN;
1275 		}
1276 	}
1277 	lwkt_reltoken(&nmp->nm_token);
1278 	return (error);
1279 }
1280 
1281 /*
1282  * nfs read call.
1283  * Just call nfs_bioread() to do the work.
1284  *
1285  * nfs_read(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1286  *	    struct ucred *a_cred)
1287  */
1288 static int
1289 nfs_read(struct vop_read_args *ap)
1290 {
1291 	struct vnode *vp = ap->a_vp;
1292 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1293 	int error;
1294 
1295 	lwkt_gettoken(&nmp->nm_token);
1296 	error = nfs_bioread(vp, ap->a_uio, ap->a_ioflag);
1297 	lwkt_reltoken(&nmp->nm_token);
1298 
1299 	return error;
1300 }
1301 
1302 /*
1303  * nfs readlink call
1304  *
1305  * nfs_readlink(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred)
1306  */
1307 static int
1308 nfs_readlink(struct vop_readlink_args *ap)
1309 {
1310 	struct vnode *vp = ap->a_vp;
1311 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1312 	int error;
1313 
1314 	if (vp->v_type != VLNK)
1315 		return (EINVAL);
1316 
1317 	lwkt_gettoken(&nmp->nm_token);
1318 	error = nfs_bioread(vp, ap->a_uio, 0);
1319 	lwkt_reltoken(&nmp->nm_token);
1320 
1321 	return error;
1322 }
1323 
1324 /*
1325  * Do a readlink rpc.
1326  * Called by nfs_doio() from below the buffer cache.
1327  */
1328 int
1329 nfs_readlinkrpc_uio(struct vnode *vp, struct uio *uiop)
1330 {
1331 	int error = 0, len, attrflag;
1332 	struct nfsm_info info;
1333 
1334 	info.mrep = NULL;
1335 	info.v3 = NFS_ISV3(vp);
1336 
1337 	nfsstats.rpccnt[NFSPROC_READLINK]++;
1338 	nfsm_reqhead(&info, vp, NFSPROC_READLINK, NFSX_FH(info.v3));
1339 	ERROROUT(nfsm_fhtom(&info, vp));
1340 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_READLINK, uiop->uio_td,
1341 				nfs_vpcred(vp, ND_CHECK), &error));
1342 	if (info.v3) {
1343 		ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
1344 					  NFS_LATTR_NOSHRINK));
1345 	}
1346 	if (!error) {
1347 		NEGATIVEOUT(len = nfsm_strsiz(&info, NFS_MAXPATHLEN));
1348 		if (len == NFS_MAXPATHLEN) {
1349 			struct nfsnode *np = VTONFS(vp);
1350 			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1351 				len = np->n_size;
1352 		}
1353 		ERROROUT(nfsm_mtouio(&info, uiop, len));
1354 	}
1355 	m_freem(info.mrep);
1356 	info.mrep = NULL;
1357 nfsmout:
1358 	return (error);
1359 }
1360 
1361 /*
1362  * nfs synchronous read rpc using UIO
1363  */
1364 int
1365 nfs_readrpc_uio(struct vnode *vp, struct uio *uiop)
1366 {
1367 	u_int32_t *tl;
1368 	struct nfsmount *nmp;
1369 	int error = 0, len, retlen, tsiz, eof, attrflag;
1370 	struct nfsm_info info;
1371 	off_t tmp_off;
1372 
1373 	info.mrep = NULL;
1374 	info.v3 = NFS_ISV3(vp);
1375 
1376 #ifndef nolint
1377 	eof = 0;
1378 #endif
1379 	nmp = VFSTONFS(vp->v_mount);
1380 
1381 	tsiz = uiop->uio_resid;
1382 	tmp_off = uiop->uio_offset + tsiz;
1383 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset)
1384 		return (EFBIG);
1385 	tmp_off = uiop->uio_offset;
1386 	while (tsiz > 0) {
1387 		nfsstats.rpccnt[NFSPROC_READ]++;
1388 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1389 		nfsm_reqhead(&info, vp, NFSPROC_READ,
1390 			     NFSX_FH(info.v3) + NFSX_UNSIGNED * 3);
1391 		ERROROUT(nfsm_fhtom(&info, vp));
1392 		tl = nfsm_build(&info, NFSX_UNSIGNED * 3);
1393 		if (info.v3) {
1394 			txdr_hyper(uiop->uio_offset, tl);
1395 			*(tl + 2) = txdr_unsigned(len);
1396 		} else {
1397 			*tl++ = txdr_unsigned(uiop->uio_offset);
1398 			*tl++ = txdr_unsigned(len);
1399 			*tl = 0;
1400 		}
1401 		NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_READ, uiop->uio_td,
1402 					nfs_vpcred(vp, ND_READ), &error));
1403 		if (info.v3) {
1404 			ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
1405 						 NFS_LATTR_NOSHRINK));
1406 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1407 			eof = fxdr_unsigned(int, *(tl + 1));
1408 		} else {
1409 			ERROROUT(nfsm_loadattr(&info, vp, NULL));
1410 		}
1411 		NEGATIVEOUT(retlen = nfsm_strsiz(&info, len));
1412 		ERROROUT(nfsm_mtouio(&info, uiop, retlen));
1413 		m_freem(info.mrep);
1414 		info.mrep = NULL;
1415 
1416 		/*
1417 		 * Handle short-read from server (NFSv3).  If EOF is not
1418 		 * flagged (and no error occurred), but retlen is less
1419 		 * then the request size, we must zero-fill the remainder.
1420 		 */
1421 		if (retlen < len && info.v3 && eof == 0) {
1422 			ERROROUT(uiomovez(len - retlen, uiop));
1423 			retlen = len;
1424 		}
1425 		tsiz -= retlen;
1426 
1427 		/*
1428 		 * Terminate loop on EOF or zero-length read.
1429 		 *
1430 		 * For NFSv2 a short-read indicates EOF, not zero-fill,
1431 		 * and also terminates the loop.
1432 		 */
1433 		if (info.v3) {
1434 			if (eof || retlen == 0)
1435 				tsiz = 0;
1436 		} else if (retlen < len) {
1437 			tsiz = 0;
1438 		}
1439 	}
1440 nfsmout:
1441 	return (error);
1442 }
1443 
1444 /*
1445  * nfs write call
1446  */
1447 int
1448 nfs_writerpc_uio(struct vnode *vp, struct uio *uiop,
1449 		 int *iomode, int *must_commit)
1450 {
1451 	u_int32_t *tl;
1452 	int32_t backup;
1453 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1454 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1455 	int  committed = NFSV3WRITE_FILESYNC;
1456 	struct nfsm_info info;
1457 
1458 	info.mrep = NULL;
1459 	info.v3 = NFS_ISV3(vp);
1460 
1461 #ifndef DIAGNOSTIC
1462 	if (uiop->uio_iovcnt != 1)
1463 		panic("nfs: writerpc iovcnt > 1");
1464 #endif
1465 	*must_commit = 0;
1466 	tsiz = uiop->uio_resid;
1467 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1468 		return (EFBIG);
1469 	while (tsiz > 0) {
1470 		nfsstats.rpccnt[NFSPROC_WRITE]++;
1471 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1472 		nfsm_reqhead(&info, vp, NFSPROC_WRITE,
1473 			     NFSX_FH(info.v3) + 5 * NFSX_UNSIGNED +
1474 			     nfsm_rndup(len));
1475 		ERROROUT(nfsm_fhtom(&info, vp));
1476 		if (info.v3) {
1477 			tl = nfsm_build(&info, 5 * NFSX_UNSIGNED);
1478 			txdr_hyper(uiop->uio_offset, tl);
1479 			tl += 2;
1480 			*tl++ = txdr_unsigned(len);
1481 			*tl++ = txdr_unsigned(*iomode);
1482 			*tl = txdr_unsigned(len);
1483 		} else {
1484 			u_int32_t x;
1485 
1486 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1487 			/* Set both "begin" and "current" to non-garbage. */
1488 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1489 			*tl++ = x;	/* "begin offset" */
1490 			*tl++ = x;	/* "current offset" */
1491 			x = txdr_unsigned(len);
1492 			*tl++ = x;	/* total to this offset */
1493 			*tl = x;	/* size of this write */
1494 		}
1495 		ERROROUT(nfsm_uiotom(&info, uiop, len));
1496 		NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_WRITE, uiop->uio_td,
1497 					nfs_vpcred(vp, ND_WRITE), &error));
1498 		if (info.v3) {
1499 			/*
1500 			 * The write RPC returns a before and after mtime.  The
1501 			 * nfsm_wcc_data() macro checks the before n_mtime
1502 			 * against the before time and stores the after time
1503 			 * in the nfsnode's cached vattr and n_mtime field.
1504 			 * The NRMODIFIED bit will be set if the before
1505 			 * time did not match the original mtime.
1506 			 */
1507 			wccflag = NFSV3_WCCCHK;
1508 			ERROROUT(nfsm_wcc_data(&info, vp, &wccflag));
1509 			if (error == 0) {
1510 				NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF));
1511 				rlen = fxdr_unsigned(int, *tl++);
1512 				if (rlen == 0) {
1513 					error = NFSERR_IO;
1514 					m_freem(info.mrep);
1515 					info.mrep = NULL;
1516 					break;
1517 				} else if (rlen < len) {
1518 					backup = len - rlen;
1519 					uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - backup;
1520 					uiop->uio_iov->iov_len += backup;
1521 					uiop->uio_offset -= backup;
1522 					uiop->uio_resid += backup;
1523 					len = rlen;
1524 				}
1525 				commit = fxdr_unsigned(int, *tl++);
1526 
1527 				/*
1528 				 * Return the lowest committment level
1529 				 * obtained by any of the RPCs.
1530 				 */
1531 				if (committed == NFSV3WRITE_FILESYNC)
1532 					committed = commit;
1533 				else if (committed == NFSV3WRITE_DATASYNC &&
1534 					commit == NFSV3WRITE_UNSTABLE)
1535 					committed = commit;
1536 				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1537 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1538 					NFSX_V3WRITEVERF);
1539 				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
1540 				} else if (bcmp((caddr_t)tl,
1541 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1542 				    *must_commit = 1;
1543 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1544 					NFSX_V3WRITEVERF);
1545 				}
1546 			}
1547 		} else {
1548 			ERROROUT(nfsm_loadattr(&info, vp, NULL));
1549 		}
1550 		m_freem(info.mrep);
1551 		info.mrep = NULL;
1552 		if (error)
1553 			break;
1554 		tsiz -= len;
1555 	}
1556 nfsmout:
1557 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
1558 		committed = NFSV3WRITE_FILESYNC;
1559 	*iomode = committed;
1560 	if (error)
1561 		uiop->uio_resid = tsiz;
1562 	return (error);
1563 }
1564 
1565 /*
1566  * nfs mknod rpc
1567  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1568  * mode set to specify the file type and the size field for rdev.
1569  */
1570 static int
1571 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1572 	     struct vattr *vap)
1573 {
1574 	struct nfsv2_sattr *sp;
1575 	u_int32_t *tl;
1576 	struct vnode *newvp = NULL;
1577 	struct nfsnode *np = NULL;
1578 	struct vattr vattr;
1579 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1580 	int rmajor, rminor;
1581 	struct nfsm_info info;
1582 
1583 	info.mrep = NULL;
1584 	info.v3 = NFS_ISV3(dvp);
1585 
1586 	if (vap->va_type == VCHR || vap->va_type == VBLK) {
1587 		rmajor = txdr_unsigned(vap->va_rmajor);
1588 		rminor = txdr_unsigned(vap->va_rminor);
1589 	} else if (vap->va_type == VFIFO || vap->va_type == VSOCK) {
1590 		rmajor = nfs_xdrneg1;
1591 		rminor = nfs_xdrneg1;
1592 	} else {
1593 		return (EOPNOTSUPP);
1594 	}
1595 	if ((error = VOP_GETATTR(dvp, &vattr)) != 0) {
1596 		return (error);
1597 	}
1598 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
1599 	nfsm_reqhead(&info, dvp, NFSPROC_MKNOD,
1600 		     NFSX_FH(info.v3) + 4 * NFSX_UNSIGNED +
1601 		     nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(info.v3));
1602 	ERROROUT(nfsm_fhtom(&info, dvp));
1603 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
1604 			     NFS_MAXNAMLEN));
1605 	if (info.v3) {
1606 		tl = nfsm_build(&info, NFSX_UNSIGNED);
1607 		*tl++ = vtonfsv3_type(vap->va_type);
1608 		nfsm_v3attrbuild(&info, vap, FALSE);
1609 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
1610 			tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
1611 			*tl++ = txdr_unsigned(vap->va_rmajor);
1612 			*tl = txdr_unsigned(vap->va_rminor);
1613 		}
1614 	} else {
1615 		sp = nfsm_build(&info, NFSX_V2SATTR);
1616 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1617 		sp->sa_uid = nfs_xdrneg1;
1618 		sp->sa_gid = nfs_xdrneg1;
1619 		sp->sa_size = makeudev(rmajor, rminor);
1620 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1621 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1622 	}
1623 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_MKNOD, cnp->cn_td,
1624 				cnp->cn_cred, &error));
1625 	if (!error) {
1626 		ERROROUT(nfsm_mtofh(&info, dvp, &newvp, &gotvp));
1627 		if (!gotvp) {
1628 			if (newvp) {
1629 				vput(newvp);
1630 				newvp = NULL;
1631 			}
1632 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1633 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_td, &np);
1634 			if (!error)
1635 				newvp = NFSTOV(np);
1636 		}
1637 	}
1638 	if (info.v3) {
1639 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
1640 	}
1641 	m_freem(info.mrep);
1642 	info.mrep = NULL;
1643 nfsmout:
1644 	if (error) {
1645 		if (newvp)
1646 			vput(newvp);
1647 	} else {
1648 		*vpp = newvp;
1649 	}
1650 	VTONFS(dvp)->n_flag |= NLMODIFIED;
1651 	if (!wccflag)
1652 		VTONFS(dvp)->n_attrstamp = 0;
1653 	return (error);
1654 }
1655 
1656 /*
1657  * nfs mknod vop
1658  * just call nfs_mknodrpc() to do the work.
1659  *
1660  * nfs_mknod(struct vnode *a_dvp, struct vnode **a_vpp,
1661  *	     struct componentname *a_cnp, struct vattr *a_vap)
1662  */
1663 /* ARGSUSED */
1664 static int
1665 nfs_mknod(struct vop_old_mknod_args *ap)
1666 {
1667 	struct nfsmount *nmp = VFSTONFS(ap->a_dvp->v_mount);
1668 	int error;
1669 
1670 	lwkt_gettoken(&nmp->nm_token);
1671 	error = nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
1672 	lwkt_reltoken(&nmp->nm_token);
1673 	if (error == 0)
1674 		nfs_knote(ap->a_dvp, NOTE_WRITE);
1675 
1676 	return error;
1677 }
1678 
1679 static u_long create_verf;
1680 /*
1681  * nfs file create call
1682  *
1683  * nfs_create(struct vnode *a_dvp, struct vnode **a_vpp,
1684  *	      struct componentname *a_cnp, struct vattr *a_vap)
1685  */
1686 static int
1687 nfs_create(struct vop_old_create_args *ap)
1688 {
1689 	struct vnode *dvp = ap->a_dvp;
1690 	struct vattr *vap = ap->a_vap;
1691 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
1692 	struct componentname *cnp = ap->a_cnp;
1693 	struct nfsv2_sattr *sp;
1694 	u_int32_t *tl;
1695 	struct nfsnode *np = NULL;
1696 	struct vnode *newvp = NULL;
1697 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1698 	struct vattr vattr;
1699 	struct nfsm_info info;
1700 
1701 	info.mrep = NULL;
1702 	info.v3 = NFS_ISV3(dvp);
1703 	lwkt_gettoken(&nmp->nm_token);
1704 
1705 	/*
1706 	 * Oops, not for me..
1707 	 */
1708 	if (vap->va_type == VSOCK) {
1709 		error = nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap);
1710 		lwkt_reltoken(&nmp->nm_token);
1711 		return error;
1712 	}
1713 
1714 	if ((error = VOP_GETATTR(dvp, &vattr)) != 0) {
1715 		lwkt_reltoken(&nmp->nm_token);
1716 		return (error);
1717 	}
1718 	if (vap->va_vaflags & VA_EXCLUSIVE)
1719 		fmode |= O_EXCL;
1720 again:
1721 	nfsstats.rpccnt[NFSPROC_CREATE]++;
1722 	nfsm_reqhead(&info, dvp, NFSPROC_CREATE,
1723 		     NFSX_FH(info.v3) + 2 * NFSX_UNSIGNED +
1724 		     nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(info.v3));
1725 	ERROROUT(nfsm_fhtom(&info, dvp));
1726 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
1727 			     NFS_MAXNAMLEN));
1728 	if (info.v3) {
1729 		tl = nfsm_build(&info, NFSX_UNSIGNED);
1730 		if (fmode & O_EXCL) {
1731 			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1732 			tl = nfsm_build(&info, NFSX_V3CREATEVERF);
1733 #ifdef INET
1734 			if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid]))
1735 				*tl++ = IA_SIN(TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia)->sin_addr.s_addr;
1736 			else
1737 #endif
1738 				*tl++ = create_verf;
1739 			*tl = ++create_verf;
1740 		} else {
1741 			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1742 			nfsm_v3attrbuild(&info, vap, FALSE);
1743 		}
1744 	} else {
1745 		sp = nfsm_build(&info, NFSX_V2SATTR);
1746 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1747 		sp->sa_uid = nfs_xdrneg1;
1748 		sp->sa_gid = nfs_xdrneg1;
1749 		sp->sa_size = 0;
1750 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1751 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1752 	}
1753 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_CREATE, cnp->cn_td,
1754 				cnp->cn_cred, &error));
1755 	if (error == 0) {
1756 		ERROROUT(nfsm_mtofh(&info, dvp, &newvp, &gotvp));
1757 		if (!gotvp) {
1758 			if (newvp) {
1759 				vput(newvp);
1760 				newvp = NULL;
1761 			}
1762 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1763 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_td, &np);
1764 			if (!error)
1765 				newvp = NFSTOV(np);
1766 		}
1767 	}
1768 	if (info.v3) {
1769 		if (error == 0)
1770 			error = nfsm_wcc_data(&info, dvp, &wccflag);
1771 		else
1772 			(void)nfsm_wcc_data(&info, dvp, &wccflag);
1773 	}
1774 	m_freem(info.mrep);
1775 	info.mrep = NULL;
1776 nfsmout:
1777 	if (error) {
1778 		if (info.v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1779 			KKASSERT(newvp == NULL);
1780 			fmode &= ~O_EXCL;
1781 			goto again;
1782 		}
1783 	} else if (info.v3 && (fmode & O_EXCL)) {
1784 		/*
1785 		 * We are normally called with only a partially initialized
1786 		 * VAP.  Since the NFSv3 spec says that server may use the
1787 		 * file attributes to store the verifier, the spec requires
1788 		 * us to do a SETATTR RPC. FreeBSD servers store the verifier
1789 		 * in atime, but we can't really assume that all servers will
1790 		 * so we ensure that our SETATTR sets both atime and mtime.
1791 		 */
1792 		if (vap->va_mtime.tv_sec == VNOVAL)
1793 			vfs_timestamp(&vap->va_mtime);
1794 		if (vap->va_atime.tv_sec == VNOVAL)
1795 			vap->va_atime = vap->va_mtime;
1796 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_td);
1797 	}
1798 	if (error == 0) {
1799 		/*
1800 		 * The new np may have enough info for access
1801 		 * checks, make sure rucred and wucred are
1802 		 * initialized for read and write rpc's.
1803 		 */
1804 		np = VTONFS(newvp);
1805 		if (np->n_rucred == NULL)
1806 			np->n_rucred = crhold(cnp->cn_cred);
1807 		if (np->n_wucred == NULL)
1808 			np->n_wucred = crhold(cnp->cn_cred);
1809 		*ap->a_vpp = newvp;
1810 		nfs_knote(dvp, NOTE_WRITE);
1811 	} else if (newvp) {
1812 		vput(newvp);
1813 	}
1814 	VTONFS(dvp)->n_flag |= NLMODIFIED;
1815 	if (!wccflag)
1816 		VTONFS(dvp)->n_attrstamp = 0;
1817 	lwkt_reltoken(&nmp->nm_token);
1818 	return (error);
1819 }
1820 
1821 /*
1822  * nfs file remove call
1823  * To try and make nfs semantics closer to ufs semantics, a file that has
1824  * other processes using the vnode is renamed instead of removed and then
1825  * removed later on the last close.
1826  * - If v_refcnt > 1
1827  *	  If a rename is not already in the works
1828  *	     call nfs_sillyrename() to set it up
1829  *     else
1830  *	  do the remove rpc
1831  *
1832  * nfs_remove(struct vnode *a_dvp, struct vnode *a_vp,
1833  *	      struct componentname *a_cnp)
1834  */
1835 static int
1836 nfs_remove(struct vop_old_remove_args *ap)
1837 {
1838 	struct vnode *vp = ap->a_vp;
1839 	struct vnode *dvp = ap->a_dvp;
1840 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
1841 	struct componentname *cnp = ap->a_cnp;
1842 	struct nfsnode *np = VTONFS(vp);
1843 	int error = 0;
1844 	struct vattr vattr;
1845 
1846 	lwkt_gettoken(&nmp->nm_token);
1847 #ifndef DIAGNOSTIC
1848 	if (VREFCNT(vp) < 1)
1849 		panic("nfs_remove: bad v_refcnt");
1850 #endif
1851 	if (vp->v_type == VDIR) {
1852 		error = EPERM;
1853 	} else if (VREFCNT(vp) == 1 || (np->n_sillyrename &&
1854 		   VOP_GETATTR(vp, &vattr) == 0 && vattr.va_nlink > 1)) {
1855 		/*
1856 		 * Force finalization so the VOP_INACTIVE() call is not delayed.
1857 		 * This prevents cred structures from building up in nfsnodes
1858 		 * for deleted files.
1859 		 */
1860 		atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
1861 		np->n_flag |= NREMOVED;
1862 
1863 		/*
1864 		 * Throw away biocache buffers, mainly to avoid
1865 		 * unnecessary delayed writes later.
1866 		 */
1867 		error = nfs_vinvalbuf(vp, 0, 1);
1868 		/* Do the rpc */
1869 		if (error != EINTR) {
1870 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
1871 					      cnp->cn_namelen,
1872 					      cnp->cn_cred, cnp->cn_td);
1873 		}
1874 
1875 		/*
1876 		 * Kludge City: If the first reply to the remove rpc is lost..
1877 		 *   the reply to the retransmitted request will be ENOENT
1878 		 *   since the file was in fact removed
1879 		 *   Therefore, we cheat and return success.
1880 		 */
1881 		if (error == ENOENT)
1882 			error = 0;
1883 	} else if (!np->n_sillyrename) {
1884 		error = nfs_sillyrename(dvp, vp, cnp);
1885 	}
1886 	np->n_attrstamp = 0;
1887 	lwkt_reltoken(&nmp->nm_token);
1888 	if (error == 0) {
1889 		nfs_knote(vp, NOTE_DELETE);
1890 		nfs_knote(dvp, NOTE_WRITE);
1891 	}
1892 
1893 	return (error);
1894 }
1895 
1896 /*
1897  * nfs file remove rpc called from nfs_inactive
1898  *
1899  * NOTE: s_dvp can be VBAD during a forced unmount.
1900  */
1901 int
1902 nfs_removeit(struct sillyrename *sp)
1903 {
1904 	if (sp->s_dvp->v_type == VBAD)
1905 		return(0);
1906 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen,
1907 		sp->s_cred, NULL));
1908 }
1909 
1910 /*
1911  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1912  */
1913 static int
1914 nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
1915 	      struct ucred *cred, struct thread *td)
1916 {
1917 	int error = 0, wccflag = NFSV3_WCCRATTR;
1918 	struct nfsm_info info;
1919 
1920 	info.mrep = NULL;
1921 	info.v3 = NFS_ISV3(dvp);
1922 
1923 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
1924 	nfsm_reqhead(&info, dvp, NFSPROC_REMOVE,
1925 		     NFSX_FH(info.v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1926 	ERROROUT(nfsm_fhtom(&info, dvp));
1927 	ERROROUT(nfsm_strtom(&info, name, namelen, NFS_MAXNAMLEN));
1928 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_REMOVE, td, cred, &error));
1929 	if (info.v3) {
1930 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
1931 	}
1932 	m_freem(info.mrep);
1933 	info.mrep = NULL;
1934 nfsmout:
1935 	VTONFS(dvp)->n_flag |= NLMODIFIED;
1936 	if (!wccflag)
1937 		VTONFS(dvp)->n_attrstamp = 0;
1938 	return (error);
1939 }
1940 
1941 /*
1942  * nfs file rename call
1943  *
1944  * nfs_rename(struct vnode *a_fdvp, struct vnode *a_fvp,
1945  *	      struct componentname *a_fcnp, struct vnode *a_tdvp,
1946  *	      struct vnode *a_tvp, struct componentname *a_tcnp)
1947  */
1948 static int
1949 nfs_rename(struct vop_old_rename_args *ap)
1950 {
1951 	struct vnode *fvp = ap->a_fvp;
1952 	struct vnode *tvp = ap->a_tvp;
1953 	struct vnode *fdvp = ap->a_fdvp;
1954 	struct vnode *tdvp = ap->a_tdvp;
1955 	struct componentname *tcnp = ap->a_tcnp;
1956 	struct componentname *fcnp = ap->a_fcnp;
1957 	struct nfsmount *nmp = VFSTONFS(fdvp->v_mount);
1958 	int error;
1959 
1960 	lwkt_gettoken(&nmp->nm_token);
1961 
1962 	/*
1963 	 * Force finalization so the VOP_INACTIVE() call is not delayed.
1964 	 * This prevents cred structures from building up in nfsnodes
1965 	 * for deleted files.
1966 	 */
1967 	if (tvp) {
1968 		atomic_set_int(&tvp->v_refcnt, VREF_FINALIZE);
1969 		if (VTONFS(tvp))
1970 			VTONFS(tvp)->n_flag |= NREMOVED;
1971 	}
1972 
1973 	/* Check for cross-device rename */
1974 	if ((fvp->v_mount != tdvp->v_mount) ||
1975 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1976 		error = EXDEV;
1977 		goto out;
1978 	}
1979 
1980 	/*
1981 	 * We shouldn't have to flush fvp on rename for most server-side
1982 	 * filesystems as the file handle should not change.  Unfortunately
1983 	 * the inode for some filesystems (msdosfs) might be tied to the
1984 	 * file name or directory position so to be completely safe
1985 	 * vfs.nfs.flush_on_rename is set by default.  Clear to improve
1986 	 * performance.
1987 	 *
1988 	 * We must flush tvp on rename because it might become stale on the
1989 	 * server after the rename.
1990 	 */
1991 	if (nfs_flush_on_rename)
1992 	    VOP_FSYNC(fvp, MNT_WAIT, 0);
1993 	if (tvp)
1994 	    VOP_FSYNC(tvp, MNT_WAIT, 0);
1995 
1996 	/*
1997 	 * If the tvp exists and is in use, sillyrename it before doing the
1998 	 * rename of the new file over it.
1999 	 *
2000 	 * XXX Can't sillyrename a directory.
2001 	 *
2002 	 * We do not attempt to do any namecache purges in this old API
2003 	 * routine.  The new API compat functions have access to the actual
2004 	 * namecache structures and will do it for us.
2005 	 */
2006 	if (tvp && VREFCNT(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
2007 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
2008 		nfs_knote(tvp, NOTE_DELETE);
2009 		vput(tvp);
2010 		tvp = NULL;
2011 	} else if (tvp) {
2012 		nfs_knote(tvp, NOTE_DELETE);
2013 	}
2014 
2015 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
2016 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
2017 		tcnp->cn_td);
2018 
2019 out:
2020 	if (error == 0) {
2021 		nfs_knote(fdvp, NOTE_WRITE);
2022 		nfs_knote(tdvp, NOTE_WRITE);
2023 		nfs_knote(fvp, NOTE_RENAME);
2024 	}
2025 	lwkt_reltoken(&nmp->nm_token);
2026 	if (tdvp == tvp)
2027 		vrele(tdvp);
2028 	else
2029 		vput(tdvp);
2030 	if (tvp)
2031 		vput(tvp);
2032 	vrele(fdvp);
2033 	vrele(fvp);
2034 	/*
2035 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
2036 	 */
2037 	if (error == ENOENT)
2038 		error = 0;
2039 	return (error);
2040 }
2041 
2042 /*
2043  * nfs file rename rpc called from nfs_remove() above
2044  */
2045 static int
2046 nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
2047 	     struct sillyrename *sp)
2048 {
2049 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
2050 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_td));
2051 }
2052 
2053 /*
2054  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
2055  */
2056 static int
2057 nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
2058 	      struct vnode *tdvp, const char *tnameptr, int tnamelen,
2059 	      struct ucred *cred, struct thread *td)
2060 {
2061 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
2062 	struct nfsm_info info;
2063 
2064 	info.mrep = NULL;
2065 	info.v3 = NFS_ISV3(fdvp);
2066 
2067 	nfsstats.rpccnt[NFSPROC_RENAME]++;
2068 	nfsm_reqhead(&info, fdvp, NFSPROC_RENAME,
2069 		    (NFSX_FH(info.v3) + NFSX_UNSIGNED)*2 +
2070 		    nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
2071 	ERROROUT(nfsm_fhtom(&info, fdvp));
2072 	ERROROUT(nfsm_strtom(&info, fnameptr, fnamelen, NFS_MAXNAMLEN));
2073 	ERROROUT(nfsm_fhtom(&info, tdvp));
2074 	ERROROUT(nfsm_strtom(&info, tnameptr, tnamelen, NFS_MAXNAMLEN));
2075 	NEGKEEPOUT(nfsm_request(&info, fdvp, NFSPROC_RENAME, td, cred, &error));
2076 	if (info.v3) {
2077 		ERROROUT(nfsm_wcc_data(&info, fdvp, &fwccflag));
2078 		ERROROUT(nfsm_wcc_data(&info, tdvp, &twccflag));
2079 	}
2080 	m_freem(info.mrep);
2081 	info.mrep = NULL;
2082 nfsmout:
2083 	VTONFS(fdvp)->n_flag |= NLMODIFIED;
2084 	VTONFS(tdvp)->n_flag |= NLMODIFIED;
2085 	if (!fwccflag)
2086 		VTONFS(fdvp)->n_attrstamp = 0;
2087 	if (!twccflag)
2088 		VTONFS(tdvp)->n_attrstamp = 0;
2089 	return (error);
2090 }
2091 
2092 /*
2093  * nfs hard link create call
2094  *
2095  * nfs_link(struct vnode *a_tdvp, struct vnode *a_vp,
2096  *	    struct componentname *a_cnp)
2097  */
2098 static int
2099 nfs_link(struct vop_old_link_args *ap)
2100 {
2101 	struct vnode *vp = ap->a_vp;
2102 	struct vnode *tdvp = ap->a_tdvp;
2103 	struct nfsmount *nmp = VFSTONFS(tdvp->v_mount);
2104 	struct componentname *cnp = ap->a_cnp;
2105 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
2106 	struct nfsm_info info;
2107 
2108 	if (vp->v_mount != tdvp->v_mount) {
2109 		return (EXDEV);
2110 	}
2111 	lwkt_gettoken(&nmp->nm_token);
2112 
2113 	/*
2114 	 * The attribute cache may get out of sync with the server on link.
2115 	 * Pushing writes to the server before handle was inherited from
2116 	 * long long ago and it is unclear if we still need to do this.
2117 	 * Defaults to off.
2118 	 */
2119 	if (nfs_flush_on_hlink)
2120 		VOP_FSYNC(vp, MNT_WAIT, 0);
2121 
2122 	info.mrep = NULL;
2123 	info.v3 = NFS_ISV3(vp);
2124 
2125 	nfsstats.rpccnt[NFSPROC_LINK]++;
2126 	nfsm_reqhead(&info, vp, NFSPROC_LINK,
2127 		     NFSX_FH(info.v3) * 2 + NFSX_UNSIGNED +
2128 		     nfsm_rndup(cnp->cn_namelen));
2129 	ERROROUT(nfsm_fhtom(&info, vp));
2130 	ERROROUT(nfsm_fhtom(&info, tdvp));
2131 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
2132 			     NFS_MAXNAMLEN));
2133 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_LINK, cnp->cn_td,
2134 				cnp->cn_cred, &error));
2135 	if (info.v3) {
2136 		ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
2137 					 NFS_LATTR_NOSHRINK));
2138 		ERROROUT(nfsm_wcc_data(&info, tdvp, &wccflag));
2139 	}
2140 	m_freem(info.mrep);
2141 	info.mrep = NULL;
2142 nfsmout:
2143 	VTONFS(tdvp)->n_flag |= NLMODIFIED;
2144 	if (!attrflag)
2145 		VTONFS(vp)->n_attrstamp = 0;
2146 	if (!wccflag)
2147 		VTONFS(tdvp)->n_attrstamp = 0;
2148 	/*
2149 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2150 	 */
2151 	if (error == EEXIST)
2152 		error = 0;
2153 	lwkt_reltoken(&nmp->nm_token);
2154 	if (error == 0) {
2155 		nfs_knote(vp, NOTE_LINK);
2156 		nfs_knote(tdvp, NOTE_WRITE);
2157 	}
2158 
2159 	return (error);
2160 }
2161 
2162 /*
2163  * nfs symbolic link create call
2164  *
2165  * nfs_symlink(struct vnode *a_dvp, struct vnode **a_vpp,
2166  *		struct componentname *a_cnp, struct vattr *a_vap,
2167  *		char *a_target)
2168  */
2169 static int
2170 nfs_symlink(struct vop_old_symlink_args *ap)
2171 {
2172 	struct vnode *dvp = ap->a_dvp;
2173 	struct vattr *vap = ap->a_vap;
2174 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2175 	struct componentname *cnp = ap->a_cnp;
2176 	struct nfsv2_sattr *sp;
2177 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
2178 	struct vnode *newvp = NULL;
2179 	struct nfsm_info info;
2180 
2181 	info.mrep = NULL;
2182 	info.v3 = NFS_ISV3(dvp);
2183 	lwkt_gettoken(&nmp->nm_token);
2184 
2185 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
2186 	slen = strlen(ap->a_target);
2187 	nfsm_reqhead(&info, dvp, NFSPROC_SYMLINK,
2188 		     NFSX_FH(info.v3) + 2*NFSX_UNSIGNED +
2189 		     nfsm_rndup(cnp->cn_namelen) +
2190 		     nfsm_rndup(slen) + NFSX_SATTR(info.v3));
2191 	ERROROUT(nfsm_fhtom(&info, dvp));
2192 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
2193 			     NFS_MAXNAMLEN));
2194 	if (info.v3) {
2195 		nfsm_v3attrbuild(&info, vap, FALSE);
2196 	}
2197 	ERROROUT(nfsm_strtom(&info, ap->a_target, slen, NFS_MAXPATHLEN));
2198 	if (info.v3 == 0) {
2199 		sp = nfsm_build(&info, NFSX_V2SATTR);
2200 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
2201 		sp->sa_uid = nfs_xdrneg1;
2202 		sp->sa_gid = nfs_xdrneg1;
2203 		sp->sa_size = nfs_xdrneg1;
2204 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2205 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2206 	}
2207 
2208 	/*
2209 	 * Issue the NFS request and get the rpc response.
2210 	 *
2211 	 * Only NFSv3 responses returning an error of 0 actually return
2212 	 * a file handle that can be converted into newvp without having
2213 	 * to do an extra lookup rpc.
2214 	 */
2215 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_SYMLINK, cnp->cn_td,
2216 				cnp->cn_cred, &error));
2217 	if (info.v3) {
2218 		if (error == 0) {
2219 		       ERROROUT(nfsm_mtofh(&info, dvp, &newvp, &gotvp));
2220 		}
2221 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
2222 	}
2223 
2224 	/*
2225 	 * out code jumps -> here, mrep is also freed.
2226 	 */
2227 
2228 	m_freem(info.mrep);
2229 	info.mrep = NULL;
2230 nfsmout:
2231 
2232 	/*
2233 	 * If we get an EEXIST error, silently convert it to no-error
2234 	 * in case of an NFS retry.
2235 	 */
2236 	if (error == EEXIST)
2237 		error = 0;
2238 
2239 	/*
2240 	 * If we do not have (or no longer have) an error, and we could
2241 	 * not extract the newvp from the response due to the request being
2242 	 * NFSv2 or the error being EEXIST.  We have to do a lookup in order
2243 	 * to obtain a newvp to return.
2244 	 */
2245 	if (error == 0 && newvp == NULL) {
2246 		struct nfsnode *np = NULL;
2247 
2248 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
2249 				     cnp->cn_cred, cnp->cn_td, &np);
2250 		if (!error)
2251 			newvp = NFSTOV(np);
2252 	}
2253 	if (error) {
2254 		if (newvp)
2255 			vput(newvp);
2256 	} else {
2257 		*ap->a_vpp = newvp;
2258 	}
2259 	VTONFS(dvp)->n_flag |= NLMODIFIED;
2260 	if (!wccflag)
2261 		VTONFS(dvp)->n_attrstamp = 0;
2262 	if (error == 0 && *ap->a_vpp)
2263 		nfs_knote(*ap->a_vpp, NOTE_WRITE);
2264 	lwkt_reltoken(&nmp->nm_token);
2265 
2266 	return (error);
2267 }
2268 
2269 /*
2270  * nfs make dir call
2271  *
2272  * nfs_mkdir(struct vnode *a_dvp, struct vnode **a_vpp,
2273  *	     struct componentname *a_cnp, struct vattr *a_vap)
2274  */
2275 static int
2276 nfs_mkdir(struct vop_old_mkdir_args *ap)
2277 {
2278 	struct vnode *dvp = ap->a_dvp;
2279 	struct vattr *vap = ap->a_vap;
2280 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2281 	struct componentname *cnp = ap->a_cnp;
2282 	struct nfsv2_sattr *sp;
2283 	struct nfsnode *np = NULL;
2284 	struct vnode *newvp = NULL;
2285 	struct vattr vattr;
2286 	int error = 0, wccflag = NFSV3_WCCRATTR;
2287 	int gotvp = 0;
2288 	int len;
2289 	struct nfsm_info info;
2290 
2291 	info.mrep = NULL;
2292 	info.v3 = NFS_ISV3(dvp);
2293 	lwkt_gettoken(&nmp->nm_token);
2294 
2295 	if ((error = VOP_GETATTR(dvp, &vattr)) != 0) {
2296 		lwkt_reltoken(&nmp->nm_token);
2297 		return (error);
2298 	}
2299 	len = cnp->cn_namelen;
2300 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
2301 	nfsm_reqhead(&info, dvp, NFSPROC_MKDIR,
2302 		     NFSX_FH(info.v3) + NFSX_UNSIGNED +
2303 		     nfsm_rndup(len) + NFSX_SATTR(info.v3));
2304 	ERROROUT(nfsm_fhtom(&info, dvp));
2305 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, len, NFS_MAXNAMLEN));
2306 	if (info.v3) {
2307 		nfsm_v3attrbuild(&info, vap, FALSE);
2308 	} else {
2309 		sp = nfsm_build(&info, NFSX_V2SATTR);
2310 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
2311 		sp->sa_uid = nfs_xdrneg1;
2312 		sp->sa_gid = nfs_xdrneg1;
2313 		sp->sa_size = nfs_xdrneg1;
2314 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2315 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2316 	}
2317 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_MKDIR, cnp->cn_td,
2318 		    cnp->cn_cred, &error));
2319 	if (error == 0) {
2320 		ERROROUT(nfsm_mtofh(&info, dvp, &newvp, &gotvp));
2321 	}
2322 	if (info.v3) {
2323 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
2324 	}
2325 	m_freem(info.mrep);
2326 	info.mrep = NULL;
2327 nfsmout:
2328 	VTONFS(dvp)->n_flag |= NLMODIFIED;
2329 	if (!wccflag)
2330 		VTONFS(dvp)->n_attrstamp = 0;
2331 	/*
2332 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
2333 	 * if we can succeed in looking up the directory.
2334 	 */
2335 	if (error == EEXIST || (!error && !gotvp)) {
2336 		if (newvp) {
2337 			vrele(newvp);
2338 			newvp = NULL;
2339 		}
2340 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
2341 			cnp->cn_td, &np);
2342 		if (!error) {
2343 			newvp = NFSTOV(np);
2344 			if (newvp->v_type != VDIR)
2345 				error = EEXIST;
2346 		}
2347 	}
2348 	if (error) {
2349 		if (newvp)
2350 			vrele(newvp);
2351 	} else {
2352 		nfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
2353 		*ap->a_vpp = newvp;
2354 	}
2355 	lwkt_reltoken(&nmp->nm_token);
2356 	return (error);
2357 }
2358 
2359 /*
2360  * nfs remove directory call
2361  *
2362  * nfs_rmdir(struct vnode *a_dvp, struct vnode *a_vp,
2363  *	     struct componentname *a_cnp)
2364  */
2365 static int
2366 nfs_rmdir(struct vop_old_rmdir_args *ap)
2367 {
2368 	struct vnode *vp = ap->a_vp;
2369 	struct vnode *dvp = ap->a_dvp;
2370 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2371 	struct componentname *cnp = ap->a_cnp;
2372 	int error = 0, wccflag = NFSV3_WCCRATTR;
2373 	struct nfsm_info info;
2374 
2375 	info.mrep = NULL;
2376 	info.v3 = NFS_ISV3(dvp);
2377 
2378 	if (dvp == vp)
2379 		return (EINVAL);
2380 
2381 	lwkt_gettoken(&nmp->nm_token);
2382 
2383 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
2384 	nfsm_reqhead(&info, dvp, NFSPROC_RMDIR,
2385 		     NFSX_FH(info.v3) + NFSX_UNSIGNED +
2386 		     nfsm_rndup(cnp->cn_namelen));
2387 	ERROROUT(nfsm_fhtom(&info, dvp));
2388 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
2389 		 NFS_MAXNAMLEN));
2390 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_RMDIR, cnp->cn_td,
2391 				cnp->cn_cred, &error));
2392 	if (info.v3) {
2393 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
2394 	}
2395 	m_freem(info.mrep);
2396 	info.mrep = NULL;
2397 nfsmout:
2398 	VTONFS(dvp)->n_flag |= NLMODIFIED;
2399 	if (!wccflag)
2400 		VTONFS(dvp)->n_attrstamp = 0;
2401 	/*
2402 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2403 	 */
2404 	if (error == ENOENT)
2405 		error = 0;
2406 	else
2407 		nfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
2408 	lwkt_reltoken(&nmp->nm_token);
2409 
2410 	return (error);
2411 }
2412 
2413 /*
2414  * nfs readdir call
2415  *
2416  * nfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred)
2417  */
2418 static int
2419 nfs_readdir(struct vop_readdir_args *ap)
2420 {
2421 	struct vnode *vp = ap->a_vp;
2422 	struct nfsnode *np = VTONFS(vp);
2423 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2424 	struct uio *uio = ap->a_uio;
2425 	int tresid, error;
2426 	struct vattr vattr;
2427 
2428 	if (vp->v_type != VDIR)
2429 		return (EPERM);
2430 
2431 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
2432 	if (error)
2433 		return (error);
2434 
2435 	lwkt_gettoken(&nmp->nm_token);
2436 
2437 	/*
2438 	 * If we have a valid EOF offset cache we must call VOP_GETATTR()
2439 	 * and then check that is still valid, or if this is an NQNFS mount
2440 	 * we call NQNFS_CKCACHEABLE() instead of VOP_GETATTR().  Note that
2441 	 * VOP_GETATTR() does not necessarily go to the wire.
2442 	 */
2443 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2444 	    (np->n_flag & (NLMODIFIED|NRMODIFIED)) == 0) {
2445 		if (VOP_GETATTR(vp, &vattr) == 0 &&
2446 		    (np->n_flag & (NLMODIFIED|NRMODIFIED)) == 0
2447 		) {
2448 			nfsstats.direofcache_hits++;
2449 			goto done;
2450 		}
2451 	}
2452 
2453 	/*
2454 	 * Call nfs_bioread() to do the real work.  nfs_bioread() does its
2455 	 * own cache coherency checks so we do not have to.
2456 	 */
2457 	tresid = uio->uio_resid;
2458 	error = nfs_bioread(vp, uio, 0);
2459 
2460 	if (!error && uio->uio_resid == tresid)
2461 		nfsstats.direofcache_misses++;
2462 done:
2463 	lwkt_reltoken(&nmp->nm_token);
2464 	vn_unlock(vp);
2465 
2466 	return (error);
2467 }
2468 
2469 /*
2470  * Readdir rpc call.  nfs_bioread->nfs_doio->nfs_readdirrpc.
2471  *
2472  * Note that for directories, nfs_bioread maintains the underlying nfs-centric
2473  * offset/block and converts the nfs formatted directory entries for userland
2474  * consumption as well as deals with offsets into the middle of blocks.
2475  * nfs_doio only deals with logical blocks.  In particular, uio_offset will
2476  * be block-bounded.  It must convert to cookies for the actual RPC.
2477  */
2478 int
2479 nfs_readdirrpc_uio(struct vnode *vp, struct uio *uiop)
2480 {
2481 	int len, left;
2482 	struct nfs_dirent *dp = NULL;
2483 	u_int32_t *tl;
2484 	nfsuint64 *cookiep;
2485 	caddr_t cp;
2486 	nfsuint64 cookie;
2487 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2488 	struct nfsnode *dnp = VTONFS(vp);
2489 	u_quad_t fileno;
2490 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2491 	int attrflag;
2492 	struct nfsm_info info;
2493 
2494 	info.mrep = NULL;
2495 	info.v3 = NFS_ISV3(vp);
2496 
2497 #ifndef DIAGNOSTIC
2498 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2499 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2500 		panic("nfs readdirrpc bad uio");
2501 #endif
2502 
2503 	/*
2504 	 * If there is no cookie, assume directory was stale.
2505 	 */
2506 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2507 	if (cookiep)
2508 		cookie = *cookiep;
2509 	else
2510 		return (NFSERR_BAD_COOKIE);
2511 	/*
2512 	 * Loop around doing readdir rpc's of size nm_readdirsize
2513 	 * truncated to a multiple of DIRBLKSIZ.
2514 	 * The stopping criteria is EOF or buffer full.
2515 	 */
2516 	while (more_dirs && bigenough) {
2517 		nfsstats.rpccnt[NFSPROC_READDIR]++;
2518 		nfsm_reqhead(&info, vp, NFSPROC_READDIR,
2519 			     NFSX_FH(info.v3) + NFSX_READDIR(info.v3));
2520 		ERROROUT(nfsm_fhtom(&info, vp));
2521 		if (info.v3) {
2522 			tl = nfsm_build(&info, 5 * NFSX_UNSIGNED);
2523 			*tl++ = cookie.nfsuquad[0];
2524 			*tl++ = cookie.nfsuquad[1];
2525 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
2526 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
2527 		} else {
2528 			/*
2529 			 * WARNING!  HAMMER DIRECTORIES WILL NOT WORK WELL
2530 			 * WITH NFSv2!!!  There's nothing I can really do
2531 			 * about it other than to hope the server supports
2532 			 * rdirplus w/NFSv2.
2533 			 */
2534 			tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
2535 			*tl++ = cookie.nfsuquad[0];
2536 		}
2537 		*tl = txdr_unsigned(nmp->nm_readdirsize);
2538 		NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_READDIR,
2539 					uiop->uio_td,
2540 					nfs_vpcred(vp, ND_READ), &error));
2541 		if (info.v3) {
2542 			ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
2543 						  NFS_LATTR_NOSHRINK));
2544 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2545 			dnp->n_cookieverf.nfsuquad[0] = *tl++;
2546 			dnp->n_cookieverf.nfsuquad[1] = *tl;
2547 		}
2548 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2549 		more_dirs = fxdr_unsigned(int, *tl);
2550 
2551 		/* loop thru the dir entries, converting them to std form */
2552 		while (more_dirs && bigenough) {
2553 			if (info.v3) {
2554 				NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2555 				fileno = fxdr_hyper(tl);
2556 				len = fxdr_unsigned(int, *(tl + 2));
2557 			} else {
2558 				NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2559 				fileno = fxdr_unsigned(u_quad_t, *tl++);
2560 				len = fxdr_unsigned(int, *tl);
2561 			}
2562 			if (len <= 0 || len > NFS_MAXNAMLEN) {
2563 				error = EBADRPC;
2564 				m_freem(info.mrep);
2565 				info.mrep = NULL;
2566 				goto nfsmout;
2567 			}
2568 
2569 			/*
2570 			 * len is the number of bytes in the path element
2571 			 * name, not including the \0 termination.
2572 			 *
2573 			 * tlen is the number of bytes w have to reserve for
2574 			 * the path element name.
2575 			 */
2576 			tlen = nfsm_rndup(len);
2577 			if (tlen == len)
2578 				tlen += 4;	/* To ensure null termination */
2579 
2580 			/*
2581 			 * If the entry would cross a DIRBLKSIZ boundary,
2582 			 * extend the previous nfs_dirent to cover the
2583 			 * remaining space.
2584 			 */
2585 			left = DIRBLKSIZ - blksiz;
2586 			if ((tlen + sizeof(struct nfs_dirent)) > left) {
2587 				dp->nfs_reclen += left;
2588 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left;
2589 				uiop->uio_iov->iov_len -= left;
2590 				uiop->uio_offset += left;
2591 				uiop->uio_resid -= left;
2592 				blksiz = 0;
2593 			}
2594 			if ((tlen + sizeof(struct nfs_dirent)) > uiop->uio_resid)
2595 				bigenough = 0;
2596 			if (bigenough) {
2597 				dp = (struct nfs_dirent *)uiop->uio_iov->iov_base;
2598 				dp->nfs_ino = fileno;
2599 				dp->nfs_namlen = len;
2600 				dp->nfs_reclen = tlen + sizeof(struct nfs_dirent);
2601 				dp->nfs_type = DT_UNKNOWN;
2602 				blksiz += dp->nfs_reclen;
2603 				if (blksiz == DIRBLKSIZ)
2604 					blksiz = 0;
2605 				uiop->uio_offset += sizeof(struct nfs_dirent);
2606 				uiop->uio_resid -= sizeof(struct nfs_dirent);
2607 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + sizeof(struct nfs_dirent);
2608 				uiop->uio_iov->iov_len -= sizeof(struct nfs_dirent);
2609 				ERROROUT(nfsm_mtouio(&info, uiop, len));
2610 
2611 				/*
2612 				 * The uiop has advanced by nfs_dirent + len
2613 				 * but really needs to advance by
2614 				 * nfs_dirent + tlen
2615 				 */
2616 				cp = uiop->uio_iov->iov_base;
2617 				tlen -= len;
2618 				*cp = '\0';	/* null terminate */
2619 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen;
2620 				uiop->uio_iov->iov_len -= tlen;
2621 				uiop->uio_offset += tlen;
2622 				uiop->uio_resid -= tlen;
2623 			} else {
2624 				/*
2625 				 * NFS strings must be rounded up (nfsm_myouio
2626 				 * handled that in the bigenough case).
2627 				 */
2628 				ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
2629 			}
2630 			if (info.v3) {
2631 				NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2632 			} else {
2633 				NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2634 			}
2635 
2636 			/*
2637 			 * If we were able to accomodate the last entry,
2638 			 * get the cookie for the next one.  Otherwise
2639 			 * hold-over the cookie for the one we were not
2640 			 * able to accomodate.
2641 			 */
2642 			if (bigenough) {
2643 				cookie.nfsuquad[0] = *tl++;
2644 				if (info.v3)
2645 					cookie.nfsuquad[1] = *tl++;
2646 			} else if (info.v3) {
2647 				tl += 2;
2648 			} else {
2649 				tl++;
2650 			}
2651 			more_dirs = fxdr_unsigned(int, *tl);
2652 		}
2653 		/*
2654 		 * If at end of rpc data, get the eof boolean
2655 		 */
2656 		if (!more_dirs) {
2657 			NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2658 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2659 		}
2660 		m_freem(info.mrep);
2661 		info.mrep = NULL;
2662 	}
2663 	/*
2664 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2665 	 * by increasing d_reclen for the last record.
2666 	 */
2667 	if (blksiz > 0) {
2668 		left = DIRBLKSIZ - blksiz;
2669 		dp->nfs_reclen += left;
2670 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left;
2671 		uiop->uio_iov->iov_len -= left;
2672 		uiop->uio_offset += left;
2673 		uiop->uio_resid -= left;
2674 	}
2675 
2676 	if (bigenough) {
2677 		/*
2678 		 * We hit the end of the directory, update direofoffset.
2679 		 */
2680 		dnp->n_direofoffset = uiop->uio_offset;
2681 	} else {
2682 		/*
2683 		 * There is more to go, insert the link cookie so the
2684 		 * next block can be read.
2685 		 */
2686 		if (uiop->uio_resid > 0)
2687 			kprintf("EEK! readdirrpc resid > 0\n");
2688 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2689 		*cookiep = cookie;
2690 	}
2691 nfsmout:
2692 	return (error);
2693 }
2694 
2695 /*
2696  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2697  */
2698 int
2699 nfs_readdirplusrpc_uio(struct vnode *vp, struct uio *uiop)
2700 {
2701 	int len, left;
2702 	struct nfs_dirent *dp;
2703 	u_int32_t *tl;
2704 	struct vnode *newvp;
2705 	nfsuint64 *cookiep;
2706 	caddr_t dpossav1, dpossav2;
2707 	caddr_t cp;
2708 	struct mbuf *mdsav1, *mdsav2;
2709 	nfsuint64 cookie;
2710 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2711 	struct nfsnode *dnp = VTONFS(vp), *np;
2712 	nfsfh_t *fhp;
2713 	u_quad_t fileno;
2714 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2715 	int attrflag, fhsize;
2716 	struct nchandle nch;
2717 	struct nchandle dnch;
2718 	struct nlcomponent nlc;
2719 	struct nfsm_info info;
2720 
2721 	info.mrep = NULL;
2722 	info.v3 = 1;
2723 
2724 #ifndef nolint
2725 	dp = NULL;
2726 #endif
2727 #ifndef DIAGNOSTIC
2728 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2729 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2730 		panic("nfs readdirplusrpc bad uio");
2731 #endif
2732 	/*
2733 	 * Obtain the namecache record for the directory so we have something
2734 	 * to use as a basis for creating the entries.  This function will
2735 	 * return a held (but not locked) ncp.  The ncp may be disconnected
2736 	 * from the tree and cannot be used for upward traversals, and the
2737 	 * ncp may be unnamed.  Note that other unrelated operations may
2738 	 * cause the ncp to be named at any time.
2739 	 *
2740 	 * We have to lock the ncp to prevent a lock order reversal when
2741 	 * rdirplus does nlookups of the children, because the vnode is
2742 	 * locked and has to stay that way.
2743 	 */
2744 	cache_fromdvp(vp, NULL, 0, &dnch);
2745 	bzero(&nlc, sizeof(nlc));
2746 	newvp = NULLVP;
2747 
2748 	/*
2749 	 * If there is no cookie, assume directory was stale.
2750 	 */
2751 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2752 	if (cookiep) {
2753 		cookie = *cookiep;
2754 	} else {
2755 		if (dnch.ncp)
2756 			cache_drop(&dnch);
2757 		return (NFSERR_BAD_COOKIE);
2758 	}
2759 
2760 	/*
2761 	 * Loop around doing readdir rpc's of size nm_readdirsize
2762 	 * truncated to a multiple of DIRBLKSIZ.
2763 	 * The stopping criteria is EOF or buffer full.
2764 	 */
2765 	while (more_dirs && bigenough) {
2766 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2767 		nfsm_reqhead(&info, vp, NFSPROC_READDIRPLUS,
2768 			     NFSX_FH(info.v3) + 6 * NFSX_UNSIGNED);
2769 		ERROROUT(nfsm_fhtom(&info, vp));
2770 		tl = nfsm_build(&info, 6 * NFSX_UNSIGNED);
2771 		*tl++ = cookie.nfsuquad[0];
2772 		*tl++ = cookie.nfsuquad[1];
2773 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
2774 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
2775 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
2776 		*tl = txdr_unsigned(nmp->nm_rsize);
2777 		NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_READDIRPLUS,
2778 					uiop->uio_td,
2779 					nfs_vpcred(vp, ND_READ), &error));
2780 		ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
2781 					  NFS_LATTR_NOSHRINK));
2782 		NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2783 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
2784 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
2785 		more_dirs = fxdr_unsigned(int, *tl);
2786 
2787 		/* loop thru the dir entries, doctoring them to 4bsd form */
2788 		while (more_dirs && bigenough) {
2789 			NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2790 			fileno = fxdr_hyper(tl);
2791 			len = fxdr_unsigned(int, *(tl + 2));
2792 			if (len <= 0 || len > NFS_MAXNAMLEN) {
2793 				error = EBADRPC;
2794 				m_freem(info.mrep);
2795 				info.mrep = NULL;
2796 				goto nfsmout;
2797 			}
2798 			tlen = nfsm_rndup(len);
2799 			if (tlen == len)
2800 				tlen += 4;	/* To ensure null termination*/
2801 			left = DIRBLKSIZ - blksiz;
2802 			if ((tlen + sizeof(struct nfs_dirent)) > left) {
2803 				dp->nfs_reclen += left;
2804 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left;
2805 				uiop->uio_iov->iov_len -= left;
2806 				uiop->uio_offset += left;
2807 				uiop->uio_resid -= left;
2808 				blksiz = 0;
2809 			}
2810 			if ((tlen + sizeof(struct nfs_dirent)) > uiop->uio_resid)
2811 				bigenough = 0;
2812 			if (bigenough) {
2813 				dp = (struct nfs_dirent *)uiop->uio_iov->iov_base;
2814 				dp->nfs_ino = fileno;
2815 				dp->nfs_namlen = len;
2816 				dp->nfs_reclen = tlen + sizeof(struct nfs_dirent);
2817 				dp->nfs_type = DT_UNKNOWN;
2818 				blksiz += dp->nfs_reclen;
2819 				if (blksiz == DIRBLKSIZ)
2820 					blksiz = 0;
2821 				uiop->uio_offset += sizeof(struct nfs_dirent);
2822 				uiop->uio_resid -= sizeof(struct nfs_dirent);
2823 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + sizeof(struct nfs_dirent);
2824 				uiop->uio_iov->iov_len -= sizeof(struct nfs_dirent);
2825 				nlc.nlc_nameptr = uiop->uio_iov->iov_base;
2826 				nlc.nlc_namelen = len;
2827 				ERROROUT(nfsm_mtouio(&info, uiop, len));
2828 				cp = uiop->uio_iov->iov_base;
2829 				tlen -= len;
2830 				*cp = '\0';
2831 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen;
2832 				uiop->uio_iov->iov_len -= tlen;
2833 				uiop->uio_offset += tlen;
2834 				uiop->uio_resid -= tlen;
2835 			} else {
2836 				ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
2837 			}
2838 			NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2839 			if (bigenough) {
2840 				cookie.nfsuquad[0] = *tl++;
2841 				cookie.nfsuquad[1] = *tl++;
2842 			} else {
2843 				tl += 2;
2844 			}
2845 
2846 			/*
2847 			 * Since the attributes are before the file handle
2848 			 * (sigh), we must skip over the attributes and then
2849 			 * come back and get them.
2850 			 */
2851 			attrflag = fxdr_unsigned(int, *tl);
2852 			if (attrflag) {
2853 			    dpossav1 = info.dpos;
2854 			    mdsav1 = info.md;
2855 			    ERROROUT(nfsm_adv(&info, NFSX_V3FATTR));
2856 			    NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2857 			    doit = fxdr_unsigned(int, *tl);
2858 			    if (doit) {
2859 				NEGATIVEOUT(fhsize = nfsm_getfh(&info, &fhp));
2860 			    }
2861 			    if (doit && bigenough && !nlcdegenerate(&nlc) &&
2862 				!NFS_CMPFH(dnp, fhp, fhsize)
2863 			    ) {
2864 				if (dnch.ncp) {
2865 #if 0
2866 				    kprintf("NFS/READDIRPLUS, ENTER %*.*s\n",
2867 					nlc.nlc_namelen, nlc.nlc_namelen,
2868 					nlc.nlc_nameptr);
2869 #endif
2870 				    /*
2871 				     * This is a bit hokey but there isn't
2872 				     * much we can do about it.  We can't
2873 				     * hold the directory vp locked while
2874 				     * doing lookups and gets.
2875 				     */
2876 				    nch = cache_nlookup_nonblock(&dnch, &nlc);
2877 				    if (nch.ncp == NULL)
2878 					goto rdfail;
2879 				    cache_setunresolved(&nch);
2880 				    error = nfs_nget_nonblock(vp->v_mount, fhp,
2881 							      fhsize, &np,
2882 							      NULL);
2883 				    if (error) {
2884 					cache_put(&nch);
2885 					goto rdfail;
2886 				    }
2887 				    newvp = NFSTOV(np);
2888 				    dpossav2 = info.dpos;
2889 				    info.dpos = dpossav1;
2890 				    mdsav2 = info.md;
2891 				    info.md = mdsav1;
2892 				    ERROROUT(nfsm_loadattr(&info, newvp, NULL));
2893 				    info.dpos = dpossav2;
2894 				    info.md = mdsav2;
2895 				    dp->nfs_type =
2896 					    IFTODT(VTTOIF(np->n_vattr.va_type));
2897 				    nfs_cache_setvp(&nch, newvp,
2898 						    nfspos_cache_timeout);
2899 				    vput(newvp);
2900 				    newvp = NULLVP;
2901 				    cache_put(&nch);
2902 				} else {
2903 rdfail:
2904 				    ;
2905 #if 0
2906 				    kprintf("Warning: NFS/rddirplus, "
2907 					    "UNABLE TO ENTER %*.*s\n",
2908 					nlc.nlc_namelen, nlc.nlc_namelen,
2909 					nlc.nlc_nameptr);
2910 #endif
2911 				}
2912 			    }
2913 			} else {
2914 			    /* Just skip over the file handle */
2915 			    NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2916 			    i = fxdr_unsigned(int, *tl);
2917 			    ERROROUT(nfsm_adv(&info, nfsm_rndup(i)));
2918 			}
2919 			NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2920 			more_dirs = fxdr_unsigned(int, *tl);
2921 		}
2922 		/*
2923 		 * If at end of rpc data, get the eof boolean
2924 		 */
2925 		if (!more_dirs) {
2926 			NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2927 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2928 		}
2929 		m_freem(info.mrep);
2930 		info.mrep = NULL;
2931 	}
2932 	/*
2933 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2934 	 * by increasing d_reclen for the last record.
2935 	 */
2936 	if (blksiz > 0) {
2937 		left = DIRBLKSIZ - blksiz;
2938 		dp->nfs_reclen += left;
2939 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left;
2940 		uiop->uio_iov->iov_len -= left;
2941 		uiop->uio_offset += left;
2942 		uiop->uio_resid -= left;
2943 	}
2944 
2945 	/*
2946 	 * We are now either at the end of the directory or have filled the
2947 	 * block.
2948 	 */
2949 	if (bigenough) {
2950 		dnp->n_direofoffset = uiop->uio_offset;
2951 	} else {
2952 		if (uiop->uio_resid > 0)
2953 			kprintf("EEK! readdirplusrpc resid > 0\n");
2954 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2955 		*cookiep = cookie;
2956 	}
2957 nfsmout:
2958 	if (newvp != NULLVP) {
2959 	        if (newvp == vp)
2960 			vrele(newvp);
2961 		else
2962 			vput(newvp);
2963 		newvp = NULLVP;
2964 	}
2965 	if (dnch.ncp)
2966 		cache_drop(&dnch);
2967 	return (error);
2968 }
2969 
2970 /*
2971  * Silly rename. To make the NFS filesystem that is stateless look a little
2972  * more like the "ufs" a remove of an active vnode is translated to a rename
2973  * to a funny looking filename that is removed by nfs_inactive on the
2974  * nfsnode. There is the potential for another process on a different client
2975  * to create the same funny name between the nfs_lookitup() fails and the
2976  * nfs_rename() completes, but...
2977  */
2978 static int
2979 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2980 {
2981 	struct sillyrename *sp;
2982 	struct nfsnode *np;
2983 	int error;
2984 
2985 	/*
2986 	 * Force finalization so the VOP_INACTIVE() call is not delayed.
2987 	 * This prevents cred structures from building up in nfsnodes
2988 	 * for deleted files.
2989 	 */
2990 	atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
2991 	np = VTONFS(vp);
2992 	np->n_flag |= NREMOVED;
2993 
2994 	/*
2995 	 * We previously purged dvp instead of vp.  I don't know why, it
2996 	 * completely destroys performance.  We can't do it anyway with the
2997 	 * new VFS API since we would be breaking the namecache topology.
2998 	 */
2999 	cache_purge(vp);	/* XXX */
3000 #ifndef DIAGNOSTIC
3001 	if (vp->v_type == VDIR)
3002 		panic("nfs: sillyrename dir");
3003 #endif
3004 	sp = kmalloc(sizeof(struct sillyrename), M_NFSREQ, M_WAITOK);
3005 	sp->s_cred = crdup(cnp->cn_cred);
3006 	sp->s_dvp = dvp;
3007 	vref(dvp);
3008 
3009 	/* Fudge together a funny name */
3010 	sp->s_namlen = ksprintf(sp->s_name, ".nfsA%08x4.4",
3011 				(int)(intptr_t)cnp->cn_td);
3012 
3013 	/* Try lookitups until we get one that isn't there */
3014 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
3015 		cnp->cn_td, NULL) == 0) {
3016 		sp->s_name[4]++;
3017 		if (sp->s_name[4] > 'z') {
3018 			error = EINVAL;
3019 			goto bad;
3020 		}
3021 	}
3022 	error = nfs_renameit(dvp, cnp, sp);
3023 	if (error)
3024 		goto bad;
3025 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
3026 		cnp->cn_td, &np);
3027 	np->n_sillyrename = sp;
3028 	return (0);
3029 bad:
3030 	vrele(sp->s_dvp);
3031 	crfree(sp->s_cred);
3032 	kfree((caddr_t)sp, M_NFSREQ);
3033 
3034 	return (error);
3035 }
3036 
3037 /*
3038  * Look up a file name and optionally either update the file handle or
3039  * allocate an nfsnode, depending on the value of npp.
3040  * npp == NULL	--> just do the lookup
3041  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
3042  *			handled too
3043  * *npp != NULL --> update the file handle in the vnode
3044  */
3045 static int
3046 nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
3047 	     struct thread *td, struct nfsnode **npp)
3048 {
3049 	struct vnode *newvp = NULL;
3050 	struct nfsnode *np, *dnp = VTONFS(dvp);
3051 	int error = 0, fhlen, attrflag;
3052 	nfsfh_t *nfhp;
3053 	struct nfsm_info info;
3054 
3055 	info.mrep = NULL;
3056 	info.v3 = NFS_ISV3(dvp);
3057 
3058 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
3059 	nfsm_reqhead(&info, dvp, NFSPROC_LOOKUP,
3060 		     NFSX_FH(info.v3) + NFSX_UNSIGNED + nfsm_rndup(len));
3061 	ERROROUT(nfsm_fhtom(&info, dvp));
3062 	ERROROUT(nfsm_strtom(&info, name, len, NFS_MAXNAMLEN));
3063 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_LOOKUP, td, cred, &error));
3064 	if (npp && !error) {
3065 		NEGATIVEOUT(fhlen = nfsm_getfh(&info, &nfhp));
3066 		if (*npp) {
3067 		    np = *npp;
3068 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
3069 			kfree((caddr_t)np->n_fhp, M_NFSBIGFH);
3070 			np->n_fhp = &np->n_fh;
3071 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
3072 			np->n_fhp =(nfsfh_t *)kmalloc(fhlen,M_NFSBIGFH,M_WAITOK);
3073 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
3074 		    np->n_fhsize = fhlen;
3075 		    newvp = NFSTOV(np);
3076 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
3077 		    vref(dvp);
3078 		    newvp = dvp;
3079 		} else {
3080 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, NULL);
3081 		    if (error) {
3082 			m_freem(info.mrep);
3083 			info.mrep = NULL;
3084 			return (error);
3085 		    }
3086 		    newvp = NFSTOV(np);
3087 		}
3088 		if (info.v3) {
3089 			ERROROUT(nfsm_postop_attr(&info, newvp, &attrflag,
3090 						  NFS_LATTR_NOSHRINK));
3091 			if (!attrflag && *npp == NULL) {
3092 				m_freem(info.mrep);
3093 				info.mrep = NULL;
3094 				if (newvp == dvp)
3095 					vrele(newvp);
3096 				else
3097 					vput(newvp);
3098 				return (ENOENT);
3099 			}
3100 		} else {
3101 			ERROROUT(nfsm_loadattr(&info, newvp, NULL));
3102 		}
3103 	}
3104 	m_freem(info.mrep);
3105 	info.mrep = NULL;
3106 nfsmout:
3107 	if (npp && *npp == NULL) {
3108 		if (error) {
3109 			if (newvp) {
3110 				if (newvp == dvp)
3111 					vrele(newvp);
3112 				else
3113 					vput(newvp);
3114 			}
3115 		} else
3116 			*npp = np;
3117 	}
3118 	return (error);
3119 }
3120 
3121 /*
3122  * Nfs Version 3 commit rpc
3123  *
3124  * We call it 'uio' to distinguish it from 'bio' but there is no real uio
3125  * involved.
3126  */
3127 int
3128 nfs_commitrpc_uio(struct vnode *vp, u_quad_t offset, int cnt, struct thread *td)
3129 {
3130 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3131 	int error = 0, wccflag = NFSV3_WCCRATTR;
3132 	struct nfsm_info info;
3133 	u_int32_t *tl;
3134 
3135 	info.mrep = NULL;
3136 	info.v3 = 1;
3137 
3138 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
3139 		return (0);
3140 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
3141 	nfsm_reqhead(&info, vp, NFSPROC_COMMIT, NFSX_FH(1));
3142 	ERROROUT(nfsm_fhtom(&info, vp));
3143 	tl = nfsm_build(&info, 3 * NFSX_UNSIGNED);
3144 	txdr_hyper(offset, tl);
3145 	tl += 2;
3146 	*tl = txdr_unsigned(cnt);
3147 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_COMMIT, td,
3148 				nfs_vpcred(vp, ND_WRITE), &error));
3149 	ERROROUT(nfsm_wcc_data(&info, vp, &wccflag));
3150 	if (!error) {
3151 		NULLOUT(tl = nfsm_dissect(&info, NFSX_V3WRITEVERF));
3152 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
3153 			NFSX_V3WRITEVERF)) {
3154 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
3155 				NFSX_V3WRITEVERF);
3156 			error = NFSERR_STALEWRITEVERF;
3157 		}
3158 	}
3159 	m_freem(info.mrep);
3160 	info.mrep = NULL;
3161 nfsmout:
3162 	return (error);
3163 }
3164 
3165 /*
3166  * Kludge City..
3167  * - make nfs_bmap() essentially a no-op that does no translation
3168  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
3169  *   (Maybe I could use the process's page mapping, but I was concerned that
3170  *    Kernel Write might not be enabled and also figured copyout() would do
3171  *    a lot more work than bcopy() and also it currently happens in the
3172  *    context of the swapper process (2).
3173  *
3174  * nfs_bmap(struct vnode *a_vp, off_t a_loffset,
3175  *	    off_t *a_doffsetp, int *a_runp, int *a_runb)
3176  */
3177 static int
3178 nfs_bmap(struct vop_bmap_args *ap)
3179 {
3180 	/* no token lock required */
3181 	if (ap->a_doffsetp != NULL)
3182 		*ap->a_doffsetp = ap->a_loffset;
3183 	if (ap->a_runp != NULL)
3184 		*ap->a_runp = 0;
3185 	if (ap->a_runb != NULL)
3186 		*ap->a_runb = 0;
3187 	return (0);
3188 }
3189 
3190 /*
3191  * Strategy routine.
3192  */
3193 static int
3194 nfs_strategy(struct vop_strategy_args *ap)
3195 {
3196 	struct bio *bio = ap->a_bio;
3197 	struct bio *nbio;
3198 	struct buf *bp __debugvar = bio->bio_buf;
3199 	struct nfsmount *nmp = VFSTONFS(ap->a_vp->v_mount);
3200 	struct thread *td;
3201 	int error;
3202 
3203 	KASSERT(bp->b_cmd != BUF_CMD_DONE,
3204 		("nfs_strategy: buffer %p unexpectedly marked done", bp));
3205 	KASSERT(BUF_LOCKINUSE(bp),
3206 		("nfs_strategy: buffer %p not locked", bp));
3207 
3208 	if (bio->bio_flags & BIO_SYNC)
3209 		td = curthread;	/* XXX */
3210 	else
3211 		td = NULL;
3212 
3213 	lwkt_gettoken(&nmp->nm_token);
3214 
3215         /*
3216 	 * We probably don't need to push an nbio any more since no
3217 	 * block conversion is required due to the use of 64 bit byte
3218 	 * offsets, but do it anyway.
3219 	 *
3220 	 * NOTE: When NFS callers itself via this strategy routines and
3221 	 *	 sets up a synchronous I/O, it expects the I/O to run
3222 	 *	 synchronously (its bio_done routine just assumes it),
3223 	 *	 so for now we have to honor the bit.
3224          */
3225 	nbio = push_bio(bio);
3226 	nbio->bio_offset = bio->bio_offset;
3227 	nbio->bio_flags = bio->bio_flags & BIO_SYNC;
3228 
3229 	/*
3230 	 * If the op is asynchronous and an i/o daemon is waiting
3231 	 * queue the request, wake it up and wait for completion
3232 	 * otherwise just do it ourselves.
3233 	 */
3234 	if (bio->bio_flags & BIO_SYNC) {
3235 		error = nfs_doio(ap->a_vp, nbio, td);
3236 	} else {
3237 		nfs_asyncio(ap->a_vp, nbio);
3238 		error = 0;
3239 	}
3240 	lwkt_reltoken(&nmp->nm_token);
3241 
3242 	return (error);
3243 }
3244 
3245 /*
3246  * fsync vnode op. Just call nfs_flush() with commit == 1.
3247  *
3248  * nfs_fsync(struct vnode *a_vp, int a_waitfor)
3249  */
3250 /* ARGSUSED */
3251 static int
3252 nfs_fsync(struct vop_fsync_args *ap)
3253 {
3254 	struct nfsmount *nmp = VFSTONFS(ap->a_vp->v_mount);
3255 	int error;
3256 
3257 	lwkt_gettoken(&nmp->nm_token);
3258 
3259 	/*
3260 	 * NOTE: Because attributes are set synchronously we currently
3261 	 *	 do not have to implement vsetisdirty()/vclrisdirty().
3262 	 */
3263 	error = nfs_flush(ap->a_vp, ap->a_waitfor, curthread, 1);
3264 
3265 	lwkt_reltoken(&nmp->nm_token);
3266 
3267 	return error;
3268 }
3269 
3270 /*
3271  * Flush all the blocks associated with a vnode.   Dirty NFS buffers may be
3272  * in one of two states:  If B_NEEDCOMMIT is clear then the buffer contains
3273  * new NFS data which needs to be written to the server.  If B_NEEDCOMMIT is
3274  * set the buffer contains data that has already been written to the server
3275  * and which now needs a commit RPC.
3276  *
3277  * If commit is 0 we only take one pass and only flush buffers containing new
3278  * dirty data.
3279  *
3280  * If commit is 1 we take two passes, issuing a commit RPC in the second
3281  * pass.
3282  *
3283  * If waitfor is MNT_WAIT and commit is 1, we loop as many times as required
3284  * to completely flush all pending data.
3285  *
3286  * Note that the RB_SCAN code properly handles the case where the
3287  * callback might block and directly or indirectly (another thread) cause
3288  * the RB tree to change.
3289  */
3290 
3291 #ifndef NFS_COMMITBVECSIZ
3292 #define NFS_COMMITBVECSIZ	16
3293 #endif
3294 
3295 struct nfs_flush_info {
3296 	enum { NFI_FLUSHNEW, NFI_COMMIT } mode;
3297 	struct thread *td;
3298 	struct vnode *vp;
3299 	int waitfor;
3300 	int slpflag;
3301 	int slptimeo;
3302 	int loops;
3303 	struct buf *bvary[NFS_COMMITBVECSIZ];
3304 	int bvsize;
3305 	off_t beg_off;
3306 	off_t end_off;
3307 };
3308 
3309 static int nfs_flush_bp(struct buf *bp, void *data);
3310 static int nfs_flush_docommit(struct nfs_flush_info *info, int error);
3311 
3312 int
3313 nfs_flush(struct vnode *vp, int waitfor, struct thread *td, int commit)
3314 {
3315 	struct nfsnode *np = VTONFS(vp);
3316 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3317 	struct nfs_flush_info info;
3318 	int error;
3319 
3320 	bzero(&info, sizeof(info));
3321 	info.td = td;
3322 	info.vp = vp;
3323 	info.waitfor = waitfor;
3324 	info.slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
3325 	info.loops = 0;
3326 	lwkt_gettoken(&vp->v_token);
3327 
3328 	do {
3329 		/*
3330 		 * Flush mode
3331 		 */
3332 		info.mode = NFI_FLUSHNEW;
3333 		error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
3334 				nfs_flush_bp, &info);
3335 
3336 		/*
3337 		 * Take a second pass if committing and no error occured.
3338 		 * Clean up any left over collection (whether an error
3339 		 * occurs or not).
3340 		 */
3341 		if (commit && error == 0) {
3342 			info.mode = NFI_COMMIT;
3343 			error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
3344 					nfs_flush_bp, &info);
3345 			if (info.bvsize)
3346 				error = nfs_flush_docommit(&info, error);
3347 		}
3348 
3349 		/*
3350 		 * Wait for pending I/O to complete before checking whether
3351 		 * any further dirty buffers exist.
3352 		 */
3353 		while (waitfor == MNT_WAIT &&
3354 		       bio_track_active(&vp->v_track_write)) {
3355 			error = bio_track_wait(&vp->v_track_write,
3356 					       info.slpflag, info.slptimeo);
3357 			if (error) {
3358 				/*
3359 				 * We have to be able to break out if this
3360 				 * is an 'intr' mount.
3361 				 */
3362 				if (nfs_sigintr(nmp, NULL, td)) {
3363 					error = -EINTR;
3364 					break;
3365 				}
3366 
3367 				/*
3368 				 * Since we do not process pending signals,
3369 				 * once we get a PCATCH our tsleep() will no
3370 				 * longer sleep, switch to a fixed timeout
3371 				 * instead.
3372 				 */
3373 				if (info.slpflag == PCATCH) {
3374 					info.slpflag = 0;
3375 					info.slptimeo = 2 * hz;
3376 				}
3377 				error = 0;
3378 			}
3379 		}
3380 		++info.loops;
3381 		/*
3382 		 * Loop if we are flushing synchronous as well as committing,
3383 		 * and dirty buffers are still present.  Otherwise we might livelock.
3384 		 */
3385 	} while (waitfor == MNT_WAIT && commit &&
3386 		 error == 0 && !RB_EMPTY(&vp->v_rbdirty_tree));
3387 
3388 	/*
3389 	 * The callbacks have to return a negative error to terminate the
3390 	 * RB scan.
3391 	 */
3392 	if (error < 0)
3393 		error = -error;
3394 
3395 	/*
3396 	 * Deal with any error collection
3397 	 */
3398 	if (np->n_flag & NWRITEERR) {
3399 		error = np->n_error;
3400 		np->n_flag &= ~NWRITEERR;
3401 	}
3402 	lwkt_reltoken(&vp->v_token);
3403 	return (error);
3404 }
3405 
3406 static
3407 int
3408 nfs_flush_bp(struct buf *bp, void *data)
3409 {
3410 	struct nfs_flush_info *info = data;
3411 	int lkflags;
3412 	int error;
3413 	off_t toff;
3414 
3415 	error = 0;
3416 	switch(info->mode) {
3417 	case NFI_FLUSHNEW:
3418 		error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT);
3419 		if (error && info->loops && info->waitfor == MNT_WAIT) {
3420 			error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT);
3421 			if (error) {
3422 				lkflags = LK_EXCLUSIVE | LK_SLEEPFAIL;
3423 				if (info->slpflag & PCATCH)
3424 					lkflags |= LK_PCATCH;
3425 				error = BUF_TIMELOCK(bp, lkflags, "nfsfsync",
3426 						     info->slptimeo);
3427 			}
3428 		}
3429 
3430 		/*
3431 		 * Ignore locking errors
3432 		 */
3433 		if (error) {
3434 			error = 0;
3435 			break;
3436 		}
3437 
3438 		/*
3439 		 * The buffer may have changed out from under us, even if
3440 		 * we did not block (MPSAFE).  Check again now that it is
3441 		 * locked.
3442 		 */
3443 		if (bp->b_vp == info->vp &&
3444 		    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == B_DELWRI) {
3445 			bremfree(bp);
3446 			bawrite(bp);
3447 		} else {
3448 			BUF_UNLOCK(bp);
3449 		}
3450 		break;
3451 	case NFI_COMMIT:
3452 		/*
3453 		 * Only process buffers in need of a commit which we can
3454 		 * immediately lock.  This may prevent a buffer from being
3455 		 * committed, but the normal flush loop will block on the
3456 		 * same buffer so we shouldn't get into an endless loop.
3457 		 */
3458 		if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
3459 		    (B_DELWRI | B_NEEDCOMMIT)) {
3460 			break;
3461 		}
3462 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
3463 			break;
3464 
3465 		/*
3466 		 * We must recheck after successfully locking the buffer.
3467 		 */
3468 		if (bp->b_vp != info->vp ||
3469 		    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
3470 		    (B_DELWRI | B_NEEDCOMMIT)) {
3471 			BUF_UNLOCK(bp);
3472 			break;
3473 		}
3474 
3475 		/*
3476 		 * NOTE: storing the bp in the bvary[] basically sets
3477 		 * it up for a commit operation.
3478 		 *
3479 		 * We must call vfs_busy_pages() now so the commit operation
3480 		 * is interlocked with user modifications to memory mapped
3481 		 * pages.  The b_dirtyoff/b_dirtyend range is not correct
3482 		 * until after the pages have been busied.
3483 		 *
3484 		 * Note: to avoid loopback deadlocks, we do not
3485 		 * assign b_runningbufspace.
3486 		 */
3487 		bremfree(bp);
3488 		bp->b_cmd = BUF_CMD_WRITE;
3489 		vfs_busy_pages(bp->b_vp, bp);
3490 		info->bvary[info->bvsize] = bp;
3491 		toff = bp->b_bio2.bio_offset + bp->b_dirtyoff;
3492 		if (info->bvsize == 0 || toff < info->beg_off)
3493 			info->beg_off = toff;
3494 		toff += (off_t)(bp->b_dirtyend - bp->b_dirtyoff);
3495 		if (info->bvsize == 0 || toff > info->end_off)
3496 			info->end_off = toff;
3497 		++info->bvsize;
3498 		if (info->bvsize == NFS_COMMITBVECSIZ) {
3499 			error = nfs_flush_docommit(info, 0);
3500 			KKASSERT(info->bvsize == 0);
3501 		}
3502 	}
3503 	return (error);
3504 }
3505 
3506 static
3507 int
3508 nfs_flush_docommit(struct nfs_flush_info *info, int error)
3509 {
3510 	struct vnode *vp;
3511 	struct buf *bp;
3512 	off_t bytes;
3513 	int retv;
3514 	int i;
3515 
3516 	vp = info->vp;
3517 
3518 	if (info->bvsize > 0) {
3519 		/*
3520 		 * Commit data on the server, as required.  Note that
3521 		 * nfs_commit will use the vnode's cred for the commit.
3522 		 * The NFSv3 commit RPC is limited to a 32 bit byte count.
3523 		 */
3524 		bytes = info->end_off - info->beg_off;
3525 		if (bytes > 0x40000000)
3526 			bytes = 0x40000000;
3527 		if (error) {
3528 			retv = -error;
3529 		} else {
3530 			retv = nfs_commitrpc_uio(vp, info->beg_off,
3531 						 (int)bytes, info->td);
3532 			if (retv == NFSERR_STALEWRITEVERF)
3533 				nfs_clearcommit(vp->v_mount);
3534 		}
3535 
3536 		/*
3537 		 * Now, either mark the blocks I/O done or mark the
3538 		 * blocks dirty, depending on whether the commit
3539 		 * succeeded.
3540 		 */
3541 		for (i = 0; i < info->bvsize; ++i) {
3542 			bp = info->bvary[i];
3543 			if (retv || (bp->b_flags & B_NEEDCOMMIT) == 0) {
3544 				/*
3545 				 * Either an error or the original
3546 				 * vfs_busy_pages() cleared B_NEEDCOMMIT
3547 				 * due to finding new dirty VM pages in
3548 				 * the buffer.
3549 				 *
3550 				 * Leave B_DELWRI intact.
3551 				 */
3552 				bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
3553 				vfs_unbusy_pages(bp);
3554 				bp->b_cmd = BUF_CMD_DONE;
3555 				bqrelse(bp);
3556 			} else {
3557 				/*
3558 				 * Success, remove B_DELWRI ( bundirty() ).
3559 				 *
3560 				 * b_dirtyoff/b_dirtyend seem to be NFS
3561 				 * specific.  We should probably move that
3562 				 * into bundirty(). XXX
3563 				 *
3564 				 * We are faking an I/O write, we have to
3565 				 * start the transaction in order to
3566 				 * immediately biodone() it.
3567 				 */
3568 				bundirty(bp);
3569 				bp->b_flags &= ~B_ERROR;
3570 				bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
3571 				bp->b_dirtyoff = bp->b_dirtyend = 0;
3572 				biodone(&bp->b_bio1);
3573 			}
3574 		}
3575 		info->bvsize = 0;
3576 	}
3577 	return (error);
3578 }
3579 
3580 /*
3581  * NFS advisory byte-level locks.
3582  * Currently unsupported.
3583  *
3584  * nfs_advlock(struct vnode *a_vp, caddr_t a_id, int a_op, struct flock *a_fl,
3585  *		int a_flags)
3586  */
3587 static int
3588 nfs_advlock(struct vop_advlock_args *ap)
3589 {
3590 	struct nfsnode *np = VTONFS(ap->a_vp);
3591 
3592 	/* no token lock currently required */
3593 	/*
3594 	 * The following kludge is to allow diskless support to work
3595 	 * until a real NFS lockd is implemented. Basically, just pretend
3596 	 * that this is a local lock.
3597 	 */
3598 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
3599 }
3600 
3601 /*
3602  * Print out the contents of an nfsnode.
3603  *
3604  * nfs_print(struct vnode *a_vp)
3605  */
3606 static int
3607 nfs_print(struct vop_print_args *ap)
3608 {
3609 	struct vnode *vp = ap->a_vp;
3610 	struct nfsnode *np = VTONFS(vp);
3611 
3612 	kprintf("tag VT_NFS, fileid %lld fsid 0x%x",
3613 		(long long)np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3614 	if (vp->v_type == VFIFO)
3615 		fifo_printinfo(vp);
3616 	kprintf("\n");
3617 	return (0);
3618 }
3619 
3620 /*
3621  * nfs special file access vnode op.
3622  *
3623  * nfs_laccess(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
3624  */
3625 static int
3626 nfs_laccess(struct vop_access_args *ap)
3627 {
3628 	struct nfsmount *nmp = VFSTONFS(ap->a_vp->v_mount);
3629 	struct vattr vattr;
3630 	int error;
3631 
3632 	lwkt_gettoken(&nmp->nm_token);
3633 	error = VOP_GETATTR(ap->a_vp, &vattr);
3634 	if (error == 0) {
3635 		error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid,
3636 					  vattr.va_mode, 0);
3637 	}
3638 	lwkt_reltoken(&nmp->nm_token);
3639 
3640 	return (error);
3641 }
3642 
3643 /*
3644  * Read wrapper for fifos.
3645  *
3646  * nfsfifo_read(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
3647  *		struct ucred *a_cred)
3648  */
3649 static int
3650 nfsfifo_read(struct vop_read_args *ap)
3651 {
3652 	struct nfsnode *np = VTONFS(ap->a_vp);
3653 
3654 	/* no token access required */
3655 	/*
3656 	 * Set access flag.
3657 	 */
3658 	np->n_flag |= NACC;
3659 	getnanotime(&np->n_atim);
3660 	return (VOCALL(&fifo_vnode_vops, &ap->a_head));
3661 }
3662 
3663 /*
3664  * Write wrapper for fifos.
3665  *
3666  * nfsfifo_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
3667  *		 struct ucred *a_cred)
3668  */
3669 static int
3670 nfsfifo_write(struct vop_write_args *ap)
3671 {
3672 	struct nfsnode *np = VTONFS(ap->a_vp);
3673 
3674 	/* no token access required */
3675 	/*
3676 	 * Set update flag.
3677 	 */
3678 	np->n_flag |= NUPD;
3679 	getnanotime(&np->n_mtim);
3680 	return (VOCALL(&fifo_vnode_vops, &ap->a_head));
3681 }
3682 
3683 /*
3684  * Close wrapper for fifos.
3685  *
3686  * Update the times on the nfsnode then do fifo close.
3687  *
3688  * nfsfifo_close(struct vnode *a_vp, int a_fflag)
3689  */
3690 static int
3691 nfsfifo_close(struct vop_close_args *ap)
3692 {
3693 	struct vnode *vp = ap->a_vp;
3694 	struct nfsnode *np = VTONFS(vp);
3695 	struct vattr vattr;
3696 	struct timespec ts;
3697 
3698 	/* no token access required */
3699 
3700 	vn_lock(vp, LK_UPGRADE | LK_RETRY); /* XXX */
3701 	if (np->n_flag & (NACC | NUPD)) {
3702 		getnanotime(&ts);
3703 		if (np->n_flag & NACC)
3704 			np->n_atim = ts;
3705 		if (np->n_flag & NUPD)
3706 			np->n_mtim = ts;
3707 		np->n_flag |= NCHG;
3708 		if (VREFCNT(vp) == 1 &&
3709 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3710 			VATTR_NULL(&vattr);
3711 			if (np->n_flag & NACC)
3712 				vattr.va_atime = np->n_atim;
3713 			if (np->n_flag & NUPD)
3714 				vattr.va_mtime = np->n_mtim;
3715 			(void)VOP_SETATTR(vp, &vattr, nfs_vpcred(vp, ND_WRITE));
3716 		}
3717 	}
3718 	return (VOCALL(&fifo_vnode_vops, &ap->a_head));
3719 }
3720 
3721 /************************************************************************
3722  *                          KQFILTER OPS                                *
3723  ************************************************************************/
3724 
3725 static void filt_nfsdetach(struct knote *kn);
3726 static int filt_nfsread(struct knote *kn, long hint);
3727 static int filt_nfswrite(struct knote *kn, long hint);
3728 static int filt_nfsvnode(struct knote *kn, long hint);
3729 
3730 static struct filterops nfsread_filtops =
3731 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
3732 	  NULL, filt_nfsdetach, filt_nfsread };
3733 static struct filterops nfswrite_filtops =
3734 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
3735 	  NULL, filt_nfsdetach, filt_nfswrite };
3736 static struct filterops nfsvnode_filtops =
3737 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
3738 	  NULL, filt_nfsdetach, filt_nfsvnode };
3739 
3740 static int
3741 nfs_kqfilter (struct vop_kqfilter_args *ap)
3742 {
3743 	struct vnode *vp = ap->a_vp;
3744 	struct knote *kn = ap->a_kn;
3745 
3746 	switch (kn->kn_filter) {
3747 	case EVFILT_READ:
3748 		kn->kn_fop = &nfsread_filtops;
3749 		break;
3750 	case EVFILT_WRITE:
3751 		kn->kn_fop = &nfswrite_filtops;
3752 		break;
3753 	case EVFILT_VNODE:
3754 		kn->kn_fop = &nfsvnode_filtops;
3755 		break;
3756 	default:
3757 		return (EOPNOTSUPP);
3758 	}
3759 
3760 	kn->kn_hook = (caddr_t)vp;
3761 
3762 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
3763 
3764 	return(0);
3765 }
3766 
3767 static void
3768 filt_nfsdetach(struct knote *kn)
3769 {
3770 	struct vnode *vp = (void *)kn->kn_hook;
3771 
3772 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
3773 }
3774 
3775 static int
3776 filt_nfsread(struct knote *kn, long hint)
3777 {
3778 	struct vnode *vp = (void *)kn->kn_hook;
3779 	struct nfsnode *node = VTONFS(vp);
3780 	off_t off;
3781 
3782 	if (hint == NOTE_REVOKE) {
3783 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
3784 		return(1);
3785 	}
3786 
3787 	/*
3788 	 * Interlock against MP races when performing this function. XXX
3789 	 */
3790 	/* TMPFS_NODE_LOCK_SH(node); */
3791 	off = node->n_size - kn->kn_fp->f_offset;
3792 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
3793 	if (kn->kn_sfflags & NOTE_OLDAPI) {
3794 		/* TMPFS_NODE_UNLOCK(node); */
3795 		return(1);
3796 	}
3797 	if (kn->kn_data == 0) {
3798 		kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
3799 	}
3800 	/* TMPFS_NODE_UNLOCK(node); */
3801 	return (kn->kn_data != 0);
3802 }
3803 
3804 static int
3805 filt_nfswrite(struct knote *kn, long hint)
3806 {
3807 	if (hint == NOTE_REVOKE)
3808 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
3809 	kn->kn_data = 0;
3810 	return (1);
3811 }
3812 
3813 static int
3814 filt_nfsvnode(struct knote *kn, long hint)
3815 {
3816 	if (kn->kn_sfflags & hint)
3817 		kn->kn_fflags |= hint;
3818 	if (hint == NOTE_REVOKE) {
3819 		kn->kn_flags |= (EV_EOF | EV_NODATA);
3820 		return (1);
3821 	}
3822 	return (kn->kn_fflags != 0);
3823 }
3824