xref: /dragonfly/sys/vfs/nfs/nfs_vnops.c (revision b187502f)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
33  * $FreeBSD: src/sys/nfs/nfs_vnops.c,v 1.150.2.5 2001/12/20 19:56:28 dillon Exp $
34  */
35 
36 
37 /*
38  * vnode op calls for Sun NFS version 2 and 3
39  */
40 
41 #include "opt_inet.h"
42 
43 #include <sys/param.h>
44 #include <sys/kernel.h>
45 #include <sys/systm.h>
46 #include <sys/resourcevar.h>
47 #include <sys/proc.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/namei.h>
53 #include <sys/nlookup.h>
54 #include <sys/socket.h>
55 #include <sys/vnode.h>
56 #include <sys/dirent.h>
57 #include <sys/fcntl.h>
58 #include <sys/lockf.h>
59 #include <sys/stat.h>
60 #include <sys/sysctl.h>
61 #include <sys/conf.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 
66 #include <sys/buf2.h>
67 
68 #include <vfs/fifofs/fifo.h>
69 #include <vfs/ufs/dir.h>
70 
71 #undef DIRBLKSIZ
72 
73 #include "rpcv2.h"
74 #include "nfsproto.h"
75 #include "nfs.h"
76 #include "nfsmount.h"
77 #include "nfsnode.h"
78 #include "xdr_subs.h"
79 #include "nfsm_subs.h"
80 
81 #include <net/if.h>
82 #include <netinet/in.h>
83 #include <netinet/in_var.h>
84 
85 /* Defs */
86 #define	TRUE	1
87 #define	FALSE	0
88 
89 static int	nfsfifo_read (struct vop_read_args *);
90 static int	nfsfifo_write (struct vop_write_args *);
91 static int	nfsfifo_close (struct vop_close_args *);
92 static int	nfs_setattrrpc (struct vnode *,struct vattr *,struct ucred *,struct thread *);
93 static	int	nfs_lookup (struct vop_old_lookup_args *);
94 static	int	nfs_create (struct vop_old_create_args *);
95 static	int	nfs_mknod (struct vop_old_mknod_args *);
96 static	int	nfs_open (struct vop_open_args *);
97 static	int	nfs_close (struct vop_close_args *);
98 static	int	nfs_access (struct vop_access_args *);
99 static	int	nfs_getattr (struct vop_getattr_args *);
100 static	int	nfs_setattr (struct vop_setattr_args *);
101 static	int	nfs_read (struct vop_read_args *);
102 static	int	nfs_fsync (struct vop_fsync_args *);
103 static	int	nfs_remove (struct vop_old_remove_args *);
104 static	int	nfs_link (struct vop_old_link_args *);
105 static	int	nfs_rename (struct vop_old_rename_args *);
106 static	int	nfs_mkdir (struct vop_old_mkdir_args *);
107 static	int	nfs_rmdir (struct vop_old_rmdir_args *);
108 static	int	nfs_symlink (struct vop_old_symlink_args *);
109 static	int	nfs_readdir (struct vop_readdir_args *);
110 static	int	nfs_bmap (struct vop_bmap_args *);
111 static	int	nfs_strategy (struct vop_strategy_args *);
112 static	int	nfs_lookitup (struct vnode *, const char *, int,
113 			struct ucred *, struct thread *, struct nfsnode **);
114 static	int	nfs_sillyrename (struct vnode *,struct vnode *,struct componentname *);
115 static int	nfs_laccess (struct vop_access_args *);
116 static int	nfs_readlink (struct vop_readlink_args *);
117 static int	nfs_print (struct vop_print_args *);
118 static int	nfs_advlock (struct vop_advlock_args *);
119 static int	nfs_kqfilter (struct vop_kqfilter_args *ap);
120 
121 static	int	nfs_nresolve (struct vop_nresolve_args *);
122 /*
123  * Global vfs data structures for nfs
124  */
125 struct vop_ops nfsv2_vnode_vops = {
126 	.vop_default =		vop_defaultop,
127 	.vop_access =		nfs_access,
128 	.vop_advlock =		nfs_advlock,
129 	.vop_bmap =		nfs_bmap,
130 	.vop_close =		nfs_close,
131 	.vop_old_create =	nfs_create,
132 	.vop_fsync =		nfs_fsync,
133 	.vop_getattr =		nfs_getattr,
134 	.vop_getpages =		vop_stdgetpages,
135 	.vop_putpages =		vop_stdputpages,
136 	.vop_inactive =		nfs_inactive,
137 	.vop_old_link =		nfs_link,
138 	.vop_old_lookup =	nfs_lookup,
139 	.vop_old_mkdir =	nfs_mkdir,
140 	.vop_old_mknod =	nfs_mknod,
141 	.vop_open =		nfs_open,
142 	.vop_print =		nfs_print,
143 	.vop_read =		nfs_read,
144 	.vop_readdir =		nfs_readdir,
145 	.vop_readlink =		nfs_readlink,
146 	.vop_reclaim =		nfs_reclaim,
147 	.vop_old_remove =	nfs_remove,
148 	.vop_old_rename =	nfs_rename,
149 	.vop_old_rmdir =	nfs_rmdir,
150 	.vop_setattr =		nfs_setattr,
151 	.vop_strategy =		nfs_strategy,
152 	.vop_old_symlink =	nfs_symlink,
153 	.vop_write =		nfs_write,
154 	.vop_nresolve =		nfs_nresolve,
155 	.vop_kqfilter =		nfs_kqfilter
156 };
157 
158 /*
159  * Special device vnode ops
160  */
161 struct vop_ops nfsv2_spec_vops = {
162 	.vop_default =		vop_defaultop,
163 	.vop_access =		nfs_laccess,
164 	.vop_close =		nfs_close,
165 	.vop_fsync =		nfs_fsync,
166 	.vop_getattr =		nfs_getattr,
167 	.vop_inactive =		nfs_inactive,
168 	.vop_print =		nfs_print,
169 	.vop_read =		vop_stdnoread,
170 	.vop_reclaim =		nfs_reclaim,
171 	.vop_setattr =		nfs_setattr,
172 	.vop_write =		vop_stdnowrite
173 };
174 
175 struct vop_ops nfsv2_fifo_vops = {
176 	.vop_default =		fifo_vnoperate,
177 	.vop_access =		nfs_laccess,
178 	.vop_close =		nfsfifo_close,
179 	.vop_fsync =		nfs_fsync,
180 	.vop_getattr =		nfs_getattr,
181 	.vop_inactive =		nfs_inactive,
182 	.vop_print =		nfs_print,
183 	.vop_read =		nfsfifo_read,
184 	.vop_reclaim =		nfs_reclaim,
185 	.vop_setattr =		nfs_setattr,
186 	.vop_write =		nfsfifo_write
187 };
188 
189 static int	nfs_mknodrpc (struct vnode *dvp, struct vnode **vpp,
190 				  struct componentname *cnp,
191 				  struct vattr *vap);
192 static int	nfs_removerpc (struct vnode *dvp, const char *name,
193 				   int namelen,
194 				   struct ucred *cred, struct thread *td);
195 static int	nfs_renamerpc (struct vnode *fdvp, const char *fnameptr,
196 				   int fnamelen, struct vnode *tdvp,
197 				   const char *tnameptr, int tnamelen,
198 				   struct ucred *cred, struct thread *td);
199 static int	nfs_renameit (struct vnode *sdvp,
200 				  struct componentname *scnp,
201 				  struct sillyrename *sp);
202 
203 SYSCTL_DECL(_vfs_nfs);
204 
205 static int nfs_flush_on_rename = 1;
206 SYSCTL_INT(_vfs_nfs, OID_AUTO, flush_on_rename, CTLFLAG_RW,
207 	   &nfs_flush_on_rename, 0, "flush fvp prior to rename");
208 static int nfs_flush_on_hlink = 0;
209 SYSCTL_INT(_vfs_nfs, OID_AUTO, flush_on_hlink, CTLFLAG_RW,
210 	   &nfs_flush_on_hlink, 0, "flush fvp prior to hard link");
211 
212 static int	nfsaccess_cache_timeout = NFS_DEFATTRTIMO;
213 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
214 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
215 
216 static int	nfsneg_cache_timeout = NFS_MINATTRTIMO;
217 SYSCTL_INT(_vfs_nfs, OID_AUTO, neg_cache_timeout, CTLFLAG_RW,
218 	   &nfsneg_cache_timeout, 0, "NFS NEGATIVE NAMECACHE timeout");
219 
220 static int	nfspos_cache_timeout = NFS_MINATTRTIMO;
221 SYSCTL_INT(_vfs_nfs, OID_AUTO, pos_cache_timeout, CTLFLAG_RW,
222 	   &nfspos_cache_timeout, 0, "NFS POSITIVE NAMECACHE timeout");
223 
224 static int	nfsv3_commit_on_close = 0;
225 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
226 	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
227 #if 0
228 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
229 	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
230 
231 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
232 	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
233 #endif
234 
235 #define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
236 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
237 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
238 
239 static __inline
240 void
241 nfs_knote(struct vnode *vp, int flags)
242 {
243 	if (flags)
244 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
245 }
246 
247 /*
248  * Returns whether a name component is a degenerate '.' or '..'.
249  */
250 static __inline
251 int
252 nlcdegenerate(struct nlcomponent *nlc)
253 {
254 	if (nlc->nlc_namelen == 1 && nlc->nlc_nameptr[0] == '.')
255 		return(1);
256 	if (nlc->nlc_namelen == 2 &&
257 	    nlc->nlc_nameptr[0] == '.' && nlc->nlc_nameptr[1] == '.')
258 		return(1);
259 	return(0);
260 }
261 
262 static int
263 nfs3_access_otw(struct vnode *vp, int wmode,
264 		struct thread *td, struct ucred *cred)
265 {
266 	struct nfsnode *np = VTONFS(vp);
267 	int attrflag;
268 	int error = 0;
269 	u_int32_t *tl;
270 	u_int32_t rmode;
271 	struct nfsm_info info;
272 
273 	info.mrep = NULL;
274 	info.v3 = 1;
275 
276 	nfsstats.rpccnt[NFSPROC_ACCESS]++;
277 	nfsm_reqhead(&info, vp, NFSPROC_ACCESS,
278 		     NFSX_FH(info.v3) + NFSX_UNSIGNED);
279 	ERROROUT(nfsm_fhtom(&info, vp));
280 	tl = nfsm_build(&info, NFSX_UNSIGNED);
281 	*tl = txdr_unsigned(wmode);
282 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_ACCESS, td, cred, &error));
283 	ERROROUT(nfsm_postop_attr(&info, vp, &attrflag, NFS_LATTR_NOSHRINK));
284 	if (error == 0) {
285 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
286 		rmode = fxdr_unsigned(u_int32_t, *tl);
287 		np->n_mode = rmode;
288 		np->n_modeuid = cred->cr_uid;
289 		np->n_modestamp = mycpu->gd_time_seconds;
290 	}
291 	m_freem(info.mrep);
292 	info.mrep = NULL;
293 nfsmout:
294 	return error;
295 }
296 
297 /*
298  * nfs access vnode op.
299  * For nfs version 2, just return ok. File accesses may fail later.
300  * For nfs version 3, use the access rpc to check accessibility. If file modes
301  * are changed on the server, accesses might still fail later.
302  *
303  * nfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
304  */
305 static int
306 nfs_access(struct vop_access_args *ap)
307 {
308 	struct ucred *cred;
309 	struct vnode *vp = ap->a_vp;
310 	thread_t td = curthread;
311 	int error = 0;
312 	u_int32_t mode, wmode;
313 	struct nfsnode *np = VTONFS(vp);
314 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
315 	int v3 = NFS_ISV3(vp);
316 
317 	lwkt_gettoken(&nmp->nm_token);
318 
319 	/*
320 	 * Disallow write attempts on filesystems mounted read-only;
321 	 * unless the file is a socket, fifo, or a block or character
322 	 * device resident on the filesystem.
323 	 */
324 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
325 		switch (vp->v_type) {
326 		case VREG:
327 		case VDIR:
328 		case VLNK:
329 			lwkt_reltoken(&nmp->nm_token);
330 			return (EROFS);
331 		default:
332 			break;
333 		}
334 	}
335 
336 	/*
337 	 * The NFS protocol passes only the effective uid/gid over the wire but
338 	 * we need to check access against real ids if AT_EACCESS not set.
339 	 * Handle this case by cloning the credentials and setting the
340 	 * effective ids to the real ones.
341 	 *
342 	 * The crdup() here can cause a lot of ucred structures to build-up
343 	 * (up to maxvnodes), so do our best to avoid it.
344 	 */
345 	if (ap->a_flags & AT_EACCESS) {
346 		cred = crhold(ap->a_cred);
347 	} else {
348 		cred = ap->a_cred;
349 		if (cred->cr_uid == cred->cr_ruid &&
350 		    cred->cr_gid == cred->cr_rgid) {
351 			cred = crhold(ap->a_cred);
352 		} else {
353 			cred = crdup(ap->a_cred);
354 			cred->cr_uid = cred->cr_ruid;
355 			cred->cr_gid = cred->cr_rgid;
356 		}
357 	}
358 
359 	/*
360 	 * For nfs v3, check to see if we have done this recently, and if
361 	 * so return our cached result instead of making an ACCESS call.
362 	 * If not, do an access rpc, otherwise you are stuck emulating
363 	 * ufs_access() locally using the vattr. This may not be correct,
364 	 * since the server may apply other access criteria such as
365 	 * client uid-->server uid mapping that we do not know about.
366 	 */
367 	if (v3) {
368 		if (ap->a_mode & VREAD)
369 			mode = NFSV3ACCESS_READ;
370 		else
371 			mode = 0;
372 		if (vp->v_type != VDIR) {
373 			if (ap->a_mode & VWRITE)
374 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
375 			if (ap->a_mode & VEXEC)
376 				mode |= NFSV3ACCESS_EXECUTE;
377 		} else {
378 			if (ap->a_mode & VWRITE)
379 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
380 					 NFSV3ACCESS_DELETE);
381 			if (ap->a_mode & VEXEC)
382 				mode |= NFSV3ACCESS_LOOKUP;
383 		}
384 		/* XXX safety belt, only make blanket request if caching */
385 		if (nfsaccess_cache_timeout > 0) {
386 			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
387 				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
388 				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
389 		} else {
390 			wmode = mode;
391 		}
392 
393 		/*
394 		 * Does our cached result allow us to give a definite yes to
395 		 * this request?
396 		 */
397 		if (np->n_modestamp &&
398 		   (mycpu->gd_time_seconds < (np->n_modestamp + nfsaccess_cache_timeout)) &&
399 		   (cred->cr_uid == np->n_modeuid) &&
400 		   ((np->n_mode & mode) == mode)) {
401 			nfsstats.accesscache_hits++;
402 		} else {
403 			/*
404 			 * Either a no, or a don't know.  Go to the wire.
405 			 */
406 			nfsstats.accesscache_misses++;
407 		        error = nfs3_access_otw(vp, wmode, td, cred);
408 			if (!error) {
409 				if ((np->n_mode & mode) != mode) {
410 					error = EACCES;
411 				}
412 			}
413 		}
414 	} else {
415 		if ((error = nfs_laccess(ap)) != 0) {
416 			crfree(cred);
417 			lwkt_reltoken(&nmp->nm_token);
418 			return (error);
419 		}
420 
421 		/*
422 		 * Attempt to prevent a mapped root from accessing a file
423 		 * which it shouldn't.  We try to read a byte from the file
424 		 * if the user is root and the file is not zero length.
425 		 * After calling nfs_laccess, we should have the correct
426 		 * file size cached.
427 		 */
428 		if (cred->cr_uid == 0 && (ap->a_mode & VREAD)
429 		    && VTONFS(vp)->n_size > 0) {
430 			struct iovec aiov;
431 			struct uio auio;
432 			char buf[1];
433 
434 			aiov.iov_base = buf;
435 			aiov.iov_len = 1;
436 			auio.uio_iov = &aiov;
437 			auio.uio_iovcnt = 1;
438 			auio.uio_offset = 0;
439 			auio.uio_resid = 1;
440 			auio.uio_segflg = UIO_SYSSPACE;
441 			auio.uio_rw = UIO_READ;
442 			auio.uio_td = td;
443 
444 			if (vp->v_type == VREG) {
445 				error = nfs_readrpc_uio(vp, &auio);
446 			} else if (vp->v_type == VDIR) {
447 				char* bp;
448 				bp = kmalloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
449 				aiov.iov_base = bp;
450 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
451 				error = nfs_readdirrpc_uio(vp, &auio);
452 				kfree(bp, M_TEMP);
453 			} else if (vp->v_type == VLNK) {
454 				error = nfs_readlinkrpc_uio(vp, &auio);
455 			} else {
456 				error = EACCES;
457 			}
458 		}
459 	}
460 	/*
461 	 * [re]record creds for reading and/or writing if access
462 	 * was granted.  Assume the NFS server will grant read access
463 	 * for execute requests.
464 	 */
465 	if (error == 0) {
466 		if ((ap->a_mode & (VREAD|VEXEC)) && cred != np->n_rucred) {
467 			crhold(cred);
468 			if (np->n_rucred)
469 				crfree(np->n_rucred);
470 			np->n_rucred = cred;
471 		}
472 		if ((ap->a_mode & VWRITE) && cred != np->n_wucred) {
473 			crhold(cred);
474 			if (np->n_wucred)
475 				crfree(np->n_wucred);
476 			np->n_wucred = cred;
477 		}
478 	}
479 	lwkt_reltoken(&nmp->nm_token);
480 	crfree(cred);
481 
482 	return(error);
483 }
484 
485 /*
486  * nfs open vnode op
487  * Check to see if the type is ok
488  * and that deletion is not in progress.
489  * For paged in text files, you will need to flush the page cache
490  * if consistency is lost.
491  *
492  * nfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
493  *	    struct file *a_fp)
494  */
495 /* ARGSUSED */
496 static int
497 nfs_open(struct vop_open_args *ap)
498 {
499 	struct vnode *vp = ap->a_vp;
500 	struct nfsnode *np = VTONFS(vp);
501 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
502 	struct vattr vattr;
503 	int error;
504 
505 	lwkt_gettoken(&nmp->nm_token);
506 
507 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
508 #ifdef DIAGNOSTIC
509 		kprintf("open eacces vtyp=%d\n",vp->v_type);
510 #endif
511 		lwkt_reltoken(&nmp->nm_token);
512 		return (EOPNOTSUPP);
513 	}
514 
515 	/*
516 	 * Save valid creds for reading and writing for later RPCs.
517 	 */
518 	if ((ap->a_mode & FREAD) && ap->a_cred != np->n_rucred) {
519 		crhold(ap->a_cred);
520 		if (np->n_rucred)
521 			crfree(np->n_rucred);
522 		np->n_rucred = ap->a_cred;
523 	}
524 	if ((ap->a_mode & FWRITE) && ap->a_cred != np->n_wucred) {
525 		crhold(ap->a_cred);
526 		if (np->n_wucred)
527 			crfree(np->n_wucred);
528 		np->n_wucred = ap->a_cred;
529 	}
530 
531 	/*
532 	 * Clear the attribute cache only if opening with write access.  It
533 	 * is unclear if we should do this at all here, but we certainly
534 	 * should not clear the cache unconditionally simply because a file
535 	 * is being opened.
536 	 */
537 	if (ap->a_mode & FWRITE)
538 		np->n_attrstamp = 0;
539 
540 	/*
541 	 * For normal NFS, reconcile changes made locally verses
542 	 * changes made remotely.  Note that VOP_GETATTR only goes
543 	 * to the wire if the cached attribute has timed out or been
544 	 * cleared.
545 	 *
546 	 * If local modifications have been made clear the attribute
547 	 * cache to force an attribute and modified time check.  If
548 	 * GETATTR detects that the file has been changed by someone
549 	 * other then us it will set NRMODIFIED.
550 	 *
551 	 * If we are opening a directory and local changes have been
552 	 * made we have to invalidate the cache in order to ensure
553 	 * that we get the most up-to-date information from the
554 	 * server.  XXX
555 	 */
556 	if (np->n_flag & NLMODIFIED) {
557 		np->n_attrstamp = 0;
558 		if (vp->v_type == VDIR) {
559 			error = nfs_vinvalbuf(vp, V_SAVE, 1);
560 			if (error == EINTR) {
561 				lwkt_reltoken(&nmp->nm_token);
562 				return (error);
563 			}
564 			nfs_invaldir(vp);
565 		}
566 	}
567 	error = VOP_GETATTR(vp, &vattr);
568 	if (error) {
569 		lwkt_reltoken(&nmp->nm_token);
570 		return (error);
571 	}
572 	if (np->n_flag & NRMODIFIED) {
573 		if (vp->v_type == VDIR)
574 			nfs_invaldir(vp);
575 		error = nfs_vinvalbuf(vp, V_SAVE, 1);
576 		if (error == EINTR) {
577 			lwkt_reltoken(&nmp->nm_token);
578 			return (error);
579 		}
580 		np->n_flag &= ~NRMODIFIED;
581 	}
582 	error = vop_stdopen(ap);
583 	lwkt_reltoken(&nmp->nm_token);
584 
585 	return error;
586 }
587 
588 /*
589  * nfs close vnode op
590  * What an NFS client should do upon close after writing is a debatable issue.
591  * Most NFS clients push delayed writes to the server upon close, basically for
592  * two reasons:
593  * 1 - So that any write errors may be reported back to the client process
594  *     doing the close system call. By far the two most likely errors are
595  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
596  * 2 - To put a worst case upper bound on cache inconsistency between
597  *     multiple clients for the file.
598  * There is also a consistency problem for Version 2 of the protocol w.r.t.
599  * not being able to tell if other clients are writing a file concurrently,
600  * since there is no way of knowing if the changed modify time in the reply
601  * is only due to the write for this client.
602  * (NFS Version 3 provides weak cache consistency data in the reply that
603  *  should be sufficient to detect and handle this case.)
604  *
605  * The current code does the following:
606  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
607  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
608  *                     or commit them (this satisfies 1 and 2 except for the
609  *                     case where the server crashes after this close but
610  *                     before the commit RPC, which is felt to be "good
611  *                     enough". Changing the last argument to nfs_flush() to
612  *                     a 1 would force a commit operation, if it is felt a
613  *                     commit is necessary now.
614  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
615  *                     1 should be dealt with via an fsync() system call for
616  *                     cases where write errors are important.
617  *
618  * nfs_close(struct vnode *a_vp, int a_fflag)
619  */
620 /* ARGSUSED */
621 static int
622 nfs_close(struct vop_close_args *ap)
623 {
624 	struct vnode *vp = ap->a_vp;
625 	struct nfsnode *np = VTONFS(vp);
626 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
627 	int error = 0;
628 	thread_t td = curthread;
629 
630 	vn_lock(vp, LK_UPGRADE | LK_RETRY); /* XXX */
631 	lwkt_gettoken(&nmp->nm_token);
632 
633 	if (vp->v_type == VREG) {
634 	    if (np->n_flag & NLMODIFIED) {
635 		if (NFS_ISV3(vp)) {
636 		    /*
637 		     * Under NFSv3 we have dirty buffers to dispose of.  We
638 		     * must flush them to the NFS server.  We have the option
639 		     * of waiting all the way through the commit rpc or just
640 		     * waiting for the initial write.  The default is to only
641 		     * wait through the initial write so the data is in the
642 		     * server's cache, which is roughly similar to the state
643 		     * a standard disk subsystem leaves the file in on close().
644 		     *
645 		     * We cannot clear the NLMODIFIED bit in np->n_flag due to
646 		     * potential races with other processes, and certainly
647 		     * cannot clear it if we don't commit.
648 		     */
649 		    int cm = nfsv3_commit_on_close ? 1 : 0;
650 		    error = nfs_flush(vp, MNT_WAIT, td, cm);
651 		    /* np->n_flag &= ~NLMODIFIED; */
652 		} else {
653 		    error = nfs_vinvalbuf(vp, V_SAVE, 1);
654 		}
655 		np->n_attrstamp = 0;
656 	    }
657 	    if (np->n_flag & NWRITEERR) {
658 		np->n_flag &= ~NWRITEERR;
659 		error = np->n_error;
660 	    }
661 	}
662 	vop_stdclose(ap);
663 	lwkt_reltoken(&nmp->nm_token);
664 
665 	return (error);
666 }
667 
668 /*
669  * nfs getattr call from vfs.
670  *
671  * nfs_getattr(struct vnode *a_vp, struct vattr *a_vap)
672  */
673 static int
674 nfs_getattr(struct vop_getattr_args *ap)
675 {
676 	struct vnode *vp = ap->a_vp;
677 	struct nfsnode *np = VTONFS(vp);
678 	struct nfsmount *nmp;
679 	int error = 0;
680 	thread_t td = curthread;
681 	struct nfsm_info info;
682 
683 	info.mrep = NULL;
684 	info.v3 = NFS_ISV3(vp);
685 	nmp = VFSTONFS(vp->v_mount);
686 
687 	lwkt_gettoken(&nmp->nm_token);
688 
689 	/*
690 	 * Update local times for special files.
691 	 */
692 	if (np->n_flag & (NACC | NUPD))
693 		np->n_flag |= NCHG;
694 	/*
695 	 * First look in the cache.
696 	 */
697 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
698 		goto done;
699 
700 	if (info.v3 && nfsaccess_cache_timeout > 0) {
701 		nfsstats.accesscache_misses++;
702 		nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, nfs_vpcred(vp, ND_CHECK));
703 		if (nfs_getattrcache(vp, ap->a_vap) == 0)
704 			goto done;
705 	}
706 
707 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
708 	nfsm_reqhead(&info, vp, NFSPROC_GETATTR, NFSX_FH(info.v3));
709 	ERROROUT(nfsm_fhtom(&info, vp));
710 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_GETATTR, td,
711 				nfs_vpcred(vp, ND_CHECK), &error));
712 	if (error == 0) {
713 		ERROROUT(nfsm_loadattr(&info, vp, ap->a_vap));
714 	}
715 	m_freem(info.mrep);
716 	info.mrep = NULL;
717 done:
718 	/*
719 	 * NFS doesn't support chflags flags.  If the nfs mount was
720 	 * made -o cache set the UF_CACHE bit for swapcache.
721 	 */
722 	if ((nmp->nm_flag & NFSMNT_CACHE) && (vp->v_flag & VROOT))
723 		ap->a_vap->va_flags |= UF_CACHE;
724 nfsmout:
725 	lwkt_reltoken(&nmp->nm_token);
726 	return (error);
727 }
728 
729 /*
730  * nfs setattr call.
731  *
732  * nfs_setattr(struct vnode *a_vp, struct vattr *a_vap, struct ucred *a_cred)
733  */
734 static int
735 nfs_setattr(struct vop_setattr_args *ap)
736 {
737 	struct vnode *vp = ap->a_vp;
738 	struct nfsnode *np = VTONFS(vp);
739 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
740 	struct vattr *vap = ap->a_vap;
741 	int error = 0;
742 	int kflags = 0;
743 	off_t tsize;
744 	thread_t td = curthread;
745 
746 #ifndef nolint
747 	tsize = (off_t)0;
748 #endif
749 	/*
750 	 * Setting of flags is not supported.
751 	 */
752 	if (vap->va_flags != VNOVAL)
753 		return (EOPNOTSUPP);
754 
755 	/*
756 	 * Disallow write attempts if the filesystem is mounted read-only.
757 	 */
758   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
759 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
760 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
761 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
762 		return (EROFS);
763 
764 	lwkt_gettoken(&nmp->nm_token);
765 
766 	if (vap->va_size != VNOVAL) {
767 		/*
768 		 * truncation requested
769 		 */
770  		switch (vp->v_type) {
771  		case VDIR:
772 			lwkt_reltoken(&nmp->nm_token);
773  			return (EISDIR);
774  		case VCHR:
775  		case VBLK:
776  		case VSOCK:
777  		case VFIFO:
778 			if (vap->va_mtime.tv_sec == VNOVAL &&
779 			    vap->va_atime.tv_sec == VNOVAL &&
780 			    vap->va_mode == (mode_t)VNOVAL &&
781 			    vap->va_uid == (uid_t)VNOVAL &&
782 			    vap->va_gid == (gid_t)VNOVAL) {
783 				lwkt_reltoken(&nmp->nm_token);
784 				return (0);
785 			}
786  			vap->va_size = VNOVAL;
787  			break;
788  		default:
789 			/*
790 			 * Disallow write attempts if the filesystem is
791 			 * mounted read-only.
792 			 */
793 			if (vp->v_mount->mnt_flag & MNT_RDONLY) {
794 				lwkt_reltoken(&nmp->nm_token);
795 				return (EROFS);
796 			}
797 
798 			tsize = np->n_size;
799 again:
800 			error = nfs_meta_setsize(vp, td, vap->va_size, 0);
801 
802 #if 0
803  			if (np->n_flag & NLMODIFIED) {
804  			    if (vap->va_size == 0)
805  				error = nfs_vinvalbuf(vp, 0, 1);
806  			    else
807  				error = nfs_vinvalbuf(vp, V_SAVE, 1);
808  			}
809 #endif
810 			/*
811 			 * note: this loop case almost always happens at
812 			 * least once per truncation.
813 			 */
814 			if (error == 0 && np->n_size != vap->va_size)
815 				goto again;
816 			np->n_vattr.va_size = vap->va_size;
817 			kflags |= NOTE_WRITE;
818 			if (tsize < vap->va_size)
819 				kflags |= NOTE_EXTEND;
820 			break;
821 		}
822 	} else if ((np->n_flag & NLMODIFIED) && vp->v_type == VREG) {
823 		/*
824 		 * What to do.  If we are modifying the mtime we lose
825 		 * mtime detection of changes made by the server or other
826 		 * clients.  But programs like rsync/rdist/cpdup are going
827 		 * to call utimes a lot.  We don't want to piecemeal sync.
828 		 *
829 		 * For now sync if any prior remote changes were detected,
830 		 * but allow us to lose track of remote changes made during
831 		 * the utimes operation.
832 		 */
833 		if (np->n_flag & NRMODIFIED)
834 			error = nfs_vinvalbuf(vp, V_SAVE, 1);
835 		if (error == EINTR) {
836 			lwkt_reltoken(&nmp->nm_token);
837 			return (error);
838 		}
839 		if (error == 0) {
840 			if (vap->va_mtime.tv_sec != VNOVAL) {
841 				np->n_mtime = vap->va_mtime.tv_sec;
842 			}
843 		}
844 	}
845 	error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
846 	if (error == 0)
847 		kflags |= NOTE_EXTEND;
848 
849 	/*
850 	 * Sanity check if a truncation was issued.  This should only occur
851 	 * if multiple processes are racing on the same file.
852 	 */
853 	if (error == 0 && vap->va_size != VNOVAL &&
854 	    np->n_size != vap->va_size) {
855 		kprintf("NFS ftruncate: server disagrees on the file size: "
856 			"%jd/%jd/%jd\n",
857 			(intmax_t)tsize,
858 			(intmax_t)vap->va_size,
859 			(intmax_t)np->n_size);
860 		goto again;
861 	}
862 	if (error && vap->va_size != VNOVAL) {
863 		np->n_size = np->n_vattr.va_size = tsize;
864 		nfs_meta_setsize(vp, td, np->n_size, 0);
865 	}
866 	lwkt_reltoken(&nmp->nm_token);
867 	nfs_knote(vp, kflags);
868 
869 	return (error);
870 }
871 
872 /*
873  * Do an nfs setattr rpc.
874  */
875 static int
876 nfs_setattrrpc(struct vnode *vp, struct vattr *vap,
877 	       struct ucred *cred, struct thread *td)
878 {
879 	struct nfsv2_sattr *sp;
880 	struct nfsnode *np = VTONFS(vp);
881 	u_int32_t *tl;
882 	int error = 0, wccflag = NFSV3_WCCRATTR;
883 	struct nfsm_info info;
884 
885 	info.mrep = NULL;
886 	info.v3 = NFS_ISV3(vp);
887 
888 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
889 	nfsm_reqhead(&info, vp, NFSPROC_SETATTR,
890 		     NFSX_FH(info.v3) + NFSX_SATTR(info.v3));
891 	ERROROUT(nfsm_fhtom(&info, vp));
892 	if (info.v3) {
893 		nfsm_v3attrbuild(&info, vap, TRUE);
894 		tl = nfsm_build(&info, NFSX_UNSIGNED);
895 		*tl = nfs_false;
896 	} else {
897 		sp = nfsm_build(&info, NFSX_V2SATTR);
898 		if (vap->va_mode == (mode_t)VNOVAL)
899 			sp->sa_mode = nfs_xdrneg1;
900 		else
901 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
902 		if (vap->va_uid == (uid_t)VNOVAL)
903 			sp->sa_uid = nfs_xdrneg1;
904 		else
905 			sp->sa_uid = txdr_unsigned(vap->va_uid);
906 		if (vap->va_gid == (gid_t)VNOVAL)
907 			sp->sa_gid = nfs_xdrneg1;
908 		else
909 			sp->sa_gid = txdr_unsigned(vap->va_gid);
910 		sp->sa_size = txdr_unsigned(vap->va_size);
911 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
912 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
913 	}
914 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_SETATTR, td, cred, &error));
915 	if (info.v3) {
916 		np->n_modestamp = 0;
917 		ERROROUT(nfsm_wcc_data(&info, vp, &wccflag));
918 	} else {
919 		ERROROUT(nfsm_loadattr(&info, vp, NULL));
920 	}
921 	m_freem(info.mrep);
922 	info.mrep = NULL;
923 nfsmout:
924 	return (error);
925 }
926 
927 static
928 void
929 nfs_cache_setvp(struct nchandle *nch, struct vnode *vp, int nctimeout)
930 {
931 	if (nctimeout == 0)
932 		nctimeout = 1;
933 	else
934 		nctimeout *= hz;
935 	cache_setvp(nch, vp);
936 	cache_settimeout(nch, nctimeout);
937 }
938 
939 /*
940  * NEW API CALL - replaces nfs_lookup().  However, we cannot remove
941  * nfs_lookup() until all remaining new api calls are implemented.
942  *
943  * Resolve a namecache entry.  This function is passed a locked ncp and
944  * must call nfs_cache_setvp() on it as appropriate to resolve the entry.
945  */
946 static int
947 nfs_nresolve(struct vop_nresolve_args *ap)
948 {
949 	struct thread *td = curthread;
950 	struct namecache *ncp;
951 	struct nfsmount *nmp;
952 	struct nfsnode *np;
953 	struct vnode *dvp;
954 	struct vnode *nvp;
955 	nfsfh_t *fhp;
956 	int attrflag;
957 	int fhsize;
958 	int error;
959 	int tmp_error;
960 	int len;
961 	struct nfsm_info info;
962 
963 	dvp = ap->a_dvp;
964 	nmp = VFSTONFS(dvp->v_mount);
965 
966 	lwkt_gettoken(&nmp->nm_token);
967 
968 	if ((error = vget(dvp, LK_SHARED)) != 0) {
969 		lwkt_reltoken(&nmp->nm_token);
970 		return (error);
971 	}
972 
973 	info.mrep = NULL;
974 	info.v3 = NFS_ISV3(dvp);
975 
976 	nvp = NULL;
977 	nfsstats.lookupcache_misses++;
978 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
979 	ncp = ap->a_nch->ncp;
980 	len = ncp->nc_nlen;
981 	nfsm_reqhead(&info, dvp, NFSPROC_LOOKUP,
982 		     NFSX_FH(info.v3) + NFSX_UNSIGNED + nfsm_rndup(len));
983 	ERROROUT(nfsm_fhtom(&info, dvp));
984 	ERROROUT(nfsm_strtom(&info, ncp->nc_name, len, NFS_MAXNAMLEN));
985 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_LOOKUP, td,
986 				ap->a_cred, &error));
987 	if (error) {
988 		/*
989 		 * Cache negatve lookups to reduce NFS traffic, but use
990 		 * a fast timeout.  Otherwise use a timeout of 1 tick.
991 		 * XXX we should add a namecache flag for no-caching
992 		 * to uncache the negative hit as soon as possible, but
993 		 * we cannot simply destroy the entry because it is used
994 		 * as a placeholder by the caller.
995 		 *
996 		 * The refactored nfs code will overwrite a non-zero error
997 		 * with 0 when we use ERROROUT(), so don't here.
998 		 */
999 		if (error == ENOENT)
1000 			nfs_cache_setvp(ap->a_nch, NULL, nfsneg_cache_timeout);
1001 		tmp_error = nfsm_postop_attr(&info, dvp, &attrflag,
1002 					     NFS_LATTR_NOSHRINK);
1003 		if (tmp_error) {
1004 			error = tmp_error;
1005 			goto nfsmout;
1006 		}
1007 		m_freem(info.mrep);
1008 		info.mrep = NULL;
1009 		goto nfsmout;
1010 	}
1011 
1012 	/*
1013 	 * Success, get the file handle, do various checks, and load
1014 	 * post-operation data from the reply packet.  Theoretically
1015 	 * we should never be looking up "." so, theoretically, we
1016 	 * should never get the same file handle as our directory.  But
1017 	 * we check anyway. XXX
1018 	 *
1019 	 * Note that no timeout is set for the positive cache hit.  We
1020 	 * assume, theoretically, that ESTALE returns will be dealt with
1021 	 * properly to handle NFS races and in anycase we cannot depend
1022 	 * on a timeout to deal with NFS open/create/excl issues so instead
1023 	 * of a bad hack here the rest of the NFS client code needs to do
1024 	 * the right thing.
1025 	 */
1026 	NEGATIVEOUT(fhsize = nfsm_getfh(&info, &fhp));
1027 
1028 	np = VTONFS(dvp);
1029 	if (NFS_CMPFH(np, fhp, fhsize)) {
1030 		vref(dvp);
1031 		nvp = dvp;
1032 	} else {
1033 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, NULL);
1034 		if (error) {
1035 			m_freem(info.mrep);
1036 			info.mrep = NULL;
1037 			vput(dvp);
1038 			lwkt_reltoken(&nmp->nm_token);
1039 			return (error);
1040 		}
1041 		nvp = NFSTOV(np);
1042 	}
1043 	if (info.v3) {
1044 		ERROROUT(nfsm_postop_attr(&info, nvp, &attrflag,
1045 					  NFS_LATTR_NOSHRINK));
1046 		ERROROUT(nfsm_postop_attr(&info, dvp, &attrflag,
1047 					  NFS_LATTR_NOSHRINK));
1048 	} else {
1049 		ERROROUT(nfsm_loadattr(&info, nvp, NULL));
1050 	}
1051 	nfs_cache_setvp(ap->a_nch, nvp, nfspos_cache_timeout);
1052 	m_freem(info.mrep);
1053 	info.mrep = NULL;
1054 nfsmout:
1055 	lwkt_reltoken(&nmp->nm_token);
1056 	vput(dvp);
1057 	if (nvp) {
1058 		if (nvp == dvp)
1059 			vrele(nvp);
1060 		else
1061 			vput(nvp);
1062 	}
1063 	return (error);
1064 }
1065 
1066 /*
1067  * 'cached' nfs directory lookup
1068  *
1069  * NOTE: cannot be removed until NFS implements all the new n*() API calls.
1070  *
1071  * nfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
1072  *	      struct componentname *a_cnp)
1073  */
1074 static int
1075 nfs_lookup(struct vop_old_lookup_args *ap)
1076 {
1077 	struct componentname *cnp = ap->a_cnp;
1078 	struct vnode *dvp = ap->a_dvp;
1079 	struct vnode **vpp = ap->a_vpp;
1080 	int flags = cnp->cn_flags;
1081 	struct vnode *newvp;
1082 	struct vnode *notvp;
1083 	struct nfsmount *nmp;
1084 	long len;
1085 	nfsfh_t *fhp;
1086 	struct nfsnode *np;
1087 	int lockparent, wantparent, attrflag, fhsize;
1088 	int error;
1089 	int tmp_error;
1090 	struct nfsm_info info;
1091 
1092 	info.mrep = NULL;
1093 	info.v3 = NFS_ISV3(dvp);
1094 	error = 0;
1095 
1096 	notvp = (cnp->cn_flags & CNP_NOTVP) ? cnp->cn_notvp : NULL;
1097 
1098 	/*
1099 	 * Read-only mount check and directory check.
1100 	 */
1101 	*vpp = NULLVP;
1102 	if ((dvp->v_mount->mnt_flag & MNT_RDONLY) &&
1103 	    (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME))
1104 		return (EROFS);
1105 
1106 	if (dvp->v_type != VDIR)
1107 		return (ENOTDIR);
1108 
1109 	/*
1110 	 * Look it up in the cache.  Note that ENOENT is only returned if we
1111 	 * previously entered a negative hit (see later on).  The additional
1112 	 * nfsneg_cache_timeout check causes previously cached results to
1113 	 * be instantly ignored if the negative caching is turned off.
1114 	 */
1115 	lockparent = flags & CNP_LOCKPARENT;
1116 	wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT);
1117 	nmp = VFSTONFS(dvp->v_mount);
1118 	np = VTONFS(dvp);
1119 
1120 	lwkt_gettoken(&nmp->nm_token);
1121 
1122 	/*
1123 	 * Go to the wire.
1124 	 */
1125 	error = 0;
1126 	newvp = NULLVP;
1127 	nfsstats.lookupcache_misses++;
1128 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
1129 	len = cnp->cn_namelen;
1130 	nfsm_reqhead(&info, dvp, NFSPROC_LOOKUP,
1131 		     NFSX_FH(info.v3) + NFSX_UNSIGNED + nfsm_rndup(len));
1132 	ERROROUT(nfsm_fhtom(&info, dvp));
1133 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, len, NFS_MAXNAMLEN));
1134 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_LOOKUP, cnp->cn_td,
1135 				cnp->cn_cred, &error));
1136 	if (error) {
1137 		tmp_error = nfsm_postop_attr(&info, dvp, &attrflag,
1138 					     NFS_LATTR_NOSHRINK);
1139 		if (tmp_error) {
1140 			error = tmp_error;
1141 			goto nfsmout;
1142 		}
1143 
1144 		m_freem(info.mrep);
1145 		info.mrep = NULL;
1146 		goto nfsmout;
1147 	}
1148 	NEGATIVEOUT(fhsize = nfsm_getfh(&info, &fhp));
1149 
1150 	/*
1151 	 * Handle RENAME case...
1152 	 */
1153 	if (cnp->cn_nameiop == NAMEI_RENAME && wantparent) {
1154 		if (NFS_CMPFH(np, fhp, fhsize)) {
1155 			m_freem(info.mrep);
1156 			info.mrep = NULL;
1157 			lwkt_reltoken(&nmp->nm_token);
1158 			return (EISDIR);
1159 		}
1160 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, notvp);
1161 		if (error) {
1162 			m_freem(info.mrep);
1163 			info.mrep = NULL;
1164 			lwkt_reltoken(&nmp->nm_token);
1165 			return (error);
1166 		}
1167 		newvp = NFSTOV(np);
1168 		if (info.v3) {
1169 			ERROROUT(nfsm_postop_attr(&info, newvp, &attrflag,
1170 						  NFS_LATTR_NOSHRINK));
1171 			ERROROUT(nfsm_postop_attr(&info, dvp, &attrflag,
1172 						  NFS_LATTR_NOSHRINK));
1173 		} else {
1174 			ERROROUT(nfsm_loadattr(&info, newvp, NULL));
1175 		}
1176 		*vpp = newvp;
1177 		m_freem(info.mrep);
1178 		info.mrep = NULL;
1179 		if (!lockparent) {
1180 			vn_unlock(dvp);
1181 			cnp->cn_flags |= CNP_PDIRUNLOCK;
1182 		}
1183 		lwkt_reltoken(&nmp->nm_token);
1184 		return (0);
1185 	}
1186 
1187 	if (flags & CNP_ISDOTDOT) {
1188 		vn_unlock(dvp);
1189 		cnp->cn_flags |= CNP_PDIRUNLOCK;
1190 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, notvp);
1191 		if (error) {
1192 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1193 			cnp->cn_flags &= ~CNP_PDIRUNLOCK;
1194 			lwkt_reltoken(&nmp->nm_token);
1195 			return (error); /* NOTE: return error from nget */
1196 		}
1197 		newvp = NFSTOV(np);
1198 		if (lockparent) {
1199 			error = vn_lock(dvp, LK_EXCLUSIVE | LK_FAILRECLAIM);
1200 			if (error) {
1201 				vput(newvp);
1202 				lwkt_reltoken(&nmp->nm_token);
1203 				return (error);
1204 			}
1205 			cnp->cn_flags |= CNP_PDIRUNLOCK;
1206 		}
1207 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
1208 		vref(dvp);
1209 		newvp = dvp;
1210 	} else {
1211 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, notvp);
1212 		if (error) {
1213 			m_freem(info.mrep);
1214 			info.mrep = NULL;
1215 			lwkt_reltoken(&nmp->nm_token);
1216 			return (error);
1217 		}
1218 		if (!lockparent) {
1219 			vn_unlock(dvp);
1220 			cnp->cn_flags |= CNP_PDIRUNLOCK;
1221 		}
1222 		newvp = NFSTOV(np);
1223 	}
1224 	if (info.v3) {
1225 		ERROROUT(nfsm_postop_attr(&info, newvp, &attrflag,
1226 					  NFS_LATTR_NOSHRINK));
1227 		ERROROUT(nfsm_postop_attr(&info, dvp, &attrflag,
1228 					  NFS_LATTR_NOSHRINK));
1229 	} else {
1230 		ERROROUT(nfsm_loadattr(&info, newvp, NULL));
1231 	}
1232 #if 0
1233 	/* XXX MOVE TO nfs_nremove() */
1234 	if ((cnp->cn_flags & CNP_MAKEENTRY) &&
1235 	    cnp->cn_nameiop != NAMEI_DELETE) {
1236 		np->n_ctime = np->n_vattr.va_ctime.tv_sec; /* XXX */
1237 	}
1238 #endif
1239 	*vpp = newvp;
1240 	m_freem(info.mrep);
1241 	info.mrep = NULL;
1242 nfsmout:
1243 	if (error) {
1244 		if (newvp != NULLVP) {
1245 			vrele(newvp);
1246 			*vpp = NULLVP;
1247 		}
1248 		if ((cnp->cn_nameiop == NAMEI_CREATE ||
1249 		     cnp->cn_nameiop == NAMEI_RENAME) &&
1250 		    error == ENOENT) {
1251 			if (!lockparent) {
1252 				vn_unlock(dvp);
1253 				cnp->cn_flags |= CNP_PDIRUNLOCK;
1254 			}
1255 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
1256 				error = EROFS;
1257 			else
1258 				error = EJUSTRETURN;
1259 		}
1260 	}
1261 	lwkt_reltoken(&nmp->nm_token);
1262 	return (error);
1263 }
1264 
1265 /*
1266  * nfs read call.
1267  * Just call nfs_bioread() to do the work.
1268  *
1269  * nfs_read(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1270  *	    struct ucred *a_cred)
1271  */
1272 static int
1273 nfs_read(struct vop_read_args *ap)
1274 {
1275 	struct vnode *vp = ap->a_vp;
1276 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1277 	int error;
1278 
1279 	lwkt_gettoken(&nmp->nm_token);
1280 	error = nfs_bioread(vp, ap->a_uio, ap->a_ioflag);
1281 	lwkt_reltoken(&nmp->nm_token);
1282 
1283 	return error;
1284 }
1285 
1286 /*
1287  * nfs readlink call
1288  *
1289  * nfs_readlink(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred)
1290  */
1291 static int
1292 nfs_readlink(struct vop_readlink_args *ap)
1293 {
1294 	struct vnode *vp = ap->a_vp;
1295 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1296 	int error;
1297 
1298 	if (vp->v_type != VLNK)
1299 		return (EINVAL);
1300 
1301 	lwkt_gettoken(&nmp->nm_token);
1302 	error = nfs_bioread(vp, ap->a_uio, 0);
1303 	lwkt_reltoken(&nmp->nm_token);
1304 
1305 	return error;
1306 }
1307 
1308 /*
1309  * Do a readlink rpc.
1310  * Called by nfs_doio() from below the buffer cache.
1311  */
1312 int
1313 nfs_readlinkrpc_uio(struct vnode *vp, struct uio *uiop)
1314 {
1315 	int error = 0, len, attrflag;
1316 	struct nfsm_info info;
1317 
1318 	info.mrep = NULL;
1319 	info.v3 = NFS_ISV3(vp);
1320 
1321 	nfsstats.rpccnt[NFSPROC_READLINK]++;
1322 	nfsm_reqhead(&info, vp, NFSPROC_READLINK, NFSX_FH(info.v3));
1323 	ERROROUT(nfsm_fhtom(&info, vp));
1324 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_READLINK, uiop->uio_td,
1325 				nfs_vpcred(vp, ND_CHECK), &error));
1326 	if (info.v3) {
1327 		ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
1328 					  NFS_LATTR_NOSHRINK));
1329 	}
1330 	if (!error) {
1331 		NEGATIVEOUT(len = nfsm_strsiz(&info, NFS_MAXPATHLEN));
1332 		if (len == NFS_MAXPATHLEN) {
1333 			struct nfsnode *np = VTONFS(vp);
1334 			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1335 				len = np->n_size;
1336 		}
1337 		ERROROUT(nfsm_mtouio(&info, uiop, len));
1338 	}
1339 	m_freem(info.mrep);
1340 	info.mrep = NULL;
1341 nfsmout:
1342 	return (error);
1343 }
1344 
1345 /*
1346  * nfs synchronous read rpc using UIO
1347  */
1348 int
1349 nfs_readrpc_uio(struct vnode *vp, struct uio *uiop)
1350 {
1351 	u_int32_t *tl;
1352 	struct nfsmount *nmp;
1353 	int error = 0, len, retlen, tsiz, eof, attrflag;
1354 	struct nfsm_info info;
1355 	off_t tmp_off;
1356 
1357 	info.mrep = NULL;
1358 	info.v3 = NFS_ISV3(vp);
1359 
1360 #ifndef nolint
1361 	eof = 0;
1362 #endif
1363 	nmp = VFSTONFS(vp->v_mount);
1364 
1365 	tsiz = uiop->uio_resid;
1366 	tmp_off = uiop->uio_offset + tsiz;
1367 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset)
1368 		return (EFBIG);
1369 	tmp_off = uiop->uio_offset;
1370 	while (tsiz > 0) {
1371 		nfsstats.rpccnt[NFSPROC_READ]++;
1372 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1373 		nfsm_reqhead(&info, vp, NFSPROC_READ,
1374 			     NFSX_FH(info.v3) + NFSX_UNSIGNED * 3);
1375 		ERROROUT(nfsm_fhtom(&info, vp));
1376 		tl = nfsm_build(&info, NFSX_UNSIGNED * 3);
1377 		if (info.v3) {
1378 			txdr_hyper(uiop->uio_offset, tl);
1379 			*(tl + 2) = txdr_unsigned(len);
1380 		} else {
1381 			*tl++ = txdr_unsigned(uiop->uio_offset);
1382 			*tl++ = txdr_unsigned(len);
1383 			*tl = 0;
1384 		}
1385 		NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_READ, uiop->uio_td,
1386 					nfs_vpcred(vp, ND_READ), &error));
1387 		if (info.v3) {
1388 			ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
1389 						 NFS_LATTR_NOSHRINK));
1390 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1391 			eof = fxdr_unsigned(int, *(tl + 1));
1392 		} else {
1393 			ERROROUT(nfsm_loadattr(&info, vp, NULL));
1394 		}
1395 		NEGATIVEOUT(retlen = nfsm_strsiz(&info, len));
1396 		ERROROUT(nfsm_mtouio(&info, uiop, retlen));
1397 		m_freem(info.mrep);
1398 		info.mrep = NULL;
1399 
1400 		/*
1401 		 * Handle short-read from server (NFSv3).  If EOF is not
1402 		 * flagged (and no error occurred), but retlen is less
1403 		 * then the request size, we must zero-fill the remainder.
1404 		 */
1405 		if (retlen < len && info.v3 && eof == 0) {
1406 			ERROROUT(uiomovez(len - retlen, uiop));
1407 			retlen = len;
1408 		}
1409 		tsiz -= retlen;
1410 
1411 		/*
1412 		 * Terminate loop on EOF or zero-length read.
1413 		 *
1414 		 * For NFSv2 a short-read indicates EOF, not zero-fill,
1415 		 * and also terminates the loop.
1416 		 */
1417 		if (info.v3) {
1418 			if (eof || retlen == 0)
1419 				tsiz = 0;
1420 		} else if (retlen < len) {
1421 			tsiz = 0;
1422 		}
1423 	}
1424 nfsmout:
1425 	return (error);
1426 }
1427 
1428 /*
1429  * nfs write call
1430  */
1431 int
1432 nfs_writerpc_uio(struct vnode *vp, struct uio *uiop,
1433 		 int *iomode, int *must_commit)
1434 {
1435 	u_int32_t *tl;
1436 	int32_t backup;
1437 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1438 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1439 	int  committed = NFSV3WRITE_FILESYNC;
1440 	struct nfsm_info info;
1441 
1442 	info.mrep = NULL;
1443 	info.v3 = NFS_ISV3(vp);
1444 
1445 #ifndef DIAGNOSTIC
1446 	if (uiop->uio_iovcnt != 1)
1447 		panic("nfs: writerpc iovcnt > 1");
1448 #endif
1449 	*must_commit = 0;
1450 	tsiz = uiop->uio_resid;
1451 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1452 		return (EFBIG);
1453 	while (tsiz > 0) {
1454 		nfsstats.rpccnt[NFSPROC_WRITE]++;
1455 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1456 		nfsm_reqhead(&info, vp, NFSPROC_WRITE,
1457 			     NFSX_FH(info.v3) + 5 * NFSX_UNSIGNED +
1458 			     nfsm_rndup(len));
1459 		ERROROUT(nfsm_fhtom(&info, vp));
1460 		if (info.v3) {
1461 			tl = nfsm_build(&info, 5 * NFSX_UNSIGNED);
1462 			txdr_hyper(uiop->uio_offset, tl);
1463 			tl += 2;
1464 			*tl++ = txdr_unsigned(len);
1465 			*tl++ = txdr_unsigned(*iomode);
1466 			*tl = txdr_unsigned(len);
1467 		} else {
1468 			u_int32_t x;
1469 
1470 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1471 			/* Set both "begin" and "current" to non-garbage. */
1472 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1473 			*tl++ = x;	/* "begin offset" */
1474 			*tl++ = x;	/* "current offset" */
1475 			x = txdr_unsigned(len);
1476 			*tl++ = x;	/* total to this offset */
1477 			*tl = x;	/* size of this write */
1478 		}
1479 		ERROROUT(nfsm_uiotom(&info, uiop, len));
1480 		NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_WRITE, uiop->uio_td,
1481 					nfs_vpcred(vp, ND_WRITE), &error));
1482 		if (info.v3) {
1483 			/*
1484 			 * The write RPC returns a before and after mtime.  The
1485 			 * nfsm_wcc_data() macro checks the before n_mtime
1486 			 * against the before time and stores the after time
1487 			 * in the nfsnode's cached vattr and n_mtime field.
1488 			 * The NRMODIFIED bit will be set if the before
1489 			 * time did not match the original mtime.
1490 			 */
1491 			wccflag = NFSV3_WCCCHK;
1492 			ERROROUT(nfsm_wcc_data(&info, vp, &wccflag));
1493 			if (error == 0) {
1494 				NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF));
1495 				rlen = fxdr_unsigned(int, *tl++);
1496 				if (rlen == 0) {
1497 					error = NFSERR_IO;
1498 					m_freem(info.mrep);
1499 					info.mrep = NULL;
1500 					break;
1501 				} else if (rlen < len) {
1502 					backup = len - rlen;
1503 					uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - backup;
1504 					uiop->uio_iov->iov_len += backup;
1505 					uiop->uio_offset -= backup;
1506 					uiop->uio_resid += backup;
1507 					len = rlen;
1508 				}
1509 				commit = fxdr_unsigned(int, *tl++);
1510 
1511 				/*
1512 				 * Return the lowest committment level
1513 				 * obtained by any of the RPCs.
1514 				 */
1515 				if (committed == NFSV3WRITE_FILESYNC)
1516 					committed = commit;
1517 				else if (committed == NFSV3WRITE_DATASYNC &&
1518 					commit == NFSV3WRITE_UNSTABLE)
1519 					committed = commit;
1520 				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1521 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1522 					NFSX_V3WRITEVERF);
1523 				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
1524 				} else if (bcmp((caddr_t)tl,
1525 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1526 				    *must_commit = 1;
1527 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1528 					NFSX_V3WRITEVERF);
1529 				}
1530 			}
1531 		} else {
1532 			ERROROUT(nfsm_loadattr(&info, vp, NULL));
1533 		}
1534 		m_freem(info.mrep);
1535 		info.mrep = NULL;
1536 		if (error)
1537 			break;
1538 		tsiz -= len;
1539 	}
1540 nfsmout:
1541 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
1542 		committed = NFSV3WRITE_FILESYNC;
1543 	*iomode = committed;
1544 	if (error)
1545 		uiop->uio_resid = tsiz;
1546 	return (error);
1547 }
1548 
1549 /*
1550  * nfs mknod rpc
1551  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1552  * mode set to specify the file type and the size field for rdev.
1553  */
1554 static int
1555 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1556 	     struct vattr *vap)
1557 {
1558 	struct nfsv2_sattr *sp;
1559 	u_int32_t *tl;
1560 	struct vnode *newvp = NULL;
1561 	struct nfsnode *np = NULL;
1562 	struct vattr vattr;
1563 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1564 	int rmajor, rminor;
1565 	struct nfsm_info info;
1566 
1567 	info.mrep = NULL;
1568 	info.v3 = NFS_ISV3(dvp);
1569 
1570 	if (vap->va_type == VCHR || vap->va_type == VBLK) {
1571 		rmajor = txdr_unsigned(vap->va_rmajor);
1572 		rminor = txdr_unsigned(vap->va_rminor);
1573 	} else if (vap->va_type == VFIFO || vap->va_type == VSOCK) {
1574 		rmajor = nfs_xdrneg1;
1575 		rminor = nfs_xdrneg1;
1576 	} else {
1577 		return (EOPNOTSUPP);
1578 	}
1579 	if ((error = VOP_GETATTR(dvp, &vattr)) != 0) {
1580 		return (error);
1581 	}
1582 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
1583 	nfsm_reqhead(&info, dvp, NFSPROC_MKNOD,
1584 		     NFSX_FH(info.v3) + 4 * NFSX_UNSIGNED +
1585 		     nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(info.v3));
1586 	ERROROUT(nfsm_fhtom(&info, dvp));
1587 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
1588 			     NFS_MAXNAMLEN));
1589 	if (info.v3) {
1590 		tl = nfsm_build(&info, NFSX_UNSIGNED);
1591 		*tl++ = vtonfsv3_type(vap->va_type);
1592 		nfsm_v3attrbuild(&info, vap, FALSE);
1593 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
1594 			tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
1595 			*tl++ = txdr_unsigned(vap->va_rmajor);
1596 			*tl = txdr_unsigned(vap->va_rminor);
1597 		}
1598 	} else {
1599 		sp = nfsm_build(&info, NFSX_V2SATTR);
1600 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1601 		sp->sa_uid = nfs_xdrneg1;
1602 		sp->sa_gid = nfs_xdrneg1;
1603 		sp->sa_size = makeudev(rmajor, rminor);
1604 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1605 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1606 	}
1607 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_MKNOD, cnp->cn_td,
1608 				cnp->cn_cred, &error));
1609 	if (!error) {
1610 		ERROROUT(nfsm_mtofh(&info, dvp, &newvp, &gotvp));
1611 		if (!gotvp) {
1612 			if (newvp) {
1613 				vput(newvp);
1614 				newvp = NULL;
1615 			}
1616 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1617 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_td, &np);
1618 			if (!error)
1619 				newvp = NFSTOV(np);
1620 		}
1621 	}
1622 	if (info.v3) {
1623 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
1624 	}
1625 	m_freem(info.mrep);
1626 	info.mrep = NULL;
1627 nfsmout:
1628 	if (error) {
1629 		if (newvp)
1630 			vput(newvp);
1631 	} else {
1632 		*vpp = newvp;
1633 	}
1634 	VTONFS(dvp)->n_flag |= NLMODIFIED;
1635 	if (!wccflag)
1636 		VTONFS(dvp)->n_attrstamp = 0;
1637 	return (error);
1638 }
1639 
1640 /*
1641  * nfs mknod vop
1642  * just call nfs_mknodrpc() to do the work.
1643  *
1644  * nfs_mknod(struct vnode *a_dvp, struct vnode **a_vpp,
1645  *	     struct componentname *a_cnp, struct vattr *a_vap)
1646  */
1647 /* ARGSUSED */
1648 static int
1649 nfs_mknod(struct vop_old_mknod_args *ap)
1650 {
1651 	struct nfsmount *nmp = VFSTONFS(ap->a_dvp->v_mount);
1652 	int error;
1653 
1654 	lwkt_gettoken(&nmp->nm_token);
1655 	error = nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
1656 	lwkt_reltoken(&nmp->nm_token);
1657 	if (error == 0)
1658 		nfs_knote(ap->a_dvp, NOTE_WRITE);
1659 
1660 	return error;
1661 }
1662 
1663 static u_long create_verf;
1664 /*
1665  * nfs file create call
1666  *
1667  * nfs_create(struct vnode *a_dvp, struct vnode **a_vpp,
1668  *	      struct componentname *a_cnp, struct vattr *a_vap)
1669  */
1670 static int
1671 nfs_create(struct vop_old_create_args *ap)
1672 {
1673 	struct vnode *dvp = ap->a_dvp;
1674 	struct vattr *vap = ap->a_vap;
1675 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
1676 	struct componentname *cnp = ap->a_cnp;
1677 	struct nfsv2_sattr *sp;
1678 	u_int32_t *tl;
1679 	struct nfsnode *np = NULL;
1680 	struct vnode *newvp = NULL;
1681 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1682 	struct vattr vattr;
1683 	struct nfsm_info info;
1684 
1685 	info.mrep = NULL;
1686 	info.v3 = NFS_ISV3(dvp);
1687 	lwkt_gettoken(&nmp->nm_token);
1688 
1689 	/*
1690 	 * Oops, not for me..
1691 	 */
1692 	if (vap->va_type == VSOCK) {
1693 		error = nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap);
1694 		lwkt_reltoken(&nmp->nm_token);
1695 		return error;
1696 	}
1697 
1698 	if ((error = VOP_GETATTR(dvp, &vattr)) != 0) {
1699 		lwkt_reltoken(&nmp->nm_token);
1700 		return (error);
1701 	}
1702 	if (vap->va_vaflags & VA_EXCLUSIVE)
1703 		fmode |= O_EXCL;
1704 again:
1705 	nfsstats.rpccnt[NFSPROC_CREATE]++;
1706 	nfsm_reqhead(&info, dvp, NFSPROC_CREATE,
1707 		     NFSX_FH(info.v3) + 2 * NFSX_UNSIGNED +
1708 		     nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(info.v3));
1709 	ERROROUT(nfsm_fhtom(&info, dvp));
1710 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
1711 			     NFS_MAXNAMLEN));
1712 	if (info.v3) {
1713 		tl = nfsm_build(&info, NFSX_UNSIGNED);
1714 		if (fmode & O_EXCL) {
1715 			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1716 			tl = nfsm_build(&info, NFSX_V3CREATEVERF);
1717 #ifdef INET
1718 			if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid]))
1719 				*tl++ = IA_SIN(TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia)->sin_addr.s_addr;
1720 			else
1721 #endif
1722 				*tl++ = create_verf;
1723 			*tl = ++create_verf;
1724 		} else {
1725 			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1726 			nfsm_v3attrbuild(&info, vap, FALSE);
1727 		}
1728 	} else {
1729 		sp = nfsm_build(&info, NFSX_V2SATTR);
1730 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1731 		sp->sa_uid = nfs_xdrneg1;
1732 		sp->sa_gid = nfs_xdrneg1;
1733 		sp->sa_size = 0;
1734 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1735 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1736 	}
1737 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_CREATE, cnp->cn_td,
1738 				cnp->cn_cred, &error));
1739 	if (error == 0) {
1740 		ERROROUT(nfsm_mtofh(&info, dvp, &newvp, &gotvp));
1741 		if (!gotvp) {
1742 			if (newvp) {
1743 				vput(newvp);
1744 				newvp = NULL;
1745 			}
1746 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1747 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_td, &np);
1748 			if (!error)
1749 				newvp = NFSTOV(np);
1750 		}
1751 	}
1752 	if (info.v3) {
1753 		if (error == 0)
1754 			error = nfsm_wcc_data(&info, dvp, &wccflag);
1755 		else
1756 			(void)nfsm_wcc_data(&info, dvp, &wccflag);
1757 	}
1758 	m_freem(info.mrep);
1759 	info.mrep = NULL;
1760 nfsmout:
1761 	if (error) {
1762 		if (info.v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1763 			KKASSERT(newvp == NULL);
1764 			fmode &= ~O_EXCL;
1765 			goto again;
1766 		}
1767 	} else if (info.v3 && (fmode & O_EXCL)) {
1768 		/*
1769 		 * We are normally called with only a partially initialized
1770 		 * VAP.  Since the NFSv3 spec says that server may use the
1771 		 * file attributes to store the verifier, the spec requires
1772 		 * us to do a SETATTR RPC. FreeBSD servers store the verifier
1773 		 * in atime, but we can't really assume that all servers will
1774 		 * so we ensure that our SETATTR sets both atime and mtime.
1775 		 */
1776 		if (vap->va_mtime.tv_sec == VNOVAL)
1777 			vfs_timestamp(&vap->va_mtime);
1778 		if (vap->va_atime.tv_sec == VNOVAL)
1779 			vap->va_atime = vap->va_mtime;
1780 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_td);
1781 	}
1782 	if (error == 0) {
1783 		/*
1784 		 * The new np may have enough info for access
1785 		 * checks, make sure rucred and wucred are
1786 		 * initialized for read and write rpc's.
1787 		 */
1788 		np = VTONFS(newvp);
1789 		if (np->n_rucred == NULL)
1790 			np->n_rucred = crhold(cnp->cn_cred);
1791 		if (np->n_wucred == NULL)
1792 			np->n_wucred = crhold(cnp->cn_cred);
1793 		*ap->a_vpp = newvp;
1794 		nfs_knote(dvp, NOTE_WRITE);
1795 	} else if (newvp) {
1796 		vput(newvp);
1797 	}
1798 	VTONFS(dvp)->n_flag |= NLMODIFIED;
1799 	if (!wccflag)
1800 		VTONFS(dvp)->n_attrstamp = 0;
1801 	lwkt_reltoken(&nmp->nm_token);
1802 	return (error);
1803 }
1804 
1805 /*
1806  * nfs file remove call
1807  * To try and make nfs semantics closer to ufs semantics, a file that has
1808  * other processes using the vnode is renamed instead of removed and then
1809  * removed later on the last close.
1810  * - If v_refcnt > 1
1811  *	  If a rename is not already in the works
1812  *	     call nfs_sillyrename() to set it up
1813  *     else
1814  *	  do the remove rpc
1815  *
1816  * nfs_remove(struct vnode *a_dvp, struct vnode *a_vp,
1817  *	      struct componentname *a_cnp)
1818  */
1819 static int
1820 nfs_remove(struct vop_old_remove_args *ap)
1821 {
1822 	struct vnode *vp = ap->a_vp;
1823 	struct vnode *dvp = ap->a_dvp;
1824 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
1825 	struct componentname *cnp = ap->a_cnp;
1826 	struct nfsnode *np = VTONFS(vp);
1827 	int error = 0;
1828 	struct vattr vattr;
1829 
1830 	lwkt_gettoken(&nmp->nm_token);
1831 #ifndef DIAGNOSTIC
1832 	if (VREFCNT(vp) < 1)
1833 		panic("nfs_remove: bad v_refcnt");
1834 #endif
1835 	if (vp->v_type == VDIR) {
1836 		error = EPERM;
1837 	} else if (VREFCNT(vp) == 1 || (np->n_sillyrename &&
1838 		   VOP_GETATTR(vp, &vattr) == 0 && vattr.va_nlink > 1)) {
1839 		/*
1840 		 * Force finalization so the VOP_INACTIVE() call is not delayed.
1841 		 * This prevents cred structures from building up in nfsnodes
1842 		 * for deleted files.
1843 		 */
1844 		atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
1845 		np->n_flag |= NREMOVED;
1846 
1847 		/*
1848 		 * Throw away biocache buffers, mainly to avoid
1849 		 * unnecessary delayed writes later.
1850 		 */
1851 		error = nfs_vinvalbuf(vp, 0, 1);
1852 		/* Do the rpc */
1853 		if (error != EINTR) {
1854 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
1855 					      cnp->cn_namelen,
1856 					      cnp->cn_cred, cnp->cn_td);
1857 		}
1858 
1859 		/*
1860 		 * Kludge City: If the first reply to the remove rpc is lost..
1861 		 *   the reply to the retransmitted request will be ENOENT
1862 		 *   since the file was in fact removed
1863 		 *   Therefore, we cheat and return success.
1864 		 */
1865 		if (error == ENOENT)
1866 			error = 0;
1867 	} else if (!np->n_sillyrename) {
1868 		error = nfs_sillyrename(dvp, vp, cnp);
1869 	}
1870 	np->n_attrstamp = 0;
1871 	lwkt_reltoken(&nmp->nm_token);
1872 	if (error == 0) {
1873 		nfs_knote(vp, NOTE_DELETE);
1874 		nfs_knote(dvp, NOTE_WRITE);
1875 	}
1876 
1877 	return (error);
1878 }
1879 
1880 /*
1881  * nfs file remove rpc called from nfs_inactive
1882  *
1883  * NOTE: s_dvp can be VBAD during a forced unmount.
1884  */
1885 int
1886 nfs_removeit(struct sillyrename *sp)
1887 {
1888 	if (sp->s_dvp->v_type == VBAD)
1889 		return(0);
1890 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen,
1891 		sp->s_cred, NULL));
1892 }
1893 
1894 /*
1895  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1896  */
1897 static int
1898 nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
1899 	      struct ucred *cred, struct thread *td)
1900 {
1901 	int error = 0, wccflag = NFSV3_WCCRATTR;
1902 	struct nfsm_info info;
1903 
1904 	info.mrep = NULL;
1905 	info.v3 = NFS_ISV3(dvp);
1906 
1907 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
1908 	nfsm_reqhead(&info, dvp, NFSPROC_REMOVE,
1909 		     NFSX_FH(info.v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1910 	ERROROUT(nfsm_fhtom(&info, dvp));
1911 	ERROROUT(nfsm_strtom(&info, name, namelen, NFS_MAXNAMLEN));
1912 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_REMOVE, td, cred, &error));
1913 	if (info.v3) {
1914 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
1915 	}
1916 	m_freem(info.mrep);
1917 	info.mrep = NULL;
1918 nfsmout:
1919 	VTONFS(dvp)->n_flag |= NLMODIFIED;
1920 	if (!wccflag)
1921 		VTONFS(dvp)->n_attrstamp = 0;
1922 	return (error);
1923 }
1924 
1925 /*
1926  * nfs file rename call
1927  *
1928  * nfs_rename(struct vnode *a_fdvp, struct vnode *a_fvp,
1929  *	      struct componentname *a_fcnp, struct vnode *a_tdvp,
1930  *	      struct vnode *a_tvp, struct componentname *a_tcnp)
1931  */
1932 static int
1933 nfs_rename(struct vop_old_rename_args *ap)
1934 {
1935 	struct vnode *fvp = ap->a_fvp;
1936 	struct vnode *tvp = ap->a_tvp;
1937 	struct vnode *fdvp = ap->a_fdvp;
1938 	struct vnode *tdvp = ap->a_tdvp;
1939 	struct componentname *tcnp = ap->a_tcnp;
1940 	struct componentname *fcnp = ap->a_fcnp;
1941 	struct nfsmount *nmp = VFSTONFS(fdvp->v_mount);
1942 	int error;
1943 
1944 	lwkt_gettoken(&nmp->nm_token);
1945 
1946 	/*
1947 	 * Force finalization so the VOP_INACTIVE() call is not delayed.
1948 	 * This prevents cred structures from building up in nfsnodes
1949 	 * for deleted files.
1950 	 */
1951 	if (tvp) {
1952 		atomic_set_int(&tvp->v_refcnt, VREF_FINALIZE);
1953 		if (VTONFS(tvp))
1954 			VTONFS(tvp)->n_flag |= NREMOVED;
1955 	}
1956 
1957 	/* Check for cross-device rename */
1958 	if ((fvp->v_mount != tdvp->v_mount) ||
1959 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1960 		error = EXDEV;
1961 		goto out;
1962 	}
1963 
1964 	/*
1965 	 * We shouldn't have to flush fvp on rename for most server-side
1966 	 * filesystems as the file handle should not change.  Unfortunately
1967 	 * the inode for some filesystems (msdosfs) might be tied to the
1968 	 * file name or directory position so to be completely safe
1969 	 * vfs.nfs.flush_on_rename is set by default.  Clear to improve
1970 	 * performance.
1971 	 *
1972 	 * We must flush tvp on rename because it might become stale on the
1973 	 * server after the rename.
1974 	 */
1975 	if (nfs_flush_on_rename)
1976 	    VOP_FSYNC(fvp, MNT_WAIT, 0);
1977 	if (tvp)
1978 	    VOP_FSYNC(tvp, MNT_WAIT, 0);
1979 
1980 	/*
1981 	 * If the tvp exists and is in use, sillyrename it before doing the
1982 	 * rename of the new file over it.
1983 	 *
1984 	 * XXX Can't sillyrename a directory.
1985 	 *
1986 	 * We do not attempt to do any namecache purges in this old API
1987 	 * routine.  The new API compat functions have access to the actual
1988 	 * namecache structures and will do it for us.
1989 	 */
1990 	if (tvp && VREFCNT(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
1991 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1992 		nfs_knote(tvp, NOTE_DELETE);
1993 		vput(tvp);
1994 		tvp = NULL;
1995 	} else if (tvp) {
1996 		nfs_knote(tvp, NOTE_DELETE);
1997 	}
1998 
1999 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
2000 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
2001 		tcnp->cn_td);
2002 
2003 out:
2004 	if (error == 0) {
2005 		nfs_knote(fdvp, NOTE_WRITE);
2006 		nfs_knote(tdvp, NOTE_WRITE);
2007 		nfs_knote(fvp, NOTE_RENAME);
2008 	}
2009 	lwkt_reltoken(&nmp->nm_token);
2010 	if (tdvp == tvp)
2011 		vrele(tdvp);
2012 	else
2013 		vput(tdvp);
2014 	if (tvp)
2015 		vput(tvp);
2016 	vrele(fdvp);
2017 	vrele(fvp);
2018 	/*
2019 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
2020 	 */
2021 	if (error == ENOENT)
2022 		error = 0;
2023 	return (error);
2024 }
2025 
2026 /*
2027  * nfs file rename rpc called from nfs_remove() above
2028  */
2029 static int
2030 nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
2031 	     struct sillyrename *sp)
2032 {
2033 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
2034 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_td));
2035 }
2036 
2037 /*
2038  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
2039  */
2040 static int
2041 nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
2042 	      struct vnode *tdvp, const char *tnameptr, int tnamelen,
2043 	      struct ucred *cred, struct thread *td)
2044 {
2045 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
2046 	struct nfsm_info info;
2047 
2048 	info.mrep = NULL;
2049 	info.v3 = NFS_ISV3(fdvp);
2050 
2051 	nfsstats.rpccnt[NFSPROC_RENAME]++;
2052 	nfsm_reqhead(&info, fdvp, NFSPROC_RENAME,
2053 		    (NFSX_FH(info.v3) + NFSX_UNSIGNED)*2 +
2054 		    nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
2055 	ERROROUT(nfsm_fhtom(&info, fdvp));
2056 	ERROROUT(nfsm_strtom(&info, fnameptr, fnamelen, NFS_MAXNAMLEN));
2057 	ERROROUT(nfsm_fhtom(&info, tdvp));
2058 	ERROROUT(nfsm_strtom(&info, tnameptr, tnamelen, NFS_MAXNAMLEN));
2059 	NEGKEEPOUT(nfsm_request(&info, fdvp, NFSPROC_RENAME, td, cred, &error));
2060 	if (info.v3) {
2061 		ERROROUT(nfsm_wcc_data(&info, fdvp, &fwccflag));
2062 		ERROROUT(nfsm_wcc_data(&info, tdvp, &twccflag));
2063 	}
2064 	m_freem(info.mrep);
2065 	info.mrep = NULL;
2066 nfsmout:
2067 	VTONFS(fdvp)->n_flag |= NLMODIFIED;
2068 	VTONFS(tdvp)->n_flag |= NLMODIFIED;
2069 	if (!fwccflag)
2070 		VTONFS(fdvp)->n_attrstamp = 0;
2071 	if (!twccflag)
2072 		VTONFS(tdvp)->n_attrstamp = 0;
2073 	return (error);
2074 }
2075 
2076 /*
2077  * nfs hard link create call
2078  *
2079  * nfs_link(struct vnode *a_tdvp, struct vnode *a_vp,
2080  *	    struct componentname *a_cnp)
2081  */
2082 static int
2083 nfs_link(struct vop_old_link_args *ap)
2084 {
2085 	struct vnode *vp = ap->a_vp;
2086 	struct vnode *tdvp = ap->a_tdvp;
2087 	struct nfsmount *nmp = VFSTONFS(tdvp->v_mount);
2088 	struct componentname *cnp = ap->a_cnp;
2089 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
2090 	struct nfsm_info info;
2091 
2092 	if (vp->v_mount != tdvp->v_mount) {
2093 		return (EXDEV);
2094 	}
2095 	lwkt_gettoken(&nmp->nm_token);
2096 
2097 	/*
2098 	 * The attribute cache may get out of sync with the server on link.
2099 	 * Pushing writes to the server before handle was inherited from
2100 	 * long long ago and it is unclear if we still need to do this.
2101 	 * Defaults to off.
2102 	 */
2103 	if (nfs_flush_on_hlink)
2104 		VOP_FSYNC(vp, MNT_WAIT, 0);
2105 
2106 	info.mrep = NULL;
2107 	info.v3 = NFS_ISV3(vp);
2108 
2109 	nfsstats.rpccnt[NFSPROC_LINK]++;
2110 	nfsm_reqhead(&info, vp, NFSPROC_LINK,
2111 		     NFSX_FH(info.v3) * 2 + NFSX_UNSIGNED +
2112 		     nfsm_rndup(cnp->cn_namelen));
2113 	ERROROUT(nfsm_fhtom(&info, vp));
2114 	ERROROUT(nfsm_fhtom(&info, tdvp));
2115 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
2116 			     NFS_MAXNAMLEN));
2117 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_LINK, cnp->cn_td,
2118 				cnp->cn_cred, &error));
2119 	if (info.v3) {
2120 		ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
2121 					 NFS_LATTR_NOSHRINK));
2122 		ERROROUT(nfsm_wcc_data(&info, tdvp, &wccflag));
2123 	}
2124 	m_freem(info.mrep);
2125 	info.mrep = NULL;
2126 nfsmout:
2127 	VTONFS(tdvp)->n_flag |= NLMODIFIED;
2128 	if (!attrflag)
2129 		VTONFS(vp)->n_attrstamp = 0;
2130 	if (!wccflag)
2131 		VTONFS(tdvp)->n_attrstamp = 0;
2132 	/*
2133 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2134 	 */
2135 	if (error == EEXIST)
2136 		error = 0;
2137 	lwkt_reltoken(&nmp->nm_token);
2138 	if (error == 0) {
2139 		nfs_knote(vp, NOTE_LINK);
2140 		nfs_knote(tdvp, NOTE_WRITE);
2141 	}
2142 
2143 	return (error);
2144 }
2145 
2146 /*
2147  * nfs symbolic link create call
2148  *
2149  * nfs_symlink(struct vnode *a_dvp, struct vnode **a_vpp,
2150  *		struct componentname *a_cnp, struct vattr *a_vap,
2151  *		char *a_target)
2152  */
2153 static int
2154 nfs_symlink(struct vop_old_symlink_args *ap)
2155 {
2156 	struct vnode *dvp = ap->a_dvp;
2157 	struct vattr *vap = ap->a_vap;
2158 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2159 	struct componentname *cnp = ap->a_cnp;
2160 	struct nfsv2_sattr *sp;
2161 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
2162 	struct vnode *newvp = NULL;
2163 	struct nfsm_info info;
2164 
2165 	info.mrep = NULL;
2166 	info.v3 = NFS_ISV3(dvp);
2167 	lwkt_gettoken(&nmp->nm_token);
2168 
2169 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
2170 	slen = strlen(ap->a_target);
2171 	nfsm_reqhead(&info, dvp, NFSPROC_SYMLINK,
2172 		     NFSX_FH(info.v3) + 2*NFSX_UNSIGNED +
2173 		     nfsm_rndup(cnp->cn_namelen) +
2174 		     nfsm_rndup(slen) + NFSX_SATTR(info.v3));
2175 	ERROROUT(nfsm_fhtom(&info, dvp));
2176 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
2177 			     NFS_MAXNAMLEN));
2178 	if (info.v3) {
2179 		nfsm_v3attrbuild(&info, vap, FALSE);
2180 	}
2181 	ERROROUT(nfsm_strtom(&info, ap->a_target, slen, NFS_MAXPATHLEN));
2182 	if (info.v3 == 0) {
2183 		sp = nfsm_build(&info, NFSX_V2SATTR);
2184 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
2185 		sp->sa_uid = nfs_xdrneg1;
2186 		sp->sa_gid = nfs_xdrneg1;
2187 		sp->sa_size = nfs_xdrneg1;
2188 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2189 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2190 	}
2191 
2192 	/*
2193 	 * Issue the NFS request and get the rpc response.
2194 	 *
2195 	 * Only NFSv3 responses returning an error of 0 actually return
2196 	 * a file handle that can be converted into newvp without having
2197 	 * to do an extra lookup rpc.
2198 	 */
2199 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_SYMLINK, cnp->cn_td,
2200 				cnp->cn_cred, &error));
2201 	if (info.v3) {
2202 		if (error == 0) {
2203 		       ERROROUT(nfsm_mtofh(&info, dvp, &newvp, &gotvp));
2204 		}
2205 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
2206 	}
2207 
2208 	/*
2209 	 * out code jumps -> here, mrep is also freed.
2210 	 */
2211 
2212 	m_freem(info.mrep);
2213 	info.mrep = NULL;
2214 nfsmout:
2215 
2216 	/*
2217 	 * If we get an EEXIST error, silently convert it to no-error
2218 	 * in case of an NFS retry.
2219 	 */
2220 	if (error == EEXIST)
2221 		error = 0;
2222 
2223 	/*
2224 	 * If we do not have (or no longer have) an error, and we could
2225 	 * not extract the newvp from the response due to the request being
2226 	 * NFSv2 or the error being EEXIST.  We have to do a lookup in order
2227 	 * to obtain a newvp to return.
2228 	 */
2229 	if (error == 0 && newvp == NULL) {
2230 		struct nfsnode *np = NULL;
2231 
2232 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
2233 				     cnp->cn_cred, cnp->cn_td, &np);
2234 		if (!error)
2235 			newvp = NFSTOV(np);
2236 	}
2237 	if (error) {
2238 		if (newvp)
2239 			vput(newvp);
2240 	} else {
2241 		*ap->a_vpp = newvp;
2242 	}
2243 	VTONFS(dvp)->n_flag |= NLMODIFIED;
2244 	if (!wccflag)
2245 		VTONFS(dvp)->n_attrstamp = 0;
2246 	if (error == 0 && *ap->a_vpp)
2247 		nfs_knote(*ap->a_vpp, NOTE_WRITE);
2248 	lwkt_reltoken(&nmp->nm_token);
2249 
2250 	return (error);
2251 }
2252 
2253 /*
2254  * nfs make dir call
2255  *
2256  * nfs_mkdir(struct vnode *a_dvp, struct vnode **a_vpp,
2257  *	     struct componentname *a_cnp, struct vattr *a_vap)
2258  */
2259 static int
2260 nfs_mkdir(struct vop_old_mkdir_args *ap)
2261 {
2262 	struct vnode *dvp = ap->a_dvp;
2263 	struct vattr *vap = ap->a_vap;
2264 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2265 	struct componentname *cnp = ap->a_cnp;
2266 	struct nfsv2_sattr *sp;
2267 	struct nfsnode *np = NULL;
2268 	struct vnode *newvp = NULL;
2269 	struct vattr vattr;
2270 	int error = 0, wccflag = NFSV3_WCCRATTR;
2271 	int gotvp = 0;
2272 	int len;
2273 	struct nfsm_info info;
2274 
2275 	info.mrep = NULL;
2276 	info.v3 = NFS_ISV3(dvp);
2277 	lwkt_gettoken(&nmp->nm_token);
2278 
2279 	if ((error = VOP_GETATTR(dvp, &vattr)) != 0) {
2280 		lwkt_reltoken(&nmp->nm_token);
2281 		return (error);
2282 	}
2283 	len = cnp->cn_namelen;
2284 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
2285 	nfsm_reqhead(&info, dvp, NFSPROC_MKDIR,
2286 		     NFSX_FH(info.v3) + NFSX_UNSIGNED +
2287 		     nfsm_rndup(len) + NFSX_SATTR(info.v3));
2288 	ERROROUT(nfsm_fhtom(&info, dvp));
2289 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, len, NFS_MAXNAMLEN));
2290 	if (info.v3) {
2291 		nfsm_v3attrbuild(&info, vap, FALSE);
2292 	} else {
2293 		sp = nfsm_build(&info, NFSX_V2SATTR);
2294 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
2295 		sp->sa_uid = nfs_xdrneg1;
2296 		sp->sa_gid = nfs_xdrneg1;
2297 		sp->sa_size = nfs_xdrneg1;
2298 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2299 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2300 	}
2301 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_MKDIR, cnp->cn_td,
2302 		    cnp->cn_cred, &error));
2303 	if (error == 0) {
2304 		ERROROUT(nfsm_mtofh(&info, dvp, &newvp, &gotvp));
2305 	}
2306 	if (info.v3) {
2307 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
2308 	}
2309 	m_freem(info.mrep);
2310 	info.mrep = NULL;
2311 nfsmout:
2312 	VTONFS(dvp)->n_flag |= NLMODIFIED;
2313 	if (!wccflag)
2314 		VTONFS(dvp)->n_attrstamp = 0;
2315 	/*
2316 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
2317 	 * if we can succeed in looking up the directory.
2318 	 */
2319 	if (error == EEXIST || (!error && !gotvp)) {
2320 		if (newvp) {
2321 			vrele(newvp);
2322 			newvp = NULL;
2323 		}
2324 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
2325 			cnp->cn_td, &np);
2326 		if (!error) {
2327 			newvp = NFSTOV(np);
2328 			if (newvp->v_type != VDIR)
2329 				error = EEXIST;
2330 		}
2331 	}
2332 	if (error) {
2333 		if (newvp)
2334 			vrele(newvp);
2335 	} else {
2336 		nfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
2337 		*ap->a_vpp = newvp;
2338 	}
2339 	lwkt_reltoken(&nmp->nm_token);
2340 	return (error);
2341 }
2342 
2343 /*
2344  * nfs remove directory call
2345  *
2346  * nfs_rmdir(struct vnode *a_dvp, struct vnode *a_vp,
2347  *	     struct componentname *a_cnp)
2348  */
2349 static int
2350 nfs_rmdir(struct vop_old_rmdir_args *ap)
2351 {
2352 	struct vnode *vp = ap->a_vp;
2353 	struct vnode *dvp = ap->a_dvp;
2354 	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2355 	struct componentname *cnp = ap->a_cnp;
2356 	int error = 0, wccflag = NFSV3_WCCRATTR;
2357 	struct nfsm_info info;
2358 
2359 	info.mrep = NULL;
2360 	info.v3 = NFS_ISV3(dvp);
2361 
2362 	if (dvp == vp)
2363 		return (EINVAL);
2364 
2365 	lwkt_gettoken(&nmp->nm_token);
2366 
2367 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
2368 	nfsm_reqhead(&info, dvp, NFSPROC_RMDIR,
2369 		     NFSX_FH(info.v3) + NFSX_UNSIGNED +
2370 		     nfsm_rndup(cnp->cn_namelen));
2371 	ERROROUT(nfsm_fhtom(&info, dvp));
2372 	ERROROUT(nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
2373 		 NFS_MAXNAMLEN));
2374 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_RMDIR, cnp->cn_td,
2375 				cnp->cn_cred, &error));
2376 	if (info.v3) {
2377 		ERROROUT(nfsm_wcc_data(&info, dvp, &wccflag));
2378 	}
2379 	m_freem(info.mrep);
2380 	info.mrep = NULL;
2381 nfsmout:
2382 	VTONFS(dvp)->n_flag |= NLMODIFIED;
2383 	if (!wccflag)
2384 		VTONFS(dvp)->n_attrstamp = 0;
2385 	/*
2386 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2387 	 */
2388 	if (error == ENOENT)
2389 		error = 0;
2390 	else
2391 		nfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
2392 	lwkt_reltoken(&nmp->nm_token);
2393 
2394 	return (error);
2395 }
2396 
2397 /*
2398  * nfs readdir call
2399  *
2400  * nfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred)
2401  */
2402 static int
2403 nfs_readdir(struct vop_readdir_args *ap)
2404 {
2405 	struct vnode *vp = ap->a_vp;
2406 	struct nfsnode *np = VTONFS(vp);
2407 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2408 	struct uio *uio = ap->a_uio;
2409 	int tresid, error;
2410 	struct vattr vattr;
2411 
2412 	if (vp->v_type != VDIR)
2413 		return (EPERM);
2414 
2415 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
2416 	if (error)
2417 		return (error);
2418 
2419 	lwkt_gettoken(&nmp->nm_token);
2420 
2421 	/*
2422 	 * If we have a valid EOF offset cache we must call VOP_GETATTR()
2423 	 * and then check that is still valid, or if this is an NQNFS mount
2424 	 * we call NQNFS_CKCACHEABLE() instead of VOP_GETATTR().  Note that
2425 	 * VOP_GETATTR() does not necessarily go to the wire.
2426 	 */
2427 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2428 	    (np->n_flag & (NLMODIFIED|NRMODIFIED)) == 0) {
2429 		if (VOP_GETATTR(vp, &vattr) == 0 &&
2430 		    (np->n_flag & (NLMODIFIED|NRMODIFIED)) == 0
2431 		) {
2432 			nfsstats.direofcache_hits++;
2433 			goto done;
2434 		}
2435 	}
2436 
2437 	/*
2438 	 * Call nfs_bioread() to do the real work.  nfs_bioread() does its
2439 	 * own cache coherency checks so we do not have to.
2440 	 */
2441 	tresid = uio->uio_resid;
2442 	error = nfs_bioread(vp, uio, 0);
2443 
2444 	if (!error && uio->uio_resid == tresid)
2445 		nfsstats.direofcache_misses++;
2446 done:
2447 	lwkt_reltoken(&nmp->nm_token);
2448 	vn_unlock(vp);
2449 
2450 	return (error);
2451 }
2452 
2453 /*
2454  * Readdir rpc call.  nfs_bioread->nfs_doio->nfs_readdirrpc.
2455  *
2456  * Note that for directories, nfs_bioread maintains the underlying nfs-centric
2457  * offset/block and converts the nfs formatted directory entries for userland
2458  * consumption as well as deals with offsets into the middle of blocks.
2459  * nfs_doio only deals with logical blocks.  In particular, uio_offset will
2460  * be block-bounded.  It must convert to cookies for the actual RPC.
2461  */
2462 int
2463 nfs_readdirrpc_uio(struct vnode *vp, struct uio *uiop)
2464 {
2465 	int len, left;
2466 	struct nfs_dirent *dp = NULL;
2467 	u_int32_t *tl;
2468 	nfsuint64 *cookiep;
2469 	caddr_t cp;
2470 	nfsuint64 cookie;
2471 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2472 	struct nfsnode *dnp = VTONFS(vp);
2473 	u_quad_t fileno;
2474 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2475 	int attrflag;
2476 	struct nfsm_info info;
2477 
2478 	info.mrep = NULL;
2479 	info.v3 = NFS_ISV3(vp);
2480 
2481 #ifndef DIAGNOSTIC
2482 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2483 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2484 		panic("nfs readdirrpc bad uio");
2485 #endif
2486 
2487 	/*
2488 	 * If there is no cookie, assume directory was stale.
2489 	 */
2490 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2491 	if (cookiep)
2492 		cookie = *cookiep;
2493 	else
2494 		return (NFSERR_BAD_COOKIE);
2495 	/*
2496 	 * Loop around doing readdir rpc's of size nm_readdirsize
2497 	 * truncated to a multiple of DIRBLKSIZ.
2498 	 * The stopping criteria is EOF or buffer full.
2499 	 */
2500 	while (more_dirs && bigenough) {
2501 		nfsstats.rpccnt[NFSPROC_READDIR]++;
2502 		nfsm_reqhead(&info, vp, NFSPROC_READDIR,
2503 			     NFSX_FH(info.v3) + NFSX_READDIR(info.v3));
2504 		ERROROUT(nfsm_fhtom(&info, vp));
2505 		if (info.v3) {
2506 			tl = nfsm_build(&info, 5 * NFSX_UNSIGNED);
2507 			*tl++ = cookie.nfsuquad[0];
2508 			*tl++ = cookie.nfsuquad[1];
2509 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
2510 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
2511 		} else {
2512 			/*
2513 			 * WARNING!  HAMMER DIRECTORIES WILL NOT WORK WELL
2514 			 * WITH NFSv2!!!  There's nothing I can really do
2515 			 * about it other than to hope the server supports
2516 			 * rdirplus w/NFSv2.
2517 			 */
2518 			tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
2519 			*tl++ = cookie.nfsuquad[0];
2520 		}
2521 		*tl = txdr_unsigned(nmp->nm_readdirsize);
2522 		NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_READDIR,
2523 					uiop->uio_td,
2524 					nfs_vpcred(vp, ND_READ), &error));
2525 		if (info.v3) {
2526 			ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
2527 						  NFS_LATTR_NOSHRINK));
2528 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2529 			dnp->n_cookieverf.nfsuquad[0] = *tl++;
2530 			dnp->n_cookieverf.nfsuquad[1] = *tl;
2531 		}
2532 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2533 		more_dirs = fxdr_unsigned(int, *tl);
2534 
2535 		/* loop thru the dir entries, converting them to std form */
2536 		while (more_dirs && bigenough) {
2537 			if (info.v3) {
2538 				NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2539 				fileno = fxdr_hyper(tl);
2540 				len = fxdr_unsigned(int, *(tl + 2));
2541 			} else {
2542 				NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2543 				fileno = fxdr_unsigned(u_quad_t, *tl++);
2544 				len = fxdr_unsigned(int, *tl);
2545 			}
2546 			if (len <= 0 || len > NFS_MAXNAMLEN) {
2547 				error = EBADRPC;
2548 				m_freem(info.mrep);
2549 				info.mrep = NULL;
2550 				goto nfsmout;
2551 			}
2552 
2553 			/*
2554 			 * len is the number of bytes in the path element
2555 			 * name, not including the \0 termination.
2556 			 *
2557 			 * tlen is the number of bytes w have to reserve for
2558 			 * the path element name.
2559 			 */
2560 			tlen = nfsm_rndup(len);
2561 			if (tlen == len)
2562 				tlen += 4;	/* To ensure null termination */
2563 
2564 			/*
2565 			 * If the entry would cross a DIRBLKSIZ boundary,
2566 			 * extend the previous nfs_dirent to cover the
2567 			 * remaining space.
2568 			 */
2569 			left = DIRBLKSIZ - blksiz;
2570 			if ((tlen + sizeof(struct nfs_dirent)) > left) {
2571 				dp->nfs_reclen += left;
2572 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left;
2573 				uiop->uio_iov->iov_len -= left;
2574 				uiop->uio_offset += left;
2575 				uiop->uio_resid -= left;
2576 				blksiz = 0;
2577 			}
2578 			if ((tlen + sizeof(struct nfs_dirent)) > uiop->uio_resid)
2579 				bigenough = 0;
2580 			if (bigenough) {
2581 				dp = (struct nfs_dirent *)uiop->uio_iov->iov_base;
2582 				dp->nfs_ino = fileno;
2583 				dp->nfs_namlen = len;
2584 				dp->nfs_reclen = tlen + sizeof(struct nfs_dirent);
2585 				dp->nfs_type = DT_UNKNOWN;
2586 				blksiz += dp->nfs_reclen;
2587 				if (blksiz == DIRBLKSIZ)
2588 					blksiz = 0;
2589 				uiop->uio_offset += sizeof(struct nfs_dirent);
2590 				uiop->uio_resid -= sizeof(struct nfs_dirent);
2591 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + sizeof(struct nfs_dirent);
2592 				uiop->uio_iov->iov_len -= sizeof(struct nfs_dirent);
2593 				ERROROUT(nfsm_mtouio(&info, uiop, len));
2594 
2595 				/*
2596 				 * The uiop has advanced by nfs_dirent + len
2597 				 * but really needs to advance by
2598 				 * nfs_dirent + tlen
2599 				 */
2600 				cp = uiop->uio_iov->iov_base;
2601 				tlen -= len;
2602 				*cp = '\0';	/* null terminate */
2603 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen;
2604 				uiop->uio_iov->iov_len -= tlen;
2605 				uiop->uio_offset += tlen;
2606 				uiop->uio_resid -= tlen;
2607 			} else {
2608 				/*
2609 				 * NFS strings must be rounded up (nfsm_myouio
2610 				 * handled that in the bigenough case).
2611 				 */
2612 				ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
2613 			}
2614 			if (info.v3) {
2615 				NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2616 			} else {
2617 				NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2618 			}
2619 
2620 			/*
2621 			 * If we were able to accomodate the last entry,
2622 			 * get the cookie for the next one.  Otherwise
2623 			 * hold-over the cookie for the one we were not
2624 			 * able to accomodate.
2625 			 */
2626 			if (bigenough) {
2627 				cookie.nfsuquad[0] = *tl++;
2628 				if (info.v3)
2629 					cookie.nfsuquad[1] = *tl++;
2630 			} else if (info.v3) {
2631 				tl += 2;
2632 			} else {
2633 				tl++;
2634 			}
2635 			more_dirs = fxdr_unsigned(int, *tl);
2636 		}
2637 		/*
2638 		 * If at end of rpc data, get the eof boolean
2639 		 */
2640 		if (!more_dirs) {
2641 			NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2642 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2643 		}
2644 		m_freem(info.mrep);
2645 		info.mrep = NULL;
2646 	}
2647 	/*
2648 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2649 	 * by increasing d_reclen for the last record.
2650 	 */
2651 	if (blksiz > 0) {
2652 		left = DIRBLKSIZ - blksiz;
2653 		dp->nfs_reclen += left;
2654 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left;
2655 		uiop->uio_iov->iov_len -= left;
2656 		uiop->uio_offset += left;
2657 		uiop->uio_resid -= left;
2658 	}
2659 
2660 	if (bigenough) {
2661 		/*
2662 		 * We hit the end of the directory, update direofoffset.
2663 		 */
2664 		dnp->n_direofoffset = uiop->uio_offset;
2665 	} else {
2666 		/*
2667 		 * There is more to go, insert the link cookie so the
2668 		 * next block can be read.
2669 		 */
2670 		if (uiop->uio_resid > 0)
2671 			kprintf("EEK! readdirrpc resid > 0\n");
2672 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2673 		*cookiep = cookie;
2674 	}
2675 nfsmout:
2676 	return (error);
2677 }
2678 
2679 /*
2680  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2681  */
2682 int
2683 nfs_readdirplusrpc_uio(struct vnode *vp, struct uio *uiop)
2684 {
2685 	int len, left;
2686 	struct nfs_dirent *dp;
2687 	u_int32_t *tl;
2688 	struct vnode *newvp;
2689 	nfsuint64 *cookiep;
2690 	caddr_t dpossav1, dpossav2;
2691 	caddr_t cp;
2692 	struct mbuf *mdsav1, *mdsav2;
2693 	nfsuint64 cookie;
2694 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2695 	struct nfsnode *dnp = VTONFS(vp), *np;
2696 	nfsfh_t *fhp;
2697 	u_quad_t fileno;
2698 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2699 	int attrflag, fhsize;
2700 	struct nchandle nch;
2701 	struct nchandle dnch;
2702 	struct nlcomponent nlc;
2703 	struct nfsm_info info;
2704 
2705 	info.mrep = NULL;
2706 	info.v3 = 1;
2707 
2708 #ifndef nolint
2709 	dp = NULL;
2710 #endif
2711 #ifndef DIAGNOSTIC
2712 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2713 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2714 		panic("nfs readdirplusrpc bad uio");
2715 #endif
2716 	/*
2717 	 * Obtain the namecache record for the directory so we have something
2718 	 * to use as a basis for creating the entries.  This function will
2719 	 * return a held (but not locked) ncp.  The ncp may be disconnected
2720 	 * from the tree and cannot be used for upward traversals, and the
2721 	 * ncp may be unnamed.  Note that other unrelated operations may
2722 	 * cause the ncp to be named at any time.
2723 	 *
2724 	 * We have to lock the ncp to prevent a lock order reversal when
2725 	 * rdirplus does nlookups of the children, because the vnode is
2726 	 * locked and has to stay that way.
2727 	 */
2728 	cache_fromdvp(vp, NULL, 0, &dnch);
2729 	bzero(&nlc, sizeof(nlc));
2730 	newvp = NULLVP;
2731 
2732 	/*
2733 	 * If there is no cookie, assume directory was stale.
2734 	 */
2735 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2736 	if (cookiep) {
2737 		cookie = *cookiep;
2738 	} else {
2739 		if (dnch.ncp)
2740 			cache_drop(&dnch);
2741 		return (NFSERR_BAD_COOKIE);
2742 	}
2743 
2744 	/*
2745 	 * Loop around doing readdir rpc's of size nm_readdirsize
2746 	 * truncated to a multiple of DIRBLKSIZ.
2747 	 * The stopping criteria is EOF or buffer full.
2748 	 */
2749 	while (more_dirs && bigenough) {
2750 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2751 		nfsm_reqhead(&info, vp, NFSPROC_READDIRPLUS,
2752 			     NFSX_FH(info.v3) + 6 * NFSX_UNSIGNED);
2753 		ERROROUT(nfsm_fhtom(&info, vp));
2754 		tl = nfsm_build(&info, 6 * NFSX_UNSIGNED);
2755 		*tl++ = cookie.nfsuquad[0];
2756 		*tl++ = cookie.nfsuquad[1];
2757 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
2758 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
2759 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
2760 		*tl = txdr_unsigned(nmp->nm_rsize);
2761 		NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_READDIRPLUS,
2762 					uiop->uio_td,
2763 					nfs_vpcred(vp, ND_READ), &error));
2764 		ERROROUT(nfsm_postop_attr(&info, vp, &attrflag,
2765 					  NFS_LATTR_NOSHRINK));
2766 		NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2767 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
2768 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
2769 		more_dirs = fxdr_unsigned(int, *tl);
2770 
2771 		/* loop thru the dir entries, doctoring them to 4bsd form */
2772 		while (more_dirs && bigenough) {
2773 			NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2774 			fileno = fxdr_hyper(tl);
2775 			len = fxdr_unsigned(int, *(tl + 2));
2776 			if (len <= 0 || len > NFS_MAXNAMLEN) {
2777 				error = EBADRPC;
2778 				m_freem(info.mrep);
2779 				info.mrep = NULL;
2780 				goto nfsmout;
2781 			}
2782 			tlen = nfsm_rndup(len);
2783 			if (tlen == len)
2784 				tlen += 4;	/* To ensure null termination*/
2785 			left = DIRBLKSIZ - blksiz;
2786 			if ((tlen + sizeof(struct nfs_dirent)) > left) {
2787 				dp->nfs_reclen += left;
2788 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left;
2789 				uiop->uio_iov->iov_len -= left;
2790 				uiop->uio_offset += left;
2791 				uiop->uio_resid -= left;
2792 				blksiz = 0;
2793 			}
2794 			if ((tlen + sizeof(struct nfs_dirent)) > uiop->uio_resid)
2795 				bigenough = 0;
2796 			if (bigenough) {
2797 				dp = (struct nfs_dirent *)uiop->uio_iov->iov_base;
2798 				dp->nfs_ino = fileno;
2799 				dp->nfs_namlen = len;
2800 				dp->nfs_reclen = tlen + sizeof(struct nfs_dirent);
2801 				dp->nfs_type = DT_UNKNOWN;
2802 				blksiz += dp->nfs_reclen;
2803 				if (blksiz == DIRBLKSIZ)
2804 					blksiz = 0;
2805 				uiop->uio_offset += sizeof(struct nfs_dirent);
2806 				uiop->uio_resid -= sizeof(struct nfs_dirent);
2807 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + sizeof(struct nfs_dirent);
2808 				uiop->uio_iov->iov_len -= sizeof(struct nfs_dirent);
2809 				nlc.nlc_nameptr = uiop->uio_iov->iov_base;
2810 				nlc.nlc_namelen = len;
2811 				ERROROUT(nfsm_mtouio(&info, uiop, len));
2812 				cp = uiop->uio_iov->iov_base;
2813 				tlen -= len;
2814 				*cp = '\0';
2815 				uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen;
2816 				uiop->uio_iov->iov_len -= tlen;
2817 				uiop->uio_offset += tlen;
2818 				uiop->uio_resid -= tlen;
2819 			} else {
2820 				ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
2821 			}
2822 			NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2823 			if (bigenough) {
2824 				cookie.nfsuquad[0] = *tl++;
2825 				cookie.nfsuquad[1] = *tl++;
2826 			} else {
2827 				tl += 2;
2828 			}
2829 
2830 			/*
2831 			 * Since the attributes are before the file handle
2832 			 * (sigh), we must skip over the attributes and then
2833 			 * come back and get them.
2834 			 */
2835 			attrflag = fxdr_unsigned(int, *tl);
2836 			if (attrflag) {
2837 			    dpossav1 = info.dpos;
2838 			    mdsav1 = info.md;
2839 			    ERROROUT(nfsm_adv(&info, NFSX_V3FATTR));
2840 			    NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2841 			    doit = fxdr_unsigned(int, *tl);
2842 			    if (doit) {
2843 				NEGATIVEOUT(fhsize = nfsm_getfh(&info, &fhp));
2844 			    }
2845 			    if (doit && bigenough && !nlcdegenerate(&nlc) &&
2846 				!NFS_CMPFH(dnp, fhp, fhsize)
2847 			    ) {
2848 				if (dnch.ncp) {
2849 #if 0
2850 				    kprintf("NFS/READDIRPLUS, ENTER %*.*s\n",
2851 					nlc.nlc_namelen, nlc.nlc_namelen,
2852 					nlc.nlc_nameptr);
2853 #endif
2854 				    /*
2855 				     * This is a bit hokey but there isn't
2856 				     * much we can do about it.  We can't
2857 				     * hold the directory vp locked while
2858 				     * doing lookups and gets.
2859 				     */
2860 				    nch = cache_nlookup_nonblock(&dnch, &nlc);
2861 				    if (nch.ncp == NULL)
2862 					goto rdfail;
2863 				    cache_setunresolved(&nch);
2864 				    error = nfs_nget_nonblock(vp->v_mount, fhp,
2865 							      fhsize, &np,
2866 							      NULL);
2867 				    if (error) {
2868 					cache_put(&nch);
2869 					goto rdfail;
2870 				    }
2871 				    newvp = NFSTOV(np);
2872 				    dpossav2 = info.dpos;
2873 				    info.dpos = dpossav1;
2874 				    mdsav2 = info.md;
2875 				    info.md = mdsav1;
2876 				    ERROROUT(nfsm_loadattr(&info, newvp, NULL));
2877 				    info.dpos = dpossav2;
2878 				    info.md = mdsav2;
2879 				    dp->nfs_type =
2880 					    IFTODT(VTTOIF(np->n_vattr.va_type));
2881 				    nfs_cache_setvp(&nch, newvp,
2882 						    nfspos_cache_timeout);
2883 				    vput(newvp);
2884 				    newvp = NULLVP;
2885 				    cache_put(&nch);
2886 				} else {
2887 rdfail:
2888 				    ;
2889 #if 0
2890 				    kprintf("Warning: NFS/rddirplus, "
2891 					    "UNABLE TO ENTER %*.*s\n",
2892 					nlc.nlc_namelen, nlc.nlc_namelen,
2893 					nlc.nlc_nameptr);
2894 #endif
2895 				}
2896 			    }
2897 			} else {
2898 			    /* Just skip over the file handle */
2899 			    NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2900 			    i = fxdr_unsigned(int, *tl);
2901 			    ERROROUT(nfsm_adv(&info, nfsm_rndup(i)));
2902 			}
2903 			NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2904 			more_dirs = fxdr_unsigned(int, *tl);
2905 		}
2906 		/*
2907 		 * If at end of rpc data, get the eof boolean
2908 		 */
2909 		if (!more_dirs) {
2910 			NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2911 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2912 		}
2913 		m_freem(info.mrep);
2914 		info.mrep = NULL;
2915 	}
2916 	/*
2917 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2918 	 * by increasing d_reclen for the last record.
2919 	 */
2920 	if (blksiz > 0) {
2921 		left = DIRBLKSIZ - blksiz;
2922 		dp->nfs_reclen += left;
2923 		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left;
2924 		uiop->uio_iov->iov_len -= left;
2925 		uiop->uio_offset += left;
2926 		uiop->uio_resid -= left;
2927 	}
2928 
2929 	/*
2930 	 * We are now either at the end of the directory or have filled the
2931 	 * block.
2932 	 */
2933 	if (bigenough) {
2934 		dnp->n_direofoffset = uiop->uio_offset;
2935 	} else {
2936 		if (uiop->uio_resid > 0)
2937 			kprintf("EEK! readdirplusrpc resid > 0\n");
2938 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2939 		*cookiep = cookie;
2940 	}
2941 nfsmout:
2942 	if (newvp != NULLVP) {
2943 	        if (newvp == vp)
2944 			vrele(newvp);
2945 		else
2946 			vput(newvp);
2947 		newvp = NULLVP;
2948 	}
2949 	if (dnch.ncp)
2950 		cache_drop(&dnch);
2951 	return (error);
2952 }
2953 
2954 /*
2955  * Silly rename. To make the NFS filesystem that is stateless look a little
2956  * more like the "ufs" a remove of an active vnode is translated to a rename
2957  * to a funny looking filename that is removed by nfs_inactive on the
2958  * nfsnode. There is the potential for another process on a different client
2959  * to create the same funny name between the nfs_lookitup() fails and the
2960  * nfs_rename() completes, but...
2961  */
2962 static int
2963 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2964 {
2965 	struct sillyrename *sp;
2966 	struct nfsnode *np;
2967 	int error;
2968 
2969 	/*
2970 	 * Force finalization so the VOP_INACTIVE() call is not delayed.
2971 	 * This prevents cred structures from building up in nfsnodes
2972 	 * for deleted files.
2973 	 */
2974 	atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
2975 	np = VTONFS(vp);
2976 	np->n_flag |= NREMOVED;
2977 
2978 	/*
2979 	 * We previously purged dvp instead of vp.  I don't know why, it
2980 	 * completely destroys performance.  We can't do it anyway with the
2981 	 * new VFS API since we would be breaking the namecache topology.
2982 	 */
2983 	cache_purge(vp);	/* XXX */
2984 #ifndef DIAGNOSTIC
2985 	if (vp->v_type == VDIR)
2986 		panic("nfs: sillyrename dir");
2987 #endif
2988 	sp = kmalloc(sizeof(struct sillyrename), M_NFSREQ, M_WAITOK);
2989 	sp->s_cred = crdup(cnp->cn_cred);
2990 	sp->s_dvp = dvp;
2991 	vref(dvp);
2992 
2993 	/* Fudge together a funny name */
2994 	sp->s_namlen = ksprintf(sp->s_name, ".nfsA%08x4.4",
2995 				(int)(intptr_t)cnp->cn_td);
2996 
2997 	/* Try lookitups until we get one that isn't there */
2998 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2999 		cnp->cn_td, NULL) == 0) {
3000 		sp->s_name[4]++;
3001 		if (sp->s_name[4] > 'z') {
3002 			error = EINVAL;
3003 			goto bad;
3004 		}
3005 	}
3006 	error = nfs_renameit(dvp, cnp, sp);
3007 	if (error)
3008 		goto bad;
3009 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
3010 		cnp->cn_td, &np);
3011 	np->n_sillyrename = sp;
3012 	return (0);
3013 bad:
3014 	vrele(sp->s_dvp);
3015 	crfree(sp->s_cred);
3016 	kfree((caddr_t)sp, M_NFSREQ);
3017 
3018 	return (error);
3019 }
3020 
3021 /*
3022  * Look up a file name and optionally either update the file handle or
3023  * allocate an nfsnode, depending on the value of npp.
3024  * npp == NULL	--> just do the lookup
3025  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
3026  *			handled too
3027  * *npp != NULL --> update the file handle in the vnode
3028  */
3029 static int
3030 nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
3031 	     struct thread *td, struct nfsnode **npp)
3032 {
3033 	struct vnode *newvp = NULL;
3034 	struct nfsnode *np, *dnp = VTONFS(dvp);
3035 	int error = 0, fhlen, attrflag;
3036 	nfsfh_t *nfhp;
3037 	struct nfsm_info info;
3038 
3039 	info.mrep = NULL;
3040 	info.v3 = NFS_ISV3(dvp);
3041 
3042 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
3043 	nfsm_reqhead(&info, dvp, NFSPROC_LOOKUP,
3044 		     NFSX_FH(info.v3) + NFSX_UNSIGNED + nfsm_rndup(len));
3045 	ERROROUT(nfsm_fhtom(&info, dvp));
3046 	ERROROUT(nfsm_strtom(&info, name, len, NFS_MAXNAMLEN));
3047 	NEGKEEPOUT(nfsm_request(&info, dvp, NFSPROC_LOOKUP, td, cred, &error));
3048 	if (npp && !error) {
3049 		NEGATIVEOUT(fhlen = nfsm_getfh(&info, &nfhp));
3050 		if (*npp) {
3051 		    np = *npp;
3052 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
3053 			kfree((caddr_t)np->n_fhp, M_NFSBIGFH);
3054 			np->n_fhp = &np->n_fh;
3055 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
3056 			np->n_fhp =(nfsfh_t *)kmalloc(fhlen,M_NFSBIGFH,M_WAITOK);
3057 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
3058 		    np->n_fhsize = fhlen;
3059 		    newvp = NFSTOV(np);
3060 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
3061 		    vref(dvp);
3062 		    newvp = dvp;
3063 		} else {
3064 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, NULL);
3065 		    if (error) {
3066 			m_freem(info.mrep);
3067 			info.mrep = NULL;
3068 			return (error);
3069 		    }
3070 		    newvp = NFSTOV(np);
3071 		}
3072 		if (info.v3) {
3073 			ERROROUT(nfsm_postop_attr(&info, newvp, &attrflag,
3074 						  NFS_LATTR_NOSHRINK));
3075 			if (!attrflag && *npp == NULL) {
3076 				m_freem(info.mrep);
3077 				info.mrep = NULL;
3078 				if (newvp == dvp)
3079 					vrele(newvp);
3080 				else
3081 					vput(newvp);
3082 				return (ENOENT);
3083 			}
3084 		} else {
3085 			ERROROUT(nfsm_loadattr(&info, newvp, NULL));
3086 		}
3087 	}
3088 	m_freem(info.mrep);
3089 	info.mrep = NULL;
3090 nfsmout:
3091 	if (npp && *npp == NULL) {
3092 		if (error) {
3093 			if (newvp) {
3094 				if (newvp == dvp)
3095 					vrele(newvp);
3096 				else
3097 					vput(newvp);
3098 			}
3099 		} else
3100 			*npp = np;
3101 	}
3102 	return (error);
3103 }
3104 
3105 /*
3106  * Nfs Version 3 commit rpc
3107  *
3108  * We call it 'uio' to distinguish it from 'bio' but there is no real uio
3109  * involved.
3110  */
3111 int
3112 nfs_commitrpc_uio(struct vnode *vp, u_quad_t offset, int cnt, struct thread *td)
3113 {
3114 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3115 	int error = 0, wccflag = NFSV3_WCCRATTR;
3116 	struct nfsm_info info;
3117 	u_int32_t *tl;
3118 
3119 	info.mrep = NULL;
3120 	info.v3 = 1;
3121 
3122 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
3123 		return (0);
3124 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
3125 	nfsm_reqhead(&info, vp, NFSPROC_COMMIT, NFSX_FH(1));
3126 	ERROROUT(nfsm_fhtom(&info, vp));
3127 	tl = nfsm_build(&info, 3 * NFSX_UNSIGNED);
3128 	txdr_hyper(offset, tl);
3129 	tl += 2;
3130 	*tl = txdr_unsigned(cnt);
3131 	NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_COMMIT, td,
3132 				nfs_vpcred(vp, ND_WRITE), &error));
3133 	ERROROUT(nfsm_wcc_data(&info, vp, &wccflag));
3134 	if (!error) {
3135 		NULLOUT(tl = nfsm_dissect(&info, NFSX_V3WRITEVERF));
3136 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
3137 			NFSX_V3WRITEVERF)) {
3138 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
3139 				NFSX_V3WRITEVERF);
3140 			error = NFSERR_STALEWRITEVERF;
3141 		}
3142 	}
3143 	m_freem(info.mrep);
3144 	info.mrep = NULL;
3145 nfsmout:
3146 	return (error);
3147 }
3148 
3149 /*
3150  * Kludge City..
3151  * - make nfs_bmap() essentially a no-op that does no translation
3152  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
3153  *   (Maybe I could use the process's page mapping, but I was concerned that
3154  *    Kernel Write might not be enabled and also figured copyout() would do
3155  *    a lot more work than bcopy() and also it currently happens in the
3156  *    context of the swapper process (2).
3157  *
3158  * nfs_bmap(struct vnode *a_vp, off_t a_loffset,
3159  *	    off_t *a_doffsetp, int *a_runp, int *a_runb)
3160  */
3161 static int
3162 nfs_bmap(struct vop_bmap_args *ap)
3163 {
3164 	/* no token lock required */
3165 	if (ap->a_doffsetp != NULL)
3166 		*ap->a_doffsetp = ap->a_loffset;
3167 	if (ap->a_runp != NULL)
3168 		*ap->a_runp = 0;
3169 	if (ap->a_runb != NULL)
3170 		*ap->a_runb = 0;
3171 	return (0);
3172 }
3173 
3174 /*
3175  * Strategy routine.
3176  */
3177 static int
3178 nfs_strategy(struct vop_strategy_args *ap)
3179 {
3180 	struct bio *bio = ap->a_bio;
3181 	struct bio *nbio;
3182 	struct buf *bp __debugvar = bio->bio_buf;
3183 	struct nfsmount *nmp = VFSTONFS(ap->a_vp->v_mount);
3184 	struct thread *td;
3185 	int error;
3186 
3187 	KASSERT(bp->b_cmd != BUF_CMD_DONE,
3188 		("nfs_strategy: buffer %p unexpectedly marked done", bp));
3189 	KASSERT(BUF_LOCKINUSE(bp),
3190 		("nfs_strategy: buffer %p not locked", bp));
3191 
3192 	if (bio->bio_flags & BIO_SYNC)
3193 		td = curthread;	/* XXX */
3194 	else
3195 		td = NULL;
3196 
3197 	lwkt_gettoken(&nmp->nm_token);
3198 
3199         /*
3200 	 * We probably don't need to push an nbio any more since no
3201 	 * block conversion is required due to the use of 64 bit byte
3202 	 * offsets, but do it anyway.
3203 	 *
3204 	 * NOTE: When NFS callers itself via this strategy routines and
3205 	 *	 sets up a synchronous I/O, it expects the I/O to run
3206 	 *	 synchronously (its bio_done routine just assumes it),
3207 	 *	 so for now we have to honor the bit.
3208          */
3209 	nbio = push_bio(bio);
3210 	nbio->bio_offset = bio->bio_offset;
3211 	nbio->bio_flags = bio->bio_flags & BIO_SYNC;
3212 
3213 	/*
3214 	 * If the op is asynchronous and an i/o daemon is waiting
3215 	 * queue the request, wake it up and wait for completion
3216 	 * otherwise just do it ourselves.
3217 	 */
3218 	if (bio->bio_flags & BIO_SYNC) {
3219 		error = nfs_doio(ap->a_vp, nbio, td);
3220 	} else {
3221 		nfs_asyncio(ap->a_vp, nbio);
3222 		error = 0;
3223 	}
3224 	lwkt_reltoken(&nmp->nm_token);
3225 
3226 	return (error);
3227 }
3228 
3229 /*
3230  * fsync vnode op. Just call nfs_flush() with commit == 1.
3231  *
3232  * nfs_fsync(struct vnode *a_vp, int a_waitfor)
3233  */
3234 /* ARGSUSED */
3235 static int
3236 nfs_fsync(struct vop_fsync_args *ap)
3237 {
3238 	struct nfsmount *nmp = VFSTONFS(ap->a_vp->v_mount);
3239 	int error;
3240 
3241 	lwkt_gettoken(&nmp->nm_token);
3242 
3243 	/*
3244 	 * NOTE: Because attributes are set synchronously we currently
3245 	 *	 do not have to implement vsetisdirty()/vclrisdirty().
3246 	 */
3247 	error = nfs_flush(ap->a_vp, ap->a_waitfor, curthread, 1);
3248 
3249 	lwkt_reltoken(&nmp->nm_token);
3250 
3251 	return error;
3252 }
3253 
3254 /*
3255  * Flush all the blocks associated with a vnode.   Dirty NFS buffers may be
3256  * in one of two states:  If B_NEEDCOMMIT is clear then the buffer contains
3257  * new NFS data which needs to be written to the server.  If B_NEEDCOMMIT is
3258  * set the buffer contains data that has already been written to the server
3259  * and which now needs a commit RPC.
3260  *
3261  * If commit is 0 we only take one pass and only flush buffers containing new
3262  * dirty data.
3263  *
3264  * If commit is 1 we take two passes, issuing a commit RPC in the second
3265  * pass.
3266  *
3267  * If waitfor is MNT_WAIT and commit is 1, we loop as many times as required
3268  * to completely flush all pending data.
3269  *
3270  * Note that the RB_SCAN code properly handles the case where the
3271  * callback might block and directly or indirectly (another thread) cause
3272  * the RB tree to change.
3273  */
3274 
3275 #ifndef NFS_COMMITBVECSIZ
3276 #define NFS_COMMITBVECSIZ	16
3277 #endif
3278 
3279 struct nfs_flush_info {
3280 	enum { NFI_FLUSHNEW, NFI_COMMIT } mode;
3281 	struct thread *td;
3282 	struct vnode *vp;
3283 	int waitfor;
3284 	int slpflag;
3285 	int slptimeo;
3286 	int loops;
3287 	struct buf *bvary[NFS_COMMITBVECSIZ];
3288 	int bvsize;
3289 	off_t beg_off;
3290 	off_t end_off;
3291 };
3292 
3293 static int nfs_flush_bp(struct buf *bp, void *data);
3294 static int nfs_flush_docommit(struct nfs_flush_info *info, int error);
3295 
3296 int
3297 nfs_flush(struct vnode *vp, int waitfor, struct thread *td, int commit)
3298 {
3299 	struct nfsnode *np = VTONFS(vp);
3300 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3301 	struct nfs_flush_info info;
3302 	int error;
3303 
3304 	bzero(&info, sizeof(info));
3305 	info.td = td;
3306 	info.vp = vp;
3307 	info.waitfor = waitfor;
3308 	info.slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
3309 	info.loops = 0;
3310 	lwkt_gettoken(&vp->v_token);
3311 
3312 	do {
3313 		/*
3314 		 * Flush mode
3315 		 */
3316 		info.mode = NFI_FLUSHNEW;
3317 		error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
3318 				nfs_flush_bp, &info);
3319 
3320 		/*
3321 		 * Take a second pass if committing and no error occured.
3322 		 * Clean up any left over collection (whether an error
3323 		 * occurs or not).
3324 		 */
3325 		if (commit && error == 0) {
3326 			info.mode = NFI_COMMIT;
3327 			error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
3328 					nfs_flush_bp, &info);
3329 			if (info.bvsize)
3330 				error = nfs_flush_docommit(&info, error);
3331 		}
3332 
3333 		/*
3334 		 * Wait for pending I/O to complete before checking whether
3335 		 * any further dirty buffers exist.
3336 		 */
3337 		while (waitfor == MNT_WAIT &&
3338 		       bio_track_active(&vp->v_track_write)) {
3339 			error = bio_track_wait(&vp->v_track_write,
3340 					       info.slpflag, info.slptimeo);
3341 			if (error) {
3342 				/*
3343 				 * We have to be able to break out if this
3344 				 * is an 'intr' mount.
3345 				 */
3346 				if (nfs_sigintr(nmp, NULL, td)) {
3347 					error = -EINTR;
3348 					break;
3349 				}
3350 
3351 				/*
3352 				 * Since we do not process pending signals,
3353 				 * once we get a PCATCH our tsleep() will no
3354 				 * longer sleep, switch to a fixed timeout
3355 				 * instead.
3356 				 */
3357 				if (info.slpflag == PCATCH) {
3358 					info.slpflag = 0;
3359 					info.slptimeo = 2 * hz;
3360 				}
3361 				error = 0;
3362 			}
3363 		}
3364 		++info.loops;
3365 		/*
3366 		 * Loop if we are flushing synchronous as well as committing,
3367 		 * and dirty buffers are still present.  Otherwise we might livelock.
3368 		 */
3369 	} while (waitfor == MNT_WAIT && commit &&
3370 		 error == 0 && !RB_EMPTY(&vp->v_rbdirty_tree));
3371 
3372 	/*
3373 	 * The callbacks have to return a negative error to terminate the
3374 	 * RB scan.
3375 	 */
3376 	if (error < 0)
3377 		error = -error;
3378 
3379 	/*
3380 	 * Deal with any error collection
3381 	 */
3382 	if (np->n_flag & NWRITEERR) {
3383 		error = np->n_error;
3384 		np->n_flag &= ~NWRITEERR;
3385 	}
3386 	lwkt_reltoken(&vp->v_token);
3387 	return (error);
3388 }
3389 
3390 static
3391 int
3392 nfs_flush_bp(struct buf *bp, void *data)
3393 {
3394 	struct nfs_flush_info *info = data;
3395 	int lkflags;
3396 	int error;
3397 	off_t toff;
3398 
3399 	error = 0;
3400 	switch(info->mode) {
3401 	case NFI_FLUSHNEW:
3402 		error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT);
3403 		if (error && info->loops && info->waitfor == MNT_WAIT) {
3404 			error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT);
3405 			if (error) {
3406 				lkflags = LK_EXCLUSIVE | LK_SLEEPFAIL;
3407 				if (info->slpflag & PCATCH)
3408 					lkflags |= LK_PCATCH;
3409 				error = BUF_TIMELOCK(bp, lkflags, "nfsfsync",
3410 						     info->slptimeo);
3411 			}
3412 		}
3413 
3414 		/*
3415 		 * Ignore locking errors
3416 		 */
3417 		if (error) {
3418 			error = 0;
3419 			break;
3420 		}
3421 
3422 		/*
3423 		 * The buffer may have changed out from under us, even if
3424 		 * we did not block (MPSAFE).  Check again now that it is
3425 		 * locked.
3426 		 */
3427 		if (bp->b_vp == info->vp &&
3428 		    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == B_DELWRI) {
3429 			bremfree(bp);
3430 			bawrite(bp);
3431 		} else {
3432 			BUF_UNLOCK(bp);
3433 		}
3434 		break;
3435 	case NFI_COMMIT:
3436 		/*
3437 		 * Only process buffers in need of a commit which we can
3438 		 * immediately lock.  This may prevent a buffer from being
3439 		 * committed, but the normal flush loop will block on the
3440 		 * same buffer so we shouldn't get into an endless loop.
3441 		 */
3442 		if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
3443 		    (B_DELWRI | B_NEEDCOMMIT)) {
3444 			break;
3445 		}
3446 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
3447 			break;
3448 
3449 		/*
3450 		 * We must recheck after successfully locking the buffer.
3451 		 */
3452 		if (bp->b_vp != info->vp ||
3453 		    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
3454 		    (B_DELWRI | B_NEEDCOMMIT)) {
3455 			BUF_UNLOCK(bp);
3456 			break;
3457 		}
3458 
3459 		/*
3460 		 * NOTE: storing the bp in the bvary[] basically sets
3461 		 * it up for a commit operation.
3462 		 *
3463 		 * We must call vfs_busy_pages() now so the commit operation
3464 		 * is interlocked with user modifications to memory mapped
3465 		 * pages.  The b_dirtyoff/b_dirtyend range is not correct
3466 		 * until after the pages have been busied.
3467 		 *
3468 		 * Note: to avoid loopback deadlocks, we do not
3469 		 * assign b_runningbufspace.
3470 		 */
3471 		bremfree(bp);
3472 		bp->b_cmd = BUF_CMD_WRITE;
3473 		vfs_busy_pages(bp->b_vp, bp);
3474 		info->bvary[info->bvsize] = bp;
3475 		toff = bp->b_bio2.bio_offset + bp->b_dirtyoff;
3476 		if (info->bvsize == 0 || toff < info->beg_off)
3477 			info->beg_off = toff;
3478 		toff += (off_t)(bp->b_dirtyend - bp->b_dirtyoff);
3479 		if (info->bvsize == 0 || toff > info->end_off)
3480 			info->end_off = toff;
3481 		++info->bvsize;
3482 		if (info->bvsize == NFS_COMMITBVECSIZ) {
3483 			error = nfs_flush_docommit(info, 0);
3484 			KKASSERT(info->bvsize == 0);
3485 		}
3486 	}
3487 	return (error);
3488 }
3489 
3490 static
3491 int
3492 nfs_flush_docommit(struct nfs_flush_info *info, int error)
3493 {
3494 	struct vnode *vp;
3495 	struct buf *bp;
3496 	off_t bytes;
3497 	int retv;
3498 	int i;
3499 
3500 	vp = info->vp;
3501 
3502 	if (info->bvsize > 0) {
3503 		/*
3504 		 * Commit data on the server, as required.  Note that
3505 		 * nfs_commit will use the vnode's cred for the commit.
3506 		 * The NFSv3 commit RPC is limited to a 32 bit byte count.
3507 		 */
3508 		bytes = info->end_off - info->beg_off;
3509 		if (bytes > 0x40000000)
3510 			bytes = 0x40000000;
3511 		if (error) {
3512 			retv = -error;
3513 		} else {
3514 			retv = nfs_commitrpc_uio(vp, info->beg_off,
3515 						 (int)bytes, info->td);
3516 			if (retv == NFSERR_STALEWRITEVERF)
3517 				nfs_clearcommit(vp->v_mount);
3518 		}
3519 
3520 		/*
3521 		 * Now, either mark the blocks I/O done or mark the
3522 		 * blocks dirty, depending on whether the commit
3523 		 * succeeded.
3524 		 */
3525 		for (i = 0; i < info->bvsize; ++i) {
3526 			bp = info->bvary[i];
3527 			if (retv || (bp->b_flags & B_NEEDCOMMIT) == 0) {
3528 				/*
3529 				 * Either an error or the original
3530 				 * vfs_busy_pages() cleared B_NEEDCOMMIT
3531 				 * due to finding new dirty VM pages in
3532 				 * the buffer.
3533 				 *
3534 				 * Leave B_DELWRI intact.
3535 				 */
3536 				bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
3537 				vfs_unbusy_pages(bp);
3538 				bp->b_cmd = BUF_CMD_DONE;
3539 				bqrelse(bp);
3540 			} else {
3541 				/*
3542 				 * Success, remove B_DELWRI ( bundirty() ).
3543 				 *
3544 				 * b_dirtyoff/b_dirtyend seem to be NFS
3545 				 * specific.  We should probably move that
3546 				 * into bundirty(). XXX
3547 				 *
3548 				 * We are faking an I/O write, we have to
3549 				 * start the transaction in order to
3550 				 * immediately biodone() it.
3551 				 */
3552 				bundirty(bp);
3553 				bp->b_flags &= ~B_ERROR;
3554 				bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
3555 				bp->b_dirtyoff = bp->b_dirtyend = 0;
3556 				biodone(&bp->b_bio1);
3557 			}
3558 		}
3559 		info->bvsize = 0;
3560 	}
3561 	return (error);
3562 }
3563 
3564 /*
3565  * NFS advisory byte-level locks.
3566  * Currently unsupported.
3567  *
3568  * nfs_advlock(struct vnode *a_vp, caddr_t a_id, int a_op, struct flock *a_fl,
3569  *		int a_flags)
3570  */
3571 static int
3572 nfs_advlock(struct vop_advlock_args *ap)
3573 {
3574 	struct nfsnode *np = VTONFS(ap->a_vp);
3575 
3576 	/* no token lock currently required */
3577 	/*
3578 	 * The following kludge is to allow diskless support to work
3579 	 * until a real NFS lockd is implemented. Basically, just pretend
3580 	 * that this is a local lock.
3581 	 */
3582 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
3583 }
3584 
3585 /*
3586  * Print out the contents of an nfsnode.
3587  *
3588  * nfs_print(struct vnode *a_vp)
3589  */
3590 static int
3591 nfs_print(struct vop_print_args *ap)
3592 {
3593 	struct vnode *vp = ap->a_vp;
3594 	struct nfsnode *np = VTONFS(vp);
3595 
3596 	kprintf("tag VT_NFS, fileid %lld fsid 0x%x",
3597 		(long long)np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3598 	if (vp->v_type == VFIFO)
3599 		fifo_printinfo(vp);
3600 	kprintf("\n");
3601 	return (0);
3602 }
3603 
3604 /*
3605  * nfs special file access vnode op.
3606  *
3607  * nfs_laccess(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
3608  */
3609 static int
3610 nfs_laccess(struct vop_access_args *ap)
3611 {
3612 	struct nfsmount *nmp = VFSTONFS(ap->a_vp->v_mount);
3613 	struct vattr vattr;
3614 	int error;
3615 
3616 	lwkt_gettoken(&nmp->nm_token);
3617 	error = VOP_GETATTR(ap->a_vp, &vattr);
3618 	if (error == 0) {
3619 		error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid,
3620 					  vattr.va_mode, 0);
3621 	}
3622 	lwkt_reltoken(&nmp->nm_token);
3623 
3624 	return (error);
3625 }
3626 
3627 /*
3628  * Read wrapper for fifos.
3629  *
3630  * nfsfifo_read(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
3631  *		struct ucred *a_cred)
3632  */
3633 static int
3634 nfsfifo_read(struct vop_read_args *ap)
3635 {
3636 	struct nfsnode *np = VTONFS(ap->a_vp);
3637 
3638 	/* no token access required */
3639 	/*
3640 	 * Set access flag.
3641 	 */
3642 	np->n_flag |= NACC;
3643 	getnanotime(&np->n_atim);
3644 	return (VOCALL(&fifo_vnode_vops, &ap->a_head));
3645 }
3646 
3647 /*
3648  * Write wrapper for fifos.
3649  *
3650  * nfsfifo_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
3651  *		 struct ucred *a_cred)
3652  */
3653 static int
3654 nfsfifo_write(struct vop_write_args *ap)
3655 {
3656 	struct nfsnode *np = VTONFS(ap->a_vp);
3657 
3658 	/* no token access required */
3659 	/*
3660 	 * Set update flag.
3661 	 */
3662 	np->n_flag |= NUPD;
3663 	getnanotime(&np->n_mtim);
3664 	return (VOCALL(&fifo_vnode_vops, &ap->a_head));
3665 }
3666 
3667 /*
3668  * Close wrapper for fifos.
3669  *
3670  * Update the times on the nfsnode then do fifo close.
3671  *
3672  * nfsfifo_close(struct vnode *a_vp, int a_fflag)
3673  */
3674 static int
3675 nfsfifo_close(struct vop_close_args *ap)
3676 {
3677 	struct vnode *vp = ap->a_vp;
3678 	struct nfsnode *np = VTONFS(vp);
3679 	struct vattr vattr;
3680 	struct timespec ts;
3681 
3682 	/* no token access required */
3683 
3684 	vn_lock(vp, LK_UPGRADE | LK_RETRY); /* XXX */
3685 	if (np->n_flag & (NACC | NUPD)) {
3686 		getnanotime(&ts);
3687 		if (np->n_flag & NACC)
3688 			np->n_atim = ts;
3689 		if (np->n_flag & NUPD)
3690 			np->n_mtim = ts;
3691 		np->n_flag |= NCHG;
3692 		if (VREFCNT(vp) == 1 &&
3693 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3694 			VATTR_NULL(&vattr);
3695 			if (np->n_flag & NACC)
3696 				vattr.va_atime = np->n_atim;
3697 			if (np->n_flag & NUPD)
3698 				vattr.va_mtime = np->n_mtim;
3699 			(void)VOP_SETATTR(vp, &vattr, nfs_vpcred(vp, ND_WRITE));
3700 		}
3701 	}
3702 	return (VOCALL(&fifo_vnode_vops, &ap->a_head));
3703 }
3704 
3705 /************************************************************************
3706  *                          KQFILTER OPS                                *
3707  ************************************************************************/
3708 
3709 static void filt_nfsdetach(struct knote *kn);
3710 static int filt_nfsread(struct knote *kn, long hint);
3711 static int filt_nfswrite(struct knote *kn, long hint);
3712 static int filt_nfsvnode(struct knote *kn, long hint);
3713 
3714 static struct filterops nfsread_filtops =
3715 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
3716 	  NULL, filt_nfsdetach, filt_nfsread };
3717 static struct filterops nfswrite_filtops =
3718 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
3719 	  NULL, filt_nfsdetach, filt_nfswrite };
3720 static struct filterops nfsvnode_filtops =
3721 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
3722 	  NULL, filt_nfsdetach, filt_nfsvnode };
3723 
3724 static int
3725 nfs_kqfilter (struct vop_kqfilter_args *ap)
3726 {
3727 	struct vnode *vp = ap->a_vp;
3728 	struct knote *kn = ap->a_kn;
3729 
3730 	switch (kn->kn_filter) {
3731 	case EVFILT_READ:
3732 		kn->kn_fop = &nfsread_filtops;
3733 		break;
3734 	case EVFILT_WRITE:
3735 		kn->kn_fop = &nfswrite_filtops;
3736 		break;
3737 	case EVFILT_VNODE:
3738 		kn->kn_fop = &nfsvnode_filtops;
3739 		break;
3740 	default:
3741 		return (EOPNOTSUPP);
3742 	}
3743 
3744 	kn->kn_hook = (caddr_t)vp;
3745 
3746 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
3747 
3748 	return(0);
3749 }
3750 
3751 static void
3752 filt_nfsdetach(struct knote *kn)
3753 {
3754 	struct vnode *vp = (void *)kn->kn_hook;
3755 
3756 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
3757 }
3758 
3759 static int
3760 filt_nfsread(struct knote *kn, long hint)
3761 {
3762 	struct vnode *vp = (void *)kn->kn_hook;
3763 	struct nfsnode *node = VTONFS(vp);
3764 	off_t off;
3765 
3766 	if (hint == NOTE_REVOKE) {
3767 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
3768 		return(1);
3769 	}
3770 
3771 	/*
3772 	 * Interlock against MP races when performing this function. XXX
3773 	 */
3774 	/* TMPFS_NODE_LOCK_SH(node); */
3775 	off = node->n_size - kn->kn_fp->f_offset;
3776 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
3777 	if (kn->kn_sfflags & NOTE_OLDAPI) {
3778 		/* TMPFS_NODE_UNLOCK(node); */
3779 		return(1);
3780 	}
3781 	if (kn->kn_data == 0) {
3782 		kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
3783 	}
3784 	/* TMPFS_NODE_UNLOCK(node); */
3785 	return (kn->kn_data != 0);
3786 }
3787 
3788 static int
3789 filt_nfswrite(struct knote *kn, long hint)
3790 {
3791 	if (hint == NOTE_REVOKE)
3792 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
3793 	kn->kn_data = 0;
3794 	return (1);
3795 }
3796 
3797 static int
3798 filt_nfsvnode(struct knote *kn, long hint)
3799 {
3800 	if (kn->kn_sfflags & hint)
3801 		kn->kn_fflags |= hint;
3802 	if (hint == NOTE_REVOKE) {
3803 		kn->kn_flags |= (EV_EOF | EV_NODATA);
3804 		return (1);
3805 	}
3806 	return (kn->kn_fflags != 0);
3807 }
3808